diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 8 | ||||
-rw-r--r-- | net/core/devlink.c | 259 | ||||
-rw-r--r-- | net/core/drop_monitor.c | 3 | ||||
-rw-r--r-- | net/core/filter.c | 640 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 48 | ||||
-rw-r--r-- | net/core/flow_offload.c | 7 | ||||
-rw-r--r-- | net/core/gro_cells.c | 3 | ||||
-rw-r--r-- | net/core/neighbour.c | 3 | ||||
-rw-r--r-- | net/core/netclassid_cgroup.c | 2 | ||||
-rw-r--r-- | net/core/netpoll.c | 4 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 23 | ||||
-rw-r--r-- | net/core/skbuff.c | 43 | ||||
-rw-r--r-- | net/core/sock.c | 134 | ||||
-rw-r--r-- | net/core/sock_map.c | 12 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 1 | ||||
-rw-r--r-- | net/core/xdp.c | 10 |
16 files changed, 605 insertions, 595 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 56c8b0921c9f..d66c73c1c734 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1100,7 +1100,7 @@ static int dev_alloc_name_ns(struct net *net, BUG_ON(!net); ret = __dev_alloc_name(net, name, buf); if (ret >= 0) - strlcpy(dev->name, buf, IFNAMSIZ); + strscpy(dev->name, buf, IFNAMSIZ); return ret; } @@ -1137,7 +1137,7 @@ static int dev_get_valid_name(struct net *net, struct net_device *dev, else if (netdev_name_in_use(net, name)) return -EEXIST; else if (dev->name != name) - strlcpy(dev->name, name, IFNAMSIZ); + strscpy(dev->name, name, IFNAMSIZ); return 0; } @@ -10370,9 +10370,7 @@ void netdev_run_todo(void) BUG_ON(!list_empty(&dev->ptype_specific)); WARN_ON(rcu_access_pointer(dev->ip_ptr)); WARN_ON(rcu_access_pointer(dev->ip6_ptr)); -#if IS_ENABLED(CONFIG_DECNET) - WARN_ON(dev->dn_ptr); -#endif + if (dev->priv_destructor) dev->priv_destructor(dev); if (dev->needs_free_netdev) diff --git a/net/core/devlink.c b/net/core/devlink.c index b50bcc18b8d9..7776dc82f88d 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1710,7 +1710,7 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct devlink *devlink = info->user_ptr[0]; u32 count; - if (!info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]) + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_SPLIT_COUNT)) return -EINVAL; if (!devlink->ops->port_split) return -EOPNOTSUPP; @@ -1838,7 +1838,7 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, if (!devlink->ops->port_del) return -EOPNOTSUPP; - if (!info->attrs[DEVLINK_ATTR_PORT_INDEX]) { + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_INDEX)) { NL_SET_ERR_MSG_MOD(extack, "Port index is not specified"); return -EINVAL; } @@ -2690,7 +2690,7 @@ static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, if (err) return err; - if (!info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]) + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_POOL_SIZE)) return -EINVAL; size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]); @@ -2900,7 +2900,7 @@ static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb, if (err) return err; - if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD]) + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD)) return -EINVAL; threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]); @@ -3156,7 +3156,7 @@ static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb, if (err) return err; - if (!info->attrs[DEVLINK_ATTR_SB_THRESHOLD]) + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD)) return -EINVAL; threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]); @@ -3845,7 +3845,7 @@ static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, struct devlink_dpipe_table *table; const char *table_name; - if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]) + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME)) return -EINVAL; table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); @@ -4029,8 +4029,9 @@ static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb, const char *table_name; bool counters_enable; - if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME] || - !info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]) + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME) || + GENL_REQ_ATTR_CHECK(info, + DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED)) return -EINVAL; table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); @@ -4119,8 +4120,8 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb, u64 size; int err; - if (!info->attrs[DEVLINK_ATTR_RESOURCE_ID] || - !info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]) + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_ID) || + GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_SIZE)) return -EINVAL; resource_id = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_ID]); @@ -4742,10 +4743,76 @@ void devlink_flash_update_timeout_notify(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify); +struct devlink_info_req { + struct sk_buff *msg; + void (*version_cb)(const char *version_name, + enum devlink_info_version_type version_type, + void *version_cb_priv); + void *version_cb_priv; +}; + +struct devlink_flash_component_lookup_ctx { + const char *lookup_name; + bool lookup_name_found; +}; + +static void +devlink_flash_component_lookup_cb(const char *version_name, + enum devlink_info_version_type version_type, + void *version_cb_priv) +{ + struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv; + + if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT || + lookup_ctx->lookup_name_found) + return; + + lookup_ctx->lookup_name_found = + !strcmp(lookup_ctx->lookup_name, version_name); +} + +static int devlink_flash_component_get(struct devlink *devlink, + struct nlattr *nla_component, + const char **p_component, + struct netlink_ext_ack *extack) +{ + struct devlink_flash_component_lookup_ctx lookup_ctx = {}; + struct devlink_info_req req = {}; + const char *component; + int ret; + + if (!nla_component) + return 0; + + component = nla_data(nla_component); + + if (!devlink->ops->info_get) { + NL_SET_ERR_MSG_ATTR(extack, nla_component, + "component update is not supported by this device"); + return -EOPNOTSUPP; + } + + lookup_ctx.lookup_name = component; + req.version_cb = devlink_flash_component_lookup_cb; + req.version_cb_priv = &lookup_ctx; + + ret = devlink->ops->info_get(devlink, &req, NULL); + if (ret) + return ret; + + if (!lookup_ctx.lookup_name_found) { + NL_SET_ERR_MSG_ATTR(extack, nla_component, + "selected component is not supported by this device"); + return -EINVAL; + } + *p_component = component; + return 0; +} + static int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *nla_component, *nla_overwrite_mask, *nla_file_name; + struct nlattr *nla_overwrite_mask, *nla_file_name; struct devlink_flash_update_params params = {}; struct devlink *devlink = info->user_ptr[0]; const char *file_name; @@ -4755,20 +4822,16 @@ static int devlink_nl_cmd_flash_update(struct sk_buff *skb, if (!devlink->ops->flash_update) return -EOPNOTSUPP; - if (!info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]) + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME)) return -EINVAL; - supported_params = devlink->ops->supported_flash_update_params; + ret = devlink_flash_component_get(devlink, + info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT], + ¶ms.component, info->extack); + if (ret) + return ret; - nla_component = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT]; - if (nla_component) { - if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT)) { - NL_SET_ERR_MSG_ATTR(info->extack, nla_component, - "component update is not supported by this device"); - return -EOPNOTSUPP; - } - params.component = nla_data(nla_component); - } + supported_params = devlink->ops->supported_flash_update_params; nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK]; if (nla_overwrite_mask) { @@ -4936,10 +4999,8 @@ static int devlink_nl_cmd_selftests_run(struct sk_buff *skb, if (!devlink->ops->selftest_run || !devlink->ops->selftest_check) return -EOPNOTSUPP; - if (!info->attrs[DEVLINK_ATTR_SELFTESTS]) { - NL_SET_ERR_MSG_MOD(info->extack, "selftest required"); + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SELFTESTS)) return -EINVAL; - } attrs = info->attrs[DEVLINK_ATTR_SELFTESTS]; @@ -5393,7 +5454,7 @@ static int devlink_param_type_get_from_info(struct genl_info *info, enum devlink_param_type *param_type) { - if (!info->attrs[DEVLINK_ATTR_PARAM_TYPE]) + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_TYPE)) return -EINVAL; switch (nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE])) { @@ -5470,7 +5531,7 @@ devlink_param_get_from_info(struct list_head *param_list, { char *param_name; - if (!info->attrs[DEVLINK_ATTR_PARAM_NAME]) + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_NAME)) return NULL; param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]); @@ -5536,7 +5597,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, return err; } - if (!info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE]) + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_VALUE_CMODE)) return -EINVAL; cmode = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE]); if (!devlink_param_cmode_is_supported(param, cmode)) @@ -5574,89 +5635,22 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { - struct devlink_param_item *param_item; - struct devlink_port *devlink_port; - struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; - int idx = 0; - int err = 0; - - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - devl_lock(devlink); - list_for_each_entry(devlink_port, &devlink->port_list, list) { - list_for_each_entry(param_item, - &devlink_port->param_list, list) { - if (idx < start) { - idx++; - continue; - } - err = devlink_nl_param_fill(msg, - devlink_port->devlink, - devlink_port->index, param_item, - DEVLINK_CMD_PORT_PARAM_GET, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); - if (err == -EOPNOTSUPP) { - err = 0; - } else if (err) { - devl_unlock(devlink); - devlink_put(devlink); - goto out; - } - idx++; - } - } - devl_unlock(devlink); - devlink_put(devlink); - } -out: - if (err != -EMSGSIZE) - return err; - - cb->args[0] = idx; + NL_SET_ERR_MSG_MOD(cb->extack, "Port params are not supported"); return msg->len; } static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[1]; - struct devlink_param_item *param_item; - struct sk_buff *msg; - int err; - - param_item = devlink_param_get_from_info(&devlink_port->param_list, - info); - if (!param_item) - return -EINVAL; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - - err = devlink_nl_param_fill(msg, devlink_port->devlink, - devlink_port->index, param_item, - DEVLINK_CMD_PORT_PARAM_GET, - info->snd_portid, info->snd_seq, 0); - if (err) { - nlmsg_free(msg); - return err; - } - - return genlmsg_reply(msg, info); + NL_SET_ERR_MSG_MOD(info->extack, "Port params are not supported"); + return -EINVAL; } static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[1]; - - return __devlink_nl_cmd_param_set_doit(devlink_port->devlink, - devlink_port->index, - &devlink_port->param_list, info, - DEVLINK_CMD_PORT_PARAM_NEW); + NL_SET_ERR_MSG_MOD(info->extack, "Port params are not supported"); + return -EINVAL; } static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg, @@ -6056,7 +6050,7 @@ static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb, unsigned int index; int err; - if (!info->attrs[DEVLINK_ATTR_REGION_NAME]) + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) return -EINVAL; if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) { @@ -6189,8 +6183,8 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb, unsigned int index; u32 snapshot_id; - if (!info->attrs[DEVLINK_ATTR_REGION_NAME] || - !info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME) || + GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_SNAPSHOT_ID)) return -EINVAL; region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]); @@ -6238,7 +6232,7 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) u8 *data; int err; - if (!info->attrs[DEVLINK_ATTR_REGION_NAME]) { + if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) { NL_SET_ERR_MSG_MOD(info->extack, "No region name provided"); return -EINVAL; } @@ -6553,18 +6547,18 @@ out_unlock: return err; } -struct devlink_info_req { - struct sk_buff *msg; -}; - int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name) { + if (!req->msg) + return 0; return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name); } EXPORT_SYMBOL_GPL(devlink_info_driver_name_put); int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) { + if (!req->msg) + return 0; return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn); } EXPORT_SYMBOL_GPL(devlink_info_serial_number_put); @@ -6572,6 +6566,8 @@ EXPORT_SYMBOL_GPL(devlink_info_serial_number_put); int devlink_info_board_serial_number_put(struct devlink_info_req *req, const char *bsn) { + if (!req->msg) + return 0; return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, bsn); } @@ -6579,11 +6575,19 @@ EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put); static int devlink_info_version_put(struct devlink_info_req *req, int attr, const char *version_name, - const char *version_value) + const char *version_value, + enum devlink_info_version_type version_type) { struct nlattr *nest; int err; + if (req->version_cb) + req->version_cb(version_name, version_type, + req->version_cb_priv); + + if (!req->msg) + return 0; + nest = nla_nest_start_noflag(req->msg, attr); if (!nest) return -EMSGSIZE; @@ -6612,7 +6616,8 @@ int devlink_info_version_fixed_put(struct devlink_info_req *req, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED, - version_name, version_value); + version_name, version_value, + DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put); @@ -6621,25 +6626,49 @@ int devlink_info_version_stored_put(struct devlink_info_req *req, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED, - version_name, version_value); + version_name, version_value, + DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_stored_put); +int devlink_info_version_stored_put_ext(struct devlink_info_req *req, + const char *version_name, + const char *version_value, + enum devlink_info_version_type version_type) +{ + return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED, + version_name, version_value, + version_type); +} +EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext); + int devlink_info_version_running_put(struct devlink_info_req *req, const char *version_name, const char *version_value) { return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING, - version_name, version_value); + version_name, version_value, + DEVLINK_INFO_VERSION_TYPE_NONE); } EXPORT_SYMBOL_GPL(devlink_info_version_running_put); +int devlink_info_version_running_put_ext(struct devlink_info_req *req, + const char *version_name, + const char *version_value, + enum devlink_info_version_type version_type) +{ + return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING, + version_name, version_value, + version_type); +} +EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext); + static int devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { - struct devlink_info_req req; + struct devlink_info_req req = {}; void *hdr; int err; @@ -9513,6 +9542,7 @@ static struct genl_family devlink_nl_family __ro_after_init = { .module = THIS_MODULE, .small_ops = devlink_nl_ops, .n_small_ops = ARRAY_SIZE(devlink_nl_ops), + .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1, .mcgrps = devlink_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps), }; @@ -9846,7 +9876,6 @@ int devl_port_register(struct devlink *devlink, INIT_LIST_HEAD(&devlink_port->reporter_list); mutex_init(&devlink_port->reporters_lock); list_add_tail(&devlink_port->list, &devlink->port_list); - INIT_LIST_HEAD(&devlink_port->param_list); INIT_LIST_HEAD(&devlink_port->region_list); INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn); @@ -12306,8 +12335,8 @@ EXPORT_SYMBOL_GPL(devl_trap_policers_unregister); static void __devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) { + struct devlink_info_req req = {}; const struct nlattr *nlattr; - struct devlink_info_req req; struct sk_buff *msg; int rem, err; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 75501e1bdd25..f084a4a6b7ab 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -464,7 +464,7 @@ net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink, goto out; hw_entry = &hw_entries->entries[hw_entries->num_entries]; - strlcpy(hw_entry->trap_name, metadata->trap_name, + strscpy(hw_entry->trap_name, metadata->trap_name, NET_DM_MAX_HW_TRAP_NAME_LEN - 1); hw_entry->count = 1; hw_entries->num_entries++; @@ -1645,6 +1645,7 @@ static struct genl_family net_drop_monitor_family __ro_after_init = { .module = THIS_MODULE, .small_ops = dropmon_ops, .n_small_ops = ARRAY_SIZE(dropmon_ops), + .resv_start_op = NET_DM_CMD_STATS_GET + 1, .mcgrps = dropmon_mcgrps, .n_mcgrps = ARRAY_SIZE(dropmon_mcgrps), }; diff --git a/net/core/filter.c b/net/core/filter.c index c191db80ce93..31608801078e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3010,7 +3010,7 @@ BPF_CALL_0(bpf_get_cgroup_classid_curr) return __task_get_classid(current); } -static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = { +const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = { .func = bpf_get_cgroup_classid_curr, .gpl_only = false, .ret_type = RET_INTEGER, @@ -4489,7 +4489,8 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key void *to_orig = to; int err; - if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) { + if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6 | + BPF_F_TUNINFO_FLAGS)))) { err = -EINVAL; goto err_clear; } @@ -4521,7 +4522,10 @@ set_compat: to->tunnel_id = be64_to_cpu(info->key.tun_id); to->tunnel_tos = info->key.tos; to->tunnel_ttl = info->key.ttl; - to->tunnel_ext = 0; + if (flags & BPF_F_TUNINFO_FLAGS) + to->tunnel_flags = info->key.tun_flags; + else + to->tunnel_ext = 0; if (flags & BPF_F_TUNINFO_IPV6) { memcpy(to->remote_ipv6, &info->key.u.ipv6.src, @@ -5014,359 +5018,259 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static int __bpf_setsockopt(struct sock *sk, int level, int optname, - char *optval, int optlen) -{ - char devname[IFNAMSIZ]; - int val, valbool; - struct net *net; - int ifindex; - int ret = 0; - - if (!sk_fullsock(sk)) +static int sol_socket_sockopt(struct sock *sk, int optname, + char *optval, int *optlen, + bool getopt) +{ + switch (optname) { + case SO_REUSEADDR: + case SO_SNDBUF: + case SO_RCVBUF: + case SO_KEEPALIVE: + case SO_PRIORITY: + case SO_REUSEPORT: + case SO_RCVLOWAT: + case SO_MARK: + case SO_MAX_PACING_RATE: + case SO_BINDTOIFINDEX: + case SO_TXREHASH: + if (*optlen != sizeof(int)) + return -EINVAL; + break; + case SO_BINDTODEVICE: + break; + default: return -EINVAL; + } - if (level == SOL_SOCKET) { - if (optlen != sizeof(int) && optname != SO_BINDTODEVICE) + if (getopt) { + if (optname == SO_BINDTODEVICE) return -EINVAL; - val = *((int *)optval); - valbool = val ? 1 : 0; - - /* Only some socketops are supported */ - switch (optname) { - case SO_RCVBUF: - val = min_t(u32, val, READ_ONCE(sysctl_rmem_max)); - val = min_t(int, val, INT_MAX / 2); - sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - WRITE_ONCE(sk->sk_rcvbuf, - max_t(int, val * 2, SOCK_MIN_RCVBUF)); - break; - case SO_SNDBUF: - val = min_t(u32, val, READ_ONCE(sysctl_wmem_max)); - val = min_t(int, val, INT_MAX / 2); - sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - WRITE_ONCE(sk->sk_sndbuf, - max_t(int, val * 2, SOCK_MIN_SNDBUF)); - break; - case SO_MAX_PACING_RATE: /* 32bit version */ - if (val != ~0U) - cmpxchg(&sk->sk_pacing_status, - SK_PACING_NONE, - SK_PACING_NEEDED); - sk->sk_max_pacing_rate = (val == ~0U) ? - ~0UL : (unsigned int)val; - sk->sk_pacing_rate = min(sk->sk_pacing_rate, - sk->sk_max_pacing_rate); - break; - case SO_PRIORITY: - sk->sk_priority = val; - break; - case SO_RCVLOWAT: - if (val < 0) - val = INT_MAX; - if (sk->sk_socket && sk->sk_socket->ops->set_rcvlowat) - ret = sk->sk_socket->ops->set_rcvlowat(sk, val); - else - WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); - break; - case SO_MARK: - if (sk->sk_mark != val) { - sk->sk_mark = val; - sk_dst_reset(sk); - } - break; - case SO_BINDTODEVICE: - optlen = min_t(long, optlen, IFNAMSIZ - 1); - strncpy(devname, optval, optlen); - devname[optlen] = 0; + return sk_getsockopt(sk, SOL_SOCKET, optname, + KERNEL_SOCKPTR(optval), + KERNEL_SOCKPTR(optlen)); + } - ifindex = 0; - if (devname[0] != '\0') { - struct net_device *dev; + return sk_setsockopt(sk, SOL_SOCKET, optname, + KERNEL_SOCKPTR(optval), *optlen); +} - ret = -ENODEV; +static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname, + char *optval, int optlen) +{ + struct tcp_sock *tp = tcp_sk(sk); + unsigned long timeout; + int val; - net = sock_net(sk); - dev = dev_get_by_name(net, devname); - if (!dev) - break; - ifindex = dev->ifindex; - dev_put(dev); - } - fallthrough; - case SO_BINDTOIFINDEX: - if (optname == SO_BINDTOIFINDEX) - ifindex = val; - ret = sock_bindtoindex(sk, ifindex, false); - break; - case SO_KEEPALIVE: - if (sk->sk_prot->keepalive) - sk->sk_prot->keepalive(sk, valbool); - sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); - break; - case SO_REUSEPORT: - sk->sk_reuseport = valbool; - break; - case SO_TXREHASH: - if (val < -1 || val > 1) { - ret = -EINVAL; - break; - } - sk->sk_txrehash = (u8)val; - break; - default: - ret = -EINVAL; - } -#ifdef CONFIG_INET - } else if (level == SOL_IP) { - if (optlen != sizeof(int) || sk->sk_family != AF_INET) - return -EINVAL; + if (optlen != sizeof(int)) + return -EINVAL; - val = *((int *)optval); - /* Only some options are supported */ - switch (optname) { - case IP_TOS: - if (val < -1 || val > 0xff) { - ret = -EINVAL; - } else { - struct inet_sock *inet = inet_sk(sk); + val = *(int *)optval; - if (val == -1) - val = 0; - inet->tos = val; - } - break; - default: - ret = -EINVAL; - } -#if IS_ENABLED(CONFIG_IPV6) - } else if (level == SOL_IPV6) { - if (optlen != sizeof(int) || sk->sk_family != AF_INET6) + /* Only some options are supported */ + switch (optname) { + case TCP_BPF_IW: + if (val <= 0 || tp->data_segs_out > tp->syn_data) + return -EINVAL; + tcp_snd_cwnd_set(tp, val); + break; + case TCP_BPF_SNDCWND_CLAMP: + if (val <= 0) + return -EINVAL; + tp->snd_cwnd_clamp = val; + tp->snd_ssthresh = val; + break; + case TCP_BPF_DELACK_MAX: + timeout = usecs_to_jiffies(val); + if (timeout > TCP_DELACK_MAX || + timeout < TCP_TIMEOUT_MIN) + return -EINVAL; + inet_csk(sk)->icsk_delack_max = timeout; + break; + case TCP_BPF_RTO_MIN: + timeout = usecs_to_jiffies(val); + if (timeout > TCP_RTO_MIN || + timeout < TCP_TIMEOUT_MIN) return -EINVAL; + inet_csk(sk)->icsk_rto_min = timeout; + break; + default: + return -EINVAL; + } - val = *((int *)optval); - /* Only some options are supported */ - switch (optname) { - case IPV6_TCLASS: - if (val < -1 || val > 0xff) { - ret = -EINVAL; - } else { - struct ipv6_pinfo *np = inet6_sk(sk); + return 0; +} - if (val == -1) - val = 0; - np->tclass = val; - } - break; - default: - ret = -EINVAL; - } -#endif - } else if (level == SOL_TCP && - sk->sk_prot->setsockopt == tcp_setsockopt) { - if (optname == TCP_CONGESTION) { - char name[TCP_CA_NAME_MAX]; +static int sol_tcp_sockopt(struct sock *sk, int optname, + char *optval, int *optlen, + bool getopt) +{ + if (sk->sk_prot->setsockopt != tcp_setsockopt) + return -EINVAL; - strncpy(name, optval, min_t(long, optlen, - TCP_CA_NAME_MAX-1)); - name[TCP_CA_NAME_MAX-1] = 0; - ret = tcp_set_congestion_control(sk, name, false, true); - } else { - struct inet_connection_sock *icsk = inet_csk(sk); + switch (optname) { + case TCP_NODELAY: + case TCP_MAXSEG: + case TCP_KEEPIDLE: + case TCP_KEEPINTVL: + case TCP_KEEPCNT: + case TCP_SYNCNT: + case TCP_WINDOW_CLAMP: + case TCP_THIN_LINEAR_TIMEOUTS: + case TCP_USER_TIMEOUT: + case TCP_NOTSENT_LOWAT: + case TCP_SAVE_SYN: + if (*optlen != sizeof(int)) + return -EINVAL; + break; + case TCP_CONGESTION: + if (*optlen < 2) + return -EINVAL; + break; + case TCP_SAVED_SYN: + if (*optlen < 1) + return -EINVAL; + break; + default: + if (getopt) + return -EINVAL; + return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen); + } + + if (getopt) { + if (optname == TCP_SAVED_SYN) { struct tcp_sock *tp = tcp_sk(sk); - unsigned long timeout; - if (optlen != sizeof(int)) + if (!tp->saved_syn || + *optlen > tcp_saved_syn_len(tp->saved_syn)) return -EINVAL; + memcpy(optval, tp->saved_syn->data, *optlen); + /* It cannot free tp->saved_syn here because it + * does not know if the user space still needs it. + */ + return 0; + } - val = *((int *)optval); - /* Only some options are supported */ - switch (optname) { - case TCP_BPF_IW: - if (val <= 0 || tp->data_segs_out > tp->syn_data) - ret = -EINVAL; - else - tcp_snd_cwnd_set(tp, val); - break; - case TCP_BPF_SNDCWND_CLAMP: - if (val <= 0) { - ret = -EINVAL; - } else { - tp->snd_cwnd_clamp = val; - tp->snd_ssthresh = val; - } - break; - case TCP_BPF_DELACK_MAX: - timeout = usecs_to_jiffies(val); - if (timeout > TCP_DELACK_MAX || - timeout < TCP_TIMEOUT_MIN) - return -EINVAL; - inet_csk(sk)->icsk_delack_max = timeout; - break; - case TCP_BPF_RTO_MIN: - timeout = usecs_to_jiffies(val); - if (timeout > TCP_RTO_MIN || - timeout < TCP_TIMEOUT_MIN) - return -EINVAL; - inet_csk(sk)->icsk_rto_min = timeout; - break; - case TCP_SAVE_SYN: - if (val < 0 || val > 1) - ret = -EINVAL; - else - tp->save_syn = val; - break; - case TCP_KEEPIDLE: - ret = tcp_sock_set_keepidle_locked(sk, val); - break; - case TCP_KEEPINTVL: - if (val < 1 || val > MAX_TCP_KEEPINTVL) - ret = -EINVAL; - else - tp->keepalive_intvl = val * HZ; - break; - case TCP_KEEPCNT: - if (val < 1 || val > MAX_TCP_KEEPCNT) - ret = -EINVAL; - else - tp->keepalive_probes = val; - break; - case TCP_SYNCNT: - if (val < 1 || val > MAX_TCP_SYNCNT) - ret = -EINVAL; - else - icsk->icsk_syn_retries = val; - break; - case TCP_USER_TIMEOUT: - if (val < 0) - ret = -EINVAL; - else - icsk->icsk_user_timeout = val; - break; - case TCP_NOTSENT_LOWAT: - tp->notsent_lowat = val; - sk->sk_write_space(sk); - break; - case TCP_WINDOW_CLAMP: - ret = tcp_set_window_clamp(sk, val); - break; - default: - ret = -EINVAL; - } + if (optname == TCP_CONGESTION) { + if (!inet_csk(sk)->icsk_ca_ops) + return -EINVAL; + /* BPF expects NULL-terminated tcp-cc string */ + optval[--(*optlen)] = '\0'; } -#endif - } else { - ret = -EINVAL; + + return do_tcp_getsockopt(sk, SOL_TCP, optname, + KERNEL_SOCKPTR(optval), + KERNEL_SOCKPTR(optlen)); } - return ret; + + return do_tcp_setsockopt(sk, SOL_TCP, optname, + KERNEL_SOCKPTR(optval), *optlen); } -static int _bpf_setsockopt(struct sock *sk, int level, int optname, - char *optval, int optlen) +static int sol_ip_sockopt(struct sock *sk, int optname, + char *optval, int *optlen, + bool getopt) { - if (sk_fullsock(sk)) - sock_owned_by_me(sk); - return __bpf_setsockopt(sk, level, optname, optval, optlen); + if (sk->sk_family != AF_INET) + return -EINVAL; + + switch (optname) { + case IP_TOS: + if (*optlen != sizeof(int)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + if (getopt) + return do_ip_getsockopt(sk, SOL_IP, optname, + KERNEL_SOCKPTR(optval), + KERNEL_SOCKPTR(optlen)); + + return do_ip_setsockopt(sk, SOL_IP, optname, + KERNEL_SOCKPTR(optval), *optlen); } -static int __bpf_getsockopt(struct sock *sk, int level, int optname, - char *optval, int optlen) +static int sol_ipv6_sockopt(struct sock *sk, int optname, + char *optval, int *optlen, + bool getopt) { - if (!sk_fullsock(sk)) - goto err_clear; + if (sk->sk_family != AF_INET6) + return -EINVAL; - if (level == SOL_SOCKET) { - if (optlen != sizeof(int)) - goto err_clear; + switch (optname) { + case IPV6_TCLASS: + case IPV6_AUTOFLOWLABEL: + if (*optlen != sizeof(int)) + return -EINVAL; + break; + default: + return -EINVAL; + } - switch (optname) { - case SO_RCVBUF: - *((int *)optval) = sk->sk_rcvbuf; - break; - case SO_SNDBUF: - *((int *)optval) = sk->sk_sndbuf; - break; - case SO_MARK: - *((int *)optval) = sk->sk_mark; - break; - case SO_PRIORITY: - *((int *)optval) = sk->sk_priority; - break; - case SO_BINDTOIFINDEX: - *((int *)optval) = sk->sk_bound_dev_if; - break; - case SO_REUSEPORT: - *((int *)optval) = sk->sk_reuseport; - break; - case SO_TXREHASH: - *((int *)optval) = sk->sk_txrehash; - break; - default: - goto err_clear; - } -#ifdef CONFIG_INET - } else if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) { - struct inet_connection_sock *icsk; - struct tcp_sock *tp; + if (getopt) + return ipv6_bpf_stub->ipv6_getsockopt(sk, SOL_IPV6, optname, + KERNEL_SOCKPTR(optval), + KERNEL_SOCKPTR(optlen)); - switch (optname) { - case TCP_CONGESTION: - icsk = inet_csk(sk); + return ipv6_bpf_stub->ipv6_setsockopt(sk, SOL_IPV6, optname, + KERNEL_SOCKPTR(optval), *optlen); +} - if (!icsk->icsk_ca_ops || optlen <= 1) - goto err_clear; - strncpy(optval, icsk->icsk_ca_ops->name, optlen); - optval[optlen - 1] = 0; - break; - case TCP_SAVED_SYN: - tp = tcp_sk(sk); +static int __bpf_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + if (!sk_fullsock(sk)) + return -EINVAL; - if (optlen <= 0 || !tp->saved_syn || - optlen > tcp_saved_syn_len(tp->saved_syn)) - goto err_clear; - memcpy(optval, tp->saved_syn->data, optlen); - break; - default: - goto err_clear; - } - } else if (level == SOL_IP) { - struct inet_sock *inet = inet_sk(sk); + if (level == SOL_SOCKET) + return sol_socket_sockopt(sk, optname, optval, &optlen, false); + else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) + return sol_ip_sockopt(sk, optname, optval, &optlen, false); + else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) + return sol_ipv6_sockopt(sk, optname, optval, &optlen, false); + else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) + return sol_tcp_sockopt(sk, optname, optval, &optlen, false); - if (optlen != sizeof(int) || sk->sk_family != AF_INET) - goto err_clear; + return -EINVAL; +} - /* Only some options are supported */ - switch (optname) { - case IP_TOS: - *((int *)optval) = (int)inet->tos; - break; - default: - goto err_clear; - } -#if IS_ENABLED(CONFIG_IPV6) - } else if (level == SOL_IPV6) { - struct ipv6_pinfo *np = inet6_sk(sk); +static int _bpf_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + if (sk_fullsock(sk)) + sock_owned_by_me(sk); + return __bpf_setsockopt(sk, level, optname, optval, optlen); +} - if (optlen != sizeof(int) || sk->sk_family != AF_INET6) - goto err_clear; +static int __bpf_getsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + int err, saved_optlen = optlen; - /* Only some options are supported */ - switch (optname) { - case IPV6_TCLASS: - *((int *)optval) = (int)np->tclass; - break; - default: - goto err_clear; - } -#endif -#endif - } else { - goto err_clear; + if (!sk_fullsock(sk)) { + err = -EINVAL; + goto done; } - return 0; -err_clear: - memset(optval, 0, optlen); - return -EINVAL; + + if (level == SOL_SOCKET) + err = sol_socket_sockopt(sk, optname, optval, &optlen, true); + else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) + err = sol_tcp_sockopt(sk, optname, optval, &optlen, true); + else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) + err = sol_ip_sockopt(sk, optname, optval, &optlen, true); + else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) + err = sol_ipv6_sockopt(sk, optname, optval, &optlen, true); + else + err = -EINVAL; + +done: + if (err) + optlen = 0; + if (optlen < saved_optlen) + memset(optval + optlen, 0, saved_optlen - optlen); + return err; } static int _bpf_getsockopt(struct sock *sk, int level, int optname, @@ -6469,6 +6373,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, int dif, int sdif, u8 family, u8 proto) { + struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo; bool refcounted = false; struct sock *sk = NULL; @@ -6477,7 +6382,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, __be32 dst4 = tuple->ipv4.daddr; if (proto == IPPROTO_TCP) - sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0, + sk = __inet_lookup(net, hinfo, NULL, 0, src4, tuple->ipv4.sport, dst4, tuple->ipv4.dport, dif, sdif, &refcounted); @@ -6491,7 +6396,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr; if (proto == IPPROTO_TCP) - sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0, + sk = __inet6_lookup(net, hinfo, NULL, 0, src6, tuple->ipv6.sport, dst6, ntohs(tuple->ipv6.dport), dif, sdif, &refcounted); @@ -7667,34 +7572,23 @@ const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak; static const struct bpf_func_proto * sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { + const struct bpf_func_proto *func_proto; + + func_proto = cgroup_common_func_proto(func_id, prog); + if (func_proto) + return func_proto; + + func_proto = cgroup_current_func_proto(func_id, prog); + if (func_proto) + return func_proto; + switch (func_id) { - /* inet and inet6 sockets are created in a process - * context so there is always a valid uid/gid - */ - case BPF_FUNC_get_current_uid_gid: - return &bpf_get_current_uid_gid_proto; - case BPF_FUNC_get_local_storage: - return &bpf_get_local_storage_proto; case BPF_FUNC_get_socket_cookie: return &bpf_get_socket_cookie_sock_proto; case BPF_FUNC_get_netns_cookie: return &bpf_get_netns_cookie_sock_proto; case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; - case BPF_FUNC_get_current_pid_tgid: - return &bpf_get_current_pid_tgid_proto; - case BPF_FUNC_get_current_comm: - return &bpf_get_current_comm_proto; -#ifdef CONFIG_CGROUPS - case BPF_FUNC_get_current_cgroup_id: - return &bpf_get_current_cgroup_id_proto; - case BPF_FUNC_get_current_ancestor_cgroup_id: - return &bpf_get_current_ancestor_cgroup_id_proto; -#endif -#ifdef CONFIG_CGROUP_NET_CLASSID - case BPF_FUNC_get_cgroup_classid: - return &bpf_get_cgroup_classid_curr_proto; -#endif case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_cg_sock_proto; case BPF_FUNC_ktime_get_coarse_ns: @@ -7707,12 +7601,17 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) static const struct bpf_func_proto * sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { + const struct bpf_func_proto *func_proto; + + func_proto = cgroup_common_func_proto(func_id, prog); + if (func_proto) + return func_proto; + + func_proto = cgroup_current_func_proto(func_id, prog); + if (func_proto) + return func_proto; + switch (func_id) { - /* inet and inet6 sockets are created in a process - * context so there is always a valid uid/gid - */ - case BPF_FUNC_get_current_uid_gid: - return &bpf_get_current_uid_gid_proto; case BPF_FUNC_bind: switch (prog->expected_attach_type) { case BPF_CGROUP_INET4_CONNECT: @@ -7725,24 +7624,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_socket_cookie_sock_addr_proto; case BPF_FUNC_get_netns_cookie: return &bpf_get_netns_cookie_sock_addr_proto; - case BPF_FUNC_get_local_storage: - return &bpf_get_local_storage_proto; case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; - case BPF_FUNC_get_current_pid_tgid: - return &bpf_get_current_pid_tgid_proto; - case BPF_FUNC_get_current_comm: - return &bpf_get_current_comm_proto; -#ifdef CONFIG_CGROUPS - case BPF_FUNC_get_current_cgroup_id: - return &bpf_get_current_cgroup_id_proto; - case BPF_FUNC_get_current_ancestor_cgroup_id: - return &bpf_get_current_ancestor_cgroup_id_proto; -#endif -#ifdef CONFIG_CGROUP_NET_CLASSID - case BPF_FUNC_get_cgroup_classid: - return &bpf_get_cgroup_classid_curr_proto; -#endif #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_tcp: return &bpf_sock_addr_sk_lookup_tcp_proto; @@ -7823,9 +7706,13 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto __weak; static const struct bpf_func_proto * cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { + const struct bpf_func_proto *func_proto; + + func_proto = cgroup_common_func_proto(func_id, prog); + if (func_proto) + return func_proto; + switch (func_id) { - case BPF_FUNC_get_local_storage: - return &bpf_get_local_storage_proto; case BPF_FUNC_sk_fullsock: return &bpf_sk_fullsock_proto; case BPF_FUNC_sk_storage_get: @@ -8065,6 +7952,12 @@ const struct bpf_func_proto bpf_sock_hash_update_proto __weak; static const struct bpf_func_proto * sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { + const struct bpf_func_proto *func_proto; + + func_proto = cgroup_common_func_proto(func_id, prog); + if (func_proto) + return func_proto; + switch (func_id) { case BPF_FUNC_setsockopt: return &bpf_sock_ops_setsockopt_proto; @@ -8078,8 +7971,6 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sock_hash_update_proto; case BPF_FUNC_get_socket_cookie: return &bpf_get_socket_cookie_sock_ops_proto; - case BPF_FUNC_get_local_storage: - return &bpf_get_local_storage_proto; case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; case BPF_FUNC_sk_storage_get: @@ -10812,14 +10703,13 @@ int sk_detach_filter(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_detach_filter); -int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, - unsigned int len) +int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len) { struct sock_fprog_kern *fprog; struct sk_filter *filter; int ret = 0; - lock_sock(sk); + sockopt_lock_sock(sk); filter = rcu_dereference_protected(sk->sk_filter, lockdep_sock_is_held(sk)); if (!filter) @@ -10844,7 +10734,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, goto out; ret = -EFAULT; - if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog))) + if (copy_to_sockptr(optval, fprog->filter, bpf_classic_proglen(fprog))) goto out; /* Instead of bytes, the API requests to return the number @@ -10852,7 +10742,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, */ ret = fprog->len; out: - release_sock(sk); + sockopt_release_sock(sk); return ret; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 5dc3860e9fc7..25cd35f5922e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -204,6 +204,30 @@ static void __skb_flow_dissect_icmp(const struct sk_buff *skb, skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen); } +static void __skb_flow_dissect_l2tpv3(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, const void *data, + int nhoff, int hlen) +{ + struct flow_dissector_key_l2tpv3 *key_l2tpv3; + struct { + __be32 session_id; + } *hdr, _hdr; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_L2TPV3)) + return; + + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); + if (!hdr) + return; + + key_l2tpv3 = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_L2TPV3, + target_container); + + key_l2tpv3->session_id = hdr->session_id; +} + void skb_flow_dissect_meta(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container) @@ -866,8 +890,8 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, } } -bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, - __be16 proto, int nhoff, int hlen, unsigned int flags) +u32 bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, + __be16 proto, int nhoff, int hlen, unsigned int flags) { struct bpf_flow_keys *flow_keys = ctx->flow_keys; u32 result; @@ -892,7 +916,7 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, flow_keys->thoff = clamp_t(u16, flow_keys->thoff, flow_keys->nhoff, hlen); - return result == BPF_OK; + return result; } static bool is_pppoe_ses_hdr_valid(const struct pppoe_hdr *hdr) @@ -1008,6 +1032,7 @@ bool __skb_flow_dissect(const struct net *net, }; __be16 n_proto = proto; struct bpf_prog *prog; + u32 result; if (skb) { ctx.skb = skb; @@ -1019,13 +1044,16 @@ bool __skb_flow_dissect(const struct net *net, } prog = READ_ONCE(run_array->items[0].prog); - ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, - hlen, flags); + result = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, + hlen, flags); + if (result == BPF_FLOW_DISSECTOR_CONTINUE) + goto dissect_continue; __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); rcu_read_unlock(); - return ret; + return result == BPF_OK; } +dissect_continue: rcu_read_unlock(); } @@ -1173,8 +1201,8 @@ proto_again: nhoff += sizeof(*vlan); } - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_NUM_OF_VLANS)) { + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_NUM_OF_VLANS) && + !(key_control->flags & FLOW_DIS_ENCAPSULATION)) { struct flow_dissector_key_num_of_vlans *key_nvs; key_nvs = skb_flow_dissector_target(flow_dissector, @@ -1497,6 +1525,10 @@ ip_proto_again: __skb_flow_dissect_icmp(skb, flow_dissector, target_container, data, nhoff, hlen); break; + case IPPROTO_L2TP: + __skb_flow_dissect_l2tpv3(skb, flow_dissector, target_container, + data, nhoff, hlen); + break; default: break; diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 8cfb63528d18..abe423fd5736 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -237,6 +237,13 @@ void flow_rule_match_pppoe(const struct flow_rule *rule, } EXPORT_SYMBOL(flow_rule_match_pppoe); +void flow_rule_match_l2tpv3(const struct flow_rule *rule, + struct flow_match_l2tpv3 *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_L2TPV3, out); +} +EXPORT_SYMBOL(flow_rule_match_l2tpv3); + struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)) diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index 21619c70a82b..ed5ec5de47f6 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -81,8 +81,7 @@ int gro_cells_init(struct gro_cells *gcells, struct net_device *dev) set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); - netif_napi_add(dev, &cell->napi, gro_cell_poll, - NAPI_POLL_WEIGHT); + netif_napi_add(dev, &cell->napi, gro_cell_poll); napi_enable(&cell->napi); } return 0; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 78cc8fb68814..e93edb810103 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1853,9 +1853,6 @@ static struct neigh_table *neigh_find_table(int family) case AF_INET6: tbl = neigh_tables[NEIGH_ND_TABLE]; break; - case AF_DECnet: - tbl = neigh_tables[NEIGH_DN_TABLE]; - break; } return tbl; diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 1a6a86693b74..d6a70aeaa503 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -66,7 +66,7 @@ struct update_classid_context { #define UPDATE_CLASSID_BATCH 1000 -static int update_classid_sock(const void *v, struct file *file, unsigned n) +static int update_classid_sock(const void *v, struct file *file, unsigned int n) { struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 5d27067b72d5..9be762e1d042 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -556,7 +556,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, ',')) == NULL) goto parse_failed; *delim = 0; - strlcpy(np->dev_name, cur, sizeof(np->dev_name)); + strscpy(np->dev_name, cur, sizeof(np->dev_name)); cur = delim; } cur++; @@ -610,7 +610,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) int err; np->dev = ndev; - strlcpy(np->dev_name, ndev->name, IFNAMSIZ); + strscpy(np->dev_name, ndev->name, IFNAMSIZ); if (ndev->priv_flags & IFF_DISABLE_NETPOLL) { np_err(np, "%s doesn't support polling, aborting\n", diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4b5b15c684ed..74864dc46a7e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -866,14 +866,12 @@ static void set_operstate(struct net_device *dev, unsigned char transition) break; case IF_OPER_TESTING: - if (operstate == IF_OPER_UP || - operstate == IF_OPER_UNKNOWN) + if (netif_oper_up(dev)) operstate = IF_OPER_TESTING; break; case IF_OPER_DORMANT: - if (operstate == IF_OPER_UP || - operstate == IF_OPER_UNKNOWN) + if (netif_oper_up(dev)) operstate = IF_OPER_DORMANT; break; } @@ -1059,6 +1057,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(1) /* IFLA_CARRIER */ + nla_total_size(4) /* IFLA_PROMISCUITY */ + + nla_total_size(4) /* IFLA_ALLMULTI */ + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */ @@ -1767,6 +1766,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) || nla_put_u32(skb, IFLA_GROUP, dev->group) || nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || + nla_put_u32(skb, IFLA_ALLMULTI, dev->allmulti) || nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || @@ -1928,6 +1928,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_GRO_MAX_SIZE] = { .type = NLA_U32 }, [IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT }, [IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT }, + [IFLA_ALLMULTI] = { .type = NLA_REJECT }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2776,13 +2777,6 @@ static int do_setlink(const struct sk_buff *skb, call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); } - if (ifm->ifi_flags || ifm->ifi_change) { - err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), - extack); - if (err < 0) - goto errout; - } - if (tb[IFLA_MASTER]) { err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); if (err) @@ -2790,6 +2784,13 @@ static int do_setlink(const struct sk_buff *skb, status |= DO_SETLINK_MODIFIED; } + if (ifm->ifi_flags || ifm->ifi_change) { + err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), + extack); + if (err < 0) + goto errout; + } + if (tb[IFLA_CARRIER]) { err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); if (err) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 417463da4fac..bbcfb1c7f59e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -781,9 +781,10 @@ EXPORT_SYMBOL(__kfree_skb); * hit zero. Meanwhile, pass the drop reason to 'kfree_skb' * tracepoint. */ -void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) +void __fix_address +kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) { - if (!skb_unref(skb)) + if (unlikely(!skb_unref(skb))) return; DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX); @@ -1187,7 +1188,7 @@ EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages); static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) { - struct ubuf_info *uarg; + struct ubuf_info_msgzc *uarg; struct sk_buff *skb; WARN_ON_ONCE(!in_task()); @@ -1205,19 +1206,19 @@ static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) return NULL; } - uarg->callback = msg_zerocopy_callback; + uarg->ubuf.callback = msg_zerocopy_callback; uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1; uarg->len = 1; uarg->bytelen = size; uarg->zerocopy = 1; - uarg->flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; - refcount_set(&uarg->refcnt, 1); + uarg->ubuf.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; + refcount_set(&uarg->ubuf.refcnt, 1); sock_hold(sk); - return uarg; + return &uarg->ubuf; } -static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg) +static inline struct sk_buff *skb_from_uarg(struct ubuf_info_msgzc *uarg) { return container_of((void *)uarg, struct sk_buff, cb); } @@ -1226,6 +1227,7 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, struct ubuf_info *uarg) { if (uarg) { + struct ubuf_info_msgzc *uarg_zc; const u32 byte_limit = 1 << 19; /* limit to a few TSO */ u32 bytelen, next; @@ -1241,8 +1243,9 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, return NULL; } - bytelen = uarg->bytelen + size; - if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) { + uarg_zc = uarg_to_msgzc(uarg); + bytelen = uarg_zc->bytelen + size; + if (uarg_zc->len == USHRT_MAX - 1 || bytelen > byte_limit) { /* TCP can create new skb to attach new uarg */ if (sk->sk_type == SOCK_STREAM) goto new_alloc; @@ -1250,11 +1253,11 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, } next = (u32)atomic_read(&sk->sk_zckey); - if ((u32)(uarg->id + uarg->len) == next) { - if (mm_account_pinned_pages(&uarg->mmp, size)) + if ((u32)(uarg_zc->id + uarg_zc->len) == next) { + if (mm_account_pinned_pages(&uarg_zc->mmp, size)) return NULL; - uarg->len++; - uarg->bytelen = bytelen; + uarg_zc->len++; + uarg_zc->bytelen = bytelen; atomic_set(&sk->sk_zckey, ++next); /* no extra ref when appending to datagram (MSG_MORE) */ @@ -1290,7 +1293,7 @@ static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len) return true; } -static void __msg_zerocopy_callback(struct ubuf_info *uarg) +static void __msg_zerocopy_callback(struct ubuf_info_msgzc *uarg) { struct sk_buff *tail, *skb = skb_from_uarg(uarg); struct sock_exterr_skb *serr; @@ -1343,19 +1346,21 @@ release: void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, bool success) { - uarg->zerocopy = uarg->zerocopy & success; + struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg); + + uarg_zc->zerocopy = uarg_zc->zerocopy & success; if (refcount_dec_and_test(&uarg->refcnt)) - __msg_zerocopy_callback(uarg); + __msg_zerocopy_callback(uarg_zc); } EXPORT_SYMBOL_GPL(msg_zerocopy_callback); void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) { - struct sock *sk = skb_from_uarg(uarg)->sk; + struct sock *sk = skb_from_uarg(uarg_to_msgzc(uarg))->sk; atomic_dec(&sk->sk_zckey); - uarg->len--; + uarg_to_msgzc(uarg)->len--; if (have_uref) msg_zerocopy_callback(NULL, uarg, true); diff --git a/net/core/sock.c b/net/core/sock.c index 788c1372663c..eeb6cbac6f49 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -703,15 +703,17 @@ static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen) goto out; } - return sock_bindtoindex(sk, index, true); + sockopt_lock_sock(sk); + ret = sock_bindtoindex_locked(sk, index); + sockopt_release_sock(sk); out: #endif return ret; } -static int sock_getbindtodevice(struct sock *sk, char __user *optval, - int __user *optlen, int len) +static int sock_getbindtodevice(struct sock *sk, sockptr_t optval, + sockptr_t optlen, int len) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES @@ -735,12 +737,12 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval, len = strlen(devname) + 1; ret = -EFAULT; - if (copy_to_user(optval, devname, len)) + if (copy_to_sockptr(optval, devname, len)) goto out; zero: ret = -EFAULT; - if (put_user(len, optlen)) + if (copy_to_sockptr(optlen, &len, sizeof(int))) goto out; ret = 0; @@ -1036,17 +1038,51 @@ static int sock_reserve_memory(struct sock *sk, int bytes) return 0; } +void sockopt_lock_sock(struct sock *sk) +{ + /* When current->bpf_ctx is set, the setsockopt is called from + * a bpf prog. bpf has ensured the sk lock has been + * acquired before calling setsockopt(). + */ + if (has_current_bpf_ctx()) + return; + + lock_sock(sk); +} +EXPORT_SYMBOL(sockopt_lock_sock); + +void sockopt_release_sock(struct sock *sk) +{ + if (has_current_bpf_ctx()) + return; + + release_sock(sk); +} +EXPORT_SYMBOL(sockopt_release_sock); + +bool sockopt_ns_capable(struct user_namespace *ns, int cap) +{ + return has_current_bpf_ctx() || ns_capable(ns, cap); +} +EXPORT_SYMBOL(sockopt_ns_capable); + +bool sockopt_capable(int cap) +{ + return has_current_bpf_ctx() || capable(cap); +} +EXPORT_SYMBOL(sockopt_capable); + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. */ -int sock_setsockopt(struct socket *sock, int level, int optname, - sockptr_t optval, unsigned int optlen) +int sk_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct so_timestamping timestamping; + struct socket *sock = sk->sk_socket; struct sock_txtime sk_txtime; - struct sock *sk = sock->sk; int val; int valbool; struct linger ling; @@ -1067,11 +1103,11 @@ int sock_setsockopt(struct socket *sock, int level, int optname, valbool = val ? 1 : 0; - lock_sock(sk); + sockopt_lock_sock(sk); switch (optname) { case SO_DEBUG: - if (val && !capable(CAP_NET_ADMIN)) + if (val && !sockopt_capable(CAP_NET_ADMIN)) ret = -EACCES; else sock_valbool_flag(sk, SOCK_DBG, valbool); @@ -1115,7 +1151,7 @@ set_sndbuf: break; case SO_SNDBUFFORCE: - if (!capable(CAP_NET_ADMIN)) { + if (!sockopt_capable(CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1137,7 +1173,7 @@ set_sndbuf: break; case SO_RCVBUFFORCE: - if (!capable(CAP_NET_ADMIN)) { + if (!sockopt_capable(CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1164,8 +1200,8 @@ set_sndbuf: case SO_PRIORITY: if ((val >= 0 && val <= 6) || - ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || - ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) sk->sk_priority = val; else ret = -EPERM; @@ -1228,7 +1264,7 @@ set_sndbuf: case SO_RCVLOWAT: if (val < 0) val = INT_MAX; - if (sock->ops->set_rcvlowat) + if (sock && sock->ops->set_rcvlowat) ret = sock->ops->set_rcvlowat(sk, val); else WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); @@ -1310,8 +1346,8 @@ set_sndbuf: clear_bit(SOCK_PASSSEC, &sock->flags); break; case SO_MARK: - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && - !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && + !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1319,8 +1355,8 @@ set_sndbuf: __sock_set_mark(sk, val); break; case SO_RCVMARK: - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && - !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && + !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1354,7 +1390,7 @@ set_sndbuf: #ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: /* allow unprivileged users to decrease the value */ - if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) + if ((val > sk->sk_ll_usec) && !sockopt_capable(CAP_NET_ADMIN)) ret = -EPERM; else { if (val < 0) @@ -1364,13 +1400,13 @@ set_sndbuf: } break; case SO_PREFER_BUSY_POLL: - if (valbool && !capable(CAP_NET_ADMIN)) + if (valbool && !sockopt_capable(CAP_NET_ADMIN)) ret = -EPERM; else WRITE_ONCE(sk->sk_prefer_busy_poll, valbool); break; case SO_BUSY_POLL_BUDGET: - if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) { + if (val > READ_ONCE(sk->sk_busy_poll_budget) && !sockopt_capable(CAP_NET_ADMIN)) { ret = -EPERM; } else { if (val < 0 || val > U16_MAX) @@ -1441,7 +1477,7 @@ set_sndbuf: * scheduler has enough safe guards. */ if (sk_txtime.clockid != CLOCK_MONOTONIC && - !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1496,9 +1532,16 @@ set_sndbuf: ret = -ENOPROTOOPT; break; } - release_sock(sk); + sockopt_release_sock(sk); return ret; } + +int sock_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) +{ + return sk_setsockopt(sock->sk, level, optname, + optval, optlen); +} EXPORT_SYMBOL(sock_setsockopt); static const struct cred *sk_get_peer_cred(struct sock *sk) @@ -1525,22 +1568,25 @@ static void cred_to_ucred(struct pid *pid, const struct cred *cred, } } -static int groups_to_user(gid_t __user *dst, const struct group_info *src) +static int groups_to_user(sockptr_t dst, const struct group_info *src) { struct user_namespace *user_ns = current_user_ns(); int i; - for (i = 0; i < src->ngroups; i++) - if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i)) + for (i = 0; i < src->ngroups; i++) { + gid_t gid = from_kgid_munged(user_ns, src->gid[i]); + + if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid))) return -EFAULT; + } return 0; } -int sock_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) +int sk_getsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, sockptr_t optlen) { - struct sock *sk = sock->sk; + struct socket *sock = sk->sk_socket; union { int val; @@ -1557,7 +1603,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, int lv = sizeof(int); int len; - if (get_user(len, optlen)) + if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; if (len < 0) return -EINVAL; @@ -1692,7 +1738,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); spin_unlock(&sk->sk_peer_lock); - if (copy_to_user(optval, &peercred, len)) + if (copy_to_sockptr(optval, &peercred, len)) return -EFAULT; goto lenout; } @@ -1710,11 +1756,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname, if (len < n * sizeof(gid_t)) { len = n * sizeof(gid_t); put_cred(cred); - return put_user(len, optlen) ? -EFAULT : -ERANGE; + return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE; } len = n * sizeof(gid_t); - ret = groups_to_user((gid_t __user *)optval, cred->group_info); + ret = groups_to_user(optval, cred->group_info); put_cred(cred); if (ret) return ret; @@ -1730,7 +1776,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, return -ENOTCONN; if (lv < len) return -EINVAL; - if (copy_to_user(optval, address, len)) + if (copy_to_sockptr(optval, address, len)) return -EFAULT; goto lenout; } @@ -1747,7 +1793,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_PEERSEC: - return security_socket_getpeersec_stream(sock, optval, optlen, len); + return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len); case SO_MARK: v.val = sk->sk_mark; @@ -1779,7 +1825,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, return sock_getbindtodevice(sk, optval, optlen, len); case SO_GET_FILTER: - len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); + len = sk_get_filter(sk, optval, len); if (len < 0) return len; @@ -1827,7 +1873,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, sk_get_meminfo(sk, meminfo); len = min_t(unsigned int, len, sizeof(meminfo)); - if (copy_to_user(optval, &meminfo, len)) + if (copy_to_sockptr(optval, &meminfo, len)) return -EFAULT; goto lenout; @@ -1896,14 +1942,22 @@ int sock_getsockopt(struct socket *sock, int level, int optname, if (len > lv) len = lv; - if (copy_to_user(optval, &v, len)) + if (copy_to_sockptr(optval, &v, len)) return -EFAULT; lenout: - if (put_user(len, optlen)) + if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; return 0; } +int sock_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + return sk_getsockopt(sock->sk, level, optname, + USER_SOCKPTR(optval), + USER_SOCKPTR(optlen)); +} + /* * Initialize an sk_lock. * diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 9a9fb9487d63..a660baedd9e7 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -41,7 +41,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); - stab = kzalloc(sizeof(*stab), GFP_USER | __GFP_ACCOUNT); + stab = bpf_map_area_alloc(sizeof(*stab), NUMA_NO_NODE); if (!stab) return ERR_PTR(-ENOMEM); @@ -52,7 +52,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) sizeof(struct sock *), stab->map.numa_node); if (!stab->sks) { - kfree(stab); + bpf_map_area_free(stab); return ERR_PTR(-ENOMEM); } @@ -361,7 +361,7 @@ static void sock_map_free(struct bpf_map *map) synchronize_rcu(); bpf_map_area_free(stab->sks); - kfree(stab); + bpf_map_area_free(stab); } static void sock_map_release_progs(struct bpf_map *map) @@ -1085,7 +1085,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) if (attr->key_size > MAX_BPF_STACK) return ERR_PTR(-E2BIG); - htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT); + htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE); if (!htab) return ERR_PTR(-ENOMEM); @@ -1115,7 +1115,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) return &htab->map; free_htab: - kfree(htab); + bpf_map_area_free(htab); return ERR_PTR(err); } @@ -1168,7 +1168,7 @@ static void sock_hash_free(struct bpf_map *map) synchronize_rcu(); bpf_map_area_free(htab->buckets); - kfree(htab); + bpf_map_area_free(htab); } static void *sock_hash_lookup_sys(struct bpf_map *map, void *key) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 725891527814..5b1ce656baa1 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -29,7 +29,6 @@ static int int_3600 = 3600; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; -static long long_max __maybe_unused = LONG_MAX; static int net_msg_warn; /* Unused, but still a sysctl */ diff --git a/net/core/xdp.c b/net/core/xdp.c index 24420209bf0e..844c9d99dc0e 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -375,19 +375,17 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, struct xdp_buff *xdp) { - struct xdp_mem_allocator *xa; struct page *page; switch (mem->type) { case MEM_TYPE_PAGE_POOL: - rcu_read_lock(); - /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ - xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); page = virt_to_head_page(data); if (napi_direct && xdp_return_frame_no_direct()) napi_direct = false; - page_pool_put_full_page(xa->page_pool, page, napi_direct); - rcu_read_unlock(); + /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) + * as mem->type knows this a page_pool page + */ + page_pool_put_full_page(page->pp, page, napi_direct); break; case MEM_TYPE_PAGE_SHARED: page_frag_free(data); |