summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c46
-rw-r--r--net/9p/trans_virtio.c8
-rw-r--r--net/bluetooth/hci_conn.c63
-rw-r--r--net/bluetooth/hci_core.c42
-rw-r--r--net/bluetooth/hci_event.c82
-rw-r--r--net/bluetooth/hci_sync.c147
-rw-r--r--net/bluetooth/hci_sysfs.c14
-rw-r--r--net/bluetooth/iso.c57
-rw-r--r--net/bluetooth/l2cap_core.c5
-rw-r--r--net/bluetooth/l2cap_sock.c2
-rw-r--r--net/bluetooth/mgmt.c28
-rw-r--r--net/bluetooth/sco.c23
-rw-r--r--net/bridge/br_if.c5
-rw-r--r--net/can/bcm.c12
-rw-r--r--net/can/raw.c60
-rw-r--r--net/ceph/messenger.c1
-rw-r--r--net/ceph/messenger_v2.c41
-rw-r--r--net/ceph/osd_client.c20
-rw-r--r--net/core/bpf_sk_storage.c5
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/net-traces.c2
-rw-r--r--net/core/rtnetlink.c8
-rw-r--r--net/core/skbuff.c5
-rw-r--r--net/core/skmsg.c10
-rw-r--r--net/core/sock.c71
-rw-r--r--net/core/sock_map.c12
-rw-r--r--net/core/xdp.c2
-rw-r--r--net/dcb/dcbnl.c2
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/dccp/proto.c10
-rw-r--r--net/dsa/port.c9
-rw-r--r--net/dsa/tag_sja1105.c90
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c4
-rw-r--r--net/ipv4/inet_hashtables.c17
-rw-r--r--net/ipv4/inet_timewait_sock.c8
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv4/ip_output.c17
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/nexthop.c28
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/tcp.c57
-rw-r--r--net/ipv4/tcp_fastopen.c6
-rw-r--r--net/ipv4/tcp_input.c12
-rw-r--r--net/ipv4/tcp_ipv4.c14
-rw-r--r--net/ipv4/tcp_metrics.c70
-rw-r--r--net/ipv4/tcp_minisocks.c11
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv4/udp.c8
-rw-r--r--net/ipv4/udp_offload.c23
-rw-r--r--net/ipv6/addrconf.c17
-rw-r--r--net/ipv6/icmp.c5
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ndisc.c3
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/raw.c6
-rw-r--r--net/ipv6/route.c7
-rw-r--r--net/ipv6/tcp_ipv6.c13
-rw-r--r--net/ipv6/udp.c16
-rw-r--r--net/ipv6/udp_offload.c10
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/llc/llc_conn.c49
-rw-r--r--net/llc/llc_if.c2
-rw-r--r--net/llc/llc_input.c3
-rw-r--r--net/llc/llc_sap.c18
-rw-r--r--net/mac80211/led.c2
-rw-r--r--net/mac80211/led.h8
-rw-r--r--net/mptcp/protocol.c14
-rw-r--r--net/mptcp/protocol.h1
-rw-r--r--net/mptcp/sockopt.c2
-rw-r--r--net/mptcp/subflow.c58
-rw-r--r--net/netfilter/nf_conntrack_core.c20
-rw-r--r--net/netfilter/nf_conntrack_helper.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c10
-rw-r--r--net/netfilter/nf_tables_api.c498
-rw-r--r--net/netfilter/nft_byteorder.c14
-rw-r--r--net/netfilter/nft_flow_offload.c6
-rw-r--r--net/netfilter/nft_immediate.c35
-rw-r--r--net/netfilter/nft_objref.c8
-rw-r--r--net/netfilter/nft_set_hash.c85
-rw-r--r--net/netfilter/nft_set_pipapo.c72
-rw-r--r--net/netfilter/nft_set_rbtree.c164
-rw-r--r--net/netfilter/nft_socket.c2
-rw-r--r--net/netfilter/xt_LED.c3
-rw-r--r--net/netfilter/xt_socket.c4
-rw-r--r--net/openvswitch/datapath.c8
-rw-r--r--net/packet/af_packet.c30
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_ipt.c70
-rw-r--r--net/sched/act_pedit.c1
-rw-r--r--net/sched/cls_bpf.c99
-rw-r--r--net/sched/cls_flower.c114
-rw-r--r--net/sched/cls_fw.c11
-rw-r--r--net/sched/cls_matchall.c35
-rw-r--r--net/sched/cls_route.c1
-rw-r--r--net/sched/cls_u32.c105
-rw-r--r--net/sched/em_meta.c4
-rw-r--r--net/sched/sch_mqprio.c14
-rw-r--r--net/sched/sch_qfq.c18
-rw-r--r--net/sched/sch_taprio.c15
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/smc/af_smc.c79
-rw-r--r--net/smc/smc.h2
-rw-r--r--net/smc/smc_clc.c4
-rw-r--r--net/smc/smc_core.c25
-rw-r--r--net/smc/smc_sysctl.c10
-rw-r--r--net/sunrpc/Makefile2
-rw-r--r--net/sunrpc/auth.c2
-rw-r--r--net/sunrpc/auth_tls.c175
-rw-r--r--net/sunrpc/clnt.c22
-rw-r--r--net/sunrpc/rpcb_clnt.c39
-rw-r--r--net/sunrpc/sysfs.c1
-rw-r--r--net/sunrpc/sysfs.h7
-rw-r--r--net/sunrpc/xprtsock.c434
-rw-r--r--net/tipc/crypto.c3
-rw-r--r--net/tipc/node.c2
-rw-r--r--net/tls/tls_device.c64
-rw-r--r--net/tls/tls_main.c3
-rw-r--r--net/unix/af_unix.c25
-rw-r--r--net/wireless/nl80211.c5
-rw-r--r--net/wireless/scan.c2
-rw-r--r--net/wireless/util.c2
-rw-r--r--net/xdp/xsk.c8
-rw-r--r--net/xfrm/xfrm_policy.c2
131 files changed, 2621 insertions, 1197 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index a3340268ec8d..86bbc7147fc1 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -904,7 +904,7 @@ EXPORT_SYMBOL(do_trace_9p_fid_put);
static int p9_client_version(struct p9_client *c)
{
- int err = 0;
+ int err;
struct p9_req_t *req;
char *version = NULL;
int msize;
@@ -975,7 +975,6 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
struct p9_client *clnt;
char *client_id;
- err = 0;
clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
if (!clnt)
return ERR_PTR(-ENOMEM);
@@ -1094,7 +1093,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
const char *uname, kuid_t n_uname,
const char *aname)
{
- int err = 0;
+ int err;
struct p9_req_t *req;
struct p9_fid *fid;
struct p9_qid qid;
@@ -1147,7 +1146,6 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
struct p9_req_t *req;
u16 nwqids, count;
- err = 0;
wqids = NULL;
clnt = oldfid->clnt;
if (clone) {
@@ -1224,7 +1222,6 @@ int p9_client_open(struct p9_fid *fid, int mode)
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n",
p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);
- err = 0;
if (fid->mode != -1)
return -EINVAL;
@@ -1262,7 +1259,7 @@ EXPORT_SYMBOL(p9_client_open);
int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags,
u32 mode, kgid_t gid, struct p9_qid *qid)
{
- int err = 0;
+ int err;
struct p9_client *clnt;
struct p9_req_t *req;
int iounit;
@@ -1314,7 +1311,6 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n",
fid->fid, name, perm, mode);
- err = 0;
clnt = fid->clnt;
if (fid->mode != -1)
@@ -1350,7 +1346,7 @@ EXPORT_SYMBOL(p9_client_fcreate);
int p9_client_symlink(struct p9_fid *dfid, const char *name,
const char *symtgt, kgid_t gid, struct p9_qid *qid)
{
- int err = 0;
+ int err;
struct p9_client *clnt;
struct p9_req_t *req;
@@ -1402,13 +1398,12 @@ EXPORT_SYMBOL(p9_client_link);
int p9_client_fsync(struct p9_fid *fid, int datasync)
{
- int err;
+ int err = 0;
struct p9_client *clnt;
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
fid->fid, datasync);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TFSYNC, "dd", fid->fid, datasync);
@@ -1428,7 +1423,7 @@ EXPORT_SYMBOL(p9_client_fsync);
int p9_client_clunk(struct p9_fid *fid)
{
- int err;
+ int err = 0;
struct p9_client *clnt;
struct p9_req_t *req;
int retries = 0;
@@ -1436,7 +1431,6 @@ int p9_client_clunk(struct p9_fid *fid)
again:
p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n",
fid->fid, retries);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid);
@@ -1465,12 +1459,11 @@ EXPORT_SYMBOL(p9_client_clunk);
int p9_client_remove(struct p9_fid *fid)
{
- int err;
+ int err = 0;
struct p9_client *clnt;
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TREMOVE, "d", fid->fid);
@@ -1680,7 +1673,6 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
if (!ret)
return ERR_PTR(-ENOMEM);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TSTAT, "d", fid->fid);
@@ -1733,7 +1725,6 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
if (!ret)
return ERR_PTR(-ENOMEM);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TGETATTR, "dq", fid->fid, request_mask);
@@ -1812,11 +1803,10 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = fid->clnt;
wst->size = p9_client_statsize(wst, clnt->proto_version);
p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n",
@@ -1851,11 +1841,10 @@ EXPORT_SYMBOL(p9_client_wstat);
int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid);
p9_debug(P9_DEBUG_9P, " valid=%x mode=%x uid=%d gid=%d size=%lld\n",
@@ -1887,7 +1876,6 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid);
@@ -1921,11 +1909,10 @@ EXPORT_SYMBOL(p9_client_statfs);
int p9_client_rename(struct p9_fid *fid,
struct p9_fid *newdirfid, const char *name)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
@@ -1949,11 +1936,10 @@ EXPORT_SYMBOL(p9_client_rename);
int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
struct p9_fid *newdirfid, const char *new_name)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = olddirfid->clnt;
p9_debug(P9_DEBUG_9P,
@@ -1986,7 +1972,6 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
struct p9_client *clnt;
struct p9_fid *attr_fid;
- err = 0;
clnt = file_fid->clnt;
attr_fid = p9_fid_create(clnt);
if (!attr_fid) {
@@ -2027,14 +2012,13 @@ EXPORT_SYMBOL_GPL(p9_client_xattrwalk);
int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
u64 attr_size, int flags)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
p9_debug(P9_DEBUG_9P,
">>> TXATTRCREATE fid %d name %s size %llu flag %d\n",
fid->fid, name, attr_size, flags);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd",
fid->fid, name, attr_size, flags);
@@ -2063,7 +2047,6 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
fid->fid, offset, count);
- err = 0;
clnt = fid->clnt;
rsize = fid->iounit;
@@ -2122,7 +2105,6 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P,
">>> TMKNOD fid %d name %s mode %d major %d minor %d\n",
@@ -2153,7 +2135,6 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
fid->fid, name, mode, from_kgid(&init_user_ns, gid));
@@ -2182,7 +2163,6 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P,
">>> TLOCK fid %d type %i flags %d start %lld length %lld proc_id %d client_id %s\n",
@@ -2214,7 +2194,6 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P,
">>> TGETLOCK fid %d, type %i start %lld length %lld proc_id %d client_id %s\n",
@@ -2251,7 +2230,6 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 3c27ffb781e3..e305071eb7b8 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -384,7 +384,7 @@ static void handle_rerror(struct p9_req_t *req, int in_hdr_len,
void *to = req->rc.sdata + in_hdr_len;
// Fits entirely into the static data? Nothing to do.
- if (req->rc.size < in_hdr_len)
+ if (req->rc.size < in_hdr_len || !pages)
return;
// Really long error message? Tough, truncate the reply. Might get
@@ -428,7 +428,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
struct page **in_pages = NULL, **out_pages = NULL;
struct virtio_chan *chan = client->trans;
struct scatterlist *sgs[4];
- size_t offs;
+ size_t offs = 0;
int need_drop = 0;
int kicked = 0;
@@ -501,8 +501,8 @@ req_retry_pinned:
if (in_pages) {
sgs[out_sgs + in_sgs++] = chan->sg + out + in;
- in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
- in_pages, in_nr_pages, offs, inlen);
+ pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
+ in_pages, in_nr_pages, offs, inlen);
}
BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 1ef952bda97d..76222565e2df 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -118,7 +118,7 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
*/
params->explicit_connect = false;
- list_del_init(&params->action);
+ hci_pend_le_list_del_init(params);
switch (params->auto_connect) {
case HCI_AUTO_CONN_EXPLICIT:
@@ -127,10 +127,10 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
return;
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
break;
case HCI_AUTO_CONN_REPORT:
- list_add(&params->action, &hdev->pend_le_reports);
+ hci_pend_le_list_add(params, &hdev->pend_le_reports);
break;
default:
break;
@@ -775,6 +775,11 @@ static void le_conn_timeout(struct work_struct *work)
hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
}
+struct iso_cig_params {
+ struct hci_cp_le_set_cig_params cp;
+ struct hci_cis_params cis[0x1f];
+};
+
struct iso_list_data {
union {
u8 cig;
@@ -786,10 +791,7 @@ struct iso_list_data {
u16 sync_handle;
};
int count;
- struct {
- struct hci_cp_le_set_cig_params cp;
- struct hci_cis_params cis[0x11];
- } pdu;
+ struct iso_cig_params pdu;
};
static void bis_list(struct hci_conn *conn, void *data)
@@ -1424,8 +1426,8 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev,
if (params->auto_connect == HCI_AUTO_CONN_DISABLED ||
params->auto_connect == HCI_AUTO_CONN_REPORT ||
params->auto_connect == HCI_AUTO_CONN_EXPLICIT) {
- list_del_init(&params->action);
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_del_init(params);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
}
params->explicit_connect = true;
@@ -1682,7 +1684,7 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
if (!link) {
hci_conn_drop(acl);
hci_conn_drop(sco);
- return NULL;
+ return ERR_PTR(-ENOLINK);
}
sco->setting = setting;
@@ -1764,10 +1766,33 @@ static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos)
return hci_send_cmd(hdev, HCI_OP_LE_CREATE_BIG, sizeof(cp), &cp);
}
+static void set_cig_params_complete(struct hci_dev *hdev, void *data, int err)
+{
+ struct iso_cig_params *pdu = data;
+
+ bt_dev_dbg(hdev, "");
+
+ if (err)
+ bt_dev_err(hdev, "Unable to set CIG parameters: %d", err);
+
+ kfree(pdu);
+}
+
+static int set_cig_params_sync(struct hci_dev *hdev, void *data)
+{
+ struct iso_cig_params *pdu = data;
+ u32 plen;
+
+ plen = sizeof(pdu->cp) + pdu->cp.num_cis * sizeof(pdu->cis[0]);
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_CIG_PARAMS, plen, pdu,
+ HCI_CMD_TIMEOUT);
+}
+
static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
{
struct hci_dev *hdev = conn->hdev;
struct iso_list_data data;
+ struct iso_cig_params *pdu;
memset(&data, 0, sizeof(data));
@@ -1837,12 +1862,16 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
if (qos->ucast.cis == BT_ISO_QOS_CIS_UNSET || !data.pdu.cp.num_cis)
return false;
- if (hci_send_cmd(hdev, HCI_OP_LE_SET_CIG_PARAMS,
- sizeof(data.pdu.cp) +
- (data.pdu.cp.num_cis * sizeof(*data.pdu.cis)),
- &data.pdu) < 0)
+ pdu = kmemdup(&data.pdu, sizeof(*pdu), GFP_KERNEL);
+ if (!pdu)
return false;
+ if (hci_cmd_sync_queue(hdev, set_cig_params_sync, pdu,
+ set_cig_params_complete) < 0) {
+ kfree(pdu);
+ return false;
+ }
+
return true;
}
@@ -2044,10 +2073,10 @@ static int create_big_sync(struct hci_dev *hdev, void *data)
flags |= MGMT_ADV_FLAG_SEC_2M;
/* Align intervals */
- interval = qos->bcast.out.interval / 1250;
+ interval = (qos->bcast.out.interval / 1250) * qos->bcast.sync_factor;
if (qos->bcast.bis)
- sync_interval = qos->bcast.sync_interval * 1600;
+ sync_interval = interval * 4;
err = hci_start_per_adv_sync(hdev, qos->bcast.bis, conn->le_per_adv_data_len,
conn->le_per_adv_data, flags, interval,
@@ -2225,7 +2254,7 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
if (!link) {
hci_conn_drop(le);
hci_conn_drop(cis);
- return NULL;
+ return ERR_PTR(-ENOLINK);
}
/* If LE is already connected and CIS handle is already set proceed to
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 48917c68358d..1ec83985f1ab 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1972,6 +1972,7 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev,
struct adv_monitor *monitor)
{
int status = 0;
+ int handle;
switch (hci_get_adv_monitor_offload_ext(hdev)) {
case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */
@@ -1980,9 +1981,10 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev,
goto free_monitor;
case HCI_ADV_MONITOR_EXT_MSFT:
+ handle = monitor->handle;
status = msft_remove_monitor(hdev, monitor);
bt_dev_dbg(hdev, "%s remove monitor %d msft status %d",
- hdev->name, monitor->handle, status);
+ hdev->name, handle, status);
break;
}
@@ -2249,22 +2251,46 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
return NULL;
}
-/* This function requires the caller holds hdev->lock */
+/* This function requires the caller holds hdev->lock or rcu_read_lock */
struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
bdaddr_t *addr, u8 addr_type)
{
struct hci_conn_params *param;
- list_for_each_entry(param, list, action) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(param, list, action) {
if (bacmp(&param->addr, addr) == 0 &&
- param->addr_type == addr_type)
+ param->addr_type == addr_type) {
+ rcu_read_unlock();
return param;
+ }
}
+ rcu_read_unlock();
+
return NULL;
}
/* This function requires the caller holds hdev->lock */
+void hci_pend_le_list_del_init(struct hci_conn_params *param)
+{
+ if (list_empty(&param->action))
+ return;
+
+ list_del_rcu(&param->action);
+ synchronize_rcu();
+ INIT_LIST_HEAD(&param->action);
+}
+
+/* This function requires the caller holds hdev->lock */
+void hci_pend_le_list_add(struct hci_conn_params *param,
+ struct list_head *list)
+{
+ list_add_rcu(&param->action, list);
+}
+
+/* This function requires the caller holds hdev->lock */
struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
bdaddr_t *addr, u8 addr_type)
{
@@ -2297,14 +2323,15 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
return params;
}
-static void hci_conn_params_free(struct hci_conn_params *params)
+void hci_conn_params_free(struct hci_conn_params *params)
{
+ hci_pend_le_list_del_init(params);
+
if (params->conn) {
hci_conn_drop(params->conn);
hci_conn_put(params->conn);
}
- list_del(&params->action);
list_del(&params->list);
kfree(params);
}
@@ -2342,8 +2369,7 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev)
continue;
}
- list_del(&params->list);
- kfree(params);
+ hci_conn_params_free(params);
}
BT_DBG("All LE disabled connection parameters were removed");
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 09ba6d8987ee..31ca320ce38d 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1564,7 +1564,7 @@ static u8 hci_cc_le_set_privacy_mode(struct hci_dev *hdev, void *data,
params = hci_conn_params_lookup(hdev, &cp->bdaddr, cp->bdaddr_type);
if (params)
- params->privacy_mode = cp->mode;
+ WRITE_ONCE(params->privacy_mode, cp->mode);
hci_dev_unlock(hdev);
@@ -2784,6 +2784,9 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
hci_enable_advertising(hdev);
}
+ /* Inform sockets conn is gone before we delete it */
+ hci_disconn_cfm(conn, HCI_ERROR_UNSPECIFIED);
+
goto done;
}
@@ -2804,8 +2807,8 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- list_del_init(&params->action);
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_del_init(params);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
break;
default:
@@ -3423,8 +3426,8 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data,
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- list_del_init(&params->action);
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_del_init(params);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
hci_update_passive_scan(hdev);
break;
@@ -3812,7 +3815,8 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_CIG_PARAMS);
- if (!cp || rp->num_handles != cp->num_cis || rp->cig_id != cp->cig_id) {
+ if (!rp->status && (!cp || rp->num_handles != cp->num_cis ||
+ rp->cig_id != cp->cig_id)) {
bt_dev_err(hdev, "unexpected Set CIG Parameters response data");
status = HCI_ERROR_UNSPECIFIED;
}
@@ -5961,7 +5965,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst,
conn->dst_type);
if (params) {
- list_del_init(&params->action);
+ hci_pend_le_list_del_init(params);
if (params->conn) {
hci_conn_drop(params->conn);
hci_conn_put(params->conn);
@@ -6316,23 +6320,18 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
return;
}
- /* When receiving non-connectable or scannable undirected
- * advertising reports, this means that the remote device is
- * not connectable and then clearly indicate this in the
- * device found event.
- *
- * When receiving a scan response, then there is no way to
+ /* When receiving a scan response, then there is no way to
* know if the remote device is connectable or not. However
* since scan responses are merged with a previously seen
* advertising report, the flags field from that report
* will be used.
*
- * In the really unlikely case that a controller get confused
- * and just sends a scan response event, then it is marked as
- * not connectable as well.
+ * In the unlikely case that a controller just sends a scan
+ * response event that doesn't match the pending report, then
+ * it is marked as a standalone SCAN_RSP.
*/
if (type == LE_ADV_SCAN_RSP)
- flags = MGMT_DEV_FOUND_NOT_CONNECTABLE;
+ flags = MGMT_DEV_FOUND_SCAN_RSP;
/* If there's nothing pending either store the data from this
* event or send an immediate device found event if the data
@@ -6790,6 +6789,7 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
{
struct hci_evt_le_cis_established *ev = data;
struct hci_conn *conn;
+ struct bt_iso_qos *qos;
u16 handle = __le16_to_cpu(ev->handle);
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6811,21 +6811,39 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
- if (conn->role == HCI_ROLE_SLAVE) {
- __le32 interval;
-
- memset(&interval, 0, sizeof(interval));
-
- memcpy(&interval, ev->c_latency, sizeof(ev->c_latency));
- conn->iso_qos.ucast.in.interval = le32_to_cpu(interval);
- memcpy(&interval, ev->p_latency, sizeof(ev->p_latency));
- conn->iso_qos.ucast.out.interval = le32_to_cpu(interval);
- conn->iso_qos.ucast.in.latency = le16_to_cpu(ev->interval);
- conn->iso_qos.ucast.out.latency = le16_to_cpu(ev->interval);
- conn->iso_qos.ucast.in.sdu = le16_to_cpu(ev->c_mtu);
- conn->iso_qos.ucast.out.sdu = le16_to_cpu(ev->p_mtu);
- conn->iso_qos.ucast.in.phy = ev->c_phy;
- conn->iso_qos.ucast.out.phy = ev->p_phy;
+ qos = &conn->iso_qos;
+
+ /* Convert ISO Interval (1.25 ms slots) to SDU Interval (us) */
+ qos->ucast.in.interval = le16_to_cpu(ev->interval) * 1250;
+ qos->ucast.out.interval = qos->ucast.in.interval;
+
+ switch (conn->role) {
+ case HCI_ROLE_SLAVE:
+ /* Convert Transport Latency (us) to Latency (msec) */
+ qos->ucast.in.latency =
+ DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency),
+ 1000);
+ qos->ucast.out.latency =
+ DIV_ROUND_CLOSEST(get_unaligned_le24(ev->p_latency),
+ 1000);
+ qos->ucast.in.sdu = le16_to_cpu(ev->c_mtu);
+ qos->ucast.out.sdu = le16_to_cpu(ev->p_mtu);
+ qos->ucast.in.phy = ev->c_phy;
+ qos->ucast.out.phy = ev->p_phy;
+ break;
+ case HCI_ROLE_MASTER:
+ /* Convert Transport Latency (us) to Latency (msec) */
+ qos->ucast.out.latency =
+ DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency),
+ 1000);
+ qos->ucast.in.latency =
+ DIV_ROUND_CLOSEST(get_unaligned_le24(ev->p_latency),
+ 1000);
+ qos->ucast.out.sdu = le16_to_cpu(ev->c_mtu);
+ qos->ucast.in.sdu = le16_to_cpu(ev->p_mtu);
+ qos->ucast.out.phy = ev->c_phy;
+ qos->ucast.in.phy = ev->p_phy;
+ break;
}
if (!ev->status) {
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 804cde43b4e0..4d1e32bb6a9c 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -2160,15 +2160,23 @@ static int hci_le_del_accept_list_sync(struct hci_dev *hdev,
return 0;
}
+struct conn_params {
+ bdaddr_t addr;
+ u8 addr_type;
+ hci_conn_flags_t flags;
+ u8 privacy_mode;
+};
+
/* Adds connection to resolve list if needed.
* Setting params to NULL programs local hdev->irk
*/
static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
- struct hci_conn_params *params)
+ struct conn_params *params)
{
struct hci_cp_le_add_to_resolv_list cp;
struct smp_irk *irk;
struct bdaddr_list_with_irk *entry;
+ struct hci_conn_params *p;
if (!use_ll_privacy(hdev))
return 0;
@@ -2203,6 +2211,16 @@ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
/* Default privacy mode is always Network */
params->privacy_mode = HCI_NETWORK_PRIVACY;
+ rcu_read_lock();
+ p = hci_pend_le_action_lookup(&hdev->pend_le_conns,
+ &params->addr, params->addr_type);
+ if (!p)
+ p = hci_pend_le_action_lookup(&hdev->pend_le_reports,
+ &params->addr, params->addr_type);
+ if (p)
+ WRITE_ONCE(p->privacy_mode, HCI_NETWORK_PRIVACY);
+ rcu_read_unlock();
+
done:
if (hci_dev_test_flag(hdev, HCI_PRIVACY))
memcpy(cp.local_irk, hdev->irk, 16);
@@ -2215,7 +2233,7 @@ done:
/* Set Device Privacy Mode. */
static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
- struct hci_conn_params *params)
+ struct conn_params *params)
{
struct hci_cp_le_set_privacy_mode cp;
struct smp_irk *irk;
@@ -2240,6 +2258,8 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
bacpy(&cp.bdaddr, &irk->bdaddr);
cp.mode = HCI_DEVICE_PRIVACY;
+ /* Note: params->privacy_mode is not updated since it is a copy */
+
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PRIVACY_MODE,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
@@ -2249,7 +2269,7 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
* properly set the privacy mode.
*/
static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
- struct hci_conn_params *params,
+ struct conn_params *params,
u8 *num_entries)
{
struct hci_cp_le_add_to_accept_list cp;
@@ -2447,6 +2467,52 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev,
return __hci_cmd_sync_sk(hdev, opcode, 0, NULL, 0, HCI_CMD_TIMEOUT, sk);
}
+static struct conn_params *conn_params_copy(struct list_head *list, size_t *n)
+{
+ struct hci_conn_params *params;
+ struct conn_params *p;
+ size_t i;
+
+ rcu_read_lock();
+
+ i = 0;
+ list_for_each_entry_rcu(params, list, action)
+ ++i;
+ *n = i;
+
+ rcu_read_unlock();
+
+ p = kvcalloc(*n, sizeof(struct conn_params), GFP_KERNEL);
+ if (!p)
+ return NULL;
+
+ rcu_read_lock();
+
+ i = 0;
+ list_for_each_entry_rcu(params, list, action) {
+ /* Racing adds are handled in next scan update */
+ if (i >= *n)
+ break;
+
+ /* No hdev->lock, but: addr, addr_type are immutable.
+ * privacy_mode is only written by us or in
+ * hci_cc_le_set_privacy_mode that we wait for.
+ * We should be idempotent so MGMT updating flags
+ * while we are processing is OK.
+ */
+ bacpy(&p[i].addr, &params->addr);
+ p[i].addr_type = params->addr_type;
+ p[i].flags = READ_ONCE(params->flags);
+ p[i].privacy_mode = READ_ONCE(params->privacy_mode);
+ ++i;
+ }
+
+ rcu_read_unlock();
+
+ *n = i;
+ return p;
+}
+
/* Device must not be scanning when updating the accept list.
*
* Update is done using the following sequence:
@@ -2466,11 +2532,12 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev,
*/
static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
{
- struct hci_conn_params *params;
+ struct conn_params *params;
struct bdaddr_list *b, *t;
u8 num_entries = 0;
bool pend_conn, pend_report;
u8 filter_policy;
+ size_t i, n;
int err;
/* Pause advertising if resolving list can be used as controllers
@@ -2504,6 +2571,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
if (hci_conn_hash_lookup_le(hdev, &b->bdaddr, b->bdaddr_type))
continue;
+ /* Pointers not dereferenced, no locks needed */
pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns,
&b->bdaddr,
b->bdaddr_type);
@@ -2532,23 +2600,50 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
* available accept list entries in the controller, then
* just abort and return filer policy value to not use the
* accept list.
+ *
+ * The list and params may be mutated while we wait for events,
+ * so make a copy and iterate it.
*/
- list_for_each_entry(params, &hdev->pend_le_conns, action) {
- err = hci_le_add_accept_list_sync(hdev, params, &num_entries);
- if (err)
+
+ params = conn_params_copy(&hdev->pend_le_conns, &n);
+ if (!params) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ for (i = 0; i < n; ++i) {
+ err = hci_le_add_accept_list_sync(hdev, &params[i],
+ &num_entries);
+ if (err) {
+ kvfree(params);
goto done;
+ }
}
+ kvfree(params);
+
/* After adding all new pending connections, walk through
* the list of pending reports and also add these to the
* accept list if there is still space. Abort if space runs out.
*/
- list_for_each_entry(params, &hdev->pend_le_reports, action) {
- err = hci_le_add_accept_list_sync(hdev, params, &num_entries);
- if (err)
+
+ params = conn_params_copy(&hdev->pend_le_reports, &n);
+ if (!params) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ for (i = 0; i < n; ++i) {
+ err = hci_le_add_accept_list_sync(hdev, &params[i],
+ &num_entries);
+ if (err) {
+ kvfree(params);
goto done;
+ }
}
+ kvfree(params);
+
/* Use the allowlist unless the following conditions are all true:
* - We are not currently suspending
* - There are 1 or more ADV monitors registered and it's not offloaded
@@ -4623,26 +4718,18 @@ static int hci_dev_setup_sync(struct hci_dev *hdev)
* BD_ADDR invalid before creating the HCI device or in
* its setup callback.
*/
- invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
-
+ invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) ||
+ test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
if (!ret) {
- if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) {
- if (!bacmp(&hdev->public_addr, BDADDR_ANY))
- hci_dev_get_bd_addr_from_property(hdev);
-
- if (bacmp(&hdev->public_addr, BDADDR_ANY) &&
- hdev->set_bdaddr) {
- ret = hdev->set_bdaddr(hdev,
- &hdev->public_addr);
-
- /* If setting of the BD_ADDR from the device
- * property succeeds, then treat the address
- * as valid even if the invalid BD_ADDR
- * quirk indicates otherwise.
- */
- if (!ret)
- invalid_bdaddr = false;
- }
+ if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks) &&
+ !bacmp(&hdev->public_addr, BDADDR_ANY))
+ hci_dev_get_bd_addr_from_property(hdev);
+
+ if (invalid_bdaddr && bacmp(&hdev->public_addr, BDADDR_ANY) &&
+ hdev->set_bdaddr) {
+ ret = hdev->set_bdaddr(hdev, &hdev->public_addr);
+ if (!ret)
+ invalid_bdaddr = false;
}
}
@@ -4845,12 +4932,12 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
struct hci_conn_params *p;
list_for_each_entry(p, &hdev->le_conn_params, list) {
+ hci_pend_le_list_del_init(p);
if (p->conn) {
hci_conn_drop(p->conn);
hci_conn_put(p->conn);
p->conn = NULL;
}
- list_del_init(&p->action);
}
BT_DBG("All LE pending actions cleared");
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 2934d7f4d564..15b33579007c 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -6,7 +6,9 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
-static struct class *bt_class;
+static const struct class bt_class = {
+ .name = "bluetooth",
+};
static void bt_link_release(struct device *dev)
{
@@ -36,7 +38,7 @@ void hci_conn_init_sysfs(struct hci_conn *conn)
BT_DBG("conn %p", conn);
conn->dev.type = &bt_link;
- conn->dev.class = bt_class;
+ conn->dev.class = &bt_class;
conn->dev.parent = &hdev->dev;
device_initialize(&conn->dev);
@@ -104,7 +106,7 @@ void hci_init_sysfs(struct hci_dev *hdev)
struct device *dev = &hdev->dev;
dev->type = &bt_host;
- dev->class = bt_class;
+ dev->class = &bt_class;
__module_get(THIS_MODULE);
device_initialize(dev);
@@ -112,12 +114,10 @@ void hci_init_sysfs(struct hci_dev *hdev)
int __init bt_sysfs_init(void)
{
- bt_class = class_create("bluetooth");
-
- return PTR_ERR_OR_ZERO(bt_class);
+ return class_register(&bt_class);
}
void bt_sysfs_cleanup(void)
{
- class_destroy(bt_class);
+ class_unregister(&bt_class);
}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 34d55a85d8f6..505d62247268 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -123,8 +123,11 @@ static struct iso_conn *iso_conn_add(struct hci_conn *hcon)
{
struct iso_conn *conn = hcon->iso_data;
- if (conn)
+ if (conn) {
+ if (!conn->hcon)
+ conn->hcon = hcon;
return conn;
+ }
conn = kzalloc(sizeof(*conn), GFP_KERNEL);
if (!conn)
@@ -300,14 +303,13 @@ static int iso_connect_bis(struct sock *sk)
goto unlock;
}
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
+ lock_sock(sk);
err = iso_chan_add(conn, sk, NULL);
- if (err)
- return err;
-
- lock_sock(sk);
+ if (err) {
+ release_sock(sk);
+ goto unlock;
+ }
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
@@ -321,7 +323,6 @@ static int iso_connect_bis(struct sock *sk)
}
release_sock(sk);
- return err;
unlock:
hci_dev_unlock(hdev);
@@ -389,14 +390,13 @@ static int iso_connect_cis(struct sock *sk)
goto unlock;
}
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
+ lock_sock(sk);
err = iso_chan_add(conn, sk, NULL);
- if (err)
- return err;
-
- lock_sock(sk);
+ if (err) {
+ release_sock(sk);
+ goto unlock;
+ }
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
@@ -413,7 +413,6 @@ static int iso_connect_cis(struct sock *sk)
}
release_sock(sk);
- return err;
unlock:
hci_dev_unlock(hdev);
@@ -704,7 +703,7 @@ static struct bt_iso_qos default_qos = {
.bcast = {
.big = BT_ISO_QOS_BIG_UNSET,
.bis = BT_ISO_QOS_BIS_UNSET,
- .sync_interval = 0x00,
+ .sync_factor = 0x01,
.packing = 0x00,
.framing = 0x00,
.in = DEFAULT_IO_QOS,
@@ -1072,8 +1071,8 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
struct sock *sk = sock->sk;
- struct iso_conn *conn = iso_pi(sk)->conn;
struct sk_buff *skb, **frag;
+ size_t mtu;
int err;
BT_DBG("sock %p, sk %p", sock, sk);
@@ -1085,11 +1084,18 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
- if (sk->sk_state != BT_CONNECTED)
+ lock_sock(sk);
+
+ if (sk->sk_state != BT_CONNECTED) {
+ release_sock(sk);
return -ENOTCONN;
+ }
+
+ mtu = iso_pi(sk)->conn->hcon->hdev->iso_mtu;
+
+ release_sock(sk);
- skb = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
- HCI_ISO_DATA_HDR_SIZE, 0);
+ skb = bt_skb_sendmsg(sk, msg, len, mtu, HCI_ISO_DATA_HDR_SIZE, 0);
if (IS_ERR(skb))
return PTR_ERR(skb);
@@ -1102,8 +1108,7 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
while (len) {
struct sk_buff *tmp;
- tmp = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
- 0, 0);
+ tmp = bt_skb_sendmsg(sk, msg, len, mtu, 0, 0);
if (IS_ERR(tmp)) {
kfree_skb(skb);
return PTR_ERR(tmp);
@@ -1158,15 +1163,19 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
BT_DBG("sk %p", sk);
if (test_and_clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ lock_sock(sk);
switch (sk->sk_state) {
case BT_CONNECT2:
- lock_sock(sk);
iso_conn_defer_accept(pi->conn->hcon);
sk->sk_state = BT_CONFIG;
release_sock(sk);
return 0;
case BT_CONNECT:
+ release_sock(sk);
return iso_connect_cis(sk);
+ default:
+ release_sock(sk);
+ break;
}
}
@@ -1213,7 +1222,7 @@ static bool check_ucast_qos(struct bt_iso_qos *qos)
static bool check_bcast_qos(struct bt_iso_qos *qos)
{
- if (qos->bcast.sync_interval > 0x07)
+ if (qos->bcast.sync_factor == 0x00)
return false;
if (qos->bcast.packing > 0x01)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index c5e8798e297c..17ca13e8c044 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -6374,9 +6374,14 @@ static inline int l2cap_le_command_rej(struct l2cap_conn *conn,
if (!chan)
goto done;
+ chan = l2cap_chan_hold_unless_zero(chan);
+ if (!chan)
+ goto done;
+
l2cap_chan_lock(chan);
l2cap_chan_del(chan, ECONNREFUSED);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
done:
mutex_unlock(&conn->chan_lock);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index eebe256104bc..947ca580bb9a 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -46,6 +46,7 @@ static const struct proto_ops l2cap_sock_ops;
static void l2cap_sock_init(struct sock *sk, struct sock *parent);
static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
int proto, gfp_t prio, int kern);
+static void l2cap_sock_cleanup_listen(struct sock *parent);
bool l2cap_is_socket(struct socket *sock)
{
@@ -1415,6 +1416,7 @@ static int l2cap_sock_release(struct socket *sock)
if (!sk)
return 0;
+ l2cap_sock_cleanup_listen(sk);
bt_sock_unlink(&l2cap_sk_list, sk);
err = l2cap_sock_shutdown(sock, SHUT_RDWR);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f7b2d0971f24..d4498037fadc 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1297,15 +1297,15 @@ static void restart_le_actions(struct hci_dev *hdev)
/* Needed for AUTO_OFF case where might not "really"
* have been powered off.
*/
- list_del_init(&p->action);
+ hci_pend_le_list_del_init(p);
switch (p->auto_connect) {
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- list_add(&p->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(p, &hdev->pend_le_conns);
break;
case HCI_AUTO_CONN_REPORT:
- list_add(&p->action, &hdev->pend_le_reports);
+ hci_pend_le_list_add(p, &hdev->pend_le_reports);
break;
default:
break;
@@ -5169,7 +5169,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
- params->flags = current_flags;
+ WRITE_ONCE(params->flags, current_flags);
status = MGMT_STATUS_SUCCESS;
/* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY
@@ -7285,7 +7285,7 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- memcpy(&rp.addr, &cp->addr.bdaddr, sizeof(rp.addr));
+ memcpy(&rp.addr, &cp->addr, sizeof(rp.addr));
status = mgmt_status(err);
if (status == MGMT_STATUS_SUCCESS) {
@@ -7580,7 +7580,7 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
if (params->auto_connect == auto_connect)
return 0;
- list_del_init(&params->action);
+ hci_pend_le_list_del_init(params);
switch (auto_connect) {
case HCI_AUTO_CONN_DISABLED:
@@ -7589,18 +7589,18 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
* connect to device, keep connecting.
*/
if (params->explicit_connect)
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
break;
case HCI_AUTO_CONN_REPORT:
if (params->explicit_connect)
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
else
- list_add(&params->action, &hdev->pend_le_reports);
+ hci_pend_le_list_add(params, &hdev->pend_le_reports);
break;
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
if (!is_connected(hdev, addr, addr_type))
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
break;
}
@@ -7823,9 +7823,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- list_del(&params->action);
- list_del(&params->list);
- kfree(params);
+ hci_conn_params_free(params);
device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type);
} else {
@@ -7856,9 +7854,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
p->auto_connect = HCI_AUTO_CONN_EXPLICIT;
continue;
}
- list_del(&p->action);
- list_del(&p->list);
- kfree(p);
+ hci_conn_params_free(p);
}
bt_dev_dbg(hdev, "All LE connection parameters were removed");
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index cd1a27ac555d..7762604ddfc0 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -126,8 +126,11 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
struct hci_dev *hdev = hcon->hdev;
struct sco_conn *conn = hcon->sco_data;
- if (conn)
+ if (conn) {
+ if (!conn->hcon)
+ conn->hcon = hcon;
return conn;
+ }
conn = kzalloc(sizeof(struct sco_conn), GFP_KERNEL);
if (!conn)
@@ -268,21 +271,21 @@ static int sco_connect(struct sock *sk)
goto unlock;
}
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
-
conn = sco_conn_add(hcon);
if (!conn) {
hci_conn_drop(hcon);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto unlock;
}
- err = sco_chan_add(conn, sk, NULL);
- if (err)
- return err;
-
lock_sock(sk);
+ err = sco_chan_add(conn, sk, NULL);
+ if (err) {
+ release_sock(sk);
+ goto unlock;
+ }
+
/* Update source addr of the socket */
bacpy(&sco_pi(sk)->src, &hcon->src);
@@ -296,8 +299,6 @@ static int sco_connect(struct sock *sk)
release_sock(sk);
- return err;
-
unlock:
hci_dev_unlock(hdev);
hci_dev_put(hdev);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 3f04b40f6056..2450690f98cf 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -166,8 +166,9 @@ void br_manage_promisc(struct net_bridge *br)
* This lets us disable promiscuous mode and write
* this config to hw.
*/
- if (br->auto_cnt == 0 ||
- (br->auto_cnt == 1 && br_auto_port(p)))
+ if ((p->dev->priv_flags & IFF_UNICAST_FLT) &&
+ (br->auto_cnt == 0 ||
+ (br->auto_cnt == 1 && br_auto_port(p))))
br_port_clear_promisc(p);
else
br_port_set_promisc(p);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 9ba35685b043..9168114fc87f 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1526,6 +1526,12 @@ static int bcm_release(struct socket *sock)
lock_sock(sk);
+#if IS_ENABLED(CONFIG_PROC_FS)
+ /* remove procfs entry */
+ if (net->can.bcmproc_dir && bo->bcm_proc_read)
+ remove_proc_entry(bo->procname, net->can.bcmproc_dir);
+#endif /* CONFIG_PROC_FS */
+
list_for_each_entry_safe(op, next, &bo->tx_ops, list)
bcm_remove_op(op);
@@ -1561,12 +1567,6 @@ static int bcm_release(struct socket *sock)
list_for_each_entry_safe(op, next, &bo->rx_ops, list)
bcm_remove_op(op);
-#if IS_ENABLED(CONFIG_PROC_FS)
- /* remove procfs entry */
- if (net->can.bcmproc_dir && bo->bcm_proc_read)
- remove_proc_entry(bo->procname, net->can.bcmproc_dir);
-#endif /* CONFIG_PROC_FS */
-
/* remove device reference */
if (bo->bound) {
bo->bound = 0;
diff --git a/net/can/raw.c b/net/can/raw.c
index 15c79b079184..e10f59375659 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -84,6 +84,7 @@ struct raw_sock {
struct sock sk;
int bound;
int ifindex;
+ struct net_device *dev;
struct list_head notifier;
int loopback;
int recv_own_msgs;
@@ -277,7 +278,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
if (!net_eq(dev_net(dev), sock_net(sk)))
return;
- if (ro->ifindex != dev->ifindex)
+ if (ro->dev != dev)
return;
switch (msg) {
@@ -292,6 +293,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
ro->ifindex = 0;
ro->bound = 0;
+ ro->dev = NULL;
ro->count = 0;
release_sock(sk);
@@ -337,6 +339,7 @@ static int raw_init(struct sock *sk)
ro->bound = 0;
ro->ifindex = 0;
+ ro->dev = NULL;
/* set default filter to single entry dfilter */
ro->dfilter.can_id = 0;
@@ -383,21 +386,15 @@ static int raw_release(struct socket *sock)
list_del(&ro->notifier);
spin_unlock(&raw_notifier_lock);
+ rtnl_lock();
lock_sock(sk);
/* remove current filters & unregister */
if (ro->bound) {
- if (ro->ifindex) {
- struct net_device *dev;
-
- dev = dev_get_by_index(sock_net(sk), ro->ifindex);
- if (dev) {
- raw_disable_allfilters(dev_net(dev), dev, sk);
- dev_put(dev);
- }
- } else {
+ if (ro->dev)
+ raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk);
+ else
raw_disable_allfilters(sock_net(sk), NULL, sk);
- }
}
if (ro->count > 1)
@@ -405,6 +402,7 @@ static int raw_release(struct socket *sock)
ro->ifindex = 0;
ro->bound = 0;
+ ro->dev = NULL;
ro->count = 0;
free_percpu(ro->uniq);
@@ -412,6 +410,8 @@ static int raw_release(struct socket *sock)
sock->sk = NULL;
release_sock(sk);
+ rtnl_unlock();
+
sock_put(sk);
return 0;
@@ -422,6 +422,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct sock *sk = sock->sk;
struct raw_sock *ro = raw_sk(sk);
+ struct net_device *dev = NULL;
int ifindex;
int err = 0;
int notify_enetdown = 0;
@@ -431,14 +432,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (addr->can_family != AF_CAN)
return -EINVAL;
+ rtnl_lock();
lock_sock(sk);
if (ro->bound && addr->can_ifindex == ro->ifindex)
goto out;
if (addr->can_ifindex) {
- struct net_device *dev;
-
dev = dev_get_by_index(sock_net(sk), addr->can_ifindex);
if (!dev) {
err = -ENODEV;
@@ -467,26 +467,20 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (!err) {
if (ro->bound) {
/* unregister old filters */
- if (ro->ifindex) {
- struct net_device *dev;
-
- dev = dev_get_by_index(sock_net(sk),
- ro->ifindex);
- if (dev) {
- raw_disable_allfilters(dev_net(dev),
- dev, sk);
- dev_put(dev);
- }
- } else {
+ if (ro->dev)
+ raw_disable_allfilters(dev_net(ro->dev),
+ ro->dev, sk);
+ else
raw_disable_allfilters(sock_net(sk), NULL, sk);
- }
}
ro->ifindex = ifindex;
ro->bound = 1;
+ ro->dev = dev;
}
out:
release_sock(sk);
+ rtnl_unlock();
if (notify_enetdown) {
sk->sk_err = ENETDOWN;
@@ -553,9 +547,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex) {
- dev = dev_get_by_index(sock_net(sk), ro->ifindex);
- if (!dev) {
+ dev = ro->dev;
+ if (ro->bound && dev) {
+ if (dev->reg_state != NETREG_REGISTERED) {
if (count > 1)
kfree(filter);
err = -ENODEV;
@@ -596,7 +590,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
ro->count = count;
out_fil:
- dev_put(dev);
release_sock(sk);
rtnl_unlock();
@@ -614,9 +607,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex) {
- dev = dev_get_by_index(sock_net(sk), ro->ifindex);
- if (!dev) {
+ dev = ro->dev;
+ if (ro->bound && dev) {
+ if (dev->reg_state != NETREG_REGISTERED) {
err = -ENODEV;
goto out_err;
}
@@ -640,7 +633,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
ro->err_mask = err_mask;
out_err:
- dev_put(dev);
release_sock(sk);
rtnl_unlock();
@@ -873,7 +865,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
skb->dev = dev;
skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time;
skb_setup_tx_timestamp(skb, sockc.tsflags);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index cd7b0bf5369e..5eb4898cccd4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1123,6 +1123,7 @@ bool ceph_addr_is_blank(const struct ceph_entity_addr *addr)
return true;
}
}
+EXPORT_SYMBOL(ceph_addr_is_blank);
int ceph_addr_port(const struct ceph_entity_addr *addr)
{
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index 1a888b86a494..1df1d29dee92 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -390,6 +390,8 @@ static int head_onwire_len(int ctrl_len, bool secure)
int head_len;
int rem_len;
+ BUG_ON(ctrl_len < 0 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN);
+
if (secure) {
head_len = CEPH_PREAMBLE_SECURE_LEN;
if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) {
@@ -408,6 +410,10 @@ static int head_onwire_len(int ctrl_len, bool secure)
static int __tail_onwire_len(int front_len, int middle_len, int data_len,
bool secure)
{
+ BUG_ON(front_len < 0 || front_len > CEPH_MSG_MAX_FRONT_LEN ||
+ middle_len < 0 || middle_len > CEPH_MSG_MAX_MIDDLE_LEN ||
+ data_len < 0 || data_len > CEPH_MSG_MAX_DATA_LEN);
+
if (!front_len && !middle_len && !data_len)
return 0;
@@ -520,29 +526,34 @@ static int decode_preamble(void *p, struct ceph_frame_desc *desc)
desc->fd_aligns[i] = ceph_decode_16(&p);
}
- /*
- * This would fire for FRAME_TAG_WAIT (it has one empty
- * segment), but we should never get it as client.
- */
- if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
- pr_err("last segment empty\n");
+ if (desc->fd_lens[0] < 0 ||
+ desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) {
+ pr_err("bad control segment length %d\n", desc->fd_lens[0]);
return -EINVAL;
}
-
- if (desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) {
- pr_err("control segment too big %d\n", desc->fd_lens[0]);
+ if (desc->fd_lens[1] < 0 ||
+ desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) {
+ pr_err("bad front segment length %d\n", desc->fd_lens[1]);
return -EINVAL;
}
- if (desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) {
- pr_err("front segment too big %d\n", desc->fd_lens[1]);
+ if (desc->fd_lens[2] < 0 ||
+ desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) {
+ pr_err("bad middle segment length %d\n", desc->fd_lens[2]);
return -EINVAL;
}
- if (desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) {
- pr_err("middle segment too big %d\n", desc->fd_lens[2]);
+ if (desc->fd_lens[3] < 0 ||
+ desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) {
+ pr_err("bad data segment length %d\n", desc->fd_lens[3]);
return -EINVAL;
}
- if (desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) {
- pr_err("data segment too big %d\n", desc->fd_lens[3]);
+
+ /*
+ * This would fire for FRAME_TAG_WAIT (it has one empty
+ * segment), but we should never get it as client.
+ */
+ if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
+ pr_err("last segment empty, segment count %d\n",
+ desc->fd_seg_cnt);
return -EINVAL;
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 11c04e7d928e..658a6f2320cf 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -3334,17 +3334,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq)
int ret;
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- ret = wait_for_completion_interruptible(&lreq->reg_commit_wait);
+ ret = wait_for_completion_killable(&lreq->reg_commit_wait);
return ret ?: lreq->reg_commit_error;
}
-static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq)
+static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq,
+ unsigned long timeout)
{
- int ret;
+ long left;
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- ret = wait_for_completion_interruptible(&lreq->notify_finish_wait);
- return ret ?: lreq->notify_finish_error;
+ left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait,
+ ceph_timeout_jiffies(timeout));
+ if (left <= 0)
+ left = left ?: -ETIMEDOUT;
+ else
+ left = lreq->notify_finish_error; /* completed */
+
+ return left;
}
/*
@@ -4896,7 +4903,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
if (!ret)
- ret = linger_notify_finish_wait(lreq);
+ ret = linger_notify_finish_wait(lreq,
+ msecs_to_jiffies(2 * timeout * MSEC_PER_SEC));
else
dout("lreq %p failed to initiate notify %d\n", lreq, ret);
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d4172534dfa8..cca7594be92e 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -496,8 +496,11 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
return ERR_PTR(-EPERM);
nla_for_each_nested(nla, nla_stgs, rem) {
- if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
+ if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) {
+ if (nla_len(nla) != sizeof(u32))
+ return ERR_PTR(-EINVAL);
nr_maps++;
+ }
}
diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
diff --git a/net/core/filter.c b/net/core/filter.c
index 06ba0e56e369..28a59596987a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4116,12 +4116,6 @@ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
if (unlikely(data_end > data_hard_end))
return -EINVAL;
- /* ALL drivers MUST init xdp->frame_sz, chicken check below */
- if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
- WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
- return -EINVAL;
- }
-
if (unlikely(data_end < xdp->data + ETH_HLEN))
return -EINVAL;
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 805b7385dd8d..6aef976bc1da 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -63,4 +63,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum);
+EXPORT_TRACEPOINT_SYMBOL_GPL(udp_fail_queue_rcv_skb);
+
EXPORT_TRACEPOINT_SYMBOL_GPL(sk_data_ready);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3ad4e030846d..aef25aa5cf1d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -5140,13 +5140,17 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
nla_for_each_nested(attr, br_spec, rem) {
- if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
+ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
if (nla_len(attr) < sizeof(flags))
return -EINVAL;
have_flags = true;
flags = nla_get_u16(attr);
- break;
+ }
+
+ if (nla_type(attr) == IFLA_BRIDGE_MODE) {
+ if (nla_len(attr) < sizeof(u16))
+ return -EINVAL;
}
}
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6c5915efbc17..a298992060e6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4261,6 +4261,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
skb_push(skb, -skb_network_offset(skb) + offset);
+ /* Ensure the head is writeable before touching the shared info */
+ err = skb_unclone(skb, GFP_ATOMIC);
+ if (err)
+ goto err_linearize;
+
skb_shinfo(skb)->frag_list = NULL;
while (list_skb) {
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index a29508e1ff35..ef1a2eb6520b 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1120,13 +1120,19 @@ static void sk_psock_strp_data_ready(struct sock *sk)
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
{
+ int ret;
+
static const struct strp_callbacks cb = {
.rcv_msg = sk_psock_strp_read,
.read_sock_done = sk_psock_strp_read_done,
.parse_msg = sk_psock_strp_parse,
};
- return strp_init(&psock->strp, sk, &cb);
+ ret = strp_init(&psock->strp, sk, &cb);
+ if (!ret)
+ sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);
+
+ return ret;
}
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
@@ -1154,7 +1160,7 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
static void sk_psock_done_strp(struct sk_psock *psock)
{
/* Parser has been stopped */
- if (psock->progs.stream_parser)
+ if (sk_psock_test_state(psock, SK_PSOCK_RX_STRP_ENABLED))
strp_done(&psock->strp);
}
#else
diff --git a/net/core/sock.c b/net/core/sock.c
index 9370fd50aa2c..732fc37a4771 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -429,6 +429,7 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
{
struct __kernel_sock_timeval tv;
int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
+ long val;
if (err)
return err;
@@ -439,7 +440,7 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
if (tv.tv_sec < 0) {
static int warned __read_mostly;
- *timeo_p = 0;
+ WRITE_ONCE(*timeo_p, 0);
if (warned < 10 && net_ratelimit()) {
warned++;
pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
@@ -447,11 +448,12 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
}
return 0;
}
- *timeo_p = MAX_SCHEDULE_TIMEOUT;
- if (tv.tv_sec == 0 && tv.tv_usec == 0)
- return 0;
- if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
- *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
+ val = MAX_SCHEDULE_TIMEOUT;
+ if ((tv.tv_sec || tv.tv_usec) &&
+ (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)))
+ val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec,
+ USEC_PER_SEC / HZ);
+ WRITE_ONCE(*timeo_p, val);
return 0;
}
@@ -804,7 +806,7 @@ EXPORT_SYMBOL(sock_no_linger);
void sock_set_priority(struct sock *sk, u32 priority)
{
lock_sock(sk);
- sk->sk_priority = priority;
+ WRITE_ONCE(sk->sk_priority, priority);
release_sock(sk);
}
EXPORT_SYMBOL(sock_set_priority);
@@ -813,9 +815,9 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs)
{
lock_sock(sk);
if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
- sk->sk_sndtimeo = secs * HZ;
+ WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
else
- sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+ WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
release_sock(sk);
}
EXPORT_SYMBOL(sock_set_sndtimeo);
@@ -988,7 +990,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf);
static void __sock_set_mark(struct sock *sk, u32 val)
{
if (val != sk->sk_mark) {
- sk->sk_mark = val;
+ WRITE_ONCE(sk->sk_mark, val);
sk_dst_reset(sk);
}
}
@@ -1007,7 +1009,7 @@ static void sock_release_reserved_memory(struct sock *sk, int bytes)
bytes = round_down(bytes, PAGE_SIZE);
WARN_ON(bytes > sk->sk_reserved_mem);
- sk->sk_reserved_mem -= bytes;
+ WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
sk_mem_reclaim(sk);
}
@@ -1044,7 +1046,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
}
sk->sk_forward_alloc += pages << PAGE_SHIFT;
- sk->sk_reserved_mem += pages << PAGE_SHIFT;
+ WRITE_ONCE(sk->sk_reserved_mem,
+ sk->sk_reserved_mem + (pages << PAGE_SHIFT));
return 0;
}
@@ -1213,7 +1216,7 @@ set_sndbuf:
if ((val >= 0 && val <= 6) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- sk->sk_priority = val;
+ WRITE_ONCE(sk->sk_priority, val);
else
ret = -EPERM;
break;
@@ -1438,7 +1441,8 @@ set_sndbuf:
cmpxchg(&sk->sk_pacing_status,
SK_PACING_NONE,
SK_PACING_NEEDED);
- sk->sk_max_pacing_rate = ulval;
+ /* Pairs with READ_ONCE() from sk_getsockopt() */
+ WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
break;
}
@@ -1533,7 +1537,9 @@ set_sndbuf:
}
if ((u8)val == SOCK_TXREHASH_DEFAULT)
val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
- /* Paired with READ_ONCE() in tcp_rtx_synack() */
+ /* Paired with READ_ONCE() in tcp_rtx_synack()
+ * and sk_getsockopt().
+ */
WRITE_ONCE(sk->sk_txrehash, (u8)val);
break;
@@ -1633,11 +1639,11 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;
case SO_SNDBUF:
- v.val = sk->sk_sndbuf;
+ v.val = READ_ONCE(sk->sk_sndbuf);
break;
case SO_RCVBUF:
- v.val = sk->sk_rcvbuf;
+ v.val = READ_ONCE(sk->sk_rcvbuf);
break;
case SO_REUSEADDR:
@@ -1679,7 +1685,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;
case SO_PRIORITY:
- v.val = sk->sk_priority;
+ v.val = READ_ONCE(sk->sk_priority);
break;
case SO_LINGER:
@@ -1717,16 +1723,18 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW:
- lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
+ lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v,
+ SO_RCVTIMEO_OLD == optname);
break;
case SO_SNDTIMEO_OLD:
case SO_SNDTIMEO_NEW:
- lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
+ lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v,
+ SO_SNDTIMEO_OLD == optname);
break;
case SO_RCVLOWAT:
- v.val = sk->sk_rcvlowat;
+ v.val = READ_ONCE(sk->sk_rcvlowat);
break;
case SO_SNDLOWAT:
@@ -1770,7 +1778,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
spin_unlock(&sk->sk_peer_lock);
if (!peer_pid)
- return -ESRCH;
+ return -ENODATA;
pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
put_pid(peer_pid);
@@ -1843,7 +1851,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
optval, optlen, len);
case SO_MARK:
- v.val = sk->sk_mark;
+ v.val = READ_ONCE(sk->sk_mark);
break;
case SO_RCVMARK:
@@ -1862,7 +1870,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
if (!sock->ops->set_peek_off)
return -EOPNOTSUPP;
- v.val = sk->sk_peek_off;
+ v.val = READ_ONCE(sk->sk_peek_off);
break;
case SO_NOFCS:
v.val = sock_flag(sk, SOCK_NOFCS);
@@ -1892,7 +1900,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
#ifdef CONFIG_NET_RX_BUSY_POLL
case SO_BUSY_POLL:
- v.val = sk->sk_ll_usec;
+ v.val = READ_ONCE(sk->sk_ll_usec);
break;
case SO_PREFER_BUSY_POLL:
v.val = READ_ONCE(sk->sk_prefer_busy_poll);
@@ -1900,12 +1908,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
#endif
case SO_MAX_PACING_RATE:
+ /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
lv = sizeof(v.ulval);
- v.ulval = sk->sk_max_pacing_rate;
+ v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
} else {
/* 32bit version */
- v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
+ v.val = min_t(unsigned long, ~0U,
+ READ_ONCE(sk->sk_max_pacing_rate));
}
break;
@@ -1973,11 +1983,12 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;
case SO_RESERVE_MEM:
- v.val = sk->sk_reserved_mem;
+ v.val = READ_ONCE(sk->sk_reserved_mem);
break;
case SO_TXREHASH:
- v.val = sk->sk_txrehash;
+ /* Paired with WRITE_ONCE() in sk_setsockopt() */
+ v.val = READ_ONCE(sk->sk_txrehash);
break;
default:
@@ -3168,7 +3179,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
int sk_set_peek_off(struct sock *sk, int val)
{
- sk->sk_peek_off = val;
+ WRITE_ONCE(sk->sk_peek_off, val);
return 0;
}
EXPORT_SYMBOL_GPL(sk_set_peek_off);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 19538d628714..8f07fea39d9e 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -115,7 +115,6 @@ static void sock_map_sk_acquire(struct sock *sk)
__acquires(&sk->sk_lock.slock)
{
lock_sock(sk);
- preempt_disable();
rcu_read_lock();
}
@@ -123,7 +122,6 @@ static void sock_map_sk_release(struct sock *sk)
__releases(&sk->sk_lock.slock)
{
rcu_read_unlock();
- preempt_enable();
release_sock(sk);
}
@@ -148,13 +146,13 @@ static void sock_map_del_link(struct sock *sk,
list_for_each_entry_safe(link, tmp, &psock->link, list) {
if (link->link_raw == link_raw) {
struct bpf_map *map = link->map;
- struct bpf_stab *stab = container_of(map, struct bpf_stab,
- map);
- if (psock->saved_data_ready && stab->progs.stream_parser)
+ struct sk_psock_progs *progs = sock_map_progs(map);
+
+ if (psock->saved_data_ready && progs->stream_parser)
strp_stop = true;
- if (psock->saved_data_ready && stab->progs.stream_verdict)
+ if (psock->saved_data_ready && progs->stream_verdict)
verdict_stop = true;
- if (psock->saved_data_ready && stab->progs.skb_verdict)
+ if (psock->saved_data_ready && progs->skb_verdict)
verdict_stop = true;
list_del(&link->list);
sk_psock_free_link(link);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 41e5ca8643ec..8362130bf085 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -741,7 +741,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
__diag_pop();
BTF_SET8_START(xdp_metadata_kfunc_ids)
-#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, 0)
+#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
BTF_SET8_END(xdp_metadata_kfunc_ids)
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index c0c438128575..2e6b8c8fd2de 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -980,7 +980,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX,
- tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest,
+ tb[DCB_ATTR_BCN], dcbnl_bcn_nest,
NULL);
if (ret)
return ret;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7249ef218178..d29d1163203d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -238,8 +238,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass,
- sk->sk_priority);
+ err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt,
+ np->tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b8a24734385e..fd2eb148d24d 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -187,7 +187,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
/* And store cached results */
icsk->icsk_pmtu_cookie = pmtu;
- dp->dccps_mss_cache = cur_mps;
+ WRITE_ONCE(dp->dccps_mss_cache, cur_mps);
return cur_mps;
}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index f331e5977a84..4e3266e4d7c3 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -630,7 +630,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
return dccp_getsockopt_service(sk, len,
(__be32 __user *)optval, optlen);
case DCCP_SOCKOPT_GET_CUR_MPS:
- val = dp->dccps_mss_cache;
+ val = READ_ONCE(dp->dccps_mss_cache);
break;
case DCCP_SOCKOPT_AVAILABLE_CCIDS:
return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
@@ -739,7 +739,7 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
trace_dccp_probe(sk, len);
- if (len > dp->dccps_mss_cache)
+ if (len > READ_ONCE(dp->dccps_mss_cache))
return -EMSGSIZE;
lock_sock(sk);
@@ -772,6 +772,12 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out_discard;
}
+ /* We need to check dccps_mss_cache after socket is locked. */
+ if (len > dp->dccps_mss_cache) {
+ rc = -EMSGSIZE;
+ goto out_discard;
+ }
+
skb_reserve(skb, sk->sk_prot->max_header);
rc = memcpy_from_msg(skb_put(skb, len), msg, len);
if (rc != 0)
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 0ce8fd311c78..2f6195d7b741 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1727,8 +1727,15 @@ int dsa_port_phylink_create(struct dsa_port *dp)
ds->ops->phylink_mac_an_restart)
dp->pl_config.legacy_pre_march2020 = true;
- if (ds->ops->phylink_get_caps)
+ if (ds->ops->phylink_get_caps) {
ds->ops->phylink_get_caps(ds, dp->index, &dp->pl_config);
+ } else {
+ /* For legacy drivers */
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+ dp->pl_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ dp->pl_config.supported_interfaces);
+ }
pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn),
mode, &dsa_port_phylink_mac_ops);
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index a5f3b73da417..ade3eeb2f3e6 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -58,11 +58,8 @@
#define SJA1110_TX_TRAILER_LEN 4
#define SJA1110_MAX_PADDING_LEN 15
-#define SJA1105_HWTS_RX_EN 0
-
struct sja1105_tagger_private {
struct sja1105_tagger_data data; /* Must be first */
- unsigned long state;
/* Protects concurrent access to the meta state machine
* from taggers running on multiple ports on SMP systems
*/
@@ -118,8 +115,8 @@ static void sja1105_meta_unpack(const struct sk_buff *skb,
* a unified unpacking command for both device series.
*/
packing(buf, &meta->tstamp, 31, 0, 4, UNPACK, 0);
- packing(buf + 4, &meta->dmac_byte_4, 7, 0, 1, UNPACK, 0);
- packing(buf + 5, &meta->dmac_byte_3, 7, 0, 1, UNPACK, 0);
+ packing(buf + 4, &meta->dmac_byte_3, 7, 0, 1, UNPACK, 0);
+ packing(buf + 5, &meta->dmac_byte_4, 7, 0, 1, UNPACK, 0);
packing(buf + 6, &meta->source_port, 7, 0, 1, UNPACK, 0);
packing(buf + 7, &meta->switch_id, 7, 0, 1, UNPACK, 0);
}
@@ -392,10 +389,6 @@ static struct sk_buff
priv = sja1105_tagger_private(ds);
- if (!test_bit(SJA1105_HWTS_RX_EN, &priv->state))
- /* Do normal processing. */
- return skb;
-
spin_lock(&priv->meta_lock);
/* Was this a link-local frame instead of the meta
* that we were expecting?
@@ -431,12 +424,6 @@ static struct sk_buff
priv = sja1105_tagger_private(ds);
- /* Drop the meta frame if we're not in the right state
- * to process it.
- */
- if (!test_bit(SJA1105_HWTS_RX_EN, &priv->state))
- return NULL;
-
spin_lock(&priv->meta_lock);
stampable_skb = priv->stampable_skb;
@@ -472,30 +459,6 @@ static struct sk_buff
return skb;
}
-static bool sja1105_rxtstamp_get_state(struct dsa_switch *ds)
-{
- struct sja1105_tagger_private *priv = sja1105_tagger_private(ds);
-
- return test_bit(SJA1105_HWTS_RX_EN, &priv->state);
-}
-
-static void sja1105_rxtstamp_set_state(struct dsa_switch *ds, bool on)
-{
- struct sja1105_tagger_private *priv = sja1105_tagger_private(ds);
-
- if (on)
- set_bit(SJA1105_HWTS_RX_EN, &priv->state);
- else
- clear_bit(SJA1105_HWTS_RX_EN, &priv->state);
-
- /* Initialize the meta state machine to a known state */
- if (!priv->stampable_skb)
- return;
-
- kfree_skb(priv->stampable_skb);
- priv->stampable_skb = NULL;
-}
-
static bool sja1105_skb_has_tag_8021q(const struct sk_buff *skb)
{
u16 tpid = ntohs(eth_hdr(skb)->h_proto);
@@ -545,33 +508,53 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
is_link_local = sja1105_is_link_local(skb);
is_meta = sja1105_is_meta_frame(skb);
- if (sja1105_skb_has_tag_8021q(skb)) {
- /* Normal traffic path. */
- sja1105_vlan_rcv(skb, &source_port, &switch_id, &vbid, &vid);
- } else if (is_link_local) {
+ if (is_link_local) {
/* Management traffic path. Switch embeds the switch ID and
* port ID into bytes of the destination MAC, courtesy of
* the incl_srcpt options.
*/
source_port = hdr->h_dest[3];
switch_id = hdr->h_dest[4];
- /* Clear the DMAC bytes that were mangled by the switch */
- hdr->h_dest[3] = 0;
- hdr->h_dest[4] = 0;
} else if (is_meta) {
sja1105_meta_unpack(skb, &meta);
source_port = meta.source_port;
switch_id = meta.switch_id;
- } else {
+ }
+
+ /* Normal data plane traffic and link-local frames are tagged with
+ * a tag_8021q VLAN which we have to strip
+ */
+ if (sja1105_skb_has_tag_8021q(skb)) {
+ int tmp_source_port = -1, tmp_switch_id = -1;
+
+ sja1105_vlan_rcv(skb, &tmp_source_port, &tmp_switch_id, &vbid,
+ &vid);
+ /* Preserve the source information from the INCL_SRCPT option,
+ * if available. This allows us to not overwrite a valid source
+ * port and switch ID with zeroes when receiving link-local
+ * frames from a VLAN-unaware bridged port (non-zero vbid) or a
+ * VLAN-aware bridged port (non-zero vid). Furthermore, the
+ * tag_8021q source port information is only of trust when the
+ * vbid is 0 (precise port). Otherwise, tmp_source_port and
+ * tmp_switch_id will be zeroes.
+ */
+ if (vbid == 0 && source_port == -1)
+ source_port = tmp_source_port;
+ if (vbid == 0 && switch_id == -1)
+ switch_id = tmp_switch_id;
+ } else if (source_port == -1 && switch_id == -1) {
+ /* Packets with no source information have no chance of
+ * getting accepted, drop them straight away.
+ */
return NULL;
}
- if (vbid >= 1)
+ if (source_port != -1 && switch_id != -1)
+ skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
+ else if (vbid >= 1)
skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid);
- else if (source_port == -1 || switch_id == -1)
- skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
else
- skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
+ skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
if (!skb->dev) {
netdev_warn(netdev, "Couldn't decode source port\n");
return NULL;
@@ -762,7 +745,6 @@ static void sja1105_disconnect(struct dsa_switch *ds)
static int sja1105_connect(struct dsa_switch *ds)
{
- struct sja1105_tagger_data *tagger_data;
struct sja1105_tagger_private *priv;
struct kthread_worker *xmit_worker;
int err;
@@ -782,10 +764,6 @@ static int sja1105_connect(struct dsa_switch *ds)
}
priv->xmit_worker = xmit_worker;
- /* Export functions for switch driver use */
- tagger_data = &priv->data;
- tagger_data->rxtstamp_get_state = sja1105_rxtstamp_get_state;
- tagger_data->rxtstamp_set_state = sja1105_rxtstamp_set_state;
ds->tagger_data = priv;
return 0;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index ba06ed42e428..2be2d4922557 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1132,7 +1132,7 @@ static int esp_init_authenc(struct xfrm_state *x,
err = crypto_aead_setkey(aead, key, keylen);
free_key:
- kfree(key);
+ kfree_sensitive(key);
error:
return err;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0cc19cfbb673..aeebe8816689 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1019,7 +1019,7 @@ static void reqsk_timer_handler(struct timer_list *t)
icsk = inet_csk(sk_listener);
net = sock_net(sk_listener);
- max_syn_ack_retries = icsk->icsk_syn_retries ? :
+ max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index b812eb36f0e3..f7426926a104 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -150,7 +150,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
}
#endif
- if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
+ if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark)))
goto errout;
if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
@@ -799,7 +799,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry.ifindex = sk->sk_bound_dev_if;
entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
if (sk_fullsock(sk))
- entry.mark = sk->sk_mark;
+ entry.mark = READ_ONCE(sk->sk_mark);
else if (sk->sk_state == TCP_NEW_SYN_RECV)
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
else if (sk->sk_state == TCP_TIME_WAIT)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e7391bf310a7..0819d6001b9a 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -650,20 +650,8 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
spin_lock(lock);
if (osk) {
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
- ret = sk_hashed(osk);
- if (ret) {
- /* Before deleting the node, we insert a new one to make
- * sure that the look-up-sk process would not miss either
- * of them and that at least one node would exist in ehash
- * table all the time. Otherwise there's a tiny chance
- * that lookup process could find nothing in ehash table.
- */
- __sk_nulls_add_node_tail_rcu(sk, list);
- sk_nulls_del_node_init_rcu(osk);
- }
- goto unlock;
- }
- if (found_dup_sk) {
+ ret = sk_nulls_del_node_init_rcu(osk);
+ } else if (found_dup_sk) {
*found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
if (*found_dup_sk)
ret = false;
@@ -672,7 +660,6 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
if (ret)
__sk_nulls_add_node_rcu(sk, list);
-unlock:
spin_unlock(lock);
return ret;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 40052414c7c7..2c1b245dba8e 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -88,10 +88,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
}
EXPORT_SYMBOL_GPL(inet_twsk_put);
-static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw,
- struct hlist_nulls_head *list)
+static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
+ struct hlist_nulls_head *list)
{
- hlist_nulls_add_tail_rcu(&tw->tw_node, list);
+ hlist_nulls_add_head_rcu(&tw->tw_node, list);
}
static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
@@ -144,7 +144,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
spin_lock(lock);
- inet_twsk_add_node_tail_rcu(tw, &ehead->chain);
+ inet_twsk_add_node_rcu(tw, &ehead->chain);
/* Step 3: Remove SK from hash chain */
if (__sk_nulls_del_node_init_rcu(sk))
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 81a1cce1a7d1..22a26d1d29a0 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -548,7 +548,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
if (skb->len > dev->mtu + dev->hard_header_len) {
- pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+ goto err_free_skb;
truncate = true;
}
@@ -689,7 +690,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
goto free_skb;
if (skb->len > dev->mtu + dev->hard_header_len) {
- pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+ goto free_skb;
truncate = true;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6e70839257f7..6ba1a0fafbaa 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -184,9 +184,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
ip_options_build(skb, &opt->opt, daddr, rt);
}
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
if (!skb->mark)
- skb->mark = sk->sk_mark;
+ skb->mark = READ_ONCE(sk->sk_mark);
/* Send it out. */
return ip_local_out(net, skb->sk, skb);
@@ -528,8 +528,8 @@ packet_routed:
skb_shinfo(skb)->gso_segs ?: 1);
/* TODO : should we use skb->sk here instead of sk ? */
- skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->priority = READ_ONCE(sk->sk_priority);
+ skb->mark = READ_ONCE(sk->sk_mark);
res = ip_local_out(net, sk, skb);
rcu_read_unlock();
@@ -1158,10 +1158,15 @@ alloc_new_skb:
}
copy = datalen - transhdrlen - fraggap - pagedlen;
+ /* [!] NOTE: copy will be negative if pagedlen>0
+ * because then the equation reduces to -fraggap.
+ */
if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
+ } else if (flags & MSG_SPLICE_PAGES) {
+ copy = 0;
}
offset += copy;
@@ -1209,6 +1214,10 @@ alloc_new_skb:
} else if (flags & MSG_SPLICE_PAGES) {
struct msghdr *msg = from;
+ err = -EIO;
+ if (WARN_ON_ONCE(copy > msg->msg_iter.count))
+ goto error;
+
err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
sk->sk_allocation);
if (err < 0)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8e97d8d4cc9d..d41bce8927b2 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -592,7 +592,7 @@ void __ip_sock_set_tos(struct sock *sk, int val)
}
if (inet_sk(sk)->tos != val) {
inet_sk(sk)->tos = val;
- sk->sk_priority = rt_tos2priority(val);
+ WRITE_ONCE(sk->sk_priority, rt_tos2priority(val));
sk_dst_reset(sk);
}
}
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 92c02c886fe7..586b1b3e35b8 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -224,7 +224,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
.un.frag.__unused = 0,
.un.frag.mtu = htons(mtu),
};
- icmph->checksum = ip_compute_csum(icmph, len);
+ icmph->checksum = csum_fold(skb_checksum(skb, 0, len, 0));
skb_reset_transport_header(skb);
niph = skb_push(skb, sizeof(*niph));
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index f95142e56da0..be5498f5dd31 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3221,13 +3221,9 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
&rtm_dump_nexthop_cb, &filter);
if (err < 0) {
if (likely(skb->len))
- goto out;
- goto out_err;
+ err = skb->len;
}
-out:
- err = skb->len;
-out_err:
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
return err;
@@ -3367,25 +3363,19 @@ static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
dd->filter.res_bucket_nh_id != nhge->nh->id)
continue;
+ dd->ctx->bucket_index = bucket_index;
err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
RTM_NEWNEXTHOPBUCKET, portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
cb->extack);
- if (err < 0) {
- if (likely(skb->len))
- goto out;
- goto out_err;
- }
+ if (err)
+ return err;
}
dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
- bucket_index = 0;
+ dd->ctx->bucket_index = 0;
-out:
- err = skb->len;
-out_err:
- dd->ctx->bucket_index = bucket_index;
- return err;
+ return 0;
}
static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
@@ -3434,13 +3424,9 @@ static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
if (err < 0) {
if (likely(skb->len))
- goto out;
- goto out_err;
+ err = skb->len;
}
-out:
- err = skb->len;
-out_err:
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
return err;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 7782ff5e6539..cb381f5aa464 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -348,7 +348,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
goto error;
skb_reserve(skb, hlen);
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
skb_dst_set(skb, &rt->dst);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 98d7e6ba7493..92fede388d52 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -518,7 +518,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
const struct inet_sock *inet = inet_sk(sk);
oif = sk->sk_bound_dev_if;
- mark = sk->sk_mark;
+ mark = READ_ONCE(sk->sk_mark);
tos = ip_sock_rt_tos(sk);
scope = ip_sock_rt_scope(sk);
prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
@@ -552,7 +552,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
inet_opt = rcu_dereference(inet->inet_opt);
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
- flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+ flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
ip_sock_rt_scope(sk),
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e03e08745308..8ed52e1e3c99 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3291,7 +3291,7 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
return -EINVAL;
lock_sock(sk);
- inet_csk(sk)->icsk_syn_retries = val;
+ WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
release_sock(sk);
return 0;
}
@@ -3300,7 +3300,7 @@ EXPORT_SYMBOL(tcp_sock_set_syncnt);
void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
{
lock_sock(sk);
- inet_csk(sk)->icsk_user_timeout = val;
+ WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
release_sock(sk);
}
EXPORT_SYMBOL(tcp_sock_set_user_timeout);
@@ -3312,7 +3312,8 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val)
if (val < 1 || val > MAX_TCP_KEEPIDLE)
return -EINVAL;
- tp->keepalive_time = val * HZ;
+ /* Paired with WRITE_ONCE() in keepalive_time_when() */
+ WRITE_ONCE(tp->keepalive_time, val * HZ);
if (sock_flag(sk, SOCK_KEEPOPEN) &&
!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
u32 elapsed = keepalive_time_elapsed(tp);
@@ -3344,7 +3345,7 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val)
return -EINVAL;
lock_sock(sk);
- tcp_sk(sk)->keepalive_intvl = val * HZ;
+ WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ);
release_sock(sk);
return 0;
}
@@ -3356,7 +3357,8 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val)
return -EINVAL;
lock_sock(sk);
- tcp_sk(sk)->keepalive_probes = val;
+ /* Paired with READ_ONCE() in keepalive_probes() */
+ WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val);
release_sock(sk);
return 0;
}
@@ -3558,19 +3560,19 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
if (val < 1 || val > MAX_TCP_KEEPINTVL)
err = -EINVAL;
else
- tp->keepalive_intvl = val * HZ;
+ WRITE_ONCE(tp->keepalive_intvl, val * HZ);
break;
case TCP_KEEPCNT:
if (val < 1 || val > MAX_TCP_KEEPCNT)
err = -EINVAL;
else
- tp->keepalive_probes = val;
+ WRITE_ONCE(tp->keepalive_probes, val);
break;
case TCP_SYNCNT:
if (val < 1 || val > MAX_TCP_SYNCNT)
err = -EINVAL;
else
- icsk->icsk_syn_retries = val;
+ WRITE_ONCE(icsk->icsk_syn_retries, val);
break;
case TCP_SAVE_SYN:
@@ -3583,18 +3585,18 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_LINGER2:
if (val < 0)
- tp->linger2 = -1;
+ WRITE_ONCE(tp->linger2, -1);
else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
- tp->linger2 = TCP_FIN_TIMEOUT_MAX;
+ WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
else
- tp->linger2 = val * HZ;
+ WRITE_ONCE(tp->linger2, val * HZ);
break;
case TCP_DEFER_ACCEPT:
/* Translate value in seconds to number of retransmits */
- icsk->icsk_accept_queue.rskq_defer_accept =
- secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
- TCP_RTO_MAX / HZ);
+ WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
+ secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+ TCP_RTO_MAX / HZ));
break;
case TCP_WINDOW_CLAMP:
@@ -3618,7 +3620,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
if (val < 0)
err = -EINVAL;
else
- icsk->icsk_user_timeout = val;
+ WRITE_ONCE(icsk->icsk_user_timeout, val);
break;
case TCP_FASTOPEN:
@@ -3656,13 +3658,13 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
if (!tp->repair)
err = -EPERM;
else
- tp->tsoffset = val - tcp_time_stamp_raw();
+ WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw());
break;
case TCP_REPAIR_WINDOW:
err = tcp_repair_set_window(tp, optval, optlen);
break;
case TCP_NOTSENT_LOWAT:
- tp->notsent_lowat = val;
+ WRITE_ONCE(tp->notsent_lowat, val);
sk->sk_write_space(sk);
break;
case TCP_INQ:
@@ -3674,7 +3676,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_TX_DELAY:
if (val)
tcp_enable_tx_delay();
- tp->tcp_tx_delay = val;
+ WRITE_ONCE(tp->tcp_tx_delay, val);
break;
default:
err = -ENOPROTOOPT;
@@ -3991,17 +3993,18 @@ int do_tcp_getsockopt(struct sock *sk, int level,
val = keepalive_probes(tp);
break;
case TCP_SYNCNT:
- val = icsk->icsk_syn_retries ? :
+ val = READ_ONCE(icsk->icsk_syn_retries) ? :
READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
break;
case TCP_LINGER2:
- val = tp->linger2;
+ val = READ_ONCE(tp->linger2);
if (val >= 0)
val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
break;
case TCP_DEFER_ACCEPT:
- val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
- TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
+ val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept);
+ val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ,
+ TCP_RTO_MAX / HZ);
break;
case TCP_WINDOW_CLAMP:
val = tp->window_clamp;
@@ -4138,11 +4141,11 @@ int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_USER_TIMEOUT:
- val = icsk->icsk_user_timeout;
+ val = READ_ONCE(icsk->icsk_user_timeout);
break;
case TCP_FASTOPEN:
- val = icsk->icsk_accept_queue.fastopenq.max_qlen;
+ val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen);
break;
case TCP_FASTOPEN_CONNECT:
@@ -4154,14 +4157,14 @@ int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_TX_DELAY:
- val = tp->tcp_tx_delay;
+ val = READ_ONCE(tp->tcp_tx_delay);
break;
case TCP_TIMESTAMP:
- val = tcp_time_stamp_raw() + tp->tsoffset;
+ val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
break;
case TCP_NOTSENT_LOWAT:
- val = tp->notsent_lowat;
+ val = READ_ONCE(tp->notsent_lowat);
break;
case TCP_INQ:
val = tp->recvmsg_inq;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 45cc7f1ca296..85e4953f1182 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -296,6 +296,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
static bool tcp_fastopen_queue_check(struct sock *sk)
{
struct fastopen_queue *fastopenq;
+ int max_qlen;
/* Make sure the listener has enabled fastopen, and we don't
* exceed the max # of pending TFO requests allowed before trying
@@ -308,10 +309,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
* temporarily vs a server not supporting Fast Open at all.
*/
fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
- if (fastopenq->max_qlen == 0)
+ max_qlen = READ_ONCE(fastopenq->max_qlen);
+ if (max_qlen == 0)
return false;
- if (fastopenq->qlen >= fastopenq->max_qlen) {
+ if (fastopenq->qlen >= max_qlen) {
struct request_sock *req1;
spin_lock(&fastopenq->lock);
req1 = fastopenq->rskq_rst_head;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6f072095211e..57c8af1859c1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3590,8 +3590,11 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
u32 *last_oow_ack_time)
{
- if (*last_oow_ack_time) {
- s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
+ /* Paired with the WRITE_ONCE() in this function. */
+ u32 val = READ_ONCE(*last_oow_ack_time);
+
+ if (val) {
+ s32 elapsed = (s32)(tcp_jiffies32 - val);
if (0 <= elapsed &&
elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
@@ -3600,7 +3603,10 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
}
}
- *last_oow_ack_time = tcp_jiffies32;
+ /* Paired with the prior READ_ONCE() and with itself,
+ * as we might be lockless.
+ */
+ WRITE_ONCE(*last_oow_ack_time, tcp_jiffies32);
return false; /* not rate-limited: go ahead, send dupack now! */
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fd365de4d5ff..a59cc4b83861 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -307,8 +307,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr,
inet->inet_sport,
usin->sin_port));
- tp->tsoffset = secure_tcp_ts_off(net, inet->inet_saddr,
- inet->inet_daddr);
+ WRITE_ONCE(tp->tsoffset,
+ secure_tcp_ts_off(net, inet->inet_saddr,
+ inet->inet_daddr));
}
inet->inet_id = get_random_u16();
@@ -930,9 +931,9 @@ static void tcp_v4_send_ack(const struct sock *sk,
ctl_sk = this_cpu_read(ipv4_tcp_sk);
sock_net_set(ctl_sk, net);
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_mark : sk->sk_mark;
+ inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark);
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_priority : sk->sk_priority;
+ inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
transmit_time = tcp_transmit_time(sk);
ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -988,11 +989,12 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
- req->ts_recent,
+ READ_ONCE(req->ts_recent),
0,
tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
- ip_hdr(skb)->tos, tcp_rsk(req)->txhash);
+ ip_hdr(skb)->tos,
+ READ_ONCE(tcp_rsk(req)->txhash));
}
/*
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 82f4575f9cd9..99ac5efe244d 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics {
struct tcp_metrics_block {
struct tcp_metrics_block __rcu *tcpm_next;
- possible_net_t tcpm_net;
+ struct net *tcpm_net;
struct inetpeer_addr tcpm_saddr;
struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp;
@@ -51,34 +51,38 @@ struct tcp_metrics_block {
struct rcu_head rcu_head;
};
-static inline struct net *tm_net(struct tcp_metrics_block *tm)
+static inline struct net *tm_net(const struct tcp_metrics_block *tm)
{
- return read_pnet(&tm->tcpm_net);
+ /* Paired with the WRITE_ONCE() in tcpm_new() */
+ return READ_ONCE(tm->tcpm_net);
}
static bool tcp_metric_locked(struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
- return tm->tcpm_lock & (1 << idx);
+ /* Paired with WRITE_ONCE() in tcpm_suck_dst() */
+ return READ_ONCE(tm->tcpm_lock) & (1 << idx);
}
-static u32 tcp_metric_get(struct tcp_metrics_block *tm,
+static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
- return tm->tcpm_vals[idx];
+ /* Paired with WRITE_ONCE() in tcp_metric_set() */
+ return READ_ONCE(tm->tcpm_vals[idx]);
}
static void tcp_metric_set(struct tcp_metrics_block *tm,
enum tcp_metric_index idx,
u32 val)
{
- tm->tcpm_vals[idx] = val;
+ /* Paired with READ_ONCE() in tcp_metric_get() */
+ WRITE_ONCE(tm->tcpm_vals[idx], val);
}
static bool addr_same(const struct inetpeer_addr *a,
const struct inetpeer_addr *b)
{
- return inetpeer_addr_cmp(a, b) == 0;
+ return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
}
struct tcpm_hash_bucket {
@@ -89,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
static unsigned int tcp_metrics_hash_log __read_mostly;
static DEFINE_SPINLOCK(tcp_metrics_lock);
+static DEFINE_SEQLOCK(fastopen_seqlock);
static void tcpm_suck_dst(struct tcp_metrics_block *tm,
const struct dst_entry *dst,
@@ -97,7 +102,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
u32 msval;
u32 val;
- tm->tcpm_stamp = jiffies;
+ WRITE_ONCE(tm->tcpm_stamp, jiffies);
val = 0;
if (dst_metric_locked(dst, RTAX_RTT))
@@ -110,30 +115,42 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
val |= 1 << TCP_METRIC_CWND;
if (dst_metric_locked(dst, RTAX_REORDERING))
val |= 1 << TCP_METRIC_REORDERING;
- tm->tcpm_lock = val;
+ /* Paired with READ_ONCE() in tcp_metric_locked() */
+ WRITE_ONCE(tm->tcpm_lock, val);
msval = dst_metric_raw(dst, RTAX_RTT);
- tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
+ tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);
msval = dst_metric_raw(dst, RTAX_RTTVAR);
- tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
- tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
- tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
- tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
+ tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
+ tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
+ dst_metric_raw(dst, RTAX_SSTHRESH));
+ tcp_metric_set(tm, TCP_METRIC_CWND,
+ dst_metric_raw(dst, RTAX_CWND));
+ tcp_metric_set(tm, TCP_METRIC_REORDERING,
+ dst_metric_raw(dst, RTAX_REORDERING));
if (fastopen_clear) {
+ write_seqlock(&fastopen_seqlock);
tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0;
tm->tcpm_fastopen.try_exp = 0;
tm->tcpm_fastopen.cookie.exp = false;
tm->tcpm_fastopen.cookie.len = 0;
+ write_sequnlock(&fastopen_seqlock);
}
}
#define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
+static void tcpm_check_stamp(struct tcp_metrics_block *tm,
+ const struct dst_entry *dst)
{
- if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
+ unsigned long limit;
+
+ if (!tm)
+ return;
+ limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
+ if (unlikely(time_after(jiffies, limit)))
tcpm_suck_dst(tm, dst, false);
}
@@ -174,20 +191,23 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
oldest = deref_locked(tcp_metrics_hash[hash].chain);
for (tm = deref_locked(oldest->tcpm_next); tm;
tm = deref_locked(tm->tcpm_next)) {
- if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
+ if (time_before(READ_ONCE(tm->tcpm_stamp),
+ READ_ONCE(oldest->tcpm_stamp)))
oldest = tm;
}
tm = oldest;
} else {
- tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
+ tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
if (!tm)
goto out_unlock;
}
- write_pnet(&tm->tcpm_net, net);
+ /* Paired with the READ_ONCE() in tm_net() */
+ WRITE_ONCE(tm->tcpm_net, net);
+
tm->tcpm_saddr = *saddr;
tm->tcpm_daddr = *daddr;
- tcpm_suck_dst(tm, dst, true);
+ tcpm_suck_dst(tm, dst, reclaim);
if (likely(!reclaim)) {
tm->tcpm_next = tcp_metrics_hash[hash].chain;
@@ -434,7 +454,7 @@ void tcp_update_metrics(struct sock *sk)
tp->reordering);
}
}
- tm->tcpm_stamp = jiffies;
+ WRITE_ONCE(tm->tcpm_stamp, jiffies);
out_unlock:
rcu_read_unlock();
}
@@ -539,8 +559,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
return ret;
}
-static DEFINE_SEQLOCK(fastopen_seqlock);
-
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie)
{
@@ -647,7 +665,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
}
if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
- jiffies - tm->tcpm_stamp,
+ jiffies - READ_ONCE(tm->tcpm_stamp),
TCP_METRICS_ATTR_PAD) < 0)
goto nla_put_failure;
@@ -658,7 +676,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
if (!nest)
goto nla_put_failure;
for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
- u32 val = tm->tcpm_vals[i];
+ u32 val = tcp_metric_get(tm, i);
if (!val)
continue;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 04fc328727e6..c8f2aa003387 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -528,7 +528,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
newtp->lsndtime = tcp_jiffies32;
- newsk->sk_txhash = treq->txhash;
+ newsk->sk_txhash = READ_ONCE(treq->txhash);
newtp->total_retrans = req->num_retrans;
tcp_init_xmit_timers(newsk);
@@ -555,7 +555,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->max_window = newtp->snd_wnd;
if (newtp->rx_opt.tstamp_ok) {
- newtp->rx_opt.ts_recent = req->ts_recent;
+ newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent);
newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
} else {
@@ -619,7 +619,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
if (tmp_opt.saw_tstamp) {
- tmp_opt.ts_recent = req->ts_recent;
+ tmp_opt.ts_recent = READ_ONCE(req->ts_recent);
if (tmp_opt.rcv_tsecr)
tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
/* We do not store true stamp, but it is not required,
@@ -758,8 +758,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* In sequence, PAWS is OK. */
+ /* TODO: We probably should defer ts_recent change once
+ * we take ownership of @req.
+ */
if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
- req->ts_recent = tmp_opt.rcv_tsval;
+ WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval);
if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
/* Truncate SYN, it is out of window starting
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2cb39b6dad02..51d8638d4b4c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -878,7 +878,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
if (likely(ireq->tstamp_ok)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
- opts->tsecr = req->ts_recent;
+ opts->tsecr = READ_ONCE(req->ts_recent);
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
if (likely(ireq->sack_ok)) {
@@ -3660,7 +3660,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
rcu_read_lock();
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif
- skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
+ skb_set_hash(skb, READ_ONCE(tcp_rsk(req)->txhash), PKT_HASH_TYPE_L4);
/* bpf program will be interested in the tcp_flags */
TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK;
tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
@@ -4210,7 +4210,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
/* Paired with WRITE_ONCE() in sock_setsockopt() */
if (READ_ONCE(sk->sk_txrehash) == SOCK_TXREHASH_ENABLED)
- tcp_rsk(req)->txhash = net_tx_rndhash();
+ WRITE_ONCE(tcp_rsk(req)->txhash, net_tx_rndhash());
res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
NULL);
if (!res) {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 470f581eedd4..206418b6d7c4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -591,7 +591,9 @@ out_reset_timer:
tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
- icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
+ icsk->icsk_rto = clamp(__tcp_set_rto(tp),
+ tcp_rto_min(sk),
+ TCP_RTO_MAX);
} else if (sk->sk_state != TCP_SYN_SENT ||
icsk->icsk_backoff >
READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 42a96b3547c9..abfa860367aa 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,6 +114,7 @@
#include <net/sock_reuseport.h>
#include <net/addrconf.h>
#include <net/udp_tunnel.h>
+#include <net/gro.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
@@ -555,10 +556,13 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
{
const struct iphdr *iph = ip_hdr(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet_get_iif_sdif(skb, &iif, &sdif);
return __udp4_lib_lookup(net, iph->saddr, sport,
- iph->daddr, dport, inet_iif(skb),
- inet_sdif(skb), net->ipv4.udp_table, NULL);
+ iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
/* Must be called under rcu_read_lock().
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 75aa4de5b731..0f46b3c2e4ac 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -274,13 +274,20 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
__sum16 check;
__be16 newlen;
- if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
- return __udp_gso_segment_list(gso_skb, features, is_ipv6);
-
mss = skb_shinfo(gso_skb)->gso_size;
if (gso_skb->len <= sizeof(*uh) + mss)
return ERR_PTR(-EINVAL);
+ if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh),
+ mss);
+ return NULL;
+ }
+
+ if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+
skb_pull(gso_skb, sizeof(*uh));
/* clear destructor to avoid skb_segment assigning it to tail */
@@ -388,8 +395,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
- !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
return __udp_gso_segment(skb, features, false);
mss = skb_shinfo(skb)->gso_size;
@@ -603,10 +609,13 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
{
const struct iphdr *iph = skb_gro_network_header(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet_get_iif_sdif(skb, &iif, &sdif);
return __udp4_lib_lookup(net, iph->saddr, sport,
- iph->daddr, dport, inet_iif(skb),
- inet_sdif(skb), net->ipv4.udp_table, NULL);
+ iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
INDIRECT_CALLABLE_SCOPE
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5479da08ef40..94cec2075eee 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -318,9 +318,8 @@ static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
static void addrconf_mod_rs_timer(struct inet6_dev *idev,
unsigned long when)
{
- if (!timer_pending(&idev->rs_timer))
+ if (!mod_timer(&idev->rs_timer, jiffies + when))
in6_dev_hold(idev);
- mod_timer(&idev->rs_timer, jiffies + when);
}
static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
@@ -2562,12 +2561,18 @@ static void manage_tempaddrs(struct inet6_dev *idev,
ipv6_ifa_notify(0, ift);
}
- if ((create || list_empty(&idev->tempaddr_list)) &&
- idev->cnf.use_tempaddr > 0) {
+ /* Also create a temporary address if it's enabled but no temporary
+ * address currently exists.
+ * However, we get called with valid_lft == 0, prefered_lft == 0, create == false
+ * as part of cleanup (ie. deleting the mngtmpaddr).
+ * We don't want that to result in creating a new temporary ip address.
+ */
+ if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft))
+ create = true;
+
+ if (create && idev->cnf.use_tempaddr > 0) {
/* When a new public address is created as described
* in [ADDRCONF], also create a new temporary address.
- * Also create a temporary address if it's enabled but
- * no temporary address currently exists.
*/
read_unlock_bh(&idev->lock);
ipv6_create_tempaddr(ifp, false);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 9edf1f45b1ed..65fa5014bc85 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -424,7 +424,10 @@ static struct net_device *icmp6_dev(const struct sk_buff *skb)
if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
const struct rt6_info *rt6 = skb_rt6_info(skb);
- if (rt6)
+ /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
+ * and ip6_null_entry could be set to skb if no route is found.
+ */
+ if (rt6 && rt6->rt6i_idev)
dev = rt6->rt6i_idev->dev;
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index da80974ad23a..070d87abf7c0 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -955,7 +955,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
goto tx_err;
if (skb->len > dev->mtu + dev->hard_header_len) {
- pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+ goto tx_err;
truncate = true;
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index cc3d5ad17257..67a3b8f6e72b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1073,7 +1073,7 @@ static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
And all this only to mangle msg->im6_msgtype and
to set msg->im6_mbz to "mbz" :-)
*/
- skb_push(skb, -skb_network_offset(pkt));
+ __skb_pull(skb, skb_network_offset(pkt));
skb_push(skb, sizeof(*msg));
skb_reset_transport_header(skb);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 18634ebd20a4..a42be96ae209 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -197,7 +197,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
static inline int ndisc_is_useropt(const struct net_device *dev,
struct nd_opt_hdr *opt)
{
- return opt->nd_opt_type == ND_OPT_RDNSS ||
+ return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
+ opt->nd_opt_type == ND_OPT_RDNSS ||
opt->nd_opt_type == ND_OPT_DNSSL ||
opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
opt->nd_opt_type == ND_OPT_PREF64 ||
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index f804c11e2146..c2c291827a2c 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -120,7 +120,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init_sk(&ipc6, np);
ipc6.sockc.tsflags = sk->sk_tsflags;
- ipc6.sockc.mark = sk->sk_mark;
+ ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_oif = oif;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ac1cef094c5f..49381f35b623 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -614,7 +614,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb_reserve(skb, hlen);
skb->protocol = htons(ETH_P_IPV6);
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
@@ -774,12 +774,12 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
*/
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_uid = sk->sk_uid;
ipcm6_init(&ipc6);
ipc6.sockc.tsflags = sk->sk_tsflags;
- ipc6.sockc.mark = sk->sk_mark;
+ ipc6.sockc.mark = fl6.flowi6_mark;
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 64e873f5895f..56a55585eb79 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2951,7 +2951,8 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
if (!oif && skb->dev)
oif = l3mdev_master_ifindex(skb->dev);
- ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
+ ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
+ sk->sk_uid);
dst = __sk_dst_get(sk);
if (!dst || !dst->obsolete ||
@@ -3172,8 +3173,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
- ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
- sk->sk_uid);
+ ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
+ READ_ONCE(sk->sk_mark), sk->sk_uid);
}
EXPORT_SYMBOL_GPL(ip6_sk_redirect);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 40dd92a2f480..6e86721e1cdb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -564,8 +564,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
- tclass, sk->sk_priority);
+ err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
+ opt, tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -939,7 +939,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (sk->sk_state == TCP_TIME_WAIT)
mark = inet_twsk(sk)->tw_mark;
else
- mark = sk->sk_mark;
+ mark = READ_ONCE(sk->sk_mark);
skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
}
if (txhash) {
@@ -1126,10 +1126,11 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
- req->ts_recent, sk->sk_bound_dev_if,
+ READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
- ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
- tcp_rsk(req)->txhash);
+ ipv6_get_dsfield(ipv6_hdr(skb)), 0,
+ READ_ONCE(sk->sk_priority),
+ READ_ONCE(tcp_rsk(req)->txhash));
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 317b01c9bc39..f787e6b8424c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -45,11 +45,13 @@
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/ip6_tunnel.h>
+#include <trace/events/udp.h>
#include <net/xfrm.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/busy_poll.h>
#include <net/sock_reuseport.h>
+#include <net/gro.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -90,7 +92,7 @@ static u32 udp6_ehashfn(const struct net *net,
fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);
return __inet6_ehashfn(lhash, lport, fhash, fport,
- udp_ipv6_hash_secret + net_hash_mix(net));
+ udp6_ehash_secret + net_hash_mix(net));
}
int udp_v6_get_port(struct sock *sk, unsigned short snum)
@@ -299,10 +301,13 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet6_get_iif_sdif(skb, &iif, &sdif);
return __udp6_lib_lookup(net, &iph->saddr, sport,
- &iph->daddr, dport, inet6_iif(skb),
- inet6_sdif(skb), net->ipv4.udp_table, NULL);
+ &iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
/* Must be called under rcu_read_lock().
@@ -623,7 +628,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type == NDISC_REDIRECT) {
if (tunnel) {
ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
- sk->sk_mark, sk->sk_uid);
+ READ_ONCE(sk->sk_mark), sk->sk_uid);
} else {
ip6_sk_redirect(skb, sk);
}
@@ -680,6 +685,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb_reason(skb, drop_reason);
+ trace_udp_fail_queue_rcv_skb(rc, sk);
return -1;
}
@@ -1354,7 +1360,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init(&ipc6);
ipc6.gso_size = READ_ONCE(up->gso_size);
ipc6.sockc.tsflags = sk->sk_tsflags;
- ipc6.sockc.mark = sk->sk_mark;
+ ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
/* destination address check */
if (sin6) {
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index ad3b8726873e..6b95ba241ebe 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -43,8 +43,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
- !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
return __udp_gso_segment(skb, features, true);
mss = skb_shinfo(skb)->gso_size;
@@ -119,10 +118,13 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
{
const struct ipv6hdr *iph = skb_gro_network_header(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet6_get_iif_sdif(skb, &iif, &sdif);
return __udp6_lib_lookup(net, &iph->saddr, sport,
- &iph->daddr, dport, inet6_iif(skb),
- inet6_sdif(skb), net->ipv4.udp_table, NULL);
+ &iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
INDIRECT_CALLABLE_SCOPE
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index b1623f9c4f92..ff78217f0cb1 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -519,7 +519,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* Get and verify the address */
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_uid = sk->sk_uid;
ipcm6_init(&ipc6);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 57c35c960b2c..9b06c380866b 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -402,7 +402,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
memcpy(laddr.mac, addr->sllc_mac, IFHWADDRLEN);
laddr.lsap = addr->sllc_sap;
rc = -EADDRINUSE; /* mac + sap clash. */
- ask = llc_lookup_established(sap, &daddr, &laddr);
+ ask = llc_lookup_established(sap, &daddr, &laddr, &init_net);
if (ask) {
sock_put(ask);
goto out_put;
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 912aa9bd5e29..d037009ee10f 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -453,11 +453,13 @@ static int llc_exec_conn_trans_actions(struct sock *sk,
static inline bool llc_estab_match(const struct llc_sap *sap,
const struct llc_addr *daddr,
const struct llc_addr *laddr,
- const struct sock *sk)
+ const struct sock *sk,
+ const struct net *net)
{
struct llc_sock *llc = llc_sk(sk);
- return llc->laddr.lsap == laddr->lsap &&
+ return net_eq(sock_net(sk), net) &&
+ llc->laddr.lsap == laddr->lsap &&
llc->daddr.lsap == daddr->lsap &&
ether_addr_equal(llc->laddr.mac, laddr->mac) &&
ether_addr_equal(llc->daddr.mac, daddr->mac);
@@ -468,6 +470,7 @@ static inline bool llc_estab_match(const struct llc_sap *sap,
* @sap: SAP
* @daddr: address of remote LLC (MAC + SAP)
* @laddr: address of local LLC (MAC + SAP)
+ * @net: netns to look up a socket in
*
* Search connection list of the SAP and finds connection using the remote
* mac, remote sap, local mac, and local sap. Returns pointer for
@@ -476,7 +479,8 @@ static inline bool llc_estab_match(const struct llc_sap *sap,
*/
static struct sock *__llc_lookup_established(struct llc_sap *sap,
struct llc_addr *daddr,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
struct sock *rc;
struct hlist_nulls_node *node;
@@ -486,12 +490,12 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap,
rcu_read_lock();
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
- if (llc_estab_match(sap, daddr, laddr, rc)) {
+ if (llc_estab_match(sap, daddr, laddr, rc, net)) {
/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
- !llc_estab_match(sap, daddr, laddr, rc))) {
+ !llc_estab_match(sap, daddr, laddr, rc, net))) {
sock_put(rc);
continue;
}
@@ -513,29 +517,33 @@ found:
struct sock *llc_lookup_established(struct llc_sap *sap,
struct llc_addr *daddr,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
struct sock *sk;
local_bh_disable();
- sk = __llc_lookup_established(sap, daddr, laddr);
+ sk = __llc_lookup_established(sap, daddr, laddr, net);
local_bh_enable();
return sk;
}
static inline bool llc_listener_match(const struct llc_sap *sap,
const struct llc_addr *laddr,
- const struct sock *sk)
+ const struct sock *sk,
+ const struct net *net)
{
struct llc_sock *llc = llc_sk(sk);
- return sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN &&
+ return net_eq(sock_net(sk), net) &&
+ sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN &&
llc->laddr.lsap == laddr->lsap &&
ether_addr_equal(llc->laddr.mac, laddr->mac);
}
static struct sock *__llc_lookup_listener(struct llc_sap *sap,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
struct sock *rc;
struct hlist_nulls_node *node;
@@ -545,12 +553,12 @@ static struct sock *__llc_lookup_listener(struct llc_sap *sap,
rcu_read_lock();
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
- if (llc_listener_match(sap, laddr, rc)) {
+ if (llc_listener_match(sap, laddr, rc, net)) {
/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
- !llc_listener_match(sap, laddr, rc))) {
+ !llc_listener_match(sap, laddr, rc, net))) {
sock_put(rc);
continue;
}
@@ -574,6 +582,7 @@ found:
* llc_lookup_listener - Finds listener for local MAC + SAP
* @sap: SAP
* @laddr: address of local LLC (MAC + SAP)
+ * @net: netns to look up a socket in
*
* Search connection list of the SAP and finds connection listening on
* local mac, and local sap. Returns pointer for parent socket found,
@@ -581,24 +590,26 @@ found:
* Caller has to make sure local_bh is disabled.
*/
static struct sock *llc_lookup_listener(struct llc_sap *sap,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
+ struct sock *rc = __llc_lookup_listener(sap, laddr, net);
static struct llc_addr null_addr;
- struct sock *rc = __llc_lookup_listener(sap, laddr);
if (!rc)
- rc = __llc_lookup_listener(sap, &null_addr);
+ rc = __llc_lookup_listener(sap, &null_addr, net);
return rc;
}
static struct sock *__llc_lookup(struct llc_sap *sap,
struct llc_addr *daddr,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
- struct sock *sk = __llc_lookup_established(sap, daddr, laddr);
+ struct sock *sk = __llc_lookup_established(sap, daddr, laddr, net);
- return sk ? : llc_lookup_listener(sap, laddr);
+ return sk ? : llc_lookup_listener(sap, laddr, net);
}
/**
@@ -776,7 +787,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
llc_pdu_decode_da(skb, daddr.mac);
llc_pdu_decode_dsap(skb, &daddr.lsap);
- sk = __llc_lookup(sap, &saddr, &daddr);
+ sk = __llc_lookup(sap, &saddr, &daddr, dev_net(skb->dev));
if (!sk)
goto drop;
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
index dde9bf08a593..58a5f419adc6 100644
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c
@@ -92,7 +92,7 @@ int llc_establish_connection(struct sock *sk, const u8 *lmac, u8 *dmac, u8 dsap)
daddr.lsap = dsap;
memcpy(daddr.mac, dmac, sizeof(daddr.mac));
memcpy(laddr.mac, lmac, sizeof(laddr.mac));
- existing = llc_lookup_established(llc->sap, &daddr, &laddr);
+ existing = llc_lookup_established(llc->sap, &daddr, &laddr, sock_net(sk));
if (existing) {
if (existing->sk_state == TCP_ESTABLISHED) {
sk = existing;
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index c309b72a5877..7cac441862e2 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -163,9 +163,6 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
void (*sta_handler)(struct sk_buff *skb);
void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb);
- if (!net_eq(dev_net(dev), &init_net))
- goto drop;
-
/*
* When the interface is in promisc. mode, drop all the crap that it
* receives, do not try to analyse it.
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 6805ce43a055..116c0e479183 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -294,25 +294,29 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb,
static inline bool llc_dgram_match(const struct llc_sap *sap,
const struct llc_addr *laddr,
- const struct sock *sk)
+ const struct sock *sk,
+ const struct net *net)
{
struct llc_sock *llc = llc_sk(sk);
return sk->sk_type == SOCK_DGRAM &&
- llc->laddr.lsap == laddr->lsap &&
- ether_addr_equal(llc->laddr.mac, laddr->mac);
+ net_eq(sock_net(sk), net) &&
+ llc->laddr.lsap == laddr->lsap &&
+ ether_addr_equal(llc->laddr.mac, laddr->mac);
}
/**
* llc_lookup_dgram - Finds dgram socket for the local sap/mac
* @sap: SAP
* @laddr: address of local LLC (MAC + SAP)
+ * @net: netns to look up a socket in
*
* Search socket list of the SAP and finds connection using the local
* mac, and local sap. Returns pointer for socket found, %NULL otherwise.
*/
static struct sock *llc_lookup_dgram(struct llc_sap *sap,
- const struct llc_addr *laddr)
+ const struct llc_addr *laddr,
+ const struct net *net)
{
struct sock *rc;
struct hlist_nulls_node *node;
@@ -322,12 +326,12 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap,
rcu_read_lock_bh();
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
- if (llc_dgram_match(sap, laddr, rc)) {
+ if (llc_dgram_match(sap, laddr, rc, net)) {
/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
- !llc_dgram_match(sap, laddr, rc))) {
+ !llc_dgram_match(sap, laddr, rc, net))) {
sock_put(rc);
continue;
}
@@ -429,7 +433,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb)
llc_sap_mcast(sap, &laddr, skb);
kfree_skb(skb);
} else {
- struct sock *sk = llc_lookup_dgram(sap, &laddr);
+ struct sock *sk = llc_lookup_dgram(sap, &laddr, dev_net(skb->dev));
if (sk) {
llc_sap_rcv(sap, skb, sk);
sock_put(sk);
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index 6de8d0ad5497..2dc732147e85 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -282,7 +282,7 @@ static void tpt_trig_timer(struct timer_list *t)
}
}
- led_trigger_blink(&local->tpt_led, &on, &off);
+ led_trigger_blink(&local->tpt_led, on, off);
}
const char *
diff --git a/net/mac80211/led.h b/net/mac80211/led.h
index b71a1428d883..d25f13346b82 100644
--- a/net/mac80211/led.h
+++ b/net/mac80211/led.h
@@ -13,22 +13,18 @@
static inline void ieee80211_led_rx(struct ieee80211_local *local)
{
#ifdef CONFIG_MAC80211_LEDS
- unsigned long led_delay = MAC80211_BLINK_DELAY;
-
if (!atomic_read(&local->rx_led_active))
return;
- led_trigger_blink_oneshot(&local->rx_led, &led_delay, &led_delay, 0);
+ led_trigger_blink_oneshot(&local->rx_led, MAC80211_BLINK_DELAY, MAC80211_BLINK_DELAY, 0);
#endif
}
static inline void ieee80211_led_tx(struct ieee80211_local *local)
{
#ifdef CONFIG_MAC80211_LEDS
- unsigned long led_delay = MAC80211_BLINK_DELAY;
-
if (!atomic_read(&local->tx_led_active))
return;
- led_trigger_blink_oneshot(&local->tx_led, &led_delay, &led_delay, 0);
+ led_trigger_blink_oneshot(&local->tx_led, MAC80211_BLINK_DELAY, MAC80211_BLINK_DELAY, 0);
#endif
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e892673deb73..d80658547836 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2335,7 +2335,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
- if (flags & MPTCP_CF_FASTCLOSE) {
+ if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
/* be sure to force the tcp_disconnect() path,
* to generate the egress reset
*/
@@ -2909,10 +2909,10 @@ static void mptcp_check_listen_stop(struct sock *sk)
return;
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ tcp_set_state(ssk, TCP_CLOSE);
mptcp_subflow_queue_clean(sk, ssk);
inet_csk_listen_stop(ssk);
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
- tcp_set_state(ssk, TCP_CLOSE);
release_sock(ssk);
}
@@ -3328,7 +3328,7 @@ static void mptcp_release_cb(struct sock *sk)
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
- if (unlikely(&msk->cb_flags)) {
+ if (unlikely(msk->cb_flags)) {
/* be sure to set the current sk state before tacking actions
* depending on sk_state, that is processing MPTCP_ERROR_REPORT
*/
@@ -3703,6 +3703,11 @@ static int mptcp_listen(struct socket *sock, int backlog)
pr_debug("msk=%p", msk);
lock_sock(sk);
+
+ err = -EINVAL;
+ if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
+ goto unlock;
+
ssock = __mptcp_nmpc_socket(msk);
if (IS_ERR(ssock)) {
err = PTR_ERR(ssock);
@@ -3718,10 +3723,9 @@ static int mptcp_listen(struct socket *sock, int backlog)
if (!err) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
mptcp_copy_inaddrs(sk, ssock->sk);
+ mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
}
- mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
-
unlock:
release_sock(sk);
return err;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 37fbe22e2433..ba2a873a4d2e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -325,7 +325,6 @@ struct mptcp_sock {
u32 subflow_id;
u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
- struct mptcp_sock *dl_next;
};
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 63f7a09335c5..a3f1fe810cc9 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -103,7 +103,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
break;
case SO_MARK:
if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
- ssk->sk_mark = sk->sk_mark;
+ WRITE_ONCE(ssk->sk_mark, sk->sk_mark);
sk_dst_reset(ssk);
}
break;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9ee3b7abbaf6..94ae7dd01c65 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1793,16 +1793,31 @@ static void subflow_state_change(struct sock *sk)
void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
{
struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
- struct mptcp_sock *msk, *next, *head = NULL;
- struct request_sock *req;
- struct sock *sk;
+ struct request_sock *req, *head, *tail;
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk, *ssk;
- /* build a list of all unaccepted mptcp sockets */
+ /* Due to lock dependencies no relevant lock can be acquired under rskq_lock.
+ * Splice the req list, so that accept() can not reach the pending ssk after
+ * the listener socket is released below.
+ */
spin_lock_bh(&queue->rskq_lock);
- for (req = queue->rskq_accept_head; req; req = req->dl_next) {
- struct mptcp_subflow_context *subflow;
- struct sock *ssk = req->sk;
+ head = queue->rskq_accept_head;
+ tail = queue->rskq_accept_tail;
+ queue->rskq_accept_head = NULL;
+ queue->rskq_accept_tail = NULL;
+ spin_unlock_bh(&queue->rskq_lock);
+
+ if (!head)
+ return;
+ /* can't acquire the msk socket lock under the subflow one,
+ * or will cause ABBA deadlock
+ */
+ release_sock(listener_ssk);
+
+ for (req = head; req; req = req->dl_next) {
+ ssk = req->sk;
if (!sk_is_mptcp(ssk))
continue;
@@ -1810,32 +1825,10 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
if (!subflow || !subflow->conn)
continue;
- /* skip if already in list */
sk = subflow->conn;
- msk = mptcp_sk(sk);
- if (msk->dl_next || msk == head)
- continue;
-
sock_hold(sk);
- msk->dl_next = head;
- head = msk;
- }
- spin_unlock_bh(&queue->rskq_lock);
- if (!head)
- return;
-
- /* can't acquire the msk socket lock under the subflow one,
- * or will cause ABBA deadlock
- */
- release_sock(listener_ssk);
-
- for (msk = head; msk; msk = next) {
- sk = (struct sock *)msk;
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
- next = msk->dl_next;
- msk->dl_next = NULL;
-
__mptcp_unaccepted_force_close(sk);
release_sock(sk);
@@ -1859,6 +1852,13 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
/* we are still under the listener msk socket lock */
lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+
+ /* restore the listener queue, to let the TCP code clean it up */
+ spin_lock_bh(&queue->rskq_lock);
+ WARN_ON_ONCE(queue->rskq_accept_head);
+ queue->rskq_accept_head = head;
+ queue->rskq_accept_tail = tail;
+ spin_unlock_bh(&queue->rskq_lock);
}
static int subflow_ulp_init(struct sock *sk)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d119f1d4c2fc..992393102d5f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -211,24 +211,18 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
unsigned int zoneid,
const struct net *net)
{
- u64 a, b, c, d;
+ siphash_key_t key;
get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
- /* The direction must be ignored, handle usable tuplehash members manually */
- a = (u64)tuple->src.u3.all[0] << 32 | tuple->src.u3.all[3];
- b = (u64)tuple->dst.u3.all[0] << 32 | tuple->dst.u3.all[3];
+ key = nf_conntrack_hash_rnd;
- c = (__force u64)tuple->src.u.all << 32 | (__force u64)tuple->dst.u.all << 16;
- c |= tuple->dst.protonum;
+ key.key[0] ^= zoneid;
+ key.key[1] ^= net_hash_mix(net);
- d = (u64)zoneid << 32 | net_hash_mix(net);
-
- /* IPv4: u3.all[1,2,3] == 0 */
- c ^= (u64)tuple->src.u3.all[1] << 32 | tuple->src.u3.all[2];
- d += (u64)tuple->dst.u3.all[1] << 32 | tuple->dst.u3.all[2];
-
- return (u32)siphash_4u64(a, b, c, d, &nf_conntrack_hash_rnd);
+ return siphash((void *)tuple,
+ offsetofend(struct nf_conntrack_tuple, dst.__nfct_hash_offsetend),
+ &key);
}
static u32 scale_hash(u32 hash)
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 0c4db2f2ac43..f22691f83853 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -360,6 +360,9 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
+ if (!nf_ct_helper_hash)
+ return -ENOENT;
+
if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
return -EINVAL;
@@ -515,4 +518,5 @@ int nf_conntrack_helper_init(void)
void nf_conntrack_helper_fini(void)
{
kvfree(nf_ct_helper_hash);
+ nf_ct_helper_hash = NULL;
}
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index ad6f0ca40cd2..af369e686fc5 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -205,6 +205,8 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
+ unsigned long status;
+
if (!nf_ct_is_confirmed(ct)) {
unsigned int *timeouts = nf_ct_timeout_lookup(ct);
@@ -217,11 +219,17 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
}
+ status = READ_ONCE(ct->status);
/* If we've seen traffic both ways, this is a GRE connection.
* Extend timeout. */
- if (ct->status & IPS_SEEN_REPLY) {
+ if (status & IPS_SEEN_REPLY) {
nf_ct_refresh_acct(ct, ctinfo, skb,
ct->proto.gre.stream_timeout);
+
+ /* never set ASSURED for IPS_NAT_CLASH, they time out soon */
+ if (unlikely((status & IPS_NAT_CLASH)))
+ return NF_ACCEPT;
+
/* Also, more likely to be important, and not a probe. */
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 9573a8fcad79..c62227ae7746 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -31,7 +31,9 @@ static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
+static LIST_HEAD(nf_tables_gc_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
+static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
enum {
NFT_VALIDATE_SKIP = 0,
@@ -120,6 +122,9 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
static void nf_tables_trans_destroy_work(struct work_struct *w);
static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
+static void nft_trans_gc_work(struct work_struct *work);
+static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
+
static void nft_ctx_init(struct nft_ctx *ctx,
struct net *net,
const struct sk_buff *skb,
@@ -253,8 +258,10 @@ int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
if (chain->bound)
return -EBUSY;
+ if (!nft_use_inc(&chain->use))
+ return -EMFILE;
+
chain->bound = true;
- chain->use++;
nft_chain_trans_bind(ctx, chain);
return 0;
@@ -437,7 +444,7 @@ static int nft_delchain(struct nft_ctx *ctx)
if (IS_ERR(trans))
return PTR_ERR(trans);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
nft_deactivate_next(ctx->net, ctx->chain);
return 0;
@@ -476,7 +483,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
/* You cannot delete the same rule twice */
if (nft_is_active_next(ctx->net, rule)) {
nft_deactivate_next(ctx->net, rule);
- ctx->chain->use--;
+ nft_use_dec(&ctx->chain->use);
return 0;
}
return -ENOENT;
@@ -580,10 +587,6 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
return __nft_trans_set_add(ctx, msg_type, set, NULL);
}
-static void nft_setelem_data_deactivate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem);
-
static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
@@ -644,7 +647,7 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
nft_map_deactivate(ctx, set);
nft_deactivate_next(ctx->net, set);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -676,7 +679,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
return err;
nft_deactivate_next(ctx->net, obj);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -711,7 +714,7 @@ static int nft_delflowtable(struct nft_ctx *ctx,
return err;
nft_deactivate_next(ctx->net, flowtable);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -2396,9 +2399,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_chain *chain;
int err;
- if (table->use == UINT_MAX)
- return -EOVERFLOW;
-
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_stats __percpu *stats = NULL;
struct nft_chain_hook hook = {};
@@ -2494,6 +2494,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (err < 0)
goto err_destroy_chain;
+ if (!nft_use_inc(&table->use)) {
+ err = -EMFILE;
+ goto err_use;
+ }
+
trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
@@ -2510,10 +2515,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
goto err_unregister_hook;
}
- table->use++;
-
return 0;
+
err_unregister_hook:
+ nft_use_dec_restore(&table->use);
+err_use:
nf_tables_unregister_hook(net, table, chain);
err_destroy_chain:
nf_tables_chain_destroy(ctx);
@@ -2694,7 +2700,7 @@ err_hooks:
static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
const struct nft_table *table,
- const struct nlattr *nla)
+ const struct nlattr *nla, u8 genmask)
{
struct nftables_pernet *nft_net = nft_pernet(net);
u32 id = ntohl(nla_get_be32(nla));
@@ -2705,7 +2711,8 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
if (trans->msg_type == NFT_MSG_NEWCHAIN &&
chain->table == table &&
- id == nft_trans_chain_id(trans))
+ id == nft_trans_chain_id(trans) &&
+ nft_active_genmask(chain, genmask))
return chain;
}
return ERR_PTR(-ENOENT);
@@ -3679,8 +3686,6 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (err < 0)
return err;
}
-
- cond_resched();
}
return 0;
@@ -3704,6 +3709,8 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
err = nft_chain_validate(&ctx, chain);
if (err < 0)
return err;
+
+ cond_resched();
}
return 0;
@@ -3805,11 +3812,10 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
}
- if (nft_chain_is_bound(chain))
- return -EOPNOTSUPP;
} else if (nla[NFTA_RULE_CHAIN_ID]) {
- chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID]);
+ chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID],
+ genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]);
return PTR_ERR(chain);
@@ -3818,6 +3824,9 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return -EINVAL;
}
+ if (nft_chain_is_bound(chain))
+ return -EOPNOTSUPP;
+
if (nla[NFTA_RULE_HANDLE]) {
handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
rule = __nft_rule_lookup(chain, handle);
@@ -3840,9 +3849,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return -EINVAL;
handle = nf_tables_alloc_handle(table);
- if (chain->use == UINT_MAX)
- return -EOVERFLOW;
-
if (nla[NFTA_RULE_POSITION]) {
pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
old_rule = __nft_rule_lookup(chain, pos_handle);
@@ -3936,6 +3942,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
}
+ if (!nft_use_inc(&chain->use)) {
+ err = -EMFILE;
+ goto err_release_rule;
+ }
+
if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
err = nft_delrule(&ctx, old_rule);
if (err < 0)
@@ -3967,7 +3978,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
}
kvfree(expr_info);
- chain->use++;
if (flow)
nft_trans_flow_rule(trans) = flow;
@@ -3978,6 +3988,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return 0;
err_destroy_flow_rule:
+ nft_use_dec_restore(&chain->use);
if (flow)
nft_flow_rule_destroy(flow);
err_release_rule:
@@ -4078,6 +4089,8 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_active_next(net, chain))
continue;
+ if (nft_chain_is_bound(chain))
+ continue;
ctx.chain = chain;
err = nft_delrule_by_chain(&ctx);
@@ -5014,9 +5027,15 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
alloc_size = sizeof(*set) + size + udlen;
if (alloc_size < size || alloc_size > INT_MAX)
return -ENOMEM;
+
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT);
- if (!set)
- return -ENOMEM;
+ if (!set) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT);
if (!name) {
@@ -5037,6 +5056,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
INIT_LIST_HEAD(&set->bindings);
INIT_LIST_HEAD(&set->catchall_list);
+ refcount_set(&set->refs, 1);
set->table = table;
write_pnet(&set->net, net);
set->ops = ops;
@@ -5074,7 +5094,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
goto err_set_expr_alloc;
list_add_tail_rcu(&set->list, &table->sets);
- table->use++;
+
return 0;
err_set_expr_alloc:
@@ -5086,6 +5106,9 @@ err_set_init:
kfree(set->name);
err_set_name:
kvfree(set);
+err_alloc:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -5101,6 +5124,14 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
}
}
+static void nft_set_put(struct nft_set *set)
+{
+ if (refcount_dec_and_test(&set->refs)) {
+ kfree(set->name);
+ kvfree(set);
+ }
+}
+
static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
{
int i;
@@ -5113,8 +5144,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
set->ops->destroy(ctx, set);
nft_set_catchall_destroy(ctx, set);
- kfree(set->name);
- kvfree(set);
+ nft_set_put(set);
}
static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
@@ -5224,9 +5254,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *i;
struct nft_set_iter iter;
- if (set->use == UINT_MAX)
- return -EOVERFLOW;
-
if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
return -EBUSY;
@@ -5254,10 +5281,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
return iter.err;
}
bind:
+ if (!nft_use_inc(&set->use))
+ return -EMFILE;
+
binding->chain = ctx->chain;
list_add_tail_rcu(&binding->list, &set->bindings);
nft_set_trans_bind(ctx, set);
- set->use++;
return 0;
}
@@ -5331,7 +5360,7 @@ void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
nft_clear(ctx->net, set);
}
- set->use++;
+ nft_use_inc_restore(&set->use);
}
EXPORT_SYMBOL_GPL(nf_tables_activate_set);
@@ -5347,7 +5376,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
else
list_del_rcu(&binding->list);
- set->use--;
+ nft_use_dec(&set->use);
break;
case NFT_TRANS_PREPARE:
if (nft_set_is_anonymous(set)) {
@@ -5356,7 +5385,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
nft_deactivate_next(ctx->net, set);
}
- set->use--;
+ nft_use_dec(&set->use);
return;
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
@@ -5364,7 +5393,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
nft_map_deactivate(ctx, set);
- set->use--;
+ nft_use_dec(&set->use);
fallthrough;
default:
nf_tables_unbind_set(ctx, set, binding,
@@ -5582,8 +5611,12 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
struct nft_set_elem *elem)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
struct nft_set_dump_args *args;
+ if (nft_set_elem_expired(ext))
+ return 0;
+
args = container_of(iter, struct nft_set_dump_args, iter);
return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
}
@@ -6155,7 +6188,7 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use--;
+ nft_use_dec(&(*nft_set_ext_obj(ext))->use);
kfree(elem);
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
@@ -6254,7 +6287,8 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (nft_set_elem_active(ext, genmask) &&
- !nft_set_elem_expired(ext))
+ !nft_set_elem_expired(ext) &&
+ !nft_set_elem_is_dead(ext))
return ext;
}
@@ -6262,29 +6296,6 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
}
EXPORT_SYMBOL_GPL(nft_set_catchall_lookup);
-void *nft_set_catchall_gc(const struct nft_set *set)
-{
- struct nft_set_elem_catchall *catchall, *next;
- struct nft_set_ext *ext;
- void *elem = NULL;
-
- list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
- ext = nft_set_elem_ext(set, catchall->elem);
-
- if (!nft_set_elem_expired(ext) ||
- nft_set_elem_mark_busy(ext))
- continue;
-
- elem = catchall->elem;
- list_del_rcu(&catchall->list);
- kfree_rcu(catchall, rcu);
- break;
- }
-
- return elem;
-}
-EXPORT_SYMBOL_GPL(nft_set_catchall_gc);
-
static int nft_setelem_catchall_insert(const struct net *net,
struct nft_set *set,
const struct nft_set_elem *elem,
@@ -6346,7 +6357,6 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
if (nft_setelem_is_catchall(set, elem)) {
nft_set_elem_change_active(net, set, ext);
- nft_set_elem_clear_busy(ext);
} else {
set->ops->activate(net, set, elem);
}
@@ -6361,8 +6371,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net,
list_for_each_entry(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_is_active(net, ext) ||
- nft_set_elem_mark_busy(ext))
+ if (!nft_is_active(net, ext))
continue;
kfree(elem->priv);
@@ -6657,8 +6666,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
set->objtype, genmask);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
+ obj = NULL;
+ goto err_parse_key_end;
+ }
+
+ if (!nft_use_inc(&obj->use)) {
+ err = -EMFILE;
+ obj = NULL;
goto err_parse_key_end;
}
+
err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
if (err < 0)
goto err_parse_key_end;
@@ -6727,10 +6744,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (flags)
*nft_set_ext_flags(ext) = flags;
- if (obj) {
+ if (obj)
*nft_set_ext_obj(ext) = obj;
- obj->use++;
- }
+
if (ulen > 0) {
if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) {
err = -EINVAL;
@@ -6750,7 +6766,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_elem_free;
}
- ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
+ ext->genmask = nft_genmask_cur(ctx->net);
err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags);
if (err) {
@@ -6798,12 +6814,13 @@ err_element_clash:
kfree(trans);
err_elem_free:
nf_tables_set_elem_destroy(ctx, set, elem.priv);
- if (obj)
- obj->use--;
err_parse_data:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
nft_data_release(&elem.data.val, desc.type);
err_parse_key_end:
+ if (obj)
+ nft_use_dec_restore(&obj->use);
+
nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
err_parse_key:
nft_data_release(&elem.key.val, NFT_DATA_VALUE);
@@ -6883,7 +6900,7 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
case NFT_JUMP:
case NFT_GOTO:
chain = data->verdict.chain;
- chain->use++;
+ nft_use_inc_restore(&chain->use);
break;
}
}
@@ -6898,19 +6915,19 @@ static void nft_setelem_data_activate(const struct net *net,
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_hold(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use++;
+ nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
}
-static void nft_setelem_data_deactivate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem)
+void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use--;
+ nft_use_dec(&(*nft_set_ext_obj(ext))->use);
}
static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
@@ -7067,8 +7084,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask) ||
- nft_set_elem_mark_busy(ext))
+ if (!nft_set_elem_active(ext, genmask))
continue;
elem.priv = catchall->elem;
@@ -7142,29 +7158,6 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
return err;
}
-void nft_set_gc_batch_release(struct rcu_head *rcu)
-{
- struct nft_set_gc_batch *gcb;
- unsigned int i;
-
- gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
- for (i = 0; i < gcb->head.cnt; i++)
- nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
- kfree(gcb);
-}
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
- gfp_t gfp)
-{
- struct nft_set_gc_batch *gcb;
-
- gcb = kzalloc(sizeof(*gcb), gfp);
- if (gcb == NULL)
- return gcb;
- gcb->head.set = set;
- return gcb;
-}
-
/*
* Stateful objects
*/
@@ -7453,9 +7446,14 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
type = nft_obj_type_get(net, objtype);
- if (IS_ERR(type))
- return PTR_ERR(type);
+ if (IS_ERR(type)) {
+ err = PTR_ERR(type);
+ goto err_type;
+ }
obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
if (IS_ERR(obj)) {
@@ -7489,7 +7487,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
goto err_obj_ht;
list_add_tail_rcu(&obj->list, &table->objects);
- table->use++;
+
return 0;
err_obj_ht:
/* queued in transaction log */
@@ -7505,6 +7503,9 @@ err_strdup:
kfree(obj);
err_init:
module_put(type->owner);
+err_type:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -7906,7 +7907,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
case NFT_TRANS_PREPARE:
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
- flowtable->use--;
+ nft_use_dec(&flowtable->use);
fallthrough;
default:
return;
@@ -8260,9 +8261,14 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT);
- if (!flowtable)
- return -ENOMEM;
+ if (!flowtable) {
+ err = -ENOMEM;
+ goto flowtable_alloc;
+ }
flowtable->table = table;
flowtable->handle = nf_tables_alloc_handle(table);
@@ -8317,7 +8323,6 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
goto err5;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
- table->use++;
return 0;
err5:
@@ -8334,6 +8339,9 @@ err2:
kfree(flowtable->name);
err1:
kfree(flowtable);
+flowtable_alloc:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -9371,6 +9379,207 @@ void nft_chain_del(struct nft_chain *chain)
list_del_rcu(&chain->list);
}
+static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
+ struct nft_trans_gc *trans)
+{
+ void **priv = trans->priv;
+ unsigned int i;
+
+ for (i = 0; i < trans->count; i++) {
+ struct nft_set_elem elem = {
+ .priv = priv[i],
+ };
+
+ nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
+ nft_setelem_remove(ctx->net, trans->set, &elem);
+ }
+}
+
+void nft_trans_gc_destroy(struct nft_trans_gc *trans)
+{
+ nft_set_put(trans->set);
+ put_net(trans->net);
+ kfree(trans);
+}
+
+static void nft_trans_gc_trans_free(struct rcu_head *rcu)
+{
+ struct nft_set_elem elem = {};
+ struct nft_trans_gc *trans;
+ struct nft_ctx ctx = {};
+ unsigned int i;
+
+ trans = container_of(rcu, struct nft_trans_gc, rcu);
+ ctx.net = read_pnet(&trans->set->net);
+
+ for (i = 0; i < trans->count; i++) {
+ elem.priv = trans->priv[i];
+ if (!nft_setelem_is_catchall(trans->set, &elem))
+ atomic_dec(&trans->set->nelems);
+
+ nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
+ }
+
+ nft_trans_gc_destroy(trans);
+}
+
+static bool nft_trans_gc_work_done(struct nft_trans_gc *trans)
+{
+ struct nftables_pernet *nft_net;
+ struct nft_ctx ctx = {};
+
+ nft_net = nft_pernet(trans->net);
+
+ mutex_lock(&nft_net->commit_mutex);
+
+ /* Check for race with transaction, otherwise this batch refers to
+ * stale objects that might not be there anymore. Skip transaction if
+ * set has been destroyed from control plane transaction in case gc
+ * worker loses race.
+ */
+ if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) {
+ mutex_unlock(&nft_net->commit_mutex);
+ return false;
+ }
+
+ ctx.net = trans->net;
+ ctx.table = trans->set->table;
+
+ nft_trans_gc_setelem_remove(&ctx, trans);
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return true;
+}
+
+static void nft_trans_gc_work(struct work_struct *work)
+{
+ struct nft_trans_gc *trans, *next;
+ LIST_HEAD(trans_gc_list);
+
+ spin_lock(&nf_tables_destroy_list_lock);
+ list_splice_init(&nf_tables_gc_list, &trans_gc_list);
+ spin_unlock(&nf_tables_destroy_list_lock);
+
+ list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
+ list_del(&trans->list);
+ if (!nft_trans_gc_work_done(trans)) {
+ nft_trans_gc_destroy(trans);
+ continue;
+ }
+ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+ }
+}
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+ unsigned int gc_seq, gfp_t gfp)
+{
+ struct net *net = read_pnet(&set->net);
+ struct nft_trans_gc *trans;
+
+ trans = kzalloc(sizeof(*trans), gfp);
+ if (!trans)
+ return NULL;
+
+ refcount_inc(&set->refs);
+ trans->set = set;
+ trans->net = get_net(net);
+ trans->seq = gc_seq;
+
+ return trans;
+}
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv)
+{
+ trans->priv[trans->count++] = priv;
+}
+
+static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
+{
+ spin_lock(&nf_tables_gc_list_lock);
+ list_add_tail(&trans->list, &nf_tables_gc_list);
+ spin_unlock(&nf_tables_gc_list_lock);
+
+ schedule_work(&trans_gc_work);
+}
+
+static int nft_trans_gc_space(struct nft_trans_gc *trans)
+{
+ return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq, gfp_t gfp)
+{
+ if (nft_trans_gc_space(gc))
+ return gc;
+
+ nft_trans_gc_queue_work(gc);
+
+ return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+}
+
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
+{
+ if (trans->count == 0) {
+ nft_trans_gc_destroy(trans);
+ return;
+ }
+
+ nft_trans_gc_queue_work(trans);
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
+{
+ if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
+ return NULL;
+
+ if (nft_trans_gc_space(gc))
+ return gc;
+
+ call_rcu(&gc->rcu, nft_trans_gc_trans_free);
+
+ return nft_trans_gc_alloc(gc->set, 0, gfp);
+}
+
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
+{
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net));
+
+ if (trans->count == 0) {
+ nft_trans_gc_destroy(trans);
+ return;
+ }
+
+ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+}
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+ unsigned int gc_seq)
+{
+ struct nft_set_elem_catchall *catchall;
+ const struct nft_set *set = gc->set;
+ struct nft_set_ext *ext;
+
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+
+ if (!nft_set_elem_expired(ext))
+ continue;
+ if (nft_set_elem_is_dead(ext))
+ goto dead_elem;
+
+ nft_set_elem_dead(ext);
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ return NULL;
+
+ nft_trans_gc_elem_add(gc, catchall->elem);
+ }
+
+ return gc;
+}
+
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -9533,11 +9742,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
+ unsigned int base_seq, gc_seq;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
- unsigned int base_seq;
LIST_HEAD(adl);
int err;
@@ -9614,6 +9823,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
WRITE_ONCE(nft_net->base_seq, base_seq);
+ /* Bump gc counter, it becomes odd, this is the busy mark. */
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+
/* step 3. Start new generation, rules_gen_X now in use. */
net->nft.gencursor = nft_gencursor_next(net);
@@ -9713,7 +9926,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
*/
if (nft_set_is_anonymous(nft_trans_set(trans)) &&
!list_empty(&nft_trans_set(trans)->bindings))
- trans->ctx.table->use--;
+ nft_use_dec(&trans->ctx.table->use);
}
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
NFT_MSG_NEWSET, GFP_KERNEL);
@@ -9721,6 +9934,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELSET:
case NFT_MSG_DESTROYSET:
+ nft_trans_set(trans)->dead = 1;
list_del_rcu(&nft_trans_set(trans)->list);
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
trans->msg_type, GFP_KERNEL);
@@ -9823,6 +10037,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_commit_notify(net, NETLINK_CB(skb).portid);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
+
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
nf_tables_commit_release(net);
return 0;
@@ -9943,7 +10159,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
nft_chain_del(trans->ctx.chain);
nf_tables_unregister_hook(trans->ctx.net,
trans->ctx.table,
@@ -9956,7 +10172,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_splice(&nft_trans_chain_hooks(trans),
&nft_trans_basechain(trans)->hook_list);
} else {
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, trans->ctx.chain);
}
nft_trans_destroy(trans);
@@ -9966,7 +10182,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- trans->ctx.chain->use--;
+ nft_use_dec_restore(&trans->ctx.chain->use);
list_del_rcu(&nft_trans_rule(trans)->list);
nft_rule_expr_deactivate(&trans->ctx,
nft_trans_rule(trans),
@@ -9976,7 +10192,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELRULE:
case NFT_MSG_DESTROYRULE:
- trans->ctx.chain->use++;
+ nft_use_inc_restore(&trans->ctx.chain->use);
nft_clear(trans->ctx.net, nft_trans_rule(trans));
nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
@@ -9989,7 +10205,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
break;
@@ -9998,7 +10214,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELSET:
case NFT_MSG_DESTROYSET:
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_set(trans));
if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
nft_map_activate(&trans->ctx, nft_trans_set(trans));
@@ -10042,13 +10258,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
nft_trans_destroy(trans);
} else {
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
nft_obj_del(nft_trans_obj(trans));
}
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_obj(trans));
nft_trans_destroy(trans);
break;
@@ -10057,7 +10273,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable_hooks(trans));
} else {
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
list_del_rcu(&nft_trans_flowtable(trans)->list);
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable(trans)->hook_list);
@@ -10069,7 +10285,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_splice(&nft_trans_flowtable_hooks(trans),
&nft_trans_flowtable(trans)->hook_list);
} else {
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
}
nft_trans_destroy(trans);
@@ -10477,6 +10693,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (!tb[NFTA_VERDICT_CODE])
return -EINVAL;
+
+ /* zero padding hole for memcmp */
+ memset(data, 0, sizeof(*data));
data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
switch (data->verdict.code) {
@@ -10502,7 +10721,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
genmask);
} else if (tb[NFTA_VERDICT_CHAIN_ID]) {
chain = nft_chain_lookup_byid(ctx->net, ctx->table,
- tb[NFTA_VERDICT_CHAIN_ID]);
+ tb[NFTA_VERDICT_CHAIN_ID],
+ genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
} else {
@@ -10518,8 +10738,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (desc->flags & NFT_DATA_DESC_SETELEM &&
chain->flags & NFT_CHAIN_BINDING)
return -EINVAL;
+ if (!nft_use_inc(&chain->use))
+ return -EMFILE;
- chain->use++;
data->verdict.chain = chain;
break;
}
@@ -10537,7 +10758,7 @@ static void nft_verdict_uninit(const struct nft_data *data)
case NFT_JUMP:
case NFT_GOTO:
chain = data->verdict.chain;
- chain->use--;
+ nft_use_dec(&chain->use);
break;
}
}
@@ -10706,11 +10927,11 @@ int __nft_release_basechain(struct nft_ctx *ctx)
nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
list_del(&rule->list);
- ctx->chain->use--;
+ nft_use_dec(&ctx->chain->use);
nf_tables_rule_release(ctx, rule);
}
nft_chain_del(ctx->chain);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
nf_tables_chain_destroy(ctx);
return 0;
@@ -10757,21 +10978,24 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
ctx.family = table->family;
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
+ if (nft_chain_is_bound(chain))
+ continue;
+
ctx.chain = chain;
list_for_each_entry_safe(rule, nr, &chain->rules, list) {
list_del(&rule->list);
- chain->use--;
+ nft_use_dec(&chain->use);
nf_tables_rule_release(&ctx, rule);
}
}
list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
list_del(&flowtable->list);
- table->use--;
+ nft_use_dec(&table->use);
nf_tables_flowtable_destroy(flowtable);
}
list_for_each_entry_safe(set, ns, &table->sets, list) {
list_del(&set->list);
- table->use--;
+ nft_use_dec(&table->use);
if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
nft_map_deactivate(&ctx, set);
@@ -10779,13 +11003,13 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
}
list_for_each_entry_safe(obj, ne, &table->objects, list) {
nft_obj_del(obj);
- table->use--;
+ nft_use_dec(&table->use);
nft_obj_destroy(&ctx, obj);
}
list_for_each_entry_safe(chain, nc, &table->chains, list) {
ctx.chain = chain;
nft_chain_del(chain);
- table->use--;
+ nft_use_dec(&table->use);
nf_tables_chain_destroy(&ctx);
}
nf_tables_table_destroy(&ctx);
@@ -10864,6 +11088,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->notify_list);
mutex_init(&nft_net->commit_mutex);
nft_net->base_seq = 1;
+ nft_net->gc_seq = 0;
return 0;
}
@@ -10892,10 +11117,16 @@ static void __net_exit nf_tables_exit_net(struct net *net)
WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
}
+static void nf_tables_exit_batch(struct list_head *net_exit_list)
+{
+ flush_work(&trans_gc_work);
+}
+
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
.pre_exit = nf_tables_pre_exit_net,
.exit = nf_tables_exit_net,
+ .exit_batch = nf_tables_exit_batch,
.id = &nf_tables_net_id,
.size = sizeof(struct nftables_pernet),
};
@@ -10967,6 +11198,7 @@ static void __exit nf_tables_module_exit(void)
nft_chain_filter_fini();
nft_chain_route_fini();
unregister_pernet_subsys(&nf_tables_net_ops);
+ cancel_work_sync(&trans_gc_work);
cancel_work_sync(&trans_destroy_work);
rcu_barrier();
rhltable_destroy(&nft_objname_ht);
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 9a85e797ed58..e596d1a842f7 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -30,11 +30,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
const struct nft_byteorder *priv = nft_expr_priv(expr);
u32 *src = &regs->data[priv->sreg];
u32 *dst = &regs->data[priv->dreg];
- union { u32 u32; u16 u16; } *s, *d;
+ u16 *s16, *d16;
unsigned int i;
- s = (void *)src;
- d = (void *)dst;
+ s16 = (void *)src;
+ d16 = (void *)dst;
switch (priv->size) {
case 8: {
@@ -62,11 +62,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 4; i++)
- d[i].u32 = ntohl((__force __be32)s[i].u32);
+ dst[i] = ntohl((__force __be32)src[i]);
break;
case NFT_BYTEORDER_HTON:
for (i = 0; i < priv->len / 4; i++)
- d[i].u32 = (__force __u32)htonl(s[i].u32);
+ dst[i] = (__force __u32)htonl(src[i]);
break;
}
break;
@@ -74,11 +74,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 2; i++)
- d[i].u16 = ntohs((__force __be16)s[i].u16);
+ d16[i] = ntohs((__force __be16)s16[i]);
break;
case NFT_BYTEORDER_HTON:
for (i = 0; i < priv->len / 2; i++)
- d[i].u16 = (__force __u16)htons(s[i].u16);
+ d16[i] = (__force __u16)htons(s16[i]);
break;
}
break;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 5ef9146e74ad..ab3362c483b4 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -408,8 +408,10 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
if (IS_ERR(flowtable))
return PTR_ERR(flowtable);
+ if (!nft_use_inc(&flowtable->use))
+ return -EMFILE;
+
priv->flowtable = flowtable;
- flowtable->use++;
return nf_ct_netns_get(ctx->net, ctx->family);
}
@@ -428,7 +430,7 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx,
{
struct nft_flow_offload *priv = nft_expr_priv(expr);
- priv->flowtable->use++;
+ nft_use_inc_restore(&priv->flowtable->use);
}
static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 3d76ebfe8939..fccb3cf7749c 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -125,15 +125,27 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
}
+static void nft_immediate_chain_deactivate(const struct nft_ctx *ctx,
+ struct nft_chain *chain,
+ enum nft_trans_phase phase)
+{
+ struct nft_ctx chain_ctx;
+ struct nft_rule *rule;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry(rule, &chain->rules, list)
+ nft_rule_expr_deactivate(&chain_ctx, rule, phase);
+}
+
static void nft_immediate_deactivate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
enum nft_trans_phase phase)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
const struct nft_data *data = &priv->data;
- struct nft_ctx chain_ctx;
struct nft_chain *chain;
- struct nft_rule *rule;
if (priv->dreg == NFT_REG_VERDICT) {
switch (data->verdict.code) {
@@ -143,23 +155,20 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx,
if (!nft_chain_binding(chain))
break;
- chain_ctx = *ctx;
- chain_ctx.chain = chain;
-
- list_for_each_entry(rule, &chain->rules, list)
- nft_rule_expr_deactivate(&chain_ctx, rule, phase);
-
switch (phase) {
case NFT_TRANS_PREPARE_ERROR:
nf_tables_unbind_chain(ctx, chain);
- fallthrough;
+ nft_deactivate_next(ctx->net, chain);
+ break;
case NFT_TRANS_PREPARE:
+ nft_immediate_chain_deactivate(ctx, chain, phase);
nft_deactivate_next(ctx->net, chain);
break;
default:
+ nft_immediate_chain_deactivate(ctx, chain, phase);
nft_chain_del(chain);
chain->bound = false;
- chain->table->use--;
+ nft_use_dec(&chain->table->use);
break;
}
break;
@@ -198,7 +207,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
* let the transaction records release this chain and its rules.
*/
if (chain->bound) {
- chain->use--;
+ nft_use_dec(&chain->use);
break;
}
@@ -206,9 +215,9 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
chain_ctx = *ctx;
chain_ctx.chain = chain;
- chain->use--;
+ nft_use_dec(&chain->use);
list_for_each_entry_safe(rule, n, &chain->rules, list) {
- chain->use--;
+ nft_use_dec(&chain->use);
list_del(&rule->list);
nf_tables_rule_destroy(&chain_ctx, rule);
}
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index a48dd5b5d45b..509011b1ef59 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -41,8 +41,10 @@ static int nft_objref_init(const struct nft_ctx *ctx,
if (IS_ERR(obj))
return -ENOENT;
+ if (!nft_use_inc(&obj->use))
+ return -EMFILE;
+
nft_objref_priv(expr) = obj;
- obj->use++;
return 0;
}
@@ -72,7 +74,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx,
if (phase == NFT_TRANS_COMMIT)
return;
- obj->use--;
+ nft_use_dec(&obj->use);
}
static void nft_objref_activate(const struct nft_ctx *ctx,
@@ -80,7 +82,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx,
{
struct nft_object *obj = nft_objref_priv(expr);
- obj->use++;
+ nft_use_inc_restore(&obj->use);
}
static const struct nft_expr_ops nft_objref_ops = {
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 0b73cb0e752f..cef5df846000 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -59,6 +59,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
return 1;
+ if (nft_set_elem_is_dead(&he->ext))
+ return 1;
if (nft_set_elem_expired(&he->ext))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
@@ -188,7 +190,6 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
struct nft_rhash_elem *he = elem->priv;
nft_set_elem_change_active(net, set, &he->ext);
- nft_set_elem_clear_busy(&he->ext);
}
static bool nft_rhash_flush(const struct net *net,
@@ -196,12 +197,9 @@ static bool nft_rhash_flush(const struct net *net,
{
struct nft_rhash_elem *he = priv;
- if (!nft_set_elem_mark_busy(&he->ext) ||
- !nft_is_active(net, &he->ext)) {
- nft_set_elem_change_active(net, set, &he->ext);
- return true;
- }
- return false;
+ nft_set_elem_change_active(net, set, &he->ext);
+
+ return true;
}
static void *nft_rhash_deactivate(const struct net *net,
@@ -218,9 +216,8 @@ static void *nft_rhash_deactivate(const struct net *net,
rcu_read_lock();
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
- if (he != NULL &&
- !nft_rhash_flush(net, set, he))
- he = NULL;
+ if (he)
+ nft_set_elem_change_active(net, set, &he->ext);
rcu_read_unlock();
@@ -252,7 +249,9 @@ static bool nft_rhash_delete(const struct nft_set *set,
if (he == NULL)
return false;
- return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
+ nft_set_elem_dead(&he->ext);
+
+ return true;
}
static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
@@ -278,8 +277,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (iter->count < iter->skip)
goto cont;
- if (nft_set_elem_expired(&he->ext))
- goto cont;
if (!nft_set_elem_active(&he->ext, iter->genmask))
goto cont;
@@ -314,25 +311,48 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
static void nft_rhash_gc(struct work_struct *work)
{
+ struct nftables_pernet *nft_net;
struct nft_set *set;
struct nft_rhash_elem *he;
struct nft_rhash *priv;
- struct nft_set_gc_batch *gcb = NULL;
struct rhashtable_iter hti;
+ struct nft_trans_gc *gc;
+ struct net *net;
+ u32 gc_seq;
priv = container_of(work, struct nft_rhash, gc_work.work);
set = nft_set_container_of(priv);
+ net = read_pnet(&set->net);
+ nft_net = nft_pernet(net);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
- if (PTR_ERR(he) != -EAGAIN)
- break;
+ if (PTR_ERR(he) != -EAGAIN) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
continue;
}
+ /* Ruleset has been updated, try later. */
+ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
+
+ if (nft_set_elem_is_dead(&he->ext))
+ goto dead_elem;
+
if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) &&
nft_rhash_expr_needs_gc_run(set, &he->ext))
goto needs_gc_run;
@@ -340,26 +360,26 @@ static void nft_rhash_gc(struct work_struct *work)
if (!nft_set_elem_expired(&he->ext))
continue;
needs_gc_run:
- if (nft_set_elem_mark_busy(&he->ext))
- continue;
+ nft_set_elem_dead(&he->ext);
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb == NULL)
- break;
- rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, he);
+ nft_trans_gc_elem_add(gc, he);
}
+
+ gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
+ /* catchall list iteration requires rcu read side lock. */
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
- he = nft_set_catchall_gc(set);
- if (he) {
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb)
- nft_set_gc_batch_add(gcb, he);
- }
- nft_set_gc_batch_complete(gcb);
+ if (gc)
+ nft_trans_gc_queue_async_done(gc);
+
+done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
@@ -394,7 +414,7 @@ static int nft_rhash_init(const struct nft_set *set,
return err;
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
- if (set->flags & NFT_SET_TIMEOUT)
+ if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL))
nft_rhash_gc_init(set);
return 0;
@@ -422,7 +442,6 @@ static void nft_rhash_destroy(const struct nft_ctx *ctx,
};
cancel_delayed_work_sync(&priv->gc_work);
- rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
(void *)&rhash_ctx);
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index db526cb7a485..a5b8301afe4a 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -566,8 +566,7 @@ next_match:
goto out;
if (last) {
- if (nft_set_elem_expired(&f->mt[b].e->ext) ||
- (genmask &&
+ if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
goto next_match;
@@ -601,8 +600,17 @@ out:
static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
- return pipapo_get(net, set, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net));
+ struct nft_pipapo_elem *ret;
+
+ ret = pipapo_get(net, set, (const u8 *)elem->key.val.data,
+ nft_genmask_cur(net));
+ if (IS_ERR(ret))
+ return ret;
+
+ if (nft_set_elem_expired(&ret->ext))
+ return ERR_PTR(-ENOENT);
+
+ return ret;
}
/**
@@ -1528,16 +1536,34 @@ static void pipapo_drop(struct nft_pipapo_match *m,
}
}
+static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
+ struct nft_pipapo_elem *e)
+
+{
+ struct nft_set_elem elem = {
+ .priv = e,
+ };
+
+ nft_setelem_data_deactivate(net, set, &elem);
+}
+
/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
* @set: nftables API set representation
* @m: Matching data
*/
-static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
+static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
{
+ struct nft_set *set = (struct nft_set *) _set;
struct nft_pipapo *priv = nft_set_priv(set);
+ struct net *net = read_pnet(&set->net);
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
+ struct nft_trans_gc *gc;
+
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+ if (!gc)
+ return;
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
@@ -1561,13 +1587,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
f--;
i--;
e = f->mt[rulemap[i].to].e;
- if (nft_set_elem_expired(&e->ext) &&
- !nft_set_elem_mark_busy(&e->ext)) {
+
+ /* synchronous gc never fails, there is no need to set on
+ * NFT_SET_ELEM_DEAD_BIT.
+ */
+ if (nft_set_elem_expired(&e->ext)) {
priv->dirty = true;
- pipapo_drop(m, rulemap);
- rcu_barrier();
- nft_set_elem_destroy(set, e, true);
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (!gc)
+ break;
+
+ nft_pipapo_gc_deactivate(net, set, e);
+ pipapo_drop(m, rulemap);
+ nft_trans_gc_elem_add(gc, e);
/* And check again current first rule, which is now the
* first we haven't checked.
@@ -1577,11 +1610,11 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
}
}
- e = nft_set_catchall_gc(set);
- if (e)
- nft_set_elem_destroy(set, e, true);
-
- priv->last_gc = jiffies;
+ gc = nft_trans_gc_catchall(gc, 0);
+ if (gc) {
+ nft_trans_gc_queue_sync_done(gc);
+ priv->last_gc = jiffies;
+ }
}
/**
@@ -1706,7 +1739,6 @@ static void nft_pipapo_activate(const struct net *net,
return;
nft_set_elem_change_active(net, set, &e->ext);
- nft_set_elem_clear_busy(&e->ext);
}
/**
@@ -1929,7 +1961,11 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
int i, start, rules_fx;
match_start = data;
- match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
+
+ if (nft_set_ext_exists(&e->ext, NFT_SET_EXT_KEY_END))
+ match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
+ else
+ match_end = data;
start = first_rule;
rules_fx = rules_f0;
@@ -2001,8 +2037,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
goto cont;
e = f->mt[r].e;
- if (nft_set_elem_expired(&e->ext))
- goto cont;
elem.priv = e;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 5c05c9b990fb..f9d4c8fcbbf8 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -46,6 +46,12 @@ static int nft_rbtree_cmp(const struct nft_set *set,
set->klen);
}
+static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
+{
+ return nft_set_elem_expired(&rbe->ext) ||
+ nft_set_elem_is_dead(&rbe->ext);
+}
+
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext,
unsigned int seq)
@@ -80,7 +86,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
continue;
}
- if (nft_set_elem_expired(&rbe->ext))
+ if (nft_rbtree_elem_expired(rbe))
return false;
if (nft_rbtree_interval_end(rbe)) {
@@ -98,7 +104,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
- !nft_set_elem_expired(&interval->ext) &&
+ !nft_rbtree_elem_expired(interval) &&
nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
@@ -215,38 +221,70 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
return rbe;
}
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ struct nft_set_elem elem = {
+ .priv = rbe,
+ };
+
+ nft_setelem_data_deactivate(net, set, &elem);
+ rb_erase(&rbe->node, &priv->root);
+}
+
static int nft_rbtree_gc_elem(const struct nft_set *__set,
struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe)
+ struct nft_rbtree_elem *rbe,
+ u8 genmask)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
- struct nft_rbtree_elem *rbe_prev = NULL;
- struct nft_set_gc_batch *gcb;
+ struct net *net = read_pnet(&set->net);
+ struct nft_rbtree_elem *rbe_prev;
+ struct nft_trans_gc *gc;
- gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
- if (!gcb)
+ gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
+ if (!gc)
return -ENOMEM;
- /* search for expired end interval coming before this element. */
+ /* search for end interval coming before this element.
+ * end intervals don't carry a timeout extension, they
+ * are coupled with the interval start element.
+ */
while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
- if (nft_rbtree_interval_end(rbe_prev))
+ if (nft_rbtree_interval_end(rbe_prev) &&
+ nft_set_elem_active(&rbe_prev->ext, genmask))
break;
prev = rb_prev(prev);
}
- if (rbe_prev) {
- rb_erase(&rbe_prev->node, &priv->root);
- atomic_dec(&set->nelems);
+ if (prev) {
+ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+ nft_rbtree_gc_remove(net, set, priv, rbe_prev);
+
+ /* There is always room in this trans gc for this element,
+ * memory allocation never actually happens, hence, the warning
+ * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
+ * this is synchronous gc which never fails.
+ */
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (WARN_ON_ONCE(!gc))
+ return -ENOMEM;
+
+ nft_trans_gc_elem_add(gc, rbe_prev);
}
- rb_erase(&rbe->node, &priv->root);
- atomic_dec(&set->nelems);
+ nft_rbtree_gc_remove(net, set, priv, rbe);
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (WARN_ON_ONCE(!gc))
+ return -ENOMEM;
+
+ nft_trans_gc_elem_add(gc, rbe);
- nft_set_gc_batch_add(gcb, rbe);
- nft_set_gc_batch_complete(gcb);
+ nft_trans_gc_queue_sync_done(gc);
return 0;
}
@@ -321,7 +359,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
/* perform garbage collection to avoid bogus overlap reports. */
if (nft_set_elem_expired(&rbe->ext)) {
- err = nft_rbtree_gc_elem(set, priv, rbe);
+ err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
if (err < 0)
return err;
@@ -474,7 +512,6 @@ static void nft_rbtree_activate(const struct net *net,
struct nft_rbtree_elem *rbe = elem->priv;
nft_set_elem_change_active(net, set, &rbe->ext);
- nft_set_elem_clear_busy(&rbe->ext);
}
static bool nft_rbtree_flush(const struct net *net,
@@ -482,12 +519,9 @@ static bool nft_rbtree_flush(const struct net *net,
{
struct nft_rbtree_elem *rbe = priv;
- if (!nft_set_elem_mark_busy(&rbe->ext) ||
- !nft_is_active(net, &rbe->ext)) {
- nft_set_elem_change_active(net, set, &rbe->ext);
- return true;
- }
- return false;
+ nft_set_elem_change_active(net, set, &rbe->ext);
+
+ return true;
}
static void *nft_rbtree_deactivate(const struct net *net,
@@ -544,8 +578,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
if (iter->count < iter->skip)
goto cont;
- if (nft_set_elem_expired(&rbe->ext))
- goto cont;
if (!nft_set_elem_active(&rbe->ext, iter->genmask))
goto cont;
@@ -564,26 +596,40 @@ cont:
static void nft_rbtree_gc(struct work_struct *work)
{
- struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL;
- struct nft_set_gc_batch *gcb = NULL;
+ struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+ struct nftables_pernet *nft_net;
struct nft_rbtree *priv;
+ struct nft_trans_gc *gc;
struct rb_node *node;
struct nft_set *set;
+ unsigned int gc_seq;
struct net *net;
- u8 genmask;
priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
- genmask = nft_genmask_cur(net);
+ nft_net = nft_pernet(net);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+
+ /* Ruleset has been updated, try later. */
+ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
+
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- if (!nft_set_elem_active(&rbe->ext, genmask))
- continue;
+ if (nft_set_elem_is_dead(&rbe->ext))
+ goto dead_elem;
/* elements are reversed in the rbtree for historical reasons,
* from highest to lowest value, that is why end element is
@@ -596,46 +642,36 @@ static void nft_rbtree_gc(struct work_struct *work)
if (!nft_set_elem_expired(&rbe->ext))
continue;
- if (nft_set_elem_mark_busy(&rbe->ext)) {
- rbe_end = NULL;
+ nft_set_elem_dead(&rbe->ext);
+
+ if (!rbe_end)
continue;
- }
- if (rbe_prev) {
- rb_erase(&rbe_prev->node, &priv->root);
- rbe_prev = NULL;
- }
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (!gcb)
- break;
+ nft_set_elem_dead(&rbe_end->ext);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe);
- rbe_prev = rbe;
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
- if (rbe_end) {
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe_end);
- rb_erase(&rbe_end->node, &priv->root);
- rbe_end = NULL;
- }
- node = rb_next(node);
- if (!node)
- break;
+ nft_trans_gc_elem_add(gc, rbe_end);
+ rbe_end = NULL;
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
+
+ nft_trans_gc_elem_add(gc, rbe);
}
- if (rbe_prev)
- rb_erase(&rbe_prev->node, &priv->root);
+
+ gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
- rbe = nft_set_catchall_gc(set);
- if (rbe) {
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb)
- nft_set_gc_batch_add(gcb, rbe);
- }
- nft_set_gc_batch_complete(gcb);
-
+ if (gc)
+ nft_trans_gc_queue_async_done(gc);
+done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 84def74698b7..9ed85be79452 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -107,7 +107,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
break;
case NFT_SOCKET_MARK:
if (sk_fullsock(sk)) {
- *dest = sk->sk_mark;
+ *dest = READ_ONCE(sk->sk_mark);
} else {
regs->verdict.code = NFT_BREAK;
return;
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 66b0f941d8fb..36c9720ad8d6 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -43,7 +43,6 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_led_info *ledinfo = par->targinfo;
struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
- unsigned long led_delay = XT_LED_BLINK_DELAY;
/*
* If "always blink" is enabled, and there's still some time until the
@@ -52,7 +51,7 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par)
if ((ledinfo->delay > 0) && ledinfo->always_blink &&
timer_pending(&ledinternal->timer))
led_trigger_blink_oneshot(&ledinternal->netfilter_led_trigger,
- &led_delay, &led_delay, 1);
+ XT_LED_BLINK_DELAY, XT_LED_BLINK_DELAY, 1);
else
led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL);
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 7013f55f05d1..76e01f292aaf 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -77,7 +77,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
- pskb->mark = sk->sk_mark;
+ pskb->mark = READ_ONCE(sk->sk_mark);
if (sk != skb->sk)
sock_gen_put(sk);
@@ -138,7 +138,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
- pskb->mark = sk->sk_mark;
+ pskb->mark = READ_ONCE(sk->sk_mark);
if (sk != skb->sk)
sock_gen_put(sk);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a6d2a0b1aa21..3d7a91e64c88 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1829,7 +1829,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX]
- ? nla_get_u32(a[OVS_DP_ATTR_IFINDEX]) : 0;
+ ? nla_get_s32(a[OVS_DP_ATTR_IFINDEX]) : 0;
/* So far only local changes have been made, now need the lock. */
ovs_lock();
@@ -2049,7 +2049,7 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
[OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
- [OVS_DP_ATTR_IFINDEX] = {.type = NLA_U32 },
+ [OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
};
static const struct genl_small_ops dp_datapath_genl_ops[] = {
@@ -2302,7 +2302,7 @@ restart:
parms.port_no = port_no;
parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX]
- ? nla_get_u32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
+ ? nla_get_s32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
vport = new_vport(&parms);
err = PTR_ERR(vport);
@@ -2539,7 +2539,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
- [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
[OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
};
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 85ff90a03b0c..a2935bd18ed9 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -401,18 +401,20 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
{
union tpacket_uhdr h;
+ /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */
+
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
- h.h1->tp_status = status;
+ WRITE_ONCE(h.h1->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
break;
case TPACKET_V2:
- h.h2->tp_status = status;
+ WRITE_ONCE(h.h2->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
break;
case TPACKET_V3:
- h.h3->tp_status = status;
+ WRITE_ONCE(h.h3->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
break;
default:
@@ -429,17 +431,19 @@ static int __packet_get_status(const struct packet_sock *po, void *frame)
smp_rmb();
+ /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */
+
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
- return h.h1->tp_status;
+ return READ_ONCE(h.h1->tp_status);
case TPACKET_V2:
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
- return h.h2->tp_status;
+ return READ_ONCE(h.h2->tp_status);
case TPACKET_V3:
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
- return h.h3->tp_status;
+ return READ_ONCE(h.h3->tp_status);
default:
WARN(1, "TPACKET version not supported.\n");
BUG();
@@ -2050,8 +2054,8 @@ retry:
skb->protocol = proto;
skb->dev = dev;
- skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->priority = READ_ONCE(sk->sk_priority);
+ skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time;
skb_setup_tx_timestamp(skb, sockc.tsflags);
@@ -2585,8 +2589,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->protocol = proto;
skb->dev = dev;
- skb->priority = po->sk.sk_priority;
- skb->mark = po->sk.sk_mark;
+ skb->priority = READ_ONCE(po->sk.sk_priority);
+ skb->mark = READ_ONCE(po->sk.sk_mark);
skb->tstamp = sockc->transmit_time;
skb_setup_tx_timestamp(skb, sockc->tsflags);
skb_zcopy_set_nouarg(skb, ph.raw);
@@ -2988,7 +2992,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
goto out_unlock;
sockcm_init(&sockc, sk);
- sockc.mark = sk->sk_mark;
+ sockc.mark = READ_ONCE(sk->sk_mark);
if (msg->msg_controllen) {
err = sock_cmsg_send(sk, msg, &sockc);
if (unlikely(err))
@@ -3061,7 +3065,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->protocol = proto;
skb->dev = dev;
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc.mark;
skb->tstamp = sockc.transmit_time;
@@ -3601,7 +3605,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
if (dev) {
sll->sll_hatype = dev->type;
sll->sll_halen = dev->addr_len;
- memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
+ memcpy(sll->sll_addr_flex, dev->dev_addr, dev->addr_len);
} else {
sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
sll->sll_halen = 0;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f7887f42d542..9d3f26bf0440 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1320,7 +1320,7 @@ struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
return ERR_PTR(err);
}
} else {
- if (strlcpy(act_name, "police", IFNAMSIZ) >= IFNAMSIZ) {
+ if (strscpy(act_name, "police", IFNAMSIZ) < 0) {
NL_SET_ERR_MSG(extack, "TC action name too long");
return ERR_PTR(-EINVAL);
}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 5d96ffebd40f..598d6e299152 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -21,6 +21,7 @@
#include <linux/tc_act/tc_ipt.h>
#include <net/tc_act/tc_ipt.h>
#include <net/tc_wrapper.h>
+#include <net/ip.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -48,7 +49,7 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t,
par.entryinfo = &e;
par.target = target;
par.targinfo = t->data;
- par.hook_mask = hook;
+ par.hook_mask = 1 << hook;
par.family = NFPROTO_IPV4;
ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
@@ -85,7 +86,8 @@ static void tcf_ipt_release(struct tc_action *a)
static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
[TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ },
- [TCA_IPT_HOOK] = { .type = NLA_U32 },
+ [TCA_IPT_HOOK] = NLA_POLICY_RANGE(NLA_U32, NF_INET_PRE_ROUTING,
+ NF_INET_NUMHOOKS),
[TCA_IPT_INDEX] = { .type = NLA_U32 },
[TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) },
};
@@ -158,15 +160,27 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
return -EEXIST;
}
}
+
+ err = -EINVAL;
hook = nla_get_u32(tb[TCA_IPT_HOOK]);
+ switch (hook) {
+ case NF_INET_PRE_ROUTING:
+ break;
+ case NF_INET_POST_ROUTING:
+ break;
+ default:
+ goto err1;
+ }
+
+ if (tb[TCA_IPT_TABLE]) {
+ /* mangle only for now */
+ if (nla_strcmp(tb[TCA_IPT_TABLE], "mangle"))
+ goto err1;
+ }
- err = -ENOMEM;
- tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
+ tname = kstrdup("mangle", GFP_KERNEL);
if (unlikely(!tname))
goto err1;
- if (tb[TCA_IPT_TABLE] == NULL ||
- nla_strscpy(tname, tb[TCA_IPT_TABLE], IFNAMSIZ) >= IFNAMSIZ)
- strcpy(tname, "mangle");
t = kmemdup(td, td->u.target_size, GFP_KERNEL);
if (unlikely(!t))
@@ -217,10 +231,31 @@ static int tcf_xt_init(struct net *net, struct nlattr *nla,
a, &act_xt_ops, tp, flags);
}
+static bool tcf_ipt_act_check(struct sk_buff *skb)
+{
+ const struct iphdr *iph;
+ unsigned int nhoff, len;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ return false;
+
+ nhoff = skb_network_offset(skb);
+ iph = ip_hdr(skb);
+ if (iph->ihl < 5 || iph->version != 4)
+ return false;
+
+ len = skb_ip_totlen(skb);
+ if (skb->len < nhoff + len || len < (iph->ihl * 4u))
+ return false;
+
+ return pskb_may_pull(skb, iph->ihl * 4u);
+}
+
TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb,
const struct tc_action *a,
struct tcf_result *res)
{
+ char saved_cb[sizeof_field(struct sk_buff, cb)];
int ret = 0, result = 0;
struct tcf_ipt *ipt = to_ipt(a);
struct xt_action_param par;
@@ -231,9 +266,24 @@ TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb,
.pf = NFPROTO_IPV4,
};
+ if (skb_protocol(skb, false) != htons(ETH_P_IP))
+ return TC_ACT_UNSPEC;
+
if (skb_unclone(skb, GFP_ATOMIC))
return TC_ACT_UNSPEC;
+ if (!tcf_ipt_act_check(skb))
+ return TC_ACT_UNSPEC;
+
+ if (state.hook == NF_INET_POST_ROUTING) {
+ if (!skb_dst(skb))
+ return TC_ACT_UNSPEC;
+
+ state.out = skb->dev;
+ }
+
+ memcpy(saved_cb, skb->cb, sizeof(saved_cb));
+
spin_lock(&ipt->tcf_lock);
tcf_lastuse_update(&ipt->tcf_tm);
@@ -246,6 +296,9 @@ TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb,
par.state = &state;
par.target = ipt->tcfi_t->u.kernel.target;
par.targinfo = ipt->tcfi_t->data;
+
+ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+
ret = par.target->target(skb, &par);
switch (ret) {
@@ -266,6 +319,9 @@ TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb,
break;
}
spin_unlock(&ipt->tcf_lock);
+
+ memcpy(skb->cb, saved_cb, sizeof(skb->cb));
+
return result;
}
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b562fc2bb5b1..1ef8fcfa9997 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -29,6 +29,7 @@ static struct tc_action_ops act_pedit_ops;
static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
[TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) },
+ [TCA_PEDIT_PARMS_EX] = { .len = sizeof(struct tc_pedit) },
[TCA_PEDIT_KEYS_EX] = { .type = NLA_NESTED },
};
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 466c26df853a..382c7a71f81f 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -406,56 +406,6 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
return 0;
}
-static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
- struct cls_bpf_prog *prog, unsigned long base,
- struct nlattr **tb, struct nlattr *est, u32 flags,
- struct netlink_ext_ack *extack)
-{
- bool is_bpf, is_ebpf, have_exts = false;
- u32 gen_flags = 0;
- int ret;
-
- is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
- is_ebpf = tb[TCA_BPF_FD];
- if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
- return -EINVAL;
-
- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags,
- extack);
- if (ret < 0)
- return ret;
-
- if (tb[TCA_BPF_FLAGS]) {
- u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
-
- if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT)
- return -EINVAL;
-
- have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
- }
- if (tb[TCA_BPF_FLAGS_GEN]) {
- gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
- if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
- !tc_flags_valid(gen_flags))
- return -EINVAL;
- }
-
- prog->exts_integrated = have_exts;
- prog->gen_flags = gen_flags;
-
- ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
- cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
- if (ret < 0)
- return ret;
-
- if (tb[TCA_BPF_CLASSID]) {
- prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
- tcf_bind_filter(tp, &prog->res, base);
- }
-
- return 0;
-}
-
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
@@ -463,9 +413,12 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
+ bool is_bpf, is_ebpf, have_exts = false;
struct cls_bpf_prog *oldprog = *arg;
struct nlattr *tb[TCA_BPF_MAX + 1];
+ bool bound_to_filter = false;
struct cls_bpf_prog *prog;
+ u32 gen_flags = 0;
int ret;
if (tca[TCA_OPTIONS] == NULL)
@@ -504,11 +457,51 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
goto errout;
prog->handle = handle;
- ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags,
- extack);
+ is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
+ is_ebpf = tb[TCA_BPF_FD];
+ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
+ ret = -EINVAL;
+ goto errout_idr;
+ }
+
+ ret = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &prog->exts,
+ flags, extack);
+ if (ret < 0)
+ goto errout_idr;
+
+ if (tb[TCA_BPF_FLAGS]) {
+ u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
+
+ if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
+ ret = -EINVAL;
+ goto errout_idr;
+ }
+
+ have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
+ }
+ if (tb[TCA_BPF_FLAGS_GEN]) {
+ gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
+ if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
+ !tc_flags_valid(gen_flags)) {
+ ret = -EINVAL;
+ goto errout_idr;
+ }
+ }
+
+ prog->exts_integrated = have_exts;
+ prog->gen_flags = gen_flags;
+
+ ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
+ cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
if (ret < 0)
goto errout_idr;
+ if (tb[TCA_BPF_CLASSID]) {
+ prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+ tcf_bind_filter(tp, &prog->res, base);
+ bound_to_filter = true;
+ }
+
ret = cls_bpf_offload(tp, prog, oldprog, extack);
if (ret)
goto errout_parms;
@@ -530,6 +523,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
return 0;
errout_parms:
+ if (bound_to_filter)
+ tcf_unbind_filter(tp, &prog->res);
cls_bpf_free_parms(prog);
errout_idr:
if (!oldprog)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 56065cc5a661..9f0711da9c95 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -776,7 +776,8 @@ mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL] = { .type = NLA_U32 },
};
-static const struct nla_policy cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX] = {
+static const struct nla_policy
+cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX + 1] = {
[TCA_FLOWER_KEY_CFM_MD_LEVEL] = NLA_POLICY_MAX(NLA_U8,
FLOW_DIS_CFM_MDL_MAX),
[TCA_FLOWER_KEY_CFM_OPCODE] = { .type = NLA_U8 },
@@ -812,6 +813,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src,
TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));
+ if (mask->tp_range.tp_min.dst != mask->tp_range.tp_max.dst) {
+ NL_SET_ERR_MSG(extack,
+ "Both min and max destination ports must be specified");
+ return -EINVAL;
+ }
+ if (mask->tp_range.tp_min.src != mask->tp_range.tp_max.src) {
+ NL_SET_ERR_MSG(extack,
+ "Both min and max source ports must be specified");
+ return -EINVAL;
+ }
if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
ntohs(key->tp_range.tp_max.dst) <=
ntohs(key->tp_range.tp_min.dst)) {
@@ -1699,7 +1710,7 @@ static int fl_set_key_cfm(struct nlattr **tb,
struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
{
- struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX];
+ struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX + 1];
int err;
if (!tb[TCA_FLOWER_KEY_CFM])
@@ -2163,53 +2174,6 @@ static bool fl_needs_tc_skb_ext(const struct fl_flow_key *mask)
return mask->meta.l2_miss;
}
-static int fl_set_parms(struct net *net, struct tcf_proto *tp,
- struct cls_fl_filter *f, struct fl_flow_mask *mask,
- unsigned long base, struct nlattr **tb,
- struct nlattr *est,
- struct fl_flow_tmplt *tmplt,
- u32 flags, u32 fl_flags,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- err = tcf_exts_validate_ex(net, tp, tb, est, &f->exts, flags,
- fl_flags, extack);
- if (err < 0)
- return err;
-
- if (tb[TCA_FLOWER_CLASSID]) {
- f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
- if (flags & TCA_ACT_FLAGS_NO_RTNL)
- rtnl_lock();
- tcf_bind_filter(tp, &f->res, base);
- if (flags & TCA_ACT_FLAGS_NO_RTNL)
- rtnl_unlock();
- }
-
- err = fl_set_key(net, tb, &f->key, &mask->key, extack);
- if (err)
- return err;
-
- fl_mask_update_range(mask);
- fl_set_masked_key(&f->mkey, &f->key, mask);
-
- if (!fl_mask_fits_tmplt(tmplt, mask)) {
- NL_SET_ERR_MSG_MOD(extack, "Mask does not fit the template");
- return -EINVAL;
- }
-
- /* Enable tc skb extension if filter matches on data extracted from
- * this extension.
- */
- if (fl_needs_tc_skb_ext(&mask->key)) {
- f->needs_tc_skb_ext = 1;
- tc_skb_ext_tc_enable();
- }
-
- return 0;
-}
-
static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
struct cls_fl_filter *fold,
bool *in_ht)
@@ -2241,6 +2205,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
struct cls_fl_head *head = fl_head_dereference(tp);
bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
struct cls_fl_filter *fold = *arg;
+ bool bound_to_filter = false;
struct cls_fl_filter *fnew;
struct fl_flow_mask *mask;
struct nlattr **tb;
@@ -2325,15 +2290,46 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout_idr;
- err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE],
- tp->chain->tmplt_priv, flags, fnew->flags,
- extack);
- if (err)
+ err = tcf_exts_validate_ex(net, tp, tb, tca[TCA_RATE],
+ &fnew->exts, flags, fnew->flags,
+ extack);
+ if (err < 0)
goto errout_idr;
+ if (tb[TCA_FLOWER_CLASSID]) {
+ fnew->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
+ rtnl_lock();
+ tcf_bind_filter(tp, &fnew->res, base);
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
+ rtnl_unlock();
+ bound_to_filter = true;
+ }
+
+ err = fl_set_key(net, tb, &fnew->key, &mask->key, extack);
+ if (err)
+ goto unbind_filter;
+
+ fl_mask_update_range(mask);
+ fl_set_masked_key(&fnew->mkey, &fnew->key, mask);
+
+ if (!fl_mask_fits_tmplt(tp->chain->tmplt_priv, mask)) {
+ NL_SET_ERR_MSG_MOD(extack, "Mask does not fit the template");
+ err = -EINVAL;
+ goto unbind_filter;
+ }
+
+ /* Enable tc skb extension if filter matches on data extracted from
+ * this extension.
+ */
+ if (fl_needs_tc_skb_ext(&mask->key)) {
+ fnew->needs_tc_skb_ext = 1;
+ tc_skb_ext_tc_enable();
+ }
+
err = fl_check_assign_mask(head, fnew, fold, mask);
if (err)
- goto errout_idr;
+ goto unbind_filter;
err = fl_ht_insert_unique(fnew, fold, &in_ht);
if (err)
@@ -2424,6 +2420,16 @@ errout_hw:
fnew->mask->filter_ht_params);
errout_mask:
fl_mask_put(head, fnew->mask);
+
+unbind_filter:
+ if (bound_to_filter) {
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
+ rtnl_lock();
+ tcf_unbind_filter(tp, &fnew->res);
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
+ rtnl_unlock();
+ }
+
errout_idr:
if (!fold)
idr_remove(&head->handle_idr, fnew->handle);
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index ae9439a6c56c..c49d6af0e048 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -212,11 +212,6 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
if (err < 0)
return err;
- if (tb[TCA_FW_CLASSID]) {
- f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
- tcf_bind_filter(tp, &f->res, base);
- }
-
if (tb[TCA_FW_INDEV]) {
int ret;
ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack);
@@ -233,6 +228,11 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
} else if (head->mask != 0xFFFFFFFF)
return err;
+ if (tb[TCA_FW_CLASSID]) {
+ f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
+ tcf_bind_filter(tp, &f->res, base);
+ }
+
return 0;
}
@@ -267,7 +267,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return -ENOBUFS;
fnew->id = f->id;
- fnew->res = f->res;
fnew->ifindex = f->ifindex;
fnew->tp = f->tp;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index fa3bbd187eb9..c4ed11df6254 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -159,26 +159,6 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
[TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
};
-static int mall_set_parms(struct net *net, struct tcf_proto *tp,
- struct cls_mall_head *head,
- unsigned long base, struct nlattr **tb,
- struct nlattr *est, u32 flags, u32 fl_flags,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- err = tcf_exts_validate_ex(net, tp, tb, est, &head->exts, flags,
- fl_flags, extack);
- if (err < 0)
- return err;
-
- if (tb[TCA_MATCHALL_CLASSID]) {
- head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
- tcf_bind_filter(tp, &head->res, base);
- }
- return 0;
-}
-
static int mall_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
@@ -187,6 +167,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
+ bool bound_to_filter = false;
struct cls_mall_head *new;
u32 userflags = 0;
int err;
@@ -226,11 +207,17 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
goto err_alloc_percpu;
}
- err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE],
- flags, new->flags, extack);
- if (err)
+ err = tcf_exts_validate_ex(net, tp, tb, tca[TCA_RATE],
+ &new->exts, flags, new->flags, extack);
+ if (err < 0)
goto err_set_parms;
+ if (tb[TCA_MATCHALL_CLASSID]) {
+ new->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
+ tcf_bind_filter(tp, &new->res, base);
+ bound_to_filter = true;
+ }
+
if (!tc_skip_hw(new->flags)) {
err = mall_replace_hw_filter(tp, new, (unsigned long)new,
extack);
@@ -246,6 +233,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
return 0;
err_replace_hw_filter:
+ if (bound_to_filter)
+ tcf_unbind_filter(tp, &new->res);
err_set_parms:
free_percpu(new->pf);
err_alloc_percpu:
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index d0c53724d3e8..1e20bbd687f1 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -513,7 +513,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (fold) {
f->id = fold->id;
f->iif = fold->iif;
- f->res = fold->res;
f->handle = fold->handle;
f->tp = fold->tp;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d15d50de7980..da4c179a4d41 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -712,8 +712,23 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
[TCA_U32_FLAGS] = { .type = NLA_U32 },
};
+static void u32_unbind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
+ struct nlattr **tb)
+{
+ if (tb[TCA_U32_CLASSID])
+ tcf_unbind_filter(tp, &n->res);
+}
+
+static void u32_bind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
+ unsigned long base, struct nlattr **tb)
+{
+ if (tb[TCA_U32_CLASSID]) {
+ n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
+ tcf_bind_filter(tp, &n->res, base);
+ }
+}
+
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
- unsigned long base,
struct tc_u_knode *n, struct nlattr **tb,
struct nlattr *est, u32 flags, u32 fl_flags,
struct netlink_ext_ack *extack)
@@ -760,10 +775,6 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
if (ht_old)
ht_old->refcnt--;
}
- if (tb[TCA_U32_CLASSID]) {
- n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
- tcf_bind_filter(tp, &n->res, base);
- }
if (ifindex >= 0)
n->ifindex = ifindex;
@@ -815,7 +826,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
new->ifindex = n->ifindex;
new->fshift = n->fshift;
- new->res = n->res;
new->flags = n->flags;
RCU_INIT_POINTER(new->ht_down, ht);
@@ -903,17 +913,27 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (!new)
return -ENOMEM;
- err = u32_set_parms(net, tp, base, new, tb,
- tca[TCA_RATE], flags, new->flags,
- extack);
+ err = u32_set_parms(net, tp, new, tb, tca[TCA_RATE],
+ flags, new->flags, extack);
if (err) {
__u32_destroy_key(new);
return err;
}
+ u32_bind_filter(tp, new, base, tb);
+
err = u32_replace_hw_knode(tp, new, flags, extack);
if (err) {
+ u32_unbind_filter(tp, new, tb);
+
+ if (tb[TCA_U32_LINK]) {
+ struct tc_u_hnode *ht_old;
+
+ ht_old = rtnl_dereference(n->ht_down);
+ if (ht_old)
+ ht_old->refcnt++;
+ }
__u32_destroy_key(new);
return err;
}
@@ -1003,18 +1023,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
+ /* At this point, we need to derive the new handle that will be used to
+ * uniquely map the identity of this table match entry. The
+ * identity of the entry that we need to construct is 32 bits made of:
+ * htid(12b):bucketid(8b):node/entryid(12b)
+ *
+ * At this point _we have the table(ht)_ in which we will insert this
+ * entry. We carry the table's id in variable "htid".
+ * Note that earlier code picked the ht selection either by a) the user
+ * providing the htid specified via TCA_U32_HASH attribute or b) when
+ * no such attribute is passed then the root ht, is default to at ID
+ * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0.
+ * If OTOH the user passed us the htid, they may also pass a bucketid of
+ * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is
+ * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be
+ * passed via the htid, so even if it was non-zero it will be ignored.
+ *
+ * We may also have a handle, if the user passed one. The handle also
+ * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b).
+ * Rule: the bucketid on the handle is ignored even if one was passed;
+ * rather the value on "htid" is always assumed to be the bucketid.
+ */
if (handle) {
+ /* Rule: The htid from handle and tableid from htid must match */
if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
return -EINVAL;
}
- handle = htid | TC_U32_NODE(handle);
- err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle,
- GFP_KERNEL);
- if (err)
- return err;
- } else
+ /* Ok, so far we have a valid htid(12b):bucketid(8b) but we
+ * need to finalize the table entry identification with the last
+ * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for
+ * entries. Rule: nodeid of 0 is reserved only for tables(see
+ * earlier code which processes TC_U32_DIVISOR attribute).
+ * Rule: The nodeid can only be derived from the handle (and not
+ * htid).
+ * Rule: if the handle specified zero for the node id example
+ * 0x60000000, then pick a new nodeid from the pool of IDs
+ * this hash table has been allocating from.
+ * If OTOH it is specified (i.e for example the user passed a
+ * handle such as 0x60000123), then we use it generate our final
+ * handle which is used to uniquely identify the match entry.
+ */
+ if (!TC_U32_NODE(handle)) {
+ handle = gen_new_kid(ht, htid);
+ } else {
+ handle = htid | TC_U32_NODE(handle);
+ err = idr_alloc_u32(&ht->handle_idr, NULL, &handle,
+ handle, GFP_KERNEL);
+ if (err)
+ return err;
+ }
+ } else {
+ /* The user did not give us a handle; lets just generate one
+ * from the table's pool of nodeids.
+ */
handle = gen_new_kid(ht, htid);
+ }
if (tb[TCA_U32_SEL] == NULL) {
NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
@@ -1074,15 +1138,18 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
#endif
- err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE],
+ err = u32_set_parms(net, tp, n, tb, tca[TCA_RATE],
flags, n->flags, extack);
+
+ u32_bind_filter(tp, n, base, tb);
+
if (err == 0) {
struct tc_u_knode __rcu **ins;
struct tc_u_knode *pins;
err = u32_replace_hw_knode(tp, n, flags, extack);
if (err)
- goto errhw;
+ goto errunbind;
if (!tc_in_hw(n->flags))
n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
@@ -1100,7 +1167,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return 0;
}
-errhw:
+errunbind:
+ u32_unbind_filter(tp, n, tb);
+
#ifdef CONFIG_CLS_U32_MARK
free_percpu(n->pcpu_success);
#endif
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index af85a73c4c54..6fdba069f6bf 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -568,7 +568,7 @@ META_COLLECTOR(int_sk_rcvtimeo)
*err = -1;
return;
}
- dst->value = sk->sk_rcvtimeo / HZ;
+ dst->value = READ_ONCE(sk->sk_rcvtimeo) / HZ;
}
META_COLLECTOR(int_sk_sndtimeo)
@@ -579,7 +579,7 @@ META_COLLECTOR(int_sk_sndtimeo)
*err = -1;
return;
}
- dst->value = sk->sk_sndtimeo / HZ;
+ dst->value = READ_ONCE(sk->sk_sndtimeo) / HZ;
}
META_COLLECTOR(int_sk_sendmsg_off)
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index ab69ff7577fc..793009f445c0 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -290,6 +290,13 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
"Attribute type expected to be TCA_MQPRIO_MIN_RATE64");
return -EINVAL;
}
+
+ if (nla_len(attr) != sizeof(u64)) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Attribute TCA_MQPRIO_MIN_RATE64 expected to have 8 bytes length");
+ return -EINVAL;
+ }
+
if (i >= qopt->num_tc)
break;
priv->min_rate[i] = nla_get_u64(attr);
@@ -312,6 +319,13 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
"Attribute type expected to be TCA_MQPRIO_MAX_RATE64");
return -EINVAL;
}
+
+ if (nla_len(attr) != sizeof(u64)) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Attribute TCA_MQPRIO_MAX_RATE64 expected to have 8 bytes length");
+ return -EINVAL;
+ }
+
if (i >= qopt->num_tc)
break;
priv->max_rate[i] = nla_get_u64(attr);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index dfd9a99e6257..befaf74b33ca 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -381,8 +381,13 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
u32 lmax)
{
struct qfq_sched *q = qdisc_priv(sch);
- struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
+ struct qfq_aggregate *new_agg;
+ /* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */
+ if (lmax > QFQ_MAX_LMAX)
+ return -EINVAL;
+
+ new_agg = qfq_find_agg(q, lmax, weight);
if (new_agg == NULL) { /* create new aggregate */
new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
if (new_agg == NULL)
@@ -423,10 +428,17 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
else
weight = 1;
- if (tb[TCA_QFQ_LMAX])
+ if (tb[TCA_QFQ_LMAX]) {
lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
- else
+ } else {
+ /* MTU size is user controlled */
lmax = psched_mtu(qdisc_dev(sch));
+ if (lmax < QFQ_MIN_LMAX || lmax > QFQ_MAX_LMAX) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "MTU size out of bounds for qfq");
+ return -EINVAL;
+ }
+ }
inv_w = ONE_FP / weight;
weight = ONE_FP / inv_w;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 717ae51d94a0..8c9cfff7fd05 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1015,6 +1015,11 @@ static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
TC_FP_PREEMPTIBLE),
};
+static struct netlink_range_validation_signed taprio_cycle_time_range = {
+ .min = 0,
+ .max = INT_MAX,
+};
+
static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_PRIOMAP] = {
.len = sizeof(struct tc_mqprio_qopt)
@@ -1023,7 +1028,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
[TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
- [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] =
+ NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
[TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
@@ -1159,6 +1165,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
return -EINVAL;
}
+ if (cycle < 0 || cycle > INT_MAX) {
+ NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
+ return -EINVAL;
+ }
+
new->cycle_time = cycle;
}
@@ -1347,7 +1358,7 @@ static void setup_txtime(struct taprio_sched *q,
struct sched_gate_list *sched, ktime_t base)
{
struct sched_entry *entry;
- u32 interval = 0;
+ u64 interval = 0;
list_for_each_entry(entry, &sched->entries, list) {
entry->next_txtime = ktime_add_ns(base, interval);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6554a357fe33..9388d98aebc0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -364,9 +364,9 @@ static void sctp_auto_asconf_init(struct sctp_sock *sp)
struct net *net = sock_net(&sp->inet.sk);
if (net->sctp.default_auto_asconf) {
- spin_lock(&net->sctp.addr_wq_lock);
+ spin_lock_bh(&net->sctp.addr_wq_lock);
list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist);
- spin_unlock(&net->sctp.addr_wq_lock);
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
sp->do_auto_asconf = 1;
}
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index a7f887d91d89..f5834af5fad5 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -378,8 +378,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
sk->sk_protocol = protocol;
- WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
- WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
+ WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
+ WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_WORK(&smc->connect_work, smc_connect_work);
@@ -436,24 +436,9 @@ out:
return rc;
}
-static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
- unsigned long mask)
-{
- /* options we don't get control via setsockopt for */
- nsk->sk_type = osk->sk_type;
- nsk->sk_sndbuf = osk->sk_sndbuf;
- nsk->sk_rcvbuf = osk->sk_rcvbuf;
- nsk->sk_sndtimeo = osk->sk_sndtimeo;
- nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
- nsk->sk_mark = osk->sk_mark;
- nsk->sk_priority = osk->sk_priority;
- nsk->sk_rcvlowat = osk->sk_rcvlowat;
- nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
- nsk->sk_err = osk->sk_err;
-
- nsk->sk_flags &= ~mask;
- nsk->sk_flags |= osk->sk_flags & mask;
-}
+/* copy only relevant settings and flags of SOL_SOCKET level from smc to
+ * clc socket (since smc is not called for these options from net/core)
+ */
#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
(1UL << SOCK_KEEPOPEN) | \
@@ -470,9 +455,55 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
(1UL << SOCK_NOFCS) | \
(1UL << SOCK_FILTER_LOCKED) | \
(1UL << SOCK_TSTAMP_NEW))
-/* copy only relevant settings and flags of SOL_SOCKET level from smc to
- * clc socket (since smc is not called for these options from net/core)
- */
+
+/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
+static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
+ unsigned long mask)
+{
+ struct net *nnet = sock_net(nsk);
+
+ nsk->sk_userlocks = osk->sk_userlocks;
+ if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+ nsk->sk_sndbuf = osk->sk_sndbuf;
+ } else {
+ if (mask == SK_FLAGS_SMC_TO_CLC)
+ WRITE_ONCE(nsk->sk_sndbuf,
+ READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
+ else
+ WRITE_ONCE(nsk->sk_sndbuf,
+ 2 * READ_ONCE(nnet->smc.sysctl_wmem));
+ }
+ if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
+ nsk->sk_rcvbuf = osk->sk_rcvbuf;
+ } else {
+ if (mask == SK_FLAGS_SMC_TO_CLC)
+ WRITE_ONCE(nsk->sk_rcvbuf,
+ READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
+ else
+ WRITE_ONCE(nsk->sk_rcvbuf,
+ 2 * READ_ONCE(nnet->smc.sysctl_rmem));
+ }
+}
+
+static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
+ unsigned long mask)
+{
+ /* options we don't get control via setsockopt for */
+ nsk->sk_type = osk->sk_type;
+ nsk->sk_sndtimeo = osk->sk_sndtimeo;
+ nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
+ nsk->sk_mark = READ_ONCE(osk->sk_mark);
+ nsk->sk_priority = osk->sk_priority;
+ nsk->sk_rcvlowat = osk->sk_rcvlowat;
+ nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
+ nsk->sk_err = osk->sk_err;
+
+ nsk->sk_flags &= ~mask;
+ nsk->sk_flags |= osk->sk_flags & mask;
+
+ smc_adjust_sock_bufsizes(nsk, osk, mask);
+}
+
static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
{
smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
@@ -2479,8 +2510,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
- new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
- new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
sock_hold(&new_smc->sk); /* sock_put in passive closing */
if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
sock_put(&new_smc->sk);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 2eeea4cdc718..1f2b912c43d1 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -161,7 +161,7 @@ struct smc_connection {
struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */
struct smc_buf_desc *rmb_desc; /* RMBE descriptor */
- int rmbe_size_short;/* compressed notation */
+ int rmbe_size_comp; /* compressed notation */
int rmbe_update_limit;
/* lower limit for consumer
* cursor update
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index b9b8b07aa702..c90d9e5dda54 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -1007,7 +1007,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->d0.gid =
conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd);
clc->d0.token = conn->rmb_desc->token;
- clc->d0.dmbe_size = conn->rmbe_size_short;
+ clc->d0.dmbe_size = conn->rmbe_size_comp;
clc->d0.dmbe_idx = 0;
memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
if (version == SMC_V1) {
@@ -1050,7 +1050,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
break;
}
- clc->r0.rmbe_size = conn->rmbe_size_short;
+ clc->r0.rmbe_size = conn->rmbe_size_comp;
clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
cpu_to_be64((u64)sg_dma_address
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3f465faf2b68..6b78075404d7 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -2309,31 +2309,30 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_connection *conn = &smc->conn;
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
- int bufsize, bufsize_short;
+ int bufsize, bufsize_comp;
struct rw_semaphore *lock; /* lock buffer list */
bool is_dgraded = false;
- int sk_buf_size;
if (is_rmb)
/* use socket recv buffer size (w/o overhead) as start value */
- sk_buf_size = smc->sk.sk_rcvbuf;
+ bufsize = smc->sk.sk_rcvbuf / 2;
else
/* use socket send buffer size (w/o overhead) as start value */
- sk_buf_size = smc->sk.sk_sndbuf;
+ bufsize = smc->sk.sk_sndbuf / 2;
- for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
- bufsize_short >= 0; bufsize_short--) {
+ for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb);
+ bufsize_comp >= 0; bufsize_comp--) {
if (is_rmb) {
lock = &lgr->rmbs_lock;
- buf_list = &lgr->rmbs[bufsize_short];
+ buf_list = &lgr->rmbs[bufsize_comp];
} else {
lock = &lgr->sndbufs_lock;
- buf_list = &lgr->sndbufs[bufsize_short];
+ buf_list = &lgr->sndbufs[bufsize_comp];
}
- bufsize = smc_uncompress_bufsize(bufsize_short);
+ bufsize = smc_uncompress_bufsize(bufsize_comp);
/* check for reusable slot in the link group */
- buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
+ buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
if (buf_desc) {
buf_desc->is_dma_need_sync = 0;
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
@@ -2377,8 +2376,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_rmb) {
conn->rmb_desc = buf_desc;
- conn->rmbe_size_short = bufsize_short;
- smc->sk.sk_rcvbuf = bufsize;
+ conn->rmbe_size_comp = bufsize_comp;
+ smc->sk.sk_rcvbuf = bufsize * 2;
atomic_set(&conn->bytes_to_rcv, 0);
conn->rmbe_update_limit =
smc_rmb_wnd_update_limit(buf_desc->len);
@@ -2386,7 +2385,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
} else {
conn->sndbuf_desc = buf_desc;
- smc->sk.sk_sndbuf = bufsize;
+ smc->sk.sk_sndbuf = bufsize * 2;
atomic_set(&conn->sndbuf_space, bufsize);
}
return 0;
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index b6f79fabb9d3..0b2a957ca5f5 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -21,6 +21,10 @@
static int min_sndbuf = SMC_BUF_MIN_SIZE;
static int min_rcvbuf = SMC_BUF_MIN_SIZE;
+static int max_sndbuf = INT_MAX / 2;
+static int max_rcvbuf = INT_MAX / 2;
+static const int net_smc_wmem_init = (64 * 1024);
+static const int net_smc_rmem_init = (64 * 1024);
static struct ctl_table smc_table[] = {
{
@@ -53,6 +57,7 @@ static struct ctl_table smc_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_sndbuf,
+ .extra2 = &max_sndbuf,
},
{
.procname = "rmem",
@@ -61,6 +66,7 @@ static struct ctl_table smc_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_rcvbuf,
+ .extra2 = &max_rcvbuf,
},
{ }
};
@@ -88,8 +94,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
- WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
- WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
+ WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init);
+ WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
return 0;
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 1c8de397d6ad..f89c10fe7e6a 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
- auth.o auth_null.o auth_unix.o \
+ auth.o auth_null.o auth_tls.o auth_unix.o \
svc.o svcsock.o svcauth.o svcauth_unix.o \
addr.o rpcb_clnt.o timer.o xdr.o \
sunrpc_syms.o cache.o rpc_pipe.o sysfs.o \
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index fb75a883503f..2f16f9d17966 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -32,7 +32,7 @@ static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
[RPC_AUTH_NULL] = (const struct rpc_authops __force __rcu *)&authnull_ops,
[RPC_AUTH_UNIX] = (const struct rpc_authops __force __rcu *)&authunix_ops,
- NULL, /* others can be loadable modules */
+ [RPC_AUTH_TLS] = (const struct rpc_authops __force __rcu *)&authtls_ops,
};
static LIST_HEAD(cred_unused);
diff --git a/net/sunrpc/auth_tls.c b/net/sunrpc/auth_tls.c
new file mode 100644
index 000000000000..de7678f8a23d
--- /dev/null
+++ b/net/sunrpc/auth_tls.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, 2022 Oracle. All rights reserved.
+ *
+ * The AUTH_TLS credential is used only to probe a remote peer
+ * for RPC-over-TLS support.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/sunrpc/clnt.h>
+
+static const char *starttls_token = "STARTTLS";
+static const size_t starttls_len = 8;
+
+static struct rpc_auth tls_auth;
+static struct rpc_cred tls_cred;
+
+static void tls_encode_probe(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ const void *obj)
+{
+}
+
+static int tls_decode_probe(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ void *obj)
+{
+ return 0;
+}
+
+static const struct rpc_procinfo rpcproc_tls_probe = {
+ .p_encode = tls_encode_probe,
+ .p_decode = tls_decode_probe,
+};
+
+static void rpc_tls_probe_call_prepare(struct rpc_task *task, void *data)
+{
+ task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
+ rpc_call_start(task);
+}
+
+static void rpc_tls_probe_call_done(struct rpc_task *task, void *data)
+{
+}
+
+static const struct rpc_call_ops rpc_tls_probe_ops = {
+ .rpc_call_prepare = rpc_tls_probe_call_prepare,
+ .rpc_call_done = rpc_tls_probe_call_done,
+};
+
+static int tls_probe(struct rpc_clnt *clnt)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &rpcproc_tls_probe,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .rpc_message = &msg,
+ .rpc_op_cred = &tls_cred,
+ .callback_ops = &rpc_tls_probe_ops,
+ .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+ };
+ struct rpc_task *task;
+ int status;
+
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ status = task->tk_status;
+ rpc_put_task(task);
+ return status;
+}
+
+static struct rpc_auth *tls_create(const struct rpc_auth_create_args *args,
+ struct rpc_clnt *clnt)
+{
+ refcount_inc(&tls_auth.au_count);
+ return &tls_auth;
+}
+
+static void tls_destroy(struct rpc_auth *auth)
+{
+}
+
+static struct rpc_cred *tls_lookup_cred(struct rpc_auth *auth,
+ struct auth_cred *acred, int flags)
+{
+ return get_rpccred(&tls_cred);
+}
+
+static void tls_destroy_cred(struct rpc_cred *cred)
+{
+}
+
+static int tls_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
+{
+ return 1;
+}
+
+static int tls_marshal(struct rpc_task *task, struct xdr_stream *xdr)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4 * XDR_UNIT);
+ if (!p)
+ return -EMSGSIZE;
+ /* Credential */
+ *p++ = rpc_auth_tls;
+ *p++ = xdr_zero;
+ /* Verifier */
+ *p++ = rpc_auth_null;
+ *p = xdr_zero;
+ return 0;
+}
+
+static int tls_refresh(struct rpc_task *task)
+{
+ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
+ return 0;
+}
+
+static int tls_validate(struct rpc_task *task, struct xdr_stream *xdr)
+{
+ __be32 *p;
+ void *str;
+
+ p = xdr_inline_decode(xdr, XDR_UNIT);
+ if (!p)
+ return -EIO;
+ if (*p != rpc_auth_null)
+ return -EIO;
+ if (xdr_stream_decode_opaque_inline(xdr, &str, starttls_len) != starttls_len)
+ return -EIO;
+ if (memcmp(str, starttls_token, starttls_len))
+ return -EIO;
+ return 0;
+}
+
+const struct rpc_authops authtls_ops = {
+ .owner = THIS_MODULE,
+ .au_flavor = RPC_AUTH_TLS,
+ .au_name = "NULL",
+ .create = tls_create,
+ .destroy = tls_destroy,
+ .lookup_cred = tls_lookup_cred,
+ .ping = tls_probe,
+};
+
+static struct rpc_auth tls_auth = {
+ .au_cslack = NUL_CALLSLACK,
+ .au_rslack = NUL_REPLYSLACK,
+ .au_verfsize = NUL_REPLYSLACK,
+ .au_ralign = NUL_REPLYSLACK,
+ .au_ops = &authtls_ops,
+ .au_flavor = RPC_AUTH_TLS,
+ .au_count = REFCOUNT_INIT(1),
+};
+
+static const struct rpc_credops tls_credops = {
+ .cr_name = "AUTH_TLS",
+ .crdestroy = tls_destroy_cred,
+ .crmatch = tls_match,
+ .crmarshal = tls_marshal,
+ .crwrap_req = rpcauth_wrap_req_encode,
+ .crrefresh = tls_refresh,
+ .crvalidate = tls_validate,
+ .crunwrap_resp = rpcauth_unwrap_resp_decode,
+};
+
+static struct rpc_cred tls_cred = {
+ .cr_lru = LIST_HEAD_INIT(tls_cred.cr_lru),
+ .cr_auth = &tls_auth,
+ .cr_ops = &tls_credops,
+ .cr_count = REFCOUNT_INIT(2),
+ .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE,
+};
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d2ee56634308..d7c697af3762 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -385,6 +385,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
if (!clnt)
goto out_err;
clnt->cl_parent = parent ? : clnt;
+ clnt->cl_xprtsec = args->xprtsec;
err = rpc_alloc_clid(clnt);
if (err)
@@ -434,7 +435,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
if (parent)
refcount_inc(&parent->cl_count);
- trace_rpc_clnt_new(clnt, xprt, program->name, args->servername);
+ trace_rpc_clnt_new(clnt, xprt, args);
return clnt;
out_no_path:
@@ -532,6 +533,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
.addrlen = args->addrsize,
.servername = args->servername,
.bc_xprt = args->bc_xprt,
+ .xprtsec = args->xprtsec,
};
char servername[48];
struct rpc_clnt *clnt;
@@ -565,8 +567,12 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
servername[0] = '\0';
switch (args->address->sa_family) {
case AF_LOCAL:
- snprintf(servername, sizeof(servername), "%s",
- sun->sun_path);
+ if (sun->sun_path[0])
+ snprintf(servername, sizeof(servername), "%s",
+ sun->sun_path);
+ else
+ snprintf(servername, sizeof(servername), "@%s",
+ sun->sun_path+1);
break;
case AF_INET:
snprintf(servername, sizeof(servername), "%pI4",
@@ -727,6 +733,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
struct rpc_clnt *parent;
int err;
+ args->xprtsec = clnt->cl_xprtsec;
xprt = xprt_create_transport(args);
if (IS_ERR(xprt))
return PTR_ERR(xprt);
@@ -1717,6 +1724,11 @@ call_start(struct rpc_task *task)
trace_rpc_request(task);
+ if (task->tk_client->cl_shutdown) {
+ rpc_call_rpcerror(task, -EIO);
+ return;
+ }
+
/* Increment call count (version might not be valid for ping) */
if (clnt->cl_program->version[clnt->cl_vers])
clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
@@ -2826,6 +2838,9 @@ static int rpc_ping(struct rpc_clnt *clnt)
struct rpc_task *task;
int status;
+ if (clnt->cl_auth->au_ops->ping)
+ return clnt->cl_auth->au_ops->ping(clnt);
+
task = rpc_call_null_helper(clnt, NULL, NULL, 0, NULL, NULL);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -3046,6 +3061,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
if (!xprtargs->ident)
xprtargs->ident = ident;
+ xprtargs->xprtsec = clnt->cl_xprtsec;
xprt = xprt_create_transport(xprtargs);
if (IS_ERR(xprt)) {
ret = PTR_ERR(xprt);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 5a8e6d46809a..5988a5c5ff3f 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -36,6 +36,7 @@
#include "netns.h"
#define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock"
+#define RPCBIND_SOCK_ABSTRACT_NAME "\0/run/rpcbind.sock"
#define RPCBIND_PROGRAM (100000u)
#define RPCBIND_PORT (111u)
@@ -216,21 +217,22 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
sn->rpcb_users = 1;
}
+/* Evaluate to actual length of the `sockaddr_un' structure. */
+# define SUN_LEN(ptr) (offsetof(struct sockaddr_un, sun_path) \
+ + 1 + strlen((ptr)->sun_path + 1))
+
/*
* Returns zero on success, otherwise a negative errno value
* is returned.
*/
-static int rpcb_create_local_unix(struct net *net)
+static int rpcb_create_af_local(struct net *net,
+ const struct sockaddr_un *addr)
{
- static const struct sockaddr_un rpcb_localaddr_rpcbind = {
- .sun_family = AF_LOCAL,
- .sun_path = RPCBIND_SOCK_PATHNAME,
- };
struct rpc_create_args args = {
.net = net,
.protocol = XPRT_TRANSPORT_LOCAL,
- .address = (struct sockaddr *)&rpcb_localaddr_rpcbind,
- .addrsize = sizeof(rpcb_localaddr_rpcbind),
+ .address = (struct sockaddr *)addr,
+ .addrsize = SUN_LEN(addr),
.servername = "localhost",
.program = &rpcb_program,
.version = RPCBVERS_2,
@@ -269,6 +271,26 @@ out:
return result;
}
+static int rpcb_create_local_abstract(struct net *net)
+{
+ static const struct sockaddr_un rpcb_localaddr_abstract = {
+ .sun_family = AF_LOCAL,
+ .sun_path = RPCBIND_SOCK_ABSTRACT_NAME,
+ };
+
+ return rpcb_create_af_local(net, &rpcb_localaddr_abstract);
+}
+
+static int rpcb_create_local_unix(struct net *net)
+{
+ static const struct sockaddr_un rpcb_localaddr_unix = {
+ .sun_family = AF_LOCAL,
+ .sun_path = RPCBIND_SOCK_PATHNAME,
+ };
+
+ return rpcb_create_af_local(net, &rpcb_localaddr_unix);
+}
+
/*
* Returns zero on success, otherwise a negative errno value
* is returned.
@@ -332,7 +354,8 @@ int rpcb_create_local(struct net *net)
if (rpcb_get_local(net))
goto out;
- if (rpcb_create_local_unix(net) != 0)
+ if (rpcb_create_local_abstract(net) != 0 &&
+ rpcb_create_local_unix(net) != 0)
result = rpcb_create_local_net(net);
out:
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index 0d0db4e1064e..5c8ecdaaa985 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -239,6 +239,7 @@ static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
if (!xprt)
return 0;
if (!(xprt->xprt_class->ident == XPRT_TRANSPORT_TCP ||
+ xprt->xprt_class->ident == XPRT_TRANSPORT_TCP_TLS ||
xprt->xprt_class->ident == XPRT_TRANSPORT_RDMA)) {
xprt_put(xprt);
return -EOPNOTSUPP;
diff --git a/net/sunrpc/sysfs.h b/net/sunrpc/sysfs.h
index 6620cebd1037..d2dd77a0a0e9 100644
--- a/net/sunrpc/sysfs.h
+++ b/net/sunrpc/sysfs.h
@@ -5,13 +5,6 @@
#ifndef __SUNRPC_SYSFS_H
#define __SUNRPC_SYSFS_H
-struct rpc_sysfs_client {
- struct kobject kobject;
- struct net *net;
- struct rpc_clnt *clnt;
- struct rpc_xprt_switch *xprt_switch;
-};
-
struct rpc_sysfs_xprt_switch {
struct kobject kobject;
struct net *net;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 5f9030b81c9e..9f010369100a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,6 +47,9 @@
#include <net/checksum.h>
#include <net/udp.h>
#include <net/tcp.h>
+#include <net/tls.h>
+#include <net/handshake.h>
+
#include <linux/bvec.h>
#include <linux/highmem.h>
#include <linux/uio.h>
@@ -96,6 +99,7 @@ static struct ctl_table_header *sunrpc_table_header;
static struct xprt_class xs_local_transport;
static struct xprt_class xs_udp_transport;
static struct xprt_class xs_tcp_transport;
+static struct xprt_class xs_tcp_tls_transport;
static struct xprt_class xs_bc_tcp_transport;
/*
@@ -187,6 +191,11 @@ static struct ctl_table xs_tunables_table[] = {
*/
#define XS_IDLE_DISC_TO (5U * 60 * HZ)
+/*
+ * TLS handshake timeout.
+ */
+#define XS_TLS_HANDSHAKE_TO (10U * HZ)
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# undef RPC_DEBUG_DATA
# define RPCDBG_FACILITY RPCDBG_TRANS
@@ -253,7 +262,12 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
switch (sap->sa_family) {
case AF_LOCAL:
sun = xs_addr_un(xprt);
- strscpy(buf, sun->sun_path, sizeof(buf));
+ if (sun->sun_path[0]) {
+ strscpy(buf, sun->sun_path, sizeof(buf));
+ } else {
+ buf[0] = '@';
+ strscpy(buf+1, sun->sun_path+1, sizeof(buf)-1);
+ }
xprt->address_strings[RPC_DISPLAY_ADDR] =
kstrdup(buf, GFP_KERNEL);
break;
@@ -342,13 +356,56 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
return want;
}
+static int
+xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
+ struct cmsghdr *cmsg, int ret)
+{
+ if (cmsg->cmsg_level == SOL_TLS &&
+ cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
+ u8 content_type = *((u8 *)CMSG_DATA(cmsg));
+
+ switch (content_type) {
+ case TLS_RECORD_TYPE_DATA:
+ /* TLS sets EOR at the end of each application data
+ * record, even though there might be more frames
+ * waiting to be decrypted.
+ */
+ msg->msg_flags &= ~MSG_EOR;
+ break;
+ case TLS_RECORD_TYPE_ALERT:
+ ret = -ENOTCONN;
+ break;
+ default:
+ ret = -EAGAIN;
+ }
+ }
+ return ret;
+}
+
+static int
+xs_sock_recv_cmsg(struct socket *sock, struct msghdr *msg, int flags)
+{
+ union {
+ struct cmsghdr cmsg;
+ u8 buf[CMSG_SPACE(sizeof(u8))];
+ } u;
+ int ret;
+
+ msg->msg_control = &u;
+ msg->msg_controllen = sizeof(u);
+ ret = sock_recvmsg(sock, msg, flags);
+ if (msg->msg_controllen != sizeof(u))
+ ret = xs_sock_process_cmsg(sock, msg, &u.cmsg, ret);
+ return ret;
+}
+
static ssize_t
xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek)
{
ssize_t ret;
if (seek != 0)
iov_iter_advance(&msg->msg_iter, seek);
- ret = sock_recvmsg(sock, msg, flags);
+ ret = xs_sock_recv_cmsg(sock, msg, flags);
return ret > 0 ? ret + seek : ret;
}
@@ -374,7 +431,7 @@ xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
size_t count)
{
iov_iter_discard(&msg->msg_iter, ITER_DEST, count);
- return sock_recvmsg(sock, msg, flags);
+ return xs_sock_recv_cmsg(sock, msg, flags);
}
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
@@ -695,6 +752,8 @@ static void xs_poll_check_readable(struct sock_xprt *transport)
{
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+ if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state))
+ return;
if (!xs_poll_socket_readable(transport))
return;
if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
@@ -1191,6 +1250,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
if (atomic_read(&transport->xprt.swapper))
sk_clear_memalloc(sk);
+ tls_handshake_cancel(sk);
+
kernel_sock_shutdown(sock, SHUT_RDWR);
mutex_lock(&transport->recv_mutex);
@@ -1380,6 +1441,10 @@ static void xs_data_ready(struct sock *sk)
trace_xs_data_ready(xprt);
transport->old_data_ready(sk);
+
+ if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state))
+ return;
+
/* Any data means we had a useful conversation, so
* then we don't need to delay the next reconnect
*/
@@ -2360,6 +2425,267 @@ out_unlock:
current_restore_flags(pflags, PF_MEMALLOC);
}
+/*
+ * Transfer the connected socket to @upper_transport, then mark that
+ * xprt CONNECTED.
+ */
+static int xs_tcp_tls_finish_connecting(struct rpc_xprt *lower_xprt,
+ struct sock_xprt *upper_transport)
+{
+ struct sock_xprt *lower_transport =
+ container_of(lower_xprt, struct sock_xprt, xprt);
+ struct rpc_xprt *upper_xprt = &upper_transport->xprt;
+
+ if (!upper_transport->inet) {
+ struct socket *sock = lower_transport->sock;
+ struct sock *sk = sock->sk;
+
+ /* Avoid temporary address, they are bad for long-lived
+ * connections such as NFS mounts.
+ * RFC4941, section 3.6 suggests that:
+ * Individual applications, which have specific
+ * knowledge about the normal duration of connections,
+ * MAY override this as appropriate.
+ */
+ if (xs_addr(upper_xprt)->sa_family == PF_INET6)
+ ip6_sock_set_addr_preferences(sk, IPV6_PREFER_SRC_PUBLIC);
+
+ xs_tcp_set_socket_timeouts(upper_xprt, sock);
+ tcp_sock_set_nodelay(sk);
+
+ lock_sock(sk);
+
+ /* @sk is already connected, so it now has the RPC callbacks.
+ * Reach into @lower_transport to save the original ones.
+ */
+ upper_transport->old_data_ready = lower_transport->old_data_ready;
+ upper_transport->old_state_change = lower_transport->old_state_change;
+ upper_transport->old_write_space = lower_transport->old_write_space;
+ upper_transport->old_error_report = lower_transport->old_error_report;
+ sk->sk_user_data = upper_xprt;
+
+ /* socket options */
+ sock_reset_flag(sk, SOCK_LINGER);
+
+ xprt_clear_connected(upper_xprt);
+
+ upper_transport->sock = sock;
+ upper_transport->inet = sk;
+ upper_transport->file = lower_transport->file;
+
+ release_sock(sk);
+
+ /* Reset lower_transport before shutting down its clnt */
+ mutex_lock(&lower_transport->recv_mutex);
+ lower_transport->inet = NULL;
+ lower_transport->sock = NULL;
+ lower_transport->file = NULL;
+
+ xprt_clear_connected(lower_xprt);
+ xs_sock_reset_connection_flags(lower_xprt);
+ xs_stream_reset_connect(lower_transport);
+ mutex_unlock(&lower_transport->recv_mutex);
+ }
+
+ if (!xprt_bound(upper_xprt))
+ return -ENOTCONN;
+
+ xs_set_memalloc(upper_xprt);
+
+ if (!xprt_test_and_set_connected(upper_xprt)) {
+ upper_xprt->connect_cookie++;
+ clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state);
+ xprt_clear_connecting(upper_xprt);
+
+ upper_xprt->stat.connect_count++;
+ upper_xprt->stat.connect_time += (long)jiffies -
+ upper_xprt->stat.connect_start;
+ xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING);
+ }
+ return 0;
+}
+
+/**
+ * xs_tls_handshake_done - TLS handshake completion handler
+ * @data: address of xprt to wake
+ * @status: status of handshake
+ * @peerid: serial number of key containing the remote's identity
+ *
+ */
+static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid)
+{
+ struct rpc_xprt *lower_xprt = data;
+ struct sock_xprt *lower_transport =
+ container_of(lower_xprt, struct sock_xprt, xprt);
+
+ lower_transport->xprt_err = status ? -EACCES : 0;
+ complete(&lower_transport->handshake_done);
+ xprt_put(lower_xprt);
+}
+
+static int xs_tls_handshake_sync(struct rpc_xprt *lower_xprt, struct xprtsec_parms *xprtsec)
+{
+ struct sock_xprt *lower_transport =
+ container_of(lower_xprt, struct sock_xprt, xprt);
+ struct tls_handshake_args args = {
+ .ta_sock = lower_transport->sock,
+ .ta_done = xs_tls_handshake_done,
+ .ta_data = xprt_get(lower_xprt),
+ .ta_peername = lower_xprt->servername,
+ };
+ struct sock *sk = lower_transport->inet;
+ int rc;
+
+ init_completion(&lower_transport->handshake_done);
+ set_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state);
+ lower_transport->xprt_err = -ETIMEDOUT;
+ switch (xprtsec->policy) {
+ case RPC_XPRTSEC_TLS_ANON:
+ rc = tls_client_hello_anon(&args, GFP_KERNEL);
+ if (rc)
+ goto out_put_xprt;
+ break;
+ case RPC_XPRTSEC_TLS_X509:
+ args.ta_my_cert = xprtsec->cert_serial;
+ args.ta_my_privkey = xprtsec->privkey_serial;
+ rc = tls_client_hello_x509(&args, GFP_KERNEL);
+ if (rc)
+ goto out_put_xprt;
+ break;
+ default:
+ rc = -EACCES;
+ goto out_put_xprt;
+ }
+
+ rc = wait_for_completion_interruptible_timeout(&lower_transport->handshake_done,
+ XS_TLS_HANDSHAKE_TO);
+ if (rc <= 0) {
+ if (!tls_handshake_cancel(sk)) {
+ if (rc == 0)
+ rc = -ETIMEDOUT;
+ goto out_put_xprt;
+ }
+ }
+
+ rc = lower_transport->xprt_err;
+
+out:
+ xs_stream_reset_connect(lower_transport);
+ clear_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state);
+ return rc;
+
+out_put_xprt:
+ xprt_put(lower_xprt);
+ goto out;
+}
+
+/**
+ * xs_tcp_tls_setup_socket - establish a TLS session on a TCP socket
+ * @work: queued work item
+ *
+ * Invoked by a work queue tasklet.
+ *
+ * For RPC-with-TLS, there is a two-stage connection process.
+ *
+ * The "upper-layer xprt" is visible to the RPC consumer. Once it has
+ * been marked connected, the consumer knows that a TCP connection and
+ * a TLS session have been established.
+ *
+ * A "lower-layer xprt", created in this function, handles the mechanics
+ * of connecting the TCP socket, performing the RPC_AUTH_TLS probe, and
+ * then driving the TLS handshake. Once all that is complete, the upper
+ * layer xprt is marked connected.
+ */
+static void xs_tcp_tls_setup_socket(struct work_struct *work)
+{
+ struct sock_xprt *upper_transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_clnt *upper_clnt = upper_transport->clnt;
+ struct rpc_xprt *upper_xprt = &upper_transport->xprt;
+ struct rpc_create_args args = {
+ .net = upper_xprt->xprt_net,
+ .protocol = upper_xprt->prot,
+ .address = (struct sockaddr *)&upper_xprt->addr,
+ .addrsize = upper_xprt->addrlen,
+ .timeout = upper_clnt->cl_timeout,
+ .servername = upper_xprt->servername,
+ .program = upper_clnt->cl_program,
+ .prognumber = upper_clnt->cl_prog,
+ .version = upper_clnt->cl_vers,
+ .authflavor = RPC_AUTH_TLS,
+ .cred = upper_clnt->cl_cred,
+ .xprtsec = {
+ .policy = RPC_XPRTSEC_NONE,
+ },
+ };
+ unsigned int pflags = current->flags;
+ struct rpc_clnt *lower_clnt;
+ struct rpc_xprt *lower_xprt;
+ int status;
+
+ if (atomic_read(&upper_xprt->swapper))
+ current->flags |= PF_MEMALLOC;
+
+ xs_stream_start_connect(upper_transport);
+
+ /* This implicitly sends an RPC_AUTH_TLS probe */
+ lower_clnt = rpc_create(&args);
+ if (IS_ERR(lower_clnt)) {
+ trace_rpc_tls_unavailable(upper_clnt, upper_xprt);
+ clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state);
+ xprt_clear_connecting(upper_xprt);
+ xprt_wake_pending_tasks(upper_xprt, PTR_ERR(lower_clnt));
+ xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING);
+ goto out_unlock;
+ }
+
+ /* RPC_AUTH_TLS probe was successful. Try a TLS handshake on
+ * the lower xprt.
+ */
+ rcu_read_lock();
+ lower_xprt = rcu_dereference(lower_clnt->cl_xprt);
+ rcu_read_unlock();
+ status = xs_tls_handshake_sync(lower_xprt, &upper_xprt->xprtsec);
+ if (status) {
+ trace_rpc_tls_not_started(upper_clnt, upper_xprt);
+ goto out_close;
+ }
+
+ status = xs_tcp_tls_finish_connecting(lower_xprt, upper_transport);
+ if (status)
+ goto out_close;
+
+ trace_rpc_socket_connect(upper_xprt, upper_transport->sock, 0);
+ if (!xprt_test_and_set_connected(upper_xprt)) {
+ upper_xprt->connect_cookie++;
+ clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state);
+ xprt_clear_connecting(upper_xprt);
+
+ upper_xprt->stat.connect_count++;
+ upper_xprt->stat.connect_time += (long)jiffies -
+ upper_xprt->stat.connect_start;
+ xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING);
+ }
+ rpc_shutdown_client(lower_clnt);
+
+out_unlock:
+ current_restore_flags(pflags, PF_MEMALLOC);
+ upper_transport->clnt = NULL;
+ xprt_unlock_connect(upper_xprt, upper_transport);
+ return;
+
+out_close:
+ rpc_shutdown_client(lower_clnt);
+
+ /* xprt_force_disconnect() wakes tasks with a fixed tk_status code.
+ * Wake them first here to ensure they get our tk_status code.
+ */
+ xprt_wake_pending_tasks(upper_xprt, status);
+ xs_tcp_force_close(upper_xprt);
+ xprt_clear_connecting(upper_xprt);
+ goto out_unlock;
+}
+
/**
* xs_connect - connect a socket to a remote endpoint
* @xprt: pointer to transport structure
@@ -2391,6 +2717,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
} else
dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
+ transport->clnt = task->tk_client;
queue_delayed_work(xprtiod_workqueue,
&transport->connect_worker,
delay);
@@ -2858,7 +3185,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
switch (sun->sun_family) {
case AF_LOCAL:
- if (sun->sun_path[0] != '/') {
+ if (sun->sun_path[0] != '/' && sun->sun_path[0] != '\0') {
dprintk("RPC: bad AF_LOCAL address: %s\n",
sun->sun_path);
ret = ERR_PTR(-EINVAL);
@@ -3045,6 +3372,94 @@ out_err:
}
/**
+ * xs_setup_tcp_tls - Set up transport to use a TCP with TLS
+ * @args: rpc transport creation arguments
+ *
+ */
+static struct rpc_xprt *xs_setup_tcp_tls(struct xprt_create *args)
+{
+ struct sockaddr *addr = args->dstaddr;
+ struct rpc_xprt *xprt;
+ struct sock_xprt *transport;
+ struct rpc_xprt *ret;
+ unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries;
+
+ if (args->flags & XPRT_CREATE_INFINITE_SLOTS)
+ max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT;
+
+ xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+ max_slot_table_size);
+ if (IS_ERR(xprt))
+ return xprt;
+ transport = container_of(xprt, struct sock_xprt, xprt);
+
+ xprt->prot = IPPROTO_TCP;
+ xprt->xprt_class = &xs_tcp_transport;
+ xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
+
+ xprt->bind_timeout = XS_BIND_TO;
+ xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+ xprt->idle_timeout = XS_IDLE_DISC_TO;
+
+ xprt->ops = &xs_tcp_ops;
+ xprt->timeout = &xs_tcp_default_timeout;
+
+ xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+ xprt->connect_timeout = xprt->timeout->to_initval *
+ (xprt->timeout->to_retries + 1);
+
+ INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
+ INIT_WORK(&transport->error_worker, xs_error_handle);
+
+ switch (args->xprtsec.policy) {
+ case RPC_XPRTSEC_TLS_ANON:
+ case RPC_XPRTSEC_TLS_X509:
+ xprt->xprtsec = args->xprtsec;
+ INIT_DELAYED_WORK(&transport->connect_worker,
+ xs_tcp_tls_setup_socket);
+ break;
+ default:
+ ret = ERR_PTR(-EACCES);
+ goto out_err;
+ }
+
+ switch (addr->sa_family) {
+ case AF_INET:
+ if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+ xprt_set_bound(xprt);
+
+ xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
+ break;
+ case AF_INET6:
+ if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+ xprt_set_bound(xprt);
+
+ xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
+ break;
+ default:
+ ret = ERR_PTR(-EAFNOSUPPORT);
+ goto out_err;
+ }
+
+ if (xprt_bound(xprt))
+ dprintk("RPC: set up xprt to %s (port %s) via %s\n",
+ xprt->address_strings[RPC_DISPLAY_ADDR],
+ xprt->address_strings[RPC_DISPLAY_PORT],
+ xprt->address_strings[RPC_DISPLAY_PROTO]);
+ else
+ dprintk("RPC: set up xprt to %s (autobind) via %s\n",
+ xprt->address_strings[RPC_DISPLAY_ADDR],
+ xprt->address_strings[RPC_DISPLAY_PROTO]);
+
+ if (try_module_get(THIS_MODULE))
+ return xprt;
+ ret = ERR_PTR(-EINVAL);
+out_err:
+ xs_xprt_free(xprt);
+ return ret;
+}
+
+/**
* xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket
* @args: rpc transport creation arguments
*
@@ -3153,6 +3568,15 @@ static struct xprt_class xs_tcp_transport = {
.netid = { "tcp", "tcp6", "" },
};
+static struct xprt_class xs_tcp_tls_transport = {
+ .list = LIST_HEAD_INIT(xs_tcp_tls_transport.list),
+ .name = "tcp-with-tls",
+ .owner = THIS_MODULE,
+ .ident = XPRT_TRANSPORT_TCP_TLS,
+ .setup = xs_setup_tcp_tls,
+ .netid = { "tcp", "tcp6", "" },
+};
+
static struct xprt_class xs_bc_tcp_transport = {
.list = LIST_HEAD_INIT(xs_bc_tcp_transport.list),
.name = "tcp NFSv4.1 backchannel",
@@ -3174,6 +3598,7 @@ int init_socket_xprt(void)
xprt_register_transport(&xs_local_transport);
xprt_register_transport(&xs_udp_transport);
xprt_register_transport(&xs_tcp_transport);
+ xprt_register_transport(&xs_tcp_tls_transport);
xprt_register_transport(&xs_bc_tcp_transport);
return 0;
@@ -3193,6 +3618,7 @@ void cleanup_socket_xprt(void)
xprt_unregister_transport(&xs_local_transport);
xprt_unregister_transport(&xs_udp_transport);
xprt_unregister_transport(&xs_tcp_transport);
+ xprt_unregister_transport(&xs_tcp_tls_transport);
xprt_unregister_transport(&xs_bc_tcp_transport);
}
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 577fa5af33ec..302fd749c424 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1960,7 +1960,8 @@ rcv:
skb_reset_network_header(*skb);
skb_pull(*skb, tipc_ehdr_size(ehdr));
- pskb_trim(*skb, (*skb)->len - aead->authsize);
+ if (pskb_trim(*skb, (*skb)->len - aead->authsize))
+ goto free_skb;
/* Validate TIPCv2 message */
if (unlikely(!tipc_msg_validate(skb))) {
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 5e000fde8067..a9c5b6594889 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -583,7 +583,7 @@ update:
n->capabilities, &n->bc_entry.inputq1,
&n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
pr_warn("Broadcast rcv link creation failed, no memory\n");
- kfree(n);
+ tipc_node_put(n);
n = NULL;
goto exit;
}
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 2021fe557e50..529101eb20bd 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -52,6 +52,8 @@ static LIST_HEAD(tls_device_list);
static LIST_HEAD(tls_device_down_list);
static DEFINE_SPINLOCK(tls_device_lock);
+static struct page *dummy_page;
+
static void tls_device_free_ctx(struct tls_context *ctx)
{
if (ctx->tx_conf == TLS_HW) {
@@ -312,36 +314,33 @@ static int tls_push_record(struct sock *sk,
return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
}
-static int tls_device_record_close(struct sock *sk,
- struct tls_context *ctx,
- struct tls_record_info *record,
- struct page_frag *pfrag,
- unsigned char record_type)
+static void tls_device_record_close(struct sock *sk,
+ struct tls_context *ctx,
+ struct tls_record_info *record,
+ struct page_frag *pfrag,
+ unsigned char record_type)
{
struct tls_prot_info *prot = &ctx->prot_info;
- int ret;
+ struct page_frag dummy_tag_frag;
/* append tag
* device will fill in the tag, we just need to append a placeholder
* use socket memory to improve coalescing (re-using a single buffer
* increases frag count)
- * if we can't allocate memory now, steal some back from data
+ * if we can't allocate memory now use the dummy page
*/
- if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
- sk->sk_allocation))) {
- ret = 0;
- tls_append_frag(record, pfrag, prot->tag_size);
- } else {
- ret = prot->tag_size;
- if (record->len <= prot->overhead_size)
- return -ENOMEM;
+ if (unlikely(pfrag->size - pfrag->offset < prot->tag_size) &&
+ !skb_page_frag_refill(prot->tag_size, pfrag, sk->sk_allocation)) {
+ dummy_tag_frag.page = dummy_page;
+ dummy_tag_frag.offset = 0;
+ pfrag = &dummy_tag_frag;
}
+ tls_append_frag(record, pfrag, prot->tag_size);
/* fill prepend */
tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
record->len - prot->overhead_size,
record_type);
- return ret;
}
static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
@@ -541,18 +540,8 @@ last_record:
if (done || record->len >= max_open_record_len ||
(record->num_frags >= MAX_SKB_FRAGS - 1)) {
- rc = tls_device_record_close(sk, tls_ctx, record,
- pfrag, record_type);
- if (rc) {
- if (rc > 0) {
- size += rc;
- } else {
- size = orig_size;
- destroy_record(record);
- ctx->open_record = NULL;
- break;
- }
- }
+ tls_device_record_close(sk, tls_ctx, record,
+ pfrag, record_type);
rc = tls_push_record(sk,
tls_ctx,
@@ -1450,14 +1439,26 @@ int __init tls_device_init(void)
{
int err;
- destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
- if (!destruct_wq)
+ dummy_page = alloc_page(GFP_KERNEL);
+ if (!dummy_page)
return -ENOMEM;
+ destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
+ if (!destruct_wq) {
+ err = -ENOMEM;
+ goto err_free_dummy;
+ }
+
err = register_netdevice_notifier(&tls_dev_notifier);
if (err)
- destroy_workqueue(destruct_wq);
+ goto err_destroy_wq;
+ return 0;
+
+err_destroy_wq:
+ destroy_workqueue(destruct_wq);
+err_free_dummy:
+ put_page(dummy_page);
return err;
}
@@ -1466,4 +1467,5 @@ void __exit tls_device_cleanup(void)
unregister_netdevice_notifier(&tls_dev_notifier);
destroy_workqueue(destruct_wq);
clean_acked_data_flush();
+ put_page(dummy_page);
}
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b6896126bb92..4a8ee2f6badb 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -139,9 +139,6 @@ int tls_push_sg(struct sock *sk,
ctx->splicing_pages = true;
while (1) {
- if (sg_is_last(sg))
- msg.msg_flags = flags;
-
/* is sending application-limited? */
tcp_rate_check_app_limited(sk);
p = sg_page(sg);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 123b35ddfd71..86930a8ed012 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -289,17 +289,29 @@ static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
return 0;
}
-static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
+static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
{
+ struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
+ short offset = offsetof(struct sockaddr_storage, __data);
+
+ BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
+
/* This may look like an off by one error but it is a bit more
* subtle. 108 is the longest valid AF_UNIX path for a binding.
* sun_path[108] doesn't as such exist. However in kernel space
* we are guaranteed that it is a valid memory location in our
* kernel address buffer because syscall functions always pass
* a pointer of struct sockaddr_storage which has a bigger buffer
- * than 108.
+ * than 108. Also, we must terminate sun_path for strlen() in
+ * getname_kernel().
+ */
+ addr->__data[addr_len - offset] = 0;
+
+ /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will
+ * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen()
+ * know the actual buffer.
*/
- ((char *)sunaddr)[addr_len] = 0;
+ return strlen(addr->__data) + offset + 1;
}
static void __unix_remove_socket(struct sock *sk)
@@ -778,7 +790,7 @@ static int unix_set_peek_off(struct sock *sk, int val)
if (mutex_lock_interruptible(&u->iolock))
return -EINTR;
- sk->sk_peek_off = val;
+ WRITE_ONCE(sk->sk_peek_off, val);
mutex_unlock(&u->iolock);
return 0;
@@ -1208,10 +1220,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
struct path parent;
int err;
- unix_mkname_bsd(sunaddr, addr_len);
- addr_len = strlen(sunaddr->sun_path) +
- offsetof(struct sockaddr_un, sun_path) + 1;
-
+ addr_len = unix_mkname_bsd(sunaddr, addr_len);
addr = unix_create_addr(sunaddr, addr_len);
if (!addr)
return -ENOMEM;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0da2e6a2a7ea..8bcf8e293308 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5430,8 +5430,11 @@ nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs)
if (!wiphy->mbssid_max_interfaces)
return ERR_PTR(-EINVAL);
- nla_for_each_nested(nl_elems, attrs, rem_elems)
+ nla_for_each_nested(nl_elems, attrs, rem_elems) {
+ if (num_elems >= 255)
+ return ERR_PTR(-EINVAL);
num_elems++;
+ }
elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL);
if (!elems)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 8bf00caf5d29..0cf1ce7b6934 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -657,7 +657,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp);
if (ret)
- return ret;
+ return 0;
for_each_element_id(elem, WLAN_EID_REDUCED_NEIGHBOR_REPORT,
ies->data, ies->len) {
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 89c9ad6c886e..1783ab9d57a3 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -580,6 +580,8 @@ int ieee80211_strip_8023_mesh_hdr(struct sk_buff *skb)
hdrlen += ETH_ALEN + 2;
else if (!pskb_may_pull(skb, hdrlen))
return -EINVAL;
+ else
+ payload.eth.h_proto = htons(skb->len - hdrlen);
mesh_addr = skb->data + sizeof(payload.eth) + ETH_ALEN;
switch (payload.flags & MESH_FLAGS_AE) {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 5a8c0dd250af..10ea85c03147 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -505,7 +505,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb->dev = dev;
skb->priority = xs->sk.sk_priority;
- skb->mark = xs->sk.sk_mark;
+ skb->mark = READ_ONCE(xs->sk.sk_mark);
skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
skb->destructor = xsk_destruct_skb;
@@ -886,6 +886,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
struct net_device *dev;
+ int bound_dev_if;
u32 flags, qid;
int err = 0;
@@ -899,6 +900,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
XDP_USE_NEED_WAKEUP))
return -EINVAL;
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if && bound_dev_if != sxdp->sxdp_ifindex)
+ return -EINVAL;
+
rtnl_lock();
mutex_lock(&xs->mutex);
if (xs->state != XSK_READY) {
@@ -989,6 +994,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
err = xp_alloc_tx_descs(xs->pool, xs);
if (err) {
xp_put_pool(xs->pool);
+ xs->pool = NULL;
sockfd_put(sock);
goto out_unlock;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e7617c9959c3..d6b405782b63 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2250,7 +2250,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
match = xfrm_selector_match(&pol->selector, fl, family);
if (match) {
- if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
+ if ((READ_ONCE(sk->sk_mark) & pol->mark.m) != pol->mark.v ||
pol->if_id != if_id) {
pol = NULL;
goto out;