summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/filter.c9
-rw-r--r--net/core/lwtunnel.c26
-rw-r--r--net/core/netpoll.c7
-rw-r--r--net/core/pktgen.c13
-rw-r--r--net/core/scm.c17
-rw-r--r--net/core/selftests.c23
-rw-r--r--net/core/skmsg.c63
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/utils.c4
9 files changed, 121 insertions, 45 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index d713696d0832..cd0c28e94979 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1958,10 +1958,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
bool do_mforce = flags & BPF_F_MARK_ENFORCE;
+ bool is_ipv6 = flags & BPF_F_IPV6;
__sum16 *ptr;
if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
- BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
+ BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK | BPF_F_IPV6)))
return -EINVAL;
if (unlikely(offset > 0xffff || offset & 1))
return -EFAULT;
@@ -1977,7 +1978,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
if (unlikely(from != 0))
return -EINVAL;
- inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
+ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, is_ipv6);
break;
case 2:
inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
@@ -2500,6 +2501,7 @@ int skb_do_redirect(struct sk_buff *skb)
goto out_drop;
skb->dev = dev;
dev_sw_netstats_rx_add(dev, skb->len);
+ skb_scrub_packet(skb, false);
return -EAGAIN;
}
return flags & BPF_F_NEIGH ?
@@ -9268,6 +9270,9 @@ static bool flow_dissector_is_valid_access(int off, int size,
if (off < 0 || off >= sizeof(struct __sk_buff))
return false;
+ if (off % size != 0)
+ return false;
+
if (type == BPF_WRITE)
return false;
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 4417a18b3e95..f63586c9ce02 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -332,6 +332,8 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
struct dst_entry *dst;
int ret;
+ local_bh_disable();
+
if (dev_xmit_recursion()) {
net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
__func__);
@@ -347,8 +349,10 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
lwtstate = dst->lwtstate;
if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
- lwtstate->type > LWTUNNEL_ENCAP_MAX)
- return 0;
+ lwtstate->type > LWTUNNEL_ENCAP_MAX) {
+ ret = 0;
+ goto out;
+ }
ret = -EOPNOTSUPP;
rcu_read_lock();
@@ -363,11 +367,13 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (ret == -EOPNOTSUPP)
goto drop;
- return ret;
+ goto out;
drop:
kfree_skb(skb);
+out:
+ local_bh_enable();
return ret;
}
EXPORT_SYMBOL_GPL(lwtunnel_output);
@@ -379,6 +385,8 @@ int lwtunnel_xmit(struct sk_buff *skb)
struct dst_entry *dst;
int ret;
+ local_bh_disable();
+
if (dev_xmit_recursion()) {
net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
__func__);
@@ -395,8 +403,10 @@ int lwtunnel_xmit(struct sk_buff *skb)
lwtstate = dst->lwtstate;
if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
- lwtstate->type > LWTUNNEL_ENCAP_MAX)
- return 0;
+ lwtstate->type > LWTUNNEL_ENCAP_MAX) {
+ ret = 0;
+ goto out;
+ }
ret = -EOPNOTSUPP;
rcu_read_lock();
@@ -411,11 +421,13 @@ int lwtunnel_xmit(struct sk_buff *skb)
if (ret == -EOPNOTSUPP)
goto drop;
- return ret;
+ goto out;
drop:
kfree_skb(skb);
+out:
+ local_bh_enable();
return ret;
}
EXPORT_SYMBOL_GPL(lwtunnel_xmit);
@@ -427,6 +439,8 @@ int lwtunnel_input(struct sk_buff *skb)
struct dst_entry *dst;
int ret;
+ DEBUG_NET_WARN_ON_ONCE(!in_softirq());
+
if (dev_xmit_recursion()) {
net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
__func__);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 657abbb7d0d7..89f5358d7a1b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -800,6 +800,13 @@ put_noaddr:
goto put;
netdev_tracker_alloc(ndev, &np->dev_tracker, GFP_KERNEL);
rtnl_unlock();
+
+ /* Make sure all NAPI polls which started before dev->npinfo
+ * was visible have exited before we start calling NAPI poll.
+ * NAPI skips locking if dev->npinfo is NULL.
+ */
+ synchronize_rcu();
+
return 0;
put:
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a2fb951996b8..a2838c15aa9d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -897,6 +897,10 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
pkt_dev->nr_labels = 0;
do {
__u32 tmp;
+
+ if (n >= MAX_MPLS_LABELS)
+ return -E2BIG;
+
len = hex32_arg(&buffer[i], 8, &tmp);
if (len <= 0)
return len;
@@ -908,8 +912,6 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
return -EFAULT;
i++;
n++;
- if (n >= MAX_MPLS_LABELS)
- return -E2BIG;
} while (c == ',');
pkt_dev->nr_labels = n;
@@ -1875,8 +1877,8 @@ static ssize_t pktgen_thread_write(struct file *file,
i = len;
/* Read variable name */
-
- len = strn_len(&user_buffer[i], sizeof(name) - 1);
+ max = min(sizeof(name) - 1, count - i);
+ len = strn_len(&user_buffer[i], max);
if (len < 0)
return len;
@@ -1906,7 +1908,8 @@ static ssize_t pktgen_thread_write(struct file *file,
if (!strcmp(name, "add_device")) {
char f[32];
memset(f, 0, 32);
- len = strn_len(&user_buffer[i], sizeof(f) - 1);
+ max = min(sizeof(f) - 1, count - i);
+ len = strn_len(&user_buffer[i], max);
if (len < 0) {
ret = len;
goto out;
diff --git a/net/core/scm.c b/net/core/scm.c
index a877c4ef4c25..cdd4e5befb14 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -36,6 +36,7 @@
#include <net/compat.h>
#include <net/scm.h>
#include <net/cls_cgroup.h>
+#include <net/af_unix.h>
/*
@@ -85,8 +86,15 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
return -ENOMEM;
*fplp = fpl;
fpl->count = 0;
+ fpl->count_unix = 0;
fpl->max = SCM_MAX_FD;
fpl->user = NULL;
+#if IS_ENABLED(CONFIG_UNIX)
+ fpl->inflight = false;
+ fpl->dead = false;
+ fpl->edges = NULL;
+ INIT_LIST_HEAD(&fpl->vertices);
+#endif
}
fpp = &fpl->fp[fpl->count];
@@ -109,6 +117,9 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
fput(file);
return -EINVAL;
}
+ if (unix_get_socket(file))
+ fpl->count_unix++;
+
*fpp++ = file;
fpl->count++;
}
@@ -367,8 +378,14 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
if (new_fpl) {
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
+
new_fpl->max = new_fpl->count;
new_fpl->user = get_uid(fpl->user);
+#if IS_ENABLED(CONFIG_UNIX)
+ new_fpl->inflight = false;
+ new_fpl->edges = NULL;
+ INIT_LIST_HEAD(&new_fpl->vertices);
+#endif
}
return new_fpl;
}
diff --git a/net/core/selftests.c b/net/core/selftests.c
index acb1ee97bbd3..946e92cca211 100644
--- a/net/core/selftests.c
+++ b/net/core/selftests.c
@@ -100,10 +100,10 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev,
ehdr->h_proto = htons(ETH_P_IP);
if (attr->tcp) {
+ memset(thdr, 0, sizeof(*thdr));
thdr->source = htons(attr->sport);
thdr->dest = htons(attr->dport);
thdr->doff = sizeof(struct tcphdr) / 4;
- thdr->check = 0;
} else {
uhdr->source = htons(attr->sport);
uhdr->dest = htons(attr->dport);
@@ -144,16 +144,25 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev,
attr->id = net_test_next_id;
shdr->id = net_test_next_id++;
- if (attr->size)
- skb_put(skb, attr->size);
- if (attr->max_size && attr->max_size > skb->len)
- skb_put(skb, attr->max_size - skb->len);
+ if (attr->size) {
+ void *payload = skb_put(skb, attr->size);
+
+ memset(payload, 0, attr->size);
+ }
+
+ if (attr->max_size && attr->max_size > skb->len) {
+ size_t pad_len = attr->max_size - skb->len;
+ void *pad = skb_put(skb, pad_len);
+
+ memset(pad, 0, pad_len);
+ }
skb->csum = 0;
skb->ip_summed = CHECKSUM_PARTIAL;
if (attr->tcp) {
- thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr,
- ihdr->daddr, 0);
+ int l4len = skb->len - skb_transport_offset(skb);
+
+ thdr->check = ~tcp_v4_check(l4len, ihdr->saddr, ihdr->daddr, 0);
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
} else {
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 5a790cd1121b..01ca497fe2cd 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -528,16 +528,22 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
u32 off, u32 len,
struct sk_psock *psock,
struct sock *sk,
- struct sk_msg *msg)
+ struct sk_msg *msg,
+ bool take_ref)
{
int num_sge, copied;
+ /* skb_to_sgvec will fail when the total number of fragments in
+ * frag_list and frags exceeds MAX_MSG_FRAGS. For example, the
+ * caller may aggregate multiple skbs.
+ */
num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
if (num_sge < 0) {
/* skb linearize may fail with ENOMEM, but lets simply try again
* later if this happens. Under memory pressure we don't want to
* drop the skb. We need to linearize the skb so that the mapping
* in skb_to_sgvec can not error.
+ * Note that skb_linearize requires the skb not to be shared.
*/
if (skb_linearize(skb))
return -EAGAIN;
@@ -554,7 +560,7 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
msg->sg.start = 0;
msg->sg.size = copied;
msg->sg.end = num_sge;
- msg->skb = skb;
+ msg->skb = take_ref ? skb_get(skb) : skb;
sk_psock_queue_msg(psock, msg);
sk_psock_data_ready(sk, psock);
@@ -562,7 +568,7 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
}
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
- u32 off, u32 len);
+ u32 off, u32 len, bool take_ref);
static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len)
@@ -576,7 +582,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* correctly.
*/
if (unlikely(skb->sk == sk))
- return sk_psock_skb_ingress_self(psock, skb, off, len);
+ return sk_psock_skb_ingress_self(psock, skb, off, len, true);
msg = sk_psock_create_ingress_msg(sk, skb);
if (!msg)
return -EAGAIN;
@@ -588,7 +594,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* into user buffers.
*/
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, true);
if (err < 0)
kfree(msg);
return err;
@@ -599,7 +605,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* because the skb is already accounted for here.
*/
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
- u32 off, u32 len)
+ u32 off, u32 len, bool take_ref)
{
struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC);
struct sock *sk = psock->sk;
@@ -608,7 +614,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
if (unlikely(!msg))
return -EAGAIN;
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref);
if (err < 0)
kfree(msg);
return err;
@@ -617,18 +623,13 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
- int err = 0;
-
if (!ingress) {
if (!sock_writeable(psock->sk))
return -EAGAIN;
return skb_send_sock(psock->sk, skb, off, len);
}
- skb_get(skb);
- err = sk_psock_skb_ingress(psock, skb, off, len);
- if (err < 0)
- kfree_skb(skb);
- return err;
+
+ return sk_psock_skb_ingress(psock, skb, off, len);
}
static void sk_psock_skb_state(struct sk_psock *psock,
@@ -653,12 +654,21 @@ static void sk_psock_backlog(struct work_struct *work)
bool ingress;
int ret;
- mutex_lock(&psock->work_mutex);
- if (unlikely(state->len)) {
- len = state->len;
- off = state->off;
- }
+ /* If sk is quickly removed from the map and then added back, the old
+ * psock should not be scheduled, because there are now two psocks
+ * pointing to the same sk.
+ */
+ if (!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+ return;
+ /* Increment the psock refcnt to synchronize with close(fd) path in
+ * sock_map_close(), ensuring we wait for backlog thread completion
+ * before sk_socket freed. If refcnt increment fails, it indicates
+ * sock_map_close() completed with sk_socket potentially already freed.
+ */
+ if (!sk_psock_get(psock->sk))
+ return;
+ mutex_lock(&psock->work_mutex);
while ((skb = skb_peek(&psock->ingress_skb))) {
len = skb->len;
off = 0;
@@ -668,6 +678,13 @@ static void sk_psock_backlog(struct work_struct *work)
off = stm->offset;
len = stm->full_len;
}
+
+ /* Resume processing from previous partial state */
+ if (unlikely(state->len)) {
+ len = state->len;
+ off = state->off;
+ }
+
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
do {
@@ -678,7 +695,8 @@ static void sk_psock_backlog(struct work_struct *work)
if (ret <= 0) {
if (ret == -EAGAIN) {
sk_psock_skb_state(psock, state, len, off);
-
+ /* Restore redir info we cleared before */
+ skb_bpf_set_redir(skb, psock->sk, ingress);
/* Delay slightly to prioritize any
* other work that might be here.
*/
@@ -695,11 +713,14 @@ static void sk_psock_backlog(struct work_struct *work)
len -= ret;
} while (len);
+ /* The entire skb sent, clear state */
+ sk_psock_skb_state(psock, state, 0, 0);
skb = skb_dequeue(&psock->ingress_skb);
kfree_skb(skb);
}
end:
mutex_unlock(&psock->work_mutex);
+ sk_psock_put(psock->sk, psock);
}
struct sk_psock *sk_psock_init(struct sock *sk, int node)
@@ -1012,7 +1033,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
off = stm->offset;
len = stm->full_len;
}
- err = sk_psock_skb_ingress_self(psock, skb, off, len);
+ err = sk_psock_skb_ingress_self(psock, skb, off, len, false);
}
if (err < 0) {
spin_lock_bh(&psock->ingress_lock);
diff --git a/net/core/sock.c b/net/core/sock.c
index 168e7f42c054..d8c0650322ea 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3797,7 +3797,7 @@ static int assign_proto_idx(struct proto *prot)
{
prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
- if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
+ if (unlikely(prot->inuse_idx == PROTO_INUSE_NR)) {
pr_err("PROTO_INUSE_NR exhausted\n");
return -ENOSPC;
}
@@ -3808,7 +3808,7 @@ static int assign_proto_idx(struct proto *prot)
static void release_proto_idx(struct proto *prot)
{
- if (prot->inuse_idx != PROTO_INUSE_NR - 1)
+ if (prot->inuse_idx != PROTO_INUSE_NR)
clear_bit(prot->inuse_idx, proto_inuse_idx);
}
#else
diff --git a/net/core/utils.c b/net/core/utils.c
index 938495bc1d34..1eeb9131e2cf 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -473,11 +473,11 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
EXPORT_SYMBOL(inet_proto_csum_replace16);
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
- __wsum diff, bool pseudohdr)
+ __wsum diff, bool pseudohdr, bool ipv6)
{
if (skb->ip_summed != CHECKSUM_PARTIAL) {
csum_replace_by_diff(sum, diff);
- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr && !ipv6)
skb->csum = ~csum_sub(diff, skb->csum);
} else if (pseudohdr) {
*sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));