From f7418bc10d8402798ee3add5ef0ed5f33266a2bb Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 24 Sep 2015 14:59:49 +0200 Subject: mac80211: fix handling of PS filtering with fast-xmit Fixes dropped packets in the tx path in case a non-PS station triggers the tx filter. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/status.c | 1 + net/mac80211/tx.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 8ba583243509..3ed7ddfbf8e8 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -101,6 +101,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, * when it wakes up for the next time. */ set_sta_flag(sta, WLAN_STA_CLEAR_PS_FILT); + ieee80211_clear_fast_xmit(sta); /* * This code races in the following way: diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 84e0e8c7fb23..7892eb8ed4c8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1218,8 +1218,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, if (!tx->sta) info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; - else if (test_and_clear_sta_flag(tx->sta, WLAN_STA_CLEAR_PS_FILT)) + else if (test_and_clear_sta_flag(tx->sta, WLAN_STA_CLEAR_PS_FILT)) { info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; + ieee80211_check_fast_xmit(tx->sta); + } info->flags |= IEEE80211_TX_CTL_FIRST_FRAGMENT; @@ -2451,7 +2453,8 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) if (test_sta_flag(sta, WLAN_STA_PS_STA) || test_sta_flag(sta, WLAN_STA_PS_DRIVER) || - test_sta_flag(sta, WLAN_STA_PS_DELIVER)) + test_sta_flag(sta, WLAN_STA_PS_DELIVER) || + test_sta_flag(sta, WLAN_STA_CLEAR_PS_FILT)) goto out; if (sdata->noack_map) -- cgit v1.2.3 From 2306c704ce280c97a60d1f45333b822b40281dea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Oct 2015 05:39:26 -0700 Subject: inet: fix race in reqsk_queue_unlink() reqsk_timer_handler() tests if icsk_accept_queue.listen_opt is NULL at its beginning. By the time it calls inet_csk_reqsk_queue_drop() and reqsk_queue_unlink(), listener might have been closed and inet_csk_listen_stop() had called reqsk_queue_yank_acceptq() which sets icsk_accept_queue.listen_opt to NULL We therefore need to correctly check listen_opt being NULL after holding syn_wait_lock for proper synchronization. Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer") Fixes: b357a364c57c ("inet: fix possible panic in reqsk_queue_unlink()") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7bb9c39e0a4d..61b45a17fc73 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -577,21 +577,22 @@ EXPORT_SYMBOL(inet_rtx_syn_ack); static bool reqsk_queue_unlink(struct request_sock_queue *queue, struct request_sock *req) { - struct listen_sock *lopt = queue->listen_opt; struct request_sock **prev; + struct listen_sock *lopt; bool found = false; spin_lock(&queue->syn_wait_lock); - - for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; - prev = &(*prev)->dl_next) { - if (*prev == req) { - *prev = req->dl_next; - found = true; - break; + lopt = queue->listen_opt; + if (lopt) { + for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; + prev = &(*prev)->dl_next) { + if (*prev == req) { + *prev = req->dl_next; + found = true; + break; + } } } - spin_unlock(&queue->syn_wait_lock); if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) reqsk_put(req); -- cgit v1.2.3 From 181a4224acdfb993a21f987f8617b5c8d7bc654e Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 1 Oct 2015 16:25:43 +0200 Subject: ipv4: fix reply_dst leakage on arp reply There are cases when the created metadata reply is not used. Ensure the allocated memory is freed also in such cases. Fixes: 63d008a4e9ee ("ipv4: send arp replies to the correct tunnel") Reported-by: Hannes Frederic Sowa Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/ipv4/arp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f03db8b7abee..0c9c3482e419 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -312,7 +312,7 @@ static void arp_send_dst(int type, int ptype, __be32 dest_ip, if (!skb) return; - skb_dst_set(skb, dst); + skb_dst_set(skb, dst_clone(dst)); arp_xmit(skb); } @@ -384,7 +384,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) } if (skb && !(dev->priv_flags & IFF_XMIT_DST_RELEASE)) - dst = dst_clone(skb_dst(skb)); + dst = skb_dst(skb); arp_send_dst(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, dst_hw, dev->dev_addr, NULL, dst); } @@ -811,7 +811,7 @@ static int arp_process(struct sock *sk, struct sk_buff *skb) } else { pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); - return 0; + goto out_free_dst; } goto out; } @@ -865,6 +865,8 @@ static int arp_process(struct sock *sk, struct sk_buff *skb) out: consume_skb(skb); +out_free_dst: + dst_release(reply_dst); return 0; } -- cgit v1.2.3 From 6bd00b850635abb0044e06101761533c8beba79c Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 1 Oct 2015 11:37:42 -0700 Subject: act_mirred: fix a race condition on mirred_list After commit 1ce87720d456 ("net: sched: make cls_u32 lockless") we began to release tc actions in a RCU callback. However, mirred action relies on RTNL lock to protect the global mirred_list, therefore we could have a race condition between RCU callback and netdevice event, which caused a list corruption as reported by Vinson. Instead of relying on RTNL lock, introduce a spinlock to protect this list. Note, in non-bind case, it is still called with RTNL lock, therefore should disable BH too. Reported-by: Vinson Lee Cc: John Fastabend Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 2d1be4a760fd..3e7c51a8ca38 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -31,13 +31,17 @@ #define MIRRED_TAB_MASK 7 static LIST_HEAD(mirred_list); +static DEFINE_SPINLOCK(mirred_list_lock); static void tcf_mirred_release(struct tc_action *a, int bind) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev = rcu_dereference_protected(m->tcfm_dev, 1); + /* We could be called either in a RCU callback or with RTNL lock held. */ + spin_lock_bh(&mirred_list_lock); list_del(&m->tcfm_list); + spin_unlock_bh(&mirred_list_lock); if (dev) dev_put(dev); } @@ -123,7 +127,9 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } if (ret == ACT_P_CREATED) { + spin_lock_bh(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); + spin_unlock_bh(&mirred_list_lock); tcf_hash_insert(a); } @@ -221,7 +227,8 @@ static int mirred_device_event(struct notifier_block *unused, struct tcf_mirred *m; ASSERT_RTNL(); - if (event == NETDEV_UNREGISTER) + if (event == NETDEV_UNREGISTER) { + spin_lock_bh(&mirred_list_lock); list_for_each_entry(m, &mirred_list, tcfm_list) { if (rcu_access_pointer(m->tcfm_dev) == dev) { dev_put(dev); @@ -231,6 +238,8 @@ static int mirred_device_event(struct notifier_block *unused, RCU_INIT_POINTER(m->tcfm_dev, NULL); } } + spin_unlock_bh(&mirred_list_lock); + } return NOTIFY_DONE; } -- cgit v1.2.3 From 215c90afb9ea633026273d81ac9c9ece2b1acd58 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 1 Oct 2015 11:37:43 -0700 Subject: act_mirred: always release tcf hash Align with other tc actions. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 3e7c51a8ca38..2efaf4ee6040 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -107,10 +107,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } else { if (bind) return 0; - if (!ovr) { - tcf_hash_release(a, bind); + + tcf_hash_release(a, bind); + if (!ovr) return -EEXIST; - } } m = to_mirred(a); -- cgit v1.2.3 From e9193d60d363e4dff75ff6d43a48f22be26d59c7 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Fri, 2 Oct 2015 00:05:36 +0300 Subject: net/unix: fix logic about sk_peek_offset Now send with MSG_PEEK can return data from multiple SKBs. Unfortunately we take into account the peek offset for each skb, that is wrong. We need to apply the peek offset only once. In addition, the peek offset should be used only if MSG_PEEK is set. Cc: "David S. Miller" (maintainer:NETWORKING Cc: Eric Dumazet (commit_signer:1/14=7%) Cc: Aaron Conole Fixes: 9f389e35674f ("af_unix: return data from multiple SKBs on recv() with MSG_PEEK flag") Signed-off-by: Andrey Vagin Tested-by: Aaron Conole Signed-off-by: David S. Miller --- net/unix/af_unix.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ef31b40ad550..94f658235fb4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2064,6 +2064,11 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) goto out; } + if (flags & MSG_PEEK) + skip = sk_peek_offset(sk, flags); + else + skip = 0; + do { int chunk; struct sk_buff *skb, *last; @@ -2112,7 +2117,6 @@ unlock: break; } - skip = sk_peek_offset(sk, flags); while (skip >= unix_skb_len(skb)) { skip -= unix_skb_len(skb); last = skb; @@ -2179,14 +2183,12 @@ unlock: if (UNIXCB(skb).fp) scm.fp = scm_fp_dup(UNIXCB(skb).fp); - if (skip) { - sk_peek_offset_fwd(sk, chunk); - skip -= chunk; - } + sk_peek_offset_fwd(sk, chunk); if (UNIXCB(skb).fp) break; + skip = 0; last = skb; last_len = skb->len; unix_state_lock(sk); -- cgit v1.2.3 From 33db4125ec745426c3483d6817d8f7ea5324cd05 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 1 Oct 2015 15:00:37 -0700 Subject: openvswitch: Rename LABEL->LABELS Conntrack LABELS (plural) are exposed by conntrack; rename the OVS name for these to be consistent with conntrack. Fixes: c2ac667 "openvswitch: Allow matching on conntrack label" Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 12 +++---- net/openvswitch/actions.c | 2 +- net/openvswitch/conntrack.c | 74 ++++++++++++++++++++-------------------- net/openvswitch/conntrack.h | 2 +- net/openvswitch/flow.h | 2 +- net/openvswitch/flow_netlink.c | 18 +++++----- 6 files changed, 55 insertions(+), 55 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 32e07d8cbaf4..c736344afed4 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -326,7 +326,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_STATE, /* u8 bitmask of OVS_CS_F_* */ OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */ - OVS_KEY_ATTR_CT_LABEL, /* 16-octet connection tracking label */ + OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -439,9 +439,9 @@ struct ovs_key_nd { __u8 nd_tll[ETH_ALEN]; }; -#define OVS_CT_LABEL_LEN 16 -struct ovs_key_ct_label { - __u8 ct_label[OVS_CT_LABEL_LEN]; +#define OVS_CT_LABELS_LEN 16 +struct ovs_key_ct_labels { + __u8 ct_labels[OVS_CT_LABELS_LEN]; }; /* OVS_KEY_ATTR_CT_STATE flags */ @@ -623,7 +623,7 @@ struct ovs_action_hash { * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the * mask, the corresponding bit in the value is copied to the connection * tracking mark field in the connection. - * @OVS_CT_ATTR_LABEL: %OVS_CT_LABEL_LEN value followed by %OVS_CT_LABEL_LEN + * @OVS_CT_ATTR_LABEL: %OVS_CT_LABELS_LEN value followed by %OVS_CT_LABELS_LEN * mask. For each bit set in the mask, the corresponding bit in the value is * copied to the connection tracking label field in the connection. * @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG. @@ -633,7 +633,7 @@ enum ovs_ct_attr { OVS_CT_ATTR_FLAGS, /* u8 bitmask of OVS_CT_F_*. */ OVS_CT_ATTR_ZONE, /* u16 zone id. */ OVS_CT_ATTR_MARK, /* mark to associate with this connection. */ - OVS_CT_ATTR_LABEL, /* label to associate with this connection. */ + OVS_CT_ATTR_LABELS, /* labels to associate with this connection. */ OVS_CT_ATTR_HELPER, /* netlink helper to assist detection of related connections. */ __OVS_CT_ATTR_MAX diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 315f5330b6e5..e23a61cc3d5c 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -968,7 +968,7 @@ static int execute_masked_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_CT_STATE: case OVS_KEY_ATTR_CT_ZONE: case OVS_KEY_ATTR_CT_MARK: - case OVS_KEY_ATTR_CT_LABEL: + case OVS_KEY_ATTR_CT_LABELS: err = -EINVAL; break; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 002a755fa07e..7d80acfb80d0 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -37,9 +37,9 @@ struct md_mark { }; /* Metadata label for masked write to conntrack label. */ -struct md_label { - struct ovs_key_ct_label value; - struct ovs_key_ct_label mask; +struct md_labels { + struct ovs_key_ct_labels value; + struct ovs_key_ct_labels mask; }; /* Conntrack action context for execution. */ @@ -50,7 +50,7 @@ struct ovs_conntrack_info { u32 flags; u16 family; struct md_mark mark; - struct md_label label; + struct md_labels labels; }; static u16 key_to_nfproto(const struct sw_flow_key *key) @@ -109,21 +109,21 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct) #endif } -static void ovs_ct_get_label(const struct nf_conn *ct, - struct ovs_key_ct_label *label) +static void ovs_ct_get_labels(const struct nf_conn *ct, + struct ovs_key_ct_labels *labels) { struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; if (cl) { size_t len = cl->words * sizeof(long); - if (len > OVS_CT_LABEL_LEN) - len = OVS_CT_LABEL_LEN; - else if (len < OVS_CT_LABEL_LEN) - memset(label, 0, OVS_CT_LABEL_LEN); - memcpy(label, cl->bits, len); + if (len > OVS_CT_LABELS_LEN) + len = OVS_CT_LABELS_LEN; + else if (len < OVS_CT_LABELS_LEN) + memset(labels, 0, OVS_CT_LABELS_LEN); + memcpy(labels, cl->bits, len); } else { - memset(label, 0, OVS_CT_LABEL_LEN); + memset(labels, 0, OVS_CT_LABELS_LEN); } } @@ -134,7 +134,7 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, key->ct.state = state; key->ct.zone = zone->id; key->ct.mark = ovs_ct_get_mark(ct); - ovs_ct_get_label(ct, &key->ct.label); + ovs_ct_get_labels(ct, &key->ct.labels); } /* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has @@ -179,8 +179,8 @@ int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - nla_put(skb, OVS_KEY_ATTR_CT_LABEL, sizeof(key->ct.label), - &key->ct.label)) + nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels), + &key->ct.labels)) return -EMSGSIZE; return 0; @@ -213,9 +213,9 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, #endif } -static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ct_label *label, - const struct ovs_key_ct_label *mask) +static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_key_ct_labels *labels, + const struct ovs_key_ct_labels *mask) { enum ip_conntrack_info ctinfo; struct nf_conn_labels *cl; @@ -235,15 +235,15 @@ static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key, nf_ct_labels_ext_add(ct); cl = nf_ct_labels_find(ct); } - if (!cl || cl->words * sizeof(long) < OVS_CT_LABEL_LEN) + if (!cl || cl->words * sizeof(long) < OVS_CT_LABELS_LEN) return -ENOSPC; - err = nf_connlabels_replace(ct, (u32 *)label, (u32 *)mask, - OVS_CT_LABEL_LEN / sizeof(u32)); + err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask, + OVS_CT_LABELS_LEN / sizeof(u32)); if (err) return err; - ovs_ct_get_label(ct, &key->ct.label); + ovs_ct_get_labels(ct, &key->ct.labels); return 0; } @@ -465,12 +465,12 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, return 0; } -static bool label_nonzero(const struct ovs_key_ct_label *label) +static bool labels_nonzero(const struct ovs_key_ct_labels *labels) { size_t i; - for (i = 0; i < sizeof(*label); i++) - if (label->ct_label[i]) + for (i = 0; i < sizeof(*labels); i++) + if (labels->ct_labels[i]) return true; return false; @@ -506,9 +506,9 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, if (err) goto err; } - if (label_nonzero(&info->label.mask)) - err = ovs_ct_set_label(skb, key, &info->label.value, - &info->label.mask); + if (labels_nonzero(&info->labels.mask)) + err = ovs_ct_set_labels(skb, key, &info->labels.value, + &info->labels.mask); err: skb_push(skb, nh_ofs); return err; @@ -545,8 +545,8 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { .maxlen = sizeof(u16) }, [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), .maxlen = sizeof(struct md_mark) }, - [OVS_CT_ATTR_LABEL] = { .minlen = sizeof(struct md_label), - .maxlen = sizeof(struct md_label) }, + [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), + .maxlen = sizeof(struct md_labels) }, [OVS_CT_ATTR_HELPER] = { .minlen = 1, .maxlen = NF_CT_HELPER_NAME_LEN } }; @@ -593,10 +593,10 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, } #endif #ifdef CONFIG_NF_CONNTRACK_LABELS - case OVS_CT_ATTR_LABEL: { - struct md_label *label = nla_data(a); + case OVS_CT_ATTR_LABELS: { + struct md_labels *labels = nla_data(a); - info->label = *label; + info->labels = *labels; break; } #endif @@ -633,7 +633,7 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) attr == OVS_KEY_ATTR_CT_MARK) return true; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - attr == OVS_KEY_ATTR_CT_LABEL) { + attr == OVS_KEY_ATTR_CT_LABELS) { struct ovs_net *ovs_net = net_generic(net, ovs_net_id); return ovs_net->xt_label; @@ -711,8 +711,8 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, &ct_info->mark)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - nla_put(skb, OVS_CT_ATTR_LABEL, sizeof(ct_info->label), - &ct_info->label)) + nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels), + &ct_info->labels)) return -EMSGSIZE; if (ct_info->helper) { if (nla_put_string(skb, OVS_CT_ATTR_HELPER, @@ -737,7 +737,7 @@ void ovs_ct_free_action(const struct nlattr *a) void ovs_ct_init(struct net *net) { - unsigned int n_bits = sizeof(struct ovs_key_ct_label) * BITS_PER_BYTE; + unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); if (nf_connlabels_get(net, n_bits)) { diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index 43f5dd7a5577..6bd603c6a031 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -72,7 +72,7 @@ static inline void ovs_ct_fill_key(const struct sk_buff *skb, key->ct.state = 0; key->ct.zone = 0; key->ct.mark = 0; - memset(&key->ct.label, 0, sizeof(key->ct.label)); + memset(&key->ct.labels, 0, sizeof(key->ct.labels)); } static inline int ovs_ct_put_key(const struct sw_flow_key *key, diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index fe527d2dd4b7..8cfa15a08668 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -116,7 +116,7 @@ struct sw_flow_key { u16 zone; u32 mark; u8 state; - struct ovs_key_ct_label label; + struct ovs_key_ct_labels labels; } ct; } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 5c030a4d7338..a60e3b7684bc 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -294,7 +294,7 @@ size_t ovs_key_attr_size(void) + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */ + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ - + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABEL */ + + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -352,7 +352,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) }, [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, - [OVS_KEY_ATTR_CT_LABEL] = { .len = sizeof(struct ovs_key_ct_label) }, + [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, }; static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) @@ -833,14 +833,14 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK); } - if (*attrs & (1 << OVS_KEY_ATTR_CT_LABEL) && - ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABEL)) { - const struct ovs_key_ct_label *cl; + if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) && + ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) { + const struct ovs_key_ct_labels *cl; - cl = nla_data(a[OVS_KEY_ATTR_CT_LABEL]); - SW_FLOW_KEY_MEMCPY(match, ct.label, cl->ct_label, + cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]); + SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels, sizeof(*cl), is_mask); - *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABEL); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); } return 0; } @@ -1973,7 +1973,7 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_CT_MARK: - case OVS_KEY_ATTR_CT_LABEL: + case OVS_KEY_ATTR_CT_LABELS: case OVS_KEY_ATTR_ETHERNET: break; -- cgit v1.2.3 From 93d08b6966cf730ea669d4d98f43627597077153 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 2 Oct 2015 12:06:03 +0200 Subject: bpf: fix panic in SO_GET_FILTER with native ebpf programs When sockets have a native eBPF program attached through setsockopt(sk, SOL_SOCKET, SO_ATTACH_BPF, ...), and then try to dump these over getsockopt(sk, SOL_SOCKET, SO_GET_FILTER, ...), the following panic appears: [49904.178642] BUG: unable to handle kernel NULL pointer dereference at (null) [49904.178762] IP: [] sk_get_filter+0x39/0x90 [49904.182000] PGD 86fc9067 PUD 531a1067 PMD 0 [49904.185196] Oops: 0000 [#1] SMP [...] [49904.224677] Call Trace: [49904.226090] [] sock_getsockopt+0x319/0x740 [49904.227535] [] ? sock_has_perm+0x63/0x70 [49904.228953] [] ? release_sock+0x108/0x150 [49904.230380] [] ? selinux_socket_getsockopt+0x23/0x30 [49904.231788] [] SyS_getsockopt+0xa6/0xc0 [49904.233267] [] entry_SYSCALL_64_fastpath+0x12/0x71 The underlying issue is the very same as in commit b382c0865600 ("sock, diag: fix panic in sock_diag_put_filterinfo"), that is, native eBPF programs don't store an original program since this is only needed in cBPF ones. However, sk_get_filter() wasn't updated to test for this at the time when eBPF could be attached. Just throw an error to the user to indicate that eBPF cannot be dumped over this interface. That way, it can also be known that a program _is_ attached (as opposed to just return 0), and a different (future) method needs to be consulted for a dump. Fixes: 89aa075832b0 ("net: sock: allow eBPF programs to be attached to sockets") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 05a04ea87172..87b78ef0c3d4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1854,9 +1854,13 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, goto out; /* We're copying the filter that has been originally attached, - * so no conversion/decode needed anymore. + * so no conversion/decode needed anymore. eBPF programs that + * have no original program cannot be dumped through this. */ + ret = -EACCES; fprog = filter->prog->orig_prog; + if (!fprog) + goto out; ret = fprog->len; if (!len) -- cgit v1.2.3 From 598c12d0ba6de9060f04999746eb1e015774044b Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 2 Oct 2015 13:18:22 +0300 Subject: ovs: do not allocate memory from offline numa node When openvswitch tries allocate memory from offline numa node 0: stats = kmem_cache_alloc_node(flow_stats_cache, GFP_KERNEL | __GFP_ZERO, 0) It catches VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid)) [ replaced with VM_WARN_ON(!node_online(nid)) recently ] in linux/gfp.h This patch disables numa affinity in this case. Signed-off-by: Konstantin Khlebnikov Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index f2ea83ba4763..c7f74aab34b9 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -93,7 +93,8 @@ struct sw_flow *ovs_flow_alloc(void) /* Initialize the default stat node. */ stats = kmem_cache_alloc_node(flow_stats_cache, - GFP_KERNEL | __GFP_ZERO, 0); + GFP_KERNEL | __GFP_ZERO, + node_online(0) ? 0 : NUMA_NO_NODE); if (!stats) goto err; -- cgit v1.2.3 From 83ffe99f52b8f269b31b21524adcd13b165f7703 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 2 Oct 2015 14:56:34 -0700 Subject: openvswitch: Fix ovs_vport_get_stats() Not every device has dev->tstats set. So when OVS tries to calculate vport stats it causes kernel panic. Following patch fixes it by using standard API to get net-device stats. ---8<--- Unable to handle kernel paging request at virtual address 766b4008 Internal error: Oops: 96000005 [#1] PREEMPT SMP Modules linked in: vport_vxlan vxlan ip6_udp_tunnel udp_tunnel tun bridge stp llc openvswitch ipv6 CPU: 7 PID: 1108 Comm: ovs-vswitchd Not tainted 4.3.0-rc3+ #82 PC is at ovs_vport_get_stats+0x150/0x1f8 [openvswitch] Call trace: [] ovs_vport_get_stats+0x150/0x1f8 [openvswitch] [] ovs_vport_cmd_fill_info+0x140/0x1e0 [openvswitch] [] ovs_vport_cmd_dump+0xbc/0x138 [openvswitch] [] netlink_dump+0xb8/0x258 [] __netlink_dump_start+0x120/0x178 [] genl_family_rcv_msg+0x2d4/0x308 [] genl_rcv_msg+0x88/0xc4 [] netlink_rcv_skb+0xd4/0x100 [] genl_rcv+0x30/0x48 [] netlink_unicast+0x154/0x200 [] netlink_sendmsg+0x308/0x364 [] sock_sendmsg+0x14/0x2c [] SyS_sendto+0xbc/0xf0 Code: aa1603e1 f94037a4 aa1303e2 aa1703e0 (f9400465) Reported-by: Tomasz Sawicki Fixes: 8c876639c98 ("openvswitch: Remove vport stats.") Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/vport.c | 42 +++++++++++++----------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index dc81dc619aa2..fc5c0b9ccfe9 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -280,35 +280,19 @@ void ovs_vport_del(struct vport *vport) */ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) { - struct net_device *dev = vport->dev; - int i; - - memset(stats, 0, sizeof(*stats)); - stats->rx_errors = dev->stats.rx_errors; - stats->tx_errors = dev->stats.tx_errors; - stats->tx_dropped = dev->stats.tx_dropped; - stats->rx_dropped = dev->stats.rx_dropped; - - stats->rx_dropped += atomic_long_read(&dev->rx_dropped); - stats->tx_dropped += atomic_long_read(&dev->tx_dropped); - - for_each_possible_cpu(i) { - const struct pcpu_sw_netstats *percpu_stats; - struct pcpu_sw_netstats local_stats; - unsigned int start; - - percpu_stats = per_cpu_ptr(dev->tstats, i); - - do { - start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); - local_stats = *percpu_stats; - } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); - - stats->rx_bytes += local_stats.rx_bytes; - stats->rx_packets += local_stats.rx_packets; - stats->tx_bytes += local_stats.tx_bytes; - stats->tx_packets += local_stats.tx_packets; - } + const struct rtnl_link_stats64 *dev_stats; + struct rtnl_link_stats64 temp; + + dev_stats = dev_get_stats(vport->dev, &temp); + stats->rx_errors = dev_stats->rx_errors; + stats->tx_errors = dev_stats->tx_errors; + stats->tx_dropped = dev_stats->tx_dropped; + stats->rx_dropped = dev_stats->rx_dropped; + + stats->rx_bytes = dev_stats->rx_bytes; + stats->rx_packets = dev_stats->rx_packets; + stats->tx_bytes = dev_stats->tx_bytes; + stats->tx_packets = dev_stats->tx_packets; } /** -- cgit v1.2.3 From 6e28b000825d959cb0c0b8fea8c2f132ddc516dc Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 5 Oct 2015 08:32:51 -0600 Subject: net: Fix vti use case with oif in dst lookups for IPv6 It occurred to me yesterday that 741a11d9e4103 ("net: ipv6: Add RT6_LOOKUP_F_IFACE flag if oif is set") means that xfrm6_dst_lookup needs the FLOWI_FLAG_SKIP_NH_OIF flag set. This latest commit causes the oif to be considered in lookups which is known to break vti. This explains why 58189ca7b274 did not the IPv6 change at the time it was submitted. Fixes: 42a7b32b73d6 ("xfrm: Add oif to dst lookups") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 30caa289c5db..5cedfda4b241 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -37,6 +37,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; + fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); -- cgit v1.2.3 From 1023d2ec1e8bd63ede9ed1d93ebb797f650859b7 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 6 Oct 2015 15:39:53 +0100 Subject: net: dsa: add missing kfree on remove To prevent memory leakage on unbinding, add missing kfree calls. Includes minor cosmetic change to make patch clean. Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index c59fa5d9c22c..ed9d43fd1fec 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -914,8 +914,10 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; - if (ds != NULL) + if (ds) { dsa_switch_destroy(ds); + kfree(ds); + } } } @@ -924,6 +926,7 @@ static int dsa_remove(struct platform_device *pdev) struct dsa_switch_tree *dst = platform_get_drvdata(pdev); dsa_remove_dst(dst); + kfree(dst); dsa_of_remove(&pdev->dev); return 0; -- cgit v1.2.3 From e410ddb89ee8e68103ea58938b4972da594e3d2d Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 6 Oct 2015 15:40:25 +0100 Subject: net: dsa: add missing dsa_switch mdiobus remove To prevent memory leakage on unbinding, add missing mdiobus unregister and unallocation calls. Reviewed-by: Florian Fainelli Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index ed9d43fd1fec..14fac4ed9569 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -424,6 +424,8 @@ static void dsa_switch_destroy(struct dsa_switch *ds) if (ds->hwmon_dev) hwmon_device_unregister(ds->hwmon_dev); #endif + mdiobus_unregister(ds->slave_mii_bus); + mdiobus_free(ds->slave_mii_bus); } #ifdef CONFIG_PM_SLEEP -- cgit v1.2.3 From cbc5d90b378cd255ffedeb12f5affe243230d47e Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 6 Oct 2015 15:40:32 +0100 Subject: net: dsa: complete dsa_switch_destroy When unbinding dsa, complete the dsa_switch_destroy to unregister the fixed link phy then cleanly unregister and destroy the net devices. Reviewed-by: Florian Fainelli Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 14fac4ed9569..61559232861b 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "dsa_priv.h" char dsa_driver_version[] = "0.1"; @@ -420,10 +421,46 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, static void dsa_switch_destroy(struct dsa_switch *ds) { + struct device_node *port_dn; + struct phy_device *phydev; + struct dsa_chip_data *cd = ds->pd; + int port; + #ifdef CONFIG_NET_DSA_HWMON if (ds->hwmon_dev) hwmon_device_unregister(ds->hwmon_dev); #endif + + /* Disable configuration of the CPU and DSA ports */ + for (port = 0; port < DSA_MAX_PORTS; port++) { + if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) + continue; + + port_dn = cd->port_dn[port]; + if (of_phy_is_fixed_link(port_dn)) { + phydev = of_phy_find_device(port_dn); + if (phydev) { + int addr = phydev->addr; + + phy_device_free(phydev); + of_node_put(port_dn); + fixed_phy_del(addr); + } + } + } + + /* Destroy network devices for physical switch ports. */ + for (port = 0; port < DSA_MAX_PORTS; port++) { + if (!(ds->phys_port_mask & (1 << port))) + continue; + + if (!ds->ports[port]) + continue; + + unregister_netdev(ds->ports[port]); + free_netdev(ds->ports[port]); + } + mdiobus_unregister(ds->slave_mii_bus); mdiobus_free(ds->slave_mii_bus); } -- cgit v1.2.3 From d4ac35d6ed82e6c96ed5c016ea46fad31294fa7a Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 6 Oct 2015 15:40:37 +0100 Subject: net: dsa: switch to devm_ calls and remove kfree calls Now the kfree calls exists in the the remove functions, remove them in all places except the of_probe functions and replace allocation calls with their devm_ counterparts. Reviewed-by: Florian Fainelli Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 61559232861b..d5a162cda087 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -306,7 +306,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) if (ret < 0) goto out; - ds->slave_mii_bus = mdiobus_alloc(); + ds->slave_mii_bus = devm_mdiobus_alloc(parent); if (ds->slave_mii_bus == NULL) { ret = -ENOMEM; goto out; @@ -315,7 +315,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) ret = mdiobus_register(ds->slave_mii_bus); if (ret < 0) - goto out_free; + goto out; /* @@ -368,10 +368,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) return ret; -out_free: - mdiobus_free(ds->slave_mii_bus); out: - kfree(ds); return ret; } @@ -401,7 +398,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, /* * Allocate and initialise switch state. */ - ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL); + ds = devm_kzalloc(parent, sizeof(*ds) + drv->priv_size, GFP_KERNEL); if (ds == NULL) return ERR_PTR(-ENOMEM); @@ -462,7 +459,6 @@ static void dsa_switch_destroy(struct dsa_switch *ds) } mdiobus_unregister(ds->slave_mii_bus); - mdiobus_free(ds->slave_mii_bus); } #ifdef CONFIG_PM_SLEEP @@ -922,7 +918,7 @@ static int dsa_probe(struct platform_device *pdev) goto out; } - dst = kzalloc(sizeof(*dst), GFP_KERNEL); + dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL); if (dst == NULL) { dev_put(dev); ret = -ENOMEM; @@ -953,10 +949,8 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; - if (ds) { + if (ds) dsa_switch_destroy(ds); - kfree(ds); - } } } @@ -965,7 +959,6 @@ static int dsa_remove(struct platform_device *pdev) struct dsa_switch_tree *dst = platform_get_drvdata(pdev); dsa_remove_dst(dst); - kfree(dst); dsa_of_remove(&pdev->dev); return 0; -- cgit v1.2.3 From 4d7f3e757c15051b4521a59791de87ce748c0eb2 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 6 Oct 2015 15:40:43 +0100 Subject: net: dsa: exit probe if no switch were found If no switch were found in dsa_setup_dst, return -ENODEV and exit the dsa_probe cleanly. Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- net/dsa/dsa.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index d5a162cda087..adb5325f4934 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -837,10 +837,11 @@ static inline void dsa_of_remove(struct device *dev) } #endif -static void dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, - struct device *parent, struct dsa_platform_data *pd) +static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, + struct device *parent, struct dsa_platform_data *pd) { int i; + unsigned configured = 0; dst->pd = pd; dst->master_netdev = dev; @@ -860,8 +861,16 @@ static void dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, dst->ds[i] = ds; if (ds->drv->poll_link != NULL) dst->link_poll_needed = 1; + + ++configured; } + /* + * If no switch was found, exit cleanly + */ + if (!configured) + return -EPROBE_DEFER; + /* * If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get @@ -878,6 +887,8 @@ static void dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, dst->link_poll_timer.expires = round_jiffies(jiffies + HZ); add_timer(&dst->link_poll_timer); } + + return 0; } static int dsa_probe(struct platform_device *pdev) @@ -927,7 +938,9 @@ static int dsa_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dst); - dsa_setup_dst(dst, dev, &pdev->dev, pd); + ret = dsa_setup_dst(dst, dev, &pdev->dev, pd); + if (ret) + goto out; return 0; -- cgit v1.2.3 From b8f2257069f179c7bdedc9501c1623070c4c37bb Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 6 Oct 2015 10:59:57 -0700 Subject: openvswitch: Fix skb leak in ovs_fragment() If ovs_fragment() was unable to fragment the skb due to an L2 header that exceeds the supported length, skbs would be leaked. Fix the bug. Fixes: 7f8a436eaa2c "openvswitch: Add conntrack action" Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e23a61cc3d5c..4cb93f92d6be 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -684,7 +684,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, { if (skb_network_offset(skb) > MAX_L2_LEN) { OVS_NLERR(1, "L2 header too long to fragment"); - return; + goto err; } if (ethertype == htons(ETH_P_IP)) { @@ -708,8 +708,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, struct rt6_info ovs_rt; if (!v6ops) { - kfree_skb(skb); - return; + goto err; } prepare_frag(vport, skb); @@ -728,8 +727,12 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru, WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", ovs_vport_name(vport), ntohs(ethertype), mru, vport->dev->mtu); - kfree_skb(skb); + goto err; } + + return; +err: + kfree_skb(skb); } static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, -- cgit v1.2.3 From ec0d043d05e6e3c0c2fac5de922c800c027c6386 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 6 Oct 2015 10:59:58 -0700 Subject: openvswitch: Ensure flow is valid before executing ct The ct action uses parts of the flow key, so we need to ensure that it is valid before executing that action. Fixes: 7f8a436eaa2c "openvswitch: Add conntrack action" Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 4cb93f92d6be..c6a39bf2c3b9 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1102,6 +1102,12 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; case OVS_ACTION_ATTR_CT: + if (!is_flow_key_valid(key)) { + err = ovs_flow_key_update(skb, key); + if (err) + return err; + } + err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key, nla_data(a)); -- cgit v1.2.3 From 6f225952461b5e9b5520d0dc6e2ff0af57874fbb Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 6 Oct 2015 10:59:59 -0700 Subject: openvswitch: Reject ct_state unsupported bits Previously, if userspace specified ct_state bits in the flow key which are currently undefined (and therefore unsupported), then they would be ignored. This could cause unexpected behaviour in future if userspace is extended to support additional bits but attempts to communicate with the current version of the kernel. This patch rectifies the situation by rejecting such ct_state bits. Fixes: 7f8a436eaa2c "openvswitch: Add conntrack action" Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.h | 12 ++++++++++++ net/openvswitch/flow_netlink.c | 6 ++++++ 2 files changed, 18 insertions(+) (limited to 'net') diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index 6bd603c6a031..d6eca8394254 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -34,6 +34,13 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); void ovs_ct_free_action(const struct nlattr *a); + +static inline bool ovs_ct_state_supported(u8 state) +{ + return !(state & ~(OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | + OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | + OVS_CS_F_INVALID | OVS_CS_F_TRACKED)); +} #else #include @@ -46,6 +53,11 @@ static inline bool ovs_ct_verify(struct net *net, int attr) return false; } +static inline bool ovs_ct_state_supported(u8 state) +{ + return false; +} + static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla, const struct sw_flow_key *key, struct sw_flow_actions **acts, bool log) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index a60e3b7684bc..d47b5c5c640e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -816,6 +816,12 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]); + if (!is_mask && !ovs_ct_state_supported(ct_state)) { + OVS_NLERR(log, "ct_state flags %02x unsupported", + ct_state); + return -EINVAL; + } + SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); } -- cgit v1.2.3 From fbccce5965a58d56aaed9e9acd1bec75d8a66e87 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 6 Oct 2015 11:00:00 -0700 Subject: openvswitch: Extend ct_state match field to 32 bits The ct_state field was initially added as an 8-bit field, however six of the bits are already being used and use cases are already starting to appear that may push the limits of this field. This patch extends the field to 32 bits while retaining the internal representation of 8 bits. This should cover forward compatibility of the ABI for the foreseeable future. This patch also reorders the OVS_CS_F_* bits to be sequential. Suggested-by: Jarno Rajahalme Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 8 ++++---- net/openvswitch/conntrack.c | 2 +- net/openvswitch/conntrack.h | 4 ++-- net/openvswitch/flow_netlink.c | 8 ++++---- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index a9a4a59912e9..c861a4cf5fec 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -323,7 +323,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls. * The implementation may restrict * the accepted length of the array. */ - OVS_KEY_ATTR_CT_STATE, /* u8 bitmask of OVS_CS_F_* */ + OVS_KEY_ATTR_CT_STATE, /* u32 bitmask of OVS_CS_F_* */ OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */ OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */ OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */ @@ -449,9 +449,9 @@ struct ovs_key_ct_labels { #define OVS_CS_F_ESTABLISHED 0x02 /* Part of an existing connection. */ #define OVS_CS_F_RELATED 0x04 /* Related to an established * connection. */ -#define OVS_CS_F_INVALID 0x20 /* Could not track connection. */ -#define OVS_CS_F_REPLY_DIR 0x40 /* Flow is in the reply direction. */ -#define OVS_CS_F_TRACKED 0x80 /* Conntrack has occurred. */ +#define OVS_CS_F_REPLY_DIR 0x08 /* Flow is in the reply direction. */ +#define OVS_CS_F_INVALID 0x10 /* Could not track connection. */ +#define OVS_CS_F_TRACKED 0x20 /* Conntrack has occurred. */ /** * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 7d80acfb80d0..466d5576fe3f 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -167,7 +167,7 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) { - if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) + if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index d6eca8394254..da8714942c95 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -35,7 +35,7 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); void ovs_ct_free_action(const struct nlattr *a); -static inline bool ovs_ct_state_supported(u8 state) +static inline bool ovs_ct_state_supported(u32 state) { return !(state & ~(OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | @@ -53,7 +53,7 @@ static inline bool ovs_ct_verify(struct net *net, int attr) return false; } -static inline bool ovs_ct_state_supported(u8 state) +static inline bool ovs_ct_state_supported(u32 state) { return false; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d47b5c5c640e..171a691f1c32 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -291,7 +291,7 @@ size_t ovs_key_attr_size(void) + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ - + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */ + + nla_total_size(4) /* OVS_KEY_ATTR_CT_STATE */ + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ @@ -349,7 +349,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, .next = ovs_tunnel_key_lens, }, [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, - [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) }, + [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, @@ -814,10 +814,10 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) && ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { - u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]); + u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]); if (!is_mask && !ovs_ct_state_supported(ct_state)) { - OVS_NLERR(log, "ct_state flags %02x unsupported", + OVS_NLERR(log, "ct_state flags %08x unsupported", ct_state); return -EINVAL; } -- cgit v1.2.3 From ab38a7b5a4493a3658d891a8e91f9ffcb3d2defb Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 6 Oct 2015 11:00:01 -0700 Subject: openvswitch: Change CT_ATTR_FLAGS to CT_ATTR_COMMIT Previously, the CT_ATTR_FLAGS attribute, when nested under the OVS_ACTION_ATTR_CT, encoded a 32-bit bitmask of flags that modify the semantics of the ct action. It's more extensible to just represent each flag as a nested attribute, and this requires no additional error checking to reject flags that aren't currently supported. Suggested-by: Ben Pfaff Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 14 ++++---------- net/openvswitch/conntrack.c | 13 ++++++------- 2 files changed, 10 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index c861a4cf5fec..036f73bc54cd 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -618,7 +618,9 @@ struct ovs_action_hash { /** * enum ovs_ct_attr - Attributes for %OVS_ACTION_ATTR_CT action. - * @OVS_CT_ATTR_FLAGS: u32 connection tracking flags. + * @OVS_CT_ATTR_COMMIT: If present, commits the connection to the conntrack + * table. This allows future packets for the same connection to be identified + * as 'established' or 'related'. * @OVS_CT_ATTR_ZONE: u16 connection tracking zone. * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the * mask, the corresponding bit in the value is copied to the connection @@ -630,7 +632,7 @@ struct ovs_action_hash { */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, - OVS_CT_ATTR_FLAGS, /* u32 bitmask of OVS_CT_F_*. */ + OVS_CT_ATTR_COMMIT, /* No argument, commits connection. */ OVS_CT_ATTR_ZONE, /* u16 zone id. */ OVS_CT_ATTR_MARK, /* mark to associate with this connection. */ OVS_CT_ATTR_LABELS, /* labels to associate with this connection. */ @@ -641,14 +643,6 @@ enum ovs_ct_attr { #define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1) -/* - * OVS_CT_ATTR_FLAGS flags - bitmask of %OVS_CT_F_* - * @OVS_CT_F_COMMIT: Commits the flow to the conntrack table. This allows - * future packets for the same connection to be identified as 'established' - * or 'related'. - */ -#define OVS_CT_F_COMMIT 0x01 - /** * enum ovs_action_attr - Action types. * diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 466d5576fe3f..80bf702715bb 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -47,7 +47,7 @@ struct ovs_conntrack_info { struct nf_conntrack_helper *helper; struct nf_conntrack_zone zone; struct nf_conn *ct; - u32 flags; + u8 commit : 1; u16 family; struct md_mark mark; struct md_labels labels; @@ -493,7 +493,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, return err; } - if (info->flags & OVS_CT_F_COMMIT) + if (info->commit) err = ovs_ct_commit(net, key, info, skb); else err = ovs_ct_lookup(net, key, info, skb); @@ -539,8 +539,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, } static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { - [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32), - .maxlen = sizeof(u32) }, + [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), .maxlen = sizeof(u16) }, [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), @@ -576,8 +575,8 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, } switch (type) { - case OVS_CT_ATTR_FLAGS: - info->flags = nla_get_u32(a); + case OVS_CT_ATTR_COMMIT: + info->commit = true; break; #ifdef CONFIG_NF_CONNTRACK_ZONES case OVS_CT_ATTR_ZONE: @@ -701,7 +700,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, if (!start) return -EMSGSIZE; - if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags)) + if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) -- cgit v1.2.3 From d40496a56430eac0d330378816954619899fe303 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 6 Oct 2015 17:23:47 -0700 Subject: act_mirred: clear sender cpu before sending to tx Similar to commit c29390c6dfee ("xps: must clear sender_cpu before forwarding") the skb->sender_cpu needs to be cleared when moving from Rx Tx, otherwise kernel could crash. Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices") Cc: Eric Dumazet Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 2efaf4ee6040..32fcdecdb9e2 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -179,6 +179,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; + skb_sender_cpu_clear(skb2); err = dev_queue_xmit(skb2); if (err) { -- cgit v1.2.3 From 6bf0577374cfb6c2301dbf4934a4f23ad3d72763 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 6 Oct 2015 20:46:07 -0700 Subject: bpf: clear sender_cpu before xmit Similar to commit c29390c6dfee ("xps: must clear sender_cpu before forwarding") the skb->sender_cpu needs to be cleared before xmit. Fixes: 3896d655f4d4 ("bpf: introduce bpf_clone_redirect() helper") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 87b78ef0c3d4..bb18c3680001 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1415,6 +1415,7 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) return dev_forward_skb(dev, skb2); skb2->dev = dev; + skb_sender_cpu_clear(skb2); return dev_queue_xmit(skb2); } -- cgit v1.2.3 From 6ac644a8ae2dabf884a1b01e82e32d96ffe6eee5 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 7 Oct 2015 16:47:32 -0700 Subject: sch_hhf: fix return value of hhf_drop() Similar to commit c0afd9ce4d6a ("fq_codel: fix return value of fq_codel_drop()") ->drop() is supposed to return the number of bytes it dropped, but hhf_drop () returns the id of the bucket where it drops a packet from. Cc: Jamal Hadi Salim Cc: Terry Lam Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_hhf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 9d15cb6b8cb1..86b04e31e60b 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -368,6 +368,15 @@ static unsigned int hhf_drop(struct Qdisc *sch) return bucket - q->buckets; } +static unsigned int hhf_qdisc_drop(struct Qdisc *sch) +{ + unsigned int prev_backlog; + + prev_backlog = sch->qstats.backlog; + hhf_drop(sch); + return prev_backlog - sch->qstats.backlog; +} + static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct hhf_sched_data *q = qdisc_priv(sch); @@ -696,7 +705,7 @@ static struct Qdisc_ops hhf_qdisc_ops __read_mostly = { .enqueue = hhf_enqueue, .dequeue = hhf_dequeue, .peek = qdisc_peek_dequeued, - .drop = hhf_drop, + .drop = hhf_qdisc_drop, .init = hhf_init, .reset = hhf_reset, .destroy = hhf_destroy, -- cgit v1.2.3 From d9e4ce65b27694b0b70ff4d1cbbb740195fd916b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 8 Oct 2015 18:19:39 +0200 Subject: ipv6: gre: setup default multicast routes over PtP links GRE point-to-point interfaces should also support ipv6 multicast. Setting up default multicast routes on interface creation was forgotten. Add it. Bugzilla: Cc: Julien Muchembled Cc: Eric Dumazet Cc: Nicolas Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 900113376d4e..36b85bd05ac8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3119,6 +3119,8 @@ static void addrconf_gre_config(struct net_device *dev) } addrconf_addr_gen(idev, true); + if (dev->flags & IFF_POINTOPOINT) + addrconf_add_mroute(dev); } #endif -- cgit v1.2.3 From 9ef2e965e55481a52d6d91ce61977a27836268d3 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 8 Oct 2015 18:19:53 +0200 Subject: ipv6: drop frames with attached skb->sk in forwarding This is a clone of commit 2ab957492d13b ("ip_forward: Drop frames with attached skb->sk") for ipv6. This commit has exactly the same reasons as the above mentioned commit, namely to prevent panics during netfilter reload or a misconfigured stack. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 92b1aa38f121..61d403ee1031 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -376,6 +376,9 @@ int ip6_forward(struct sk_buff *skb) if (skb->pkt_type != PACKET_HOST) goto drop; + if (unlikely(skb->sk)) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; -- cgit v1.2.3 From 4633dfc32c0019bed2996de9bbdbe7f3b518a44e Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Thu, 8 Oct 2015 19:20:14 +0530 Subject: mac80211: Fix hwflags debugfs file format Commit 30686bf7f5b3 ("mac80211: convert HW flags to unsigned long bitmap") accidentally removed the newline delimiter from the hwflags debugfs file. Fix this by adding back the newline between the HW flags. Cc: stable@vger.kernel.org [4.2] Signed-off-by: Mohammed Shafi Shajakhan [fix commit log] Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index ced6bf3be8d6..1560c8482bcb 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -149,7 +149,7 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) { if (test_bit(i, local->hw.flags)) - pos += scnprintf(pos, end - pos, "%s", + pos += scnprintf(pos, end - pos, "%s\n", hw_flag_names[i]); } -- cgit v1.2.3 From 87aaf2caed8496404d3809edc30d38d4a4a5d273 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 12 Oct 2015 14:31:01 +0200 Subject: switchdev: check if the vlan id is in the proper vlan range VLANs 0 and 4095 are reserved and shouldn't be used, add checks to switchdev similar to the bridge. Also make sure ids above 4095 cannot be passed either. Fixes: 47f8328bb1a4 ("switchdev: add new switchdev bridge setlink") Signed-off-by: Nikolay Aleksandrov Acked-by: Scott Feldman Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index fda38f830a10..77f5d17e2612 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -634,6 +635,8 @@ static int switchdev_port_br_afspec(struct net_device *dev, if (nla_len(attr) != sizeof(struct bridge_vlan_info)) return -EINVAL; vinfo = nla_data(attr); + if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) + return -EINVAL; vlan->flags = vinfo->flags; if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { if (vlan->vid_begin) -- cgit v1.2.3 From e332bc67cf5e5e5b71a1aec9750d0791aac65183 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 12 Oct 2015 11:02:08 -0500 Subject: ipv6: Don't call with rt6_uncached_list_flush_dev As originally written rt6_uncached_list_flush_dev makes no sense when called with dev == NULL as it attempts to flush all uncached routes regardless of network namespace when dev == NULL. Which is simply incorrect behavior. Furthermore at the point rt6_ifdown is called with dev == NULL no more network devices exist in the network namespace so even if the code in rt6_uncached_list_flush_dev were to attempt something sensible it would be meaningless. Therefore remove support in rt6_uncached_list_flush_dev for handling network devices where dev == NULL, and only call rt6_uncached_list_flush_dev when rt6_ifdown is called with a network device. Fixes: 8d0b94afdca8 ("ipv6: Keep track of DST_NOCACHE routes in case of iface down/unregister") Signed-off-by: "Eric W. Biederman" Reviewed-by: Martin KaFai Lau Tested-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/route.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cb32ce250db0..ed04e29a6aa3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -142,6 +142,9 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) struct net_device *loopback_dev = net->loopback_dev; int cpu; + if (dev == loopback_dev) + return; + for_each_possible_cpu(cpu) { struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); struct rt6_info *rt; @@ -151,14 +154,12 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) struct inet6_dev *rt_idev = rt->rt6i_idev; struct net_device *rt_dev = rt->dst.dev; - if (rt_idev && (rt_idev->dev == dev || !dev) && - rt_idev->dev != loopback_dev) { + if (rt_idev->dev == dev) { rt->rt6i_idev = in6_dev_get(loopback_dev); in6_dev_put(rt_idev); } - if (rt_dev && (rt_dev == dev || !dev) && - rt_dev != loopback_dev) { + if (rt_dev == dev) { rt->dst.dev = loopback_dev; dev_hold(rt->dst.dev); dev_put(rt_dev); @@ -2622,7 +2623,8 @@ void rt6_ifdown(struct net *net, struct net_device *dev) fib6_clean_all(net, fib6_ifdown, &adn); icmp6_clean_all(fib6_ifdown, &adn); - rt6_uncached_list_flush_dev(net, dev); + if (dev) + rt6_uncached_list_flush_dev(net, dev); } struct rt6_mtu_change_arg { -- cgit v1.2.3 From 0f8b8e28fb3241f9fd82ce13bac2b40c35e987e0 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Tue, 13 Oct 2015 12:41:51 -0400 Subject: tipc: eliminate risk of stalled link synchronization In commit 6e498158a827 ("tipc: move link synch and failover to link aggregation level") we introduced a new mechanism for performing link failover and synchronization. We have now detected a bug in this mechanism. During link synchronization we use the arrival of any packet on the tunnel link to trig a check for whether it has reached the synchronization point or not. This has turned out to be too permissive, since it may cause an arriving non-last SYNCH packet to end the synch state, just to see the next SYNCH packet initiate a new synch state with a new, higher synch point. This is not fatal, but should be avoided, because it may significantly extend the synchronization period, while at the same time we are not allowed to send NACKs if packets are lost. In the worst case, a low-traffic user may see its traffic stall until a LINK_PROTOCOL state message trigs the link to leave synchronization state. At the same time, LINK_PROTOCOL packets which happen to have a (non- valid) sequence number lower than the tunnel link's rcv_nxt value will be consistently dropped, and will never be able to resolve the situation described above. We fix this by exempting LINK_PROTOCOL packets from the sequence number check, as they should be. We also reduce (but don't completely eliminate) the risk of entering multiple synchronization states by only allowing the (logically) first SYNCH packet to initiate a synchronization state. This works independently of actual packet arrival order. Fixes: commit 6e498158a827 ("tipc: move link synch and failover to link aggregation level") Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/node.c b/net/tipc/node.c index 703875fd6cde..2c32a83037a3 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1116,7 +1116,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, } /* Ignore duplicate packets */ - if (less(oseqno, rcv_nxt)) + if ((usr != LINK_PROTOCOL) && less(oseqno, rcv_nxt)) return true; /* Initiate or update failover mode if applicable */ @@ -1146,8 +1146,8 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, if (!pl || !tipc_link_is_up(pl)) return true; - /* Initiate or update synch mode if applicable */ - if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) { + /* Initiate synch mode if applicable */ + if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) { syncpt = iseqno + exp_pkts - 1; if (!tipc_link_is_up(l)) { tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); -- cgit v1.2.3 From 077cb37fcf6f00a45f375161200b5ee0cd4e937b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 14 Oct 2015 01:09:40 -0700 Subject: ethtool: Use kcalloc instead of kmalloc for ethtool_get_strings It seems that kernel memory can leak into userspace by a kmalloc, ethtool_get_strings, then copy_to_user sequence. Avoid this by using kcalloc to zero fill the copied buffer. Signed-off-by: Joe Perches Acked-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index b495ab1797fa..29edf74846fc 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1284,7 +1284,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) gstrings.len = ret; - data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); + data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER); if (!data) return -ENOMEM; -- cgit v1.2.3 From dde4b5ae65de659b9ec64bafdde0430459fcb495 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 14 Oct 2015 09:23:18 -0400 Subject: tipc: move fragment importance field to new header position In commit e3eea1eb47a ("tipc: clean up handling of message priorities") we introduced a field in the packet header for keeping track of the priority of fragments, since this value is not present in the specified protocol header. Since the value so far only is used at the transmitting end of the link, we have not yet officially defined it as part of the protocol. Unfortunately, the field we use for keeping this value, bits 13-15 in in word 5, has turned out to be a poor choice; it is already used by the broadcast protocol for carrying the 'network id' field of the sending node. Since packet fragments also need to be transported across the broadcast protocol, the risk of conflict is obvious, and we see this happen when we use network identities larger than 2^13-1. This has escaped our testing because we have so far only been using small network id values. We now move this field to bits 0-2 in word 9, a field that is guaranteed to be unused by all involved protocols. Fixes: e3eea1eb47a ("tipc: clean up handling of message priorities") Signed-off-by: Jon Maloy Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/msg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/msg.h b/net/tipc/msg.h index a82c5848d4bc..5351a3f97e8e 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -357,7 +357,7 @@ static inline u32 msg_importance(struct tipc_msg *m) if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m))) return usr; if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)) - return msg_bits(m, 5, 13, 0x7); + return msg_bits(m, 9, 0, 0x7); return TIPC_SYSTEM_IMPORTANCE; } @@ -366,7 +366,7 @@ static inline void msg_set_importance(struct tipc_msg *m, u32 i) int usr = msg_user(m); if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))) - msg_set_bits(m, 5, 13, 0x7, i); + msg_set_bits(m, 9, 0, 0x7, i); else if (i < TIPC_SYSTEM_IMPORTANCE) msg_set_user(m, i); else -- cgit v1.2.3 From 168b8a25c0ac30f427bfe6ad547779c4c363d042 Mon Sep 17 00:00:00 2001 From: Jakub Pawlowski Date: Fri, 16 Oct 2015 10:07:49 +0300 Subject: Bluetooth: Fix double scan updates When disable/enable scan command is issued twice, some controllers will return an error for the second request, i.e. requests with this command will fail on some controllers, and succeed on others. This patch makes sure that unnecessary scan disable/enable commands are not issued. When adding device to the auto connect whitelist when there is pending connect attempt, there is no need to update scan. hci_connect_le_scan_cleanup is conditionally executing hci_conn_params_del, that is calling hci_update_background_scan. Make the other case also update scan, and remove reduntand call from hci_connect_le_scan_remove. When stopping interleaved discovery the state should be set to stopped only when both LE scanning and discovery has stopped. Signed-off-by: Jakub Pawlowski Acked-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 7 ++++--- net/bluetooth/hci_event.c | 7 ++++++- net/bluetooth/mgmt.c | 6 +++++- 3 files changed, 15 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b4548c739a64..2ebcaaa6b855 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -91,10 +91,12 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn) * autoconnect action, remove them completely. If they are, just unmark * them as waiting for connection, by clearing explicit_connect field. */ - if (params->auto_connect == HCI_AUTO_CONN_EXPLICIT) + if (params->auto_connect == HCI_AUTO_CONN_EXPLICIT) { hci_conn_params_del(conn->hdev, bdaddr, bdaddr_type); - else + } else { params->explicit_connect = false; + hci_update_background_scan(conn->hdev); + } } /* This function requires the caller holds hdev->lock */ @@ -103,7 +105,6 @@ static void hci_connect_le_scan_remove(struct hci_conn *conn) hci_connect_le_scan_cleanup(conn); hci_conn_hash_del(conn->hdev, conn); - hci_update_background_scan(conn->hdev); } static void hci_acl_create_connection(struct hci_conn *conn) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 186041866315..509e41575633 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -55,7 +55,12 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) wake_up_bit(&hdev->flags, HCI_INQUIRY); hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + /* Set discovery state to stopped if we're not doing LE active + * scanning. + */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) || + hdev->le_scan_type != LE_SCAN_ACTIVE) + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); hci_dev_unlock(hdev); hci_conn_check_pending(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ccaf5a436d8f..9a9bbc990d4f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6124,7 +6124,11 @@ static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr, case HCI_AUTO_CONN_ALWAYS: if (!is_connected(hdev, addr, addr_type)) { list_add(¶ms->action, &hdev->pend_le_conns); - __hci_update_background_scan(req); + /* If we are in scan phase of connecting, we were + * already added to pend_le_conns and scanning. + */ + if (params->auto_connect != HCI_AUTO_CONN_EXPLICIT) + __hci_update_background_scan(req); } break; } -- cgit v1.2.3 From b958f9a3e87766a88036616389eaaf3ad3bd5fc8 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 16 Oct 2015 10:07:50 +0300 Subject: Bluetooth: Fix reference counting for LE-scan based connections The code should never directly call hci_conn_hash_del since many cleanup & reference counting updates would be lost. Normally hci_conn_del is the right thing to do, but in the case of a connection doing LE scanning this could cause a deadlock due to doing a cancel_delayed_work_sync() on the same work callback that we were called from. Connections in the LE scanning state actually need very little cleanup - just a small subset of hci_conn_del. To solve the issue, refactor out these essential pieces into a new hci_conn_cleanup() function and call that from the two necessary places. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 53 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2ebcaaa6b855..4c240c1cb2cb 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -99,12 +99,41 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn) } } +static void hci_conn_cleanup(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + + if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags)) + hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type); + + hci_chan_list_flush(conn); + + hci_conn_hash_del(hdev, conn); + + if (hdev->notify) + hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); + + hci_conn_del_sysfs(conn); + + debugfs_remove_recursive(conn->debugfs); + + hci_dev_put(hdev); + + hci_conn_put(conn); +} + /* This function requires the caller holds hdev->lock */ static void hci_connect_le_scan_remove(struct hci_conn *conn) { hci_connect_le_scan_cleanup(conn); - hci_conn_hash_del(conn->hdev, conn); + /* We can't call hci_conn_del here since that would deadlock + * with trying to call cancel_delayed_work_sync(&conn->disc_work). + * Instead, call just hci_conn_cleanup() which contains the bare + * minimum cleanup operations needed for a connection in this + * state. + */ + hci_conn_cleanup(conn); } static void hci_acl_create_connection(struct hci_conn *conn) @@ -582,27 +611,17 @@ int hci_conn_del(struct hci_conn *conn) } } - hci_chan_list_flush(conn); - if (conn->amp_mgr) amp_mgr_put(conn->amp_mgr); - hci_conn_hash_del(hdev, conn); - if (hdev->notify) - hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); - skb_queue_purge(&conn->data_q); - hci_conn_del_sysfs(conn); - - debugfs_remove_recursive(conn->debugfs); - - if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags)) - hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type); - - hci_dev_put(hdev); - - hci_conn_put(conn); + /* Remove the connection from the list and cleanup its remaining + * state. This is a separate function since for some cases like + * BT_CONNECT_SCAN we *only* want the cleanup part without the + * rest of hci_conn_del. + */ + hci_conn_cleanup(conn); return 0; } -- cgit v1.2.3 From 49c509220db990ad003060db2267b9bbb597cd94 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 16 Oct 2015 10:07:51 +0300 Subject: Bluetooth: Fix LE reconnection logic We can't use hci_explicit_connect_lookup() since that would only cover explicit connections, leaving normal reconnections completely untouched. Not using it in turn means leaving out entries in pend_le_reports. To fix this and simplify the logic move conn params from the reports list to the pend_le_conns list for the duration of an explicit connect. Once the connect is complete move the params back to the pend_le_reports list. This also means that the explicit connect lookup function only needs to look into the pend_le_conns list. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 4 ++-- net/bluetooth/hci_core.c | 7 ------- net/bluetooth/hci_event.c | 4 ++-- net/bluetooth/mgmt.c | 5 ++++- 4 files changed, 8 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 4c240c1cb2cb..d5c06eeab4a3 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1000,8 +1000,8 @@ static int hci_explicit_conn_params_set(struct hci_request *req, /* If we created new params, or existing params were marked as disabled, * mark them to be used just once to connect. */ - if (params->auto_connect == HCI_AUTO_CONN_DISABLED) { - params->auto_connect = HCI_AUTO_CONN_EXPLICIT; + if (params->auto_connect == HCI_AUTO_CONN_DISABLED || + params->auto_connect == HCI_AUTO_CONN_REPORT) { list_del_init(¶ms->action); list_add(¶ms->action, &hdev->pend_le_conns); } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index adcbc74c2432..e837539452fb 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2861,13 +2861,6 @@ struct hci_conn_params *hci_explicit_connect_lookup(struct hci_dev *hdev, return param; } - list_for_each_entry(param, &hdev->pend_le_reports, action) { - if (bacmp(¶m->addr, addr) == 0 && - param->addr_type == addr_type && - param->explicit_connect) - return param; - } - return NULL; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 509e41575633..bc31099d3b5b 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4653,8 +4653,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, /* If we're not connectable only connect devices that we have in * our pend_le_conns list. */ - params = hci_explicit_connect_lookup(hdev, addr, addr_type); - + params = hci_pend_le_action_lookup(&hdev->pend_le_conns, addr, + addr_type); if (!params) return NULL; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9a9bbc990d4f..4dbfe01546b3 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6117,7 +6117,10 @@ static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr, __hci_update_background_scan(req); break; case HCI_AUTO_CONN_REPORT: - list_add(¶ms->action, &hdev->pend_le_reports); + if (params->explicit_connect) + list_add(¶ms->action, &hdev->pend_le_conns); + else + list_add(¶ms->action, &hdev->pend_le_reports); __hci_update_background_scan(req); break; case HCI_AUTO_CONN_DIRECT: -- cgit v1.2.3 From 679d2b6f9d742b3f091868bd9a0634647ce7e782 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 16 Oct 2015 10:07:52 +0300 Subject: Bluetooth: Fix remove_device behavior for explicit connects Devices undergoing an explicit connect should not have their conn_params struct removed by the mgmt Remove Device command. This patch fixes the necessary checks in the command handler to correct the behavior. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4dbfe01546b3..0ed94e6f4de9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6386,7 +6386,8 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (params->auto_connect == HCI_AUTO_CONN_DISABLED) { + if (params->auto_connect == HCI_AUTO_CONN_DISABLED || + params->auto_connect == HCI_AUTO_CONN_EXPLICIT) { err = cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS); mgmt_pending_remove(cmd); @@ -6422,6 +6423,10 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, if (p->auto_connect == HCI_AUTO_CONN_DISABLED) continue; device_removed(sk, hdev, &p->addr, p->addr_type); + if (p->explicit_connect) { + p->auto_connect = HCI_AUTO_CONN_EXPLICIT; + continue; + } list_del(&p->action); list_del(&p->list); kfree(p); -- cgit v1.2.3 From 9ad3e6ffe189a988389d88ce33101668cb2d54c6 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 16 Oct 2015 10:07:53 +0300 Subject: Bluetooth: Fix conn_params list update in hci_connect_le_scan_cleanup After clearing the params->explicit_connect variable the parameters may need to be either added back to the right list or potentially left absent from both the le_reports and the le_conns lists. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index d5c06eeab4a3..fe99025fb649 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -91,12 +91,27 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn) * autoconnect action, remove them completely. If they are, just unmark * them as waiting for connection, by clearing explicit_connect field. */ - if (params->auto_connect == HCI_AUTO_CONN_EXPLICIT) { + params->explicit_connect = false; + + list_del_init(¶ms->action); + + switch (params->auto_connect) { + case HCI_AUTO_CONN_EXPLICIT: hci_conn_params_del(conn->hdev, bdaddr, bdaddr_type); - } else { - params->explicit_connect = false; - hci_update_background_scan(conn->hdev); + /* return instead of break to avoid duplicate scan update */ + return; + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: + list_add(¶ms->action, &conn->hdev->pend_le_conns); + break; + case HCI_AUTO_CONN_REPORT: + list_add(¶ms->action, &conn->hdev->pend_le_reports); + break; + default: + break; } + + hci_update_background_scan(conn->hdev); } static void hci_conn_cleanup(struct hci_conn *conn) -- cgit v1.2.3 From 5157b8a503fa834e8569c7fed06981e3d3d53db0 Mon Sep 17 00:00:00 2001 From: Jakub Pawlowski Date: Fri, 16 Oct 2015 10:07:54 +0300 Subject: Bluetooth: Fix initializing conn_params in scan phase This patch makes sure that conn_params that were created just for explicit_connect, will get properly deleted during cleanup. Signed-off-by: Jakub Pawlowski Acked-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 22 +++++++++++++++------- net/bluetooth/mgmt.c | 6 +++++- 2 files changed, 20 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index fe99025fb649..2dda439c8cb8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1008,15 +1008,23 @@ static int hci_explicit_conn_params_set(struct hci_request *req, if (is_connected(hdev, addr, addr_type)) return -EISCONN; - params = hci_conn_params_add(hdev, addr, addr_type); - if (!params) - return -EIO; + params = hci_conn_params_lookup(hdev, addr, addr_type); + if (!params) { + params = hci_conn_params_add(hdev, addr, addr_type); + if (!params) + return -ENOMEM; + + /* If we created new params, mark them to be deleted in + * hci_connect_le_scan_cleanup. It's different case than + * existing disabled params, those will stay after cleanup. + */ + params->auto_connect = HCI_AUTO_CONN_EXPLICIT; + } - /* If we created new params, or existing params were marked as disabled, - * mark them to be used just once to connect. - */ + /* We're trying to connect, so make sure params are at pend_le_conns */ if (params->auto_connect == HCI_AUTO_CONN_DISABLED || - params->auto_connect == HCI_AUTO_CONN_REPORT) { + params->auto_connect == HCI_AUTO_CONN_REPORT || + params->auto_connect == HCI_AUTO_CONN_EXPLICIT) { list_del_init(¶ms->action); list_add(¶ms->action, &hdev->pend_le_conns); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0ed94e6f4de9..c4fe2fee753f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3545,6 +3545,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, auth_type); } else { u8 addr_type; + struct hci_conn_params *p; /* Convert from L2CAP channel address type to HCI address type */ @@ -3562,7 +3563,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, * If connection parameters already exist, then they * will be kept and this function does nothing. */ - hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type); + p = hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type); + + if (p->auto_connect == HCI_AUTO_CONN_EXPLICIT) + p->auto_connect = HCI_AUTO_CONN_DISABLED; conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr, addr_type, sec_level, -- cgit v1.2.3 From ebfa45f0d952e5e7bb30a7f9daaad681de138728 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 15 Oct 2015 16:39:57 -0700 Subject: ipv6: Move common init code for rt6_info to a new function rt6_info_init() Introduce rt6_info_init() to do the common init work for 'struct rt6_info' (after calling dst_alloc). It is a prep work to fix the rt6_info init logic in the ip6_blackhole_route(). Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Cc: Julian Anastasov Cc: Phil Sutter Cc: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/route.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ed04e29a6aa3..4198017a5aa7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -319,6 +319,15 @@ static const struct rt6_info ip6_blk_hole_entry_template = { #endif +static void rt6_info_init(struct rt6_info *rt) +{ + struct dst_entry *dst = &rt->dst; + + memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); + INIT_LIST_HEAD(&rt->rt6i_siblings); + INIT_LIST_HEAD(&rt->rt6i_uncached); +} + /* allocate dst with ip6_dst_ops */ static struct rt6_info *__ip6_dst_alloc(struct net *net, struct net_device *dev, @@ -327,13 +336,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0, DST_OBSOLETE_FORCE_CHK, flags); - if (rt) { - struct dst_entry *dst = &rt->dst; + if (rt) + rt6_info_init(rt); - memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); - INIT_LIST_HEAD(&rt->rt6i_siblings); - INIT_LIST_HEAD(&rt->rt6i_uncached); - } return rt; } -- cgit v1.2.3 From 0a1f59620068fb82a2e2aded202e62f4bb856d52 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 15 Oct 2015 16:39:58 -0700 Subject: ipv6: Initialize rt6_info properly in ip6_blackhole_route() ip6_blackhole_route() does not initialize the newly allocated rt6_info properly. This patch: 1. Call rt6_info_init() to initialize rt6i_siblings and rt6i_uncached 2. The current rt->dst._metrics init code is incorrect: - 'rt->dst._metrics = ort->dst._metris' is not always safe - Not sure what dst_copy_metrics() is trying to do here considering ip6_rt_blackhole_cow_metrics() always returns NULL Fix: - Always do dst_copy_metrics() - Replace ip6_rt_blackhole_cow_metrics() with dst_cow_metrics_generic() 3. Mask out the RTF_PCPU bit from the newly allocated blackhole route. This bug triggers an oops (reported by Phil Sutter) in rt6_get_cookie(). It is because RTF_PCPU is set while rt->dst.from is NULL. Fixes: d52d3997f843 ("ipv6: Create percpu rt6_info") Signed-off-by: Martin KaFai Lau Reported-by: Phil Sutter Tested-by: Phil Sutter Cc: Hannes Frederic Sowa Cc: Julian Anastasov Cc: Phil Sutter Cc: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/route.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4198017a5aa7..968f31c01f89 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -248,12 +248,6 @@ static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, { } -static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, - unsigned long old) -{ - return NULL; -} - static struct dst_ops ip6_dst_blackhole_ops = { .family = AF_INET6, .destroy = ip6_dst_destroy, @@ -262,7 +256,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .default_advmss = ip6_default_advmss, .update_pmtu = ip6_rt_blackhole_update_pmtu, .redirect = ip6_rt_blackhole_redirect, - .cow_metrics = ip6_rt_blackhole_cow_metrics, + .cow_metrics = dst_cow_metrics_generic, .neigh_lookup = ip6_neigh_lookup, }; @@ -1219,24 +1213,20 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); if (rt) { - new = &rt->dst; - - memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); + rt6_info_init(rt); + new = &rt->dst; new->__use = 1; new->input = dst_discard; new->output = dst_discard_sk; - if (dst_metrics_read_only(&ort->dst)) - new->_metrics = ort->dst._metrics; - else - dst_copy_metrics(new, &ort->dst); + dst_copy_metrics(new, &ort->dst); rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); rt->rt6i_gateway = ort->rt6i_gateway; - rt->rt6i_flags = ort->rt6i_flags; + rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; rt->rt6i_metric = 0; memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); -- cgit v1.2.3 From db65a3aaf29ecce2e34271d52e8d2336b97bd9fe Mon Sep 17 00:00:00 2001 From: "Arad, Ronen" Date: Thu, 15 Oct 2015 01:55:17 -0700 Subject: netlink: Trim skb to alloc size to avoid MSG_TRUNC netlink_dump() allocates skb based on the calculated min_dump_alloc or a per socket max_recvmsg_len. min_alloc_size is maximum space required for any single netdev attributes as calculated by rtnl_calcit(). max_recvmsg_len tracks the user provided buffer to netlink_recvmsg. It is capped at 16KiB. The intention is to avoid small allocations and to minimize the number of calls required to obtain dump information for all net devices. netlink_dump packs as many small messages as could fit within an skb that was sized for the largest single netdev information. The actual space available within an skb is larger than what is requested. It could be much larger and up to near 2x with align to next power of 2 approach. Allowing netlink_dump to use all the space available within the allocated skb increases the buffer size a user has to provide to avoid truncaion (i.e. MSG_TRUNG flag set). It was observed that with many VLANs configured on at least one netdev, a larger buffer of near 64KiB was necessary to avoid "Message truncated" error in "ip link" or "bridge [-c[ompressvlans]] vlan show" when min_alloc_size was only little over 32KiB. This patch trims skb to allocated size in order to allow the user to avoid truncation with more reasonable buffer size. Signed-off-by: Ronen Arad Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8f060d7f9a0e..0a49a8c7c564 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2785,6 +2785,7 @@ static int netlink_dump(struct sock *sk) struct sk_buff *skb = NULL; struct nlmsghdr *nlh; int len, err = -ENOBUFS; + int alloc_min_size; int alloc_size; mutex_lock(nlk->cb_mutex); @@ -2793,9 +2794,6 @@ static int netlink_dump(struct sock *sk) goto errout_skb; } - cb = &nlk->cb; - alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); - if (!netlink_rx_is_mmaped(sk) && atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) goto errout_skb; @@ -2805,23 +2803,35 @@ static int netlink_dump(struct sock *sk) * to reduce number of system calls on dump operations, if user * ever provided a big enough buffer. */ - if (alloc_size < nlk->max_recvmsg_len) { - skb = netlink_alloc_skb(sk, - nlk->max_recvmsg_len, - nlk->portid, + cb = &nlk->cb; + alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); + + if (alloc_min_size < nlk->max_recvmsg_len) { + alloc_size = nlk->max_recvmsg_len; + skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); - /* available room should be exact amount to avoid MSG_TRUNC */ - if (skb) - skb_reserve(skb, skb_tailroom(skb) - - nlk->max_recvmsg_len); } - if (!skb) + if (!skb) { + alloc_size = alloc_min_size; skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL); + } if (!skb) goto errout_skb; + + /* Trim skb to allocated size. User is expected to provide buffer as + * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at + * netlink_recvmsg())). dump will pack as many smaller messages as + * could fit within the allocated skb. skb is typically allocated + * with larger space than required (could be as much as near 2x the + * requested size with align to next power of 2 approach). Allowing + * dump to use the excess space makes it difficult for a user to have a + * reasonable static buffer based on the expected largest dump of a + * single netdev. The outcome is MSG_TRUNC error. + */ + skb_reserve(skb, skb_tailroom(skb) - alloc_size); netlink_skb_set_owner_r(skb, sk); len = cb->dump(skb, cb); -- cgit v1.2.3 From 740dbc289155fdeed32438396370e70b684cd45e Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 16 Oct 2015 11:08:18 -0700 Subject: openvswitch: Scrub skb between namespaces If OVS receives a packet from another namespace, then the packet should be scrubbed. However, people have already begun to rely on the behaviour that skb->mark is preserved across namespaces, so retain this one field. This is mainly to address information leakage between namespaces when using OVS internal ports, but by placing it in ovs_vport_receive() it is more generally applicable, meaning it should not be overlooked if other port types are allowed to be moved into namespaces in future. Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/openvswitch/vport.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index fc5c0b9ccfe9..12a36ac21eda 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -444,6 +444,15 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->mru = 0; + if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { + u32 mark; + + mark = skb->mark; + skb_scrub_packet(skb, true); + skb->mark = mark; + tun_info = NULL; + } + /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_key_extract(tun_info, skb, &key); if (unlikely(error)) { -- cgit v1.2.3