summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c52
-rw-r--r--net/atm/common.c1
-rw-r--r--net/atm/pvc.c1
-rw-r--r--net/batman-adv/gateway_client.c6
-rw-r--r--net/batman-adv/translation-table.c1
-rw-r--r--net/bluetooth/a2mp.c16
-rw-r--r--net/bluetooth/af_bluetooth.c141
-rw-r--r--net/bluetooth/bnep/sock.c22
-rw-r--r--net/bluetooth/cmtp/sock.c23
-rw-r--r--net/bluetooth/hci_core.c6
-rw-r--r--net/bluetooth/hci_event.c121
-rw-r--r--net/bluetooth/hci_sock.c15
-rw-r--r--net/bluetooth/hidp/sock.c22
-rw-r--r--net/bluetooth/l2cap_core.c33
-rw-r--r--net/bluetooth/l2cap_sock.c25
-rw-r--r--net/bluetooth/mgmt.c34
-rw-r--r--net/bluetooth/rfcomm/sock.c16
-rw-r--r--net/bluetooth/rfcomm/tty.c2
-rw-r--r--net/bluetooth/sco.c35
-rw-r--r--net/bluetooth/smp.c5
-rw-r--r--net/bridge/br_device.c30
-rw-r--r--net/bridge/br_fdb.c2
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c6
-rw-r--r--net/bridge/br_private.h4
-rw-r--r--net/bridge/br_stp_timer.c2
-rw-r--r--net/bridge/br_sysfs_if.c6
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/caif/chnl_net.c4
-rw-r--r--net/ceph/ceph_common.c25
-rw-r--r--net/ceph/crush/mapper.c13
-rw-r--r--net/ceph/crypto.c1
-rw-r--r--net/ceph/crypto.h3
-rw-r--r--net/ceph/messenger.c925
-rw-r--r--net/ceph/mon_client.c76
-rw-r--r--net/ceph/msgpool.c7
-rw-r--r--net/ceph/osd_client.c77
-rw-r--r--net/ceph/osdmap.c59
-rw-r--r--net/core/dev.c113
-rw-r--r--net/core/dst.c10
-rw-r--r--net/core/filter.c8
-rw-r--r--net/core/netpoll.c109
-rw-r--r--net/core/netprio_cgroup.c30
-rw-r--r--net/core/rtnetlink.c31
-rw-r--r--net/core/scm.c4
-rw-r--r--net/core/skbuff.c124
-rw-r--r--net/core/sock.c60
-rw-r--r--net/dccp/ccid.h4
-rw-r--r--net/dccp/ccids/ccid3.c1
-rw-r--r--net/decnet/dn_route.c6
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/devinet.c39
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_semantics.c42
-rw-r--r--net/ipv4/fib_trie.c68
-rw-r--r--net/ipv4/igmp.c7
-rw-r--r--net/ipv4/inet_connection_sock.c7
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/ip_output.c10
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c9
-rw-r--r--net/ipv4/route.c205
-rw-r--r--net/ipv4/sysctl_net_ipv4.c15
-rw-r--r--net/ipv4/tcp.c9
-rw-r--r--net/ipv4/tcp_cong.c3
-rw-r--r--net/ipv4/tcp_input.c38
-rw-r--r--net/ipv4/tcp_ipv4.c54
-rw-r--r--net/ipv4/tcp_metrics.c12
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c49
-rw-r--r--net/ipv4/tcp_timer.c6
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/Kconfig16
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/ip6_gre.c1790
-rw-r--r--net/ipv6/ip6_input.c2
-rw-r--r--net/ipv6/ip6_tunnel.c91
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/route.c10
-rw-r--r--net/ipv6/tcp_ipv6.c48
-rw-r--r--net/ipv6/xfrm6_policy.c8
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/l2tp/l2tp_ip6.c1
-rw-r--r--net/llc/af_llc.c8
-rw-r--r--net/llc/llc_input.c21
-rw-r--r--net/llc/llc_station.c29
-rw-r--r--net/mac80211/mesh.c24
-rw-r--r--net/mac80211/mesh.h2
-rw-r--r--net/mac80211/mesh_plink.c44
-rw-r--r--net/mac80211/mlme.c2
-rw-r--r--net/mac80211/scan.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c1
-rw-r--r--net/netfilter/nf_conntrack_expect.c29
-rw-r--r--net/netfilter/nf_conntrack_netlink.c7
-rw-r--r--net/netfilter/nf_conntrack_sip.c92
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/openvswitch/actions.c3
-rw-r--r--net/packet/Kconfig8
-rw-r--r--net/packet/Makefile2
-rw-r--r--net/packet/af_packet.c134
-rw-r--r--net/packet/diag.c242
-rw-r--r--net/packet/internal.h121
-rw-r--r--net/sched/act_gact.c14
-rw-r--r--net/sched/act_ipt.c7
-rw-r--r--net/sched/act_mirred.c11
-rw-r--r--net/sched/act_pedit.c5
-rw-r--r--net/sched/act_simple.c5
-rw-r--r--net/sched/sch_generic.c18
-rw-r--r--net/sched/sch_qfq.c95
-rw-r--r--net/sctp/associola.c25
-rw-r--r--net/sctp/auth.c20
-rw-r--r--net/sctp/bind_addr.c20
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/endpointola.c25
-rw-r--r--net/sctp/input.c115
-rw-r--r--net/sctp/ipv6.c36
-rw-r--r--net/sctp/objcnt.c8
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c18
-rw-r--r--net/sctp/primitive.c4
-rw-r--r--net/sctp/proc.c55
-rw-r--r--net/sctp/protocol.c454
-rw-r--r--net/sctp/sm_make_chunk.c61
-rw-r--r--net/sctp/sm_sideeffect.c26
-rw-r--r--net/sctp/sm_statefuns.c725
-rw-r--r--net/sctp/sm_statetable.c17
-rw-r--r--net/sctp/socket.c119
-rw-r--r--net/sctp/sysctl.c198
-rw-r--r--net/sctp/transport.c23
-rw-r--r--net/sctp/ulpevent.c3
-rw-r--r--net/sctp/ulpqueue.c18
-rw-r--r--net/socket.c1
-rw-r--r--net/sunrpc/Kconfig5
-rw-r--r--net/sunrpc/auth.c54
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c1
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c20
-rw-r--r--net/sunrpc/cache.c5
-rw-r--r--net/sunrpc/clnt.c12
-rw-r--r--net/sunrpc/rpcb_clnt.c4
-rw-r--r--net/sunrpc/sched.c14
-rw-r--r--net/sunrpc/xdr.c127
-rw-r--r--net/sunrpc/xprtrdma/transport.c3
-rw-r--r--net/sunrpc/xprtsock.c53
-rw-r--r--net/tipc/bearer.c21
-rw-r--r--net/tipc/config.c48
-rw-r--r--net/tipc/core.c22
-rw-r--r--net/tipc/core.h18
-rw-r--r--net/tipc/eth_media.c29
-rw-r--r--net/tipc/handler.c2
-rw-r--r--net/tipc/link.c4
-rw-r--r--net/tipc/name_table.c14
-rw-r--r--net/tipc/net.c3
-rw-r--r--net/tipc/net.h2
-rw-r--r--net/tipc/subscr.c4
-rw-r--r--net/unix/af_unix.c97
-rw-r--r--net/wireless/core.c5
-rw-r--r--net/wireless/core.h1
-rw-r--r--net/wireless/reg.c19
-rw-r--r--net/wireless/util.c2
-rw-r--r--net/xfrm/xfrm_policy.c41
-rw-r--r--net/xfrm/xfrm_state.c23
-rw-r--r--net/xfrm/xfrm_user.c9
166 files changed, 6056 insertions, 2403 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 73a2a83ee2da..402442402af7 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -137,9 +137,21 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
return rc;
}
+static inline netdev_tx_t vlan_netpoll_send_skb(struct vlan_dev_priv *vlan, struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (vlan->netpoll)
+ netpoll_send_skb(vlan->netpoll, skb);
+#else
+ BUG();
+#endif
+ return NETDEV_TX_OK;
+}
+
static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
unsigned int len;
int ret;
@@ -150,29 +162,30 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
* OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
*/
if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
- vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR) {
+ vlan->flags & VLAN_FLAG_REORDER_HDR) {
u16 vlan_tci;
- vlan_tci = vlan_dev_priv(dev)->vlan_id;
+ vlan_tci = vlan->vlan_id;
vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
}
- skb->dev = vlan_dev_priv(dev)->real_dev;
+ skb->dev = vlan->real_dev;
len = skb->len;
- if (netpoll_tx_running(dev))
- return skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev);
+ if (unlikely(netpoll_tx_running(dev)))
+ return vlan_netpoll_send_skb(vlan, skb);
+
ret = dev_queue_xmit(skb);
if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
struct vlan_pcpu_stats *stats;
- stats = this_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats);
+ stats = this_cpu_ptr(vlan->vlan_pcpu_stats);
u64_stats_update_begin(&stats->syncp);
stats->tx_packets++;
stats->tx_bytes += len;
u64_stats_update_end(&stats->syncp);
} else {
- this_cpu_inc(vlan_dev_priv(dev)->vlan_pcpu_stats->tx_dropped);
+ this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped);
}
return ret;
@@ -669,25 +682,26 @@ static void vlan_dev_poll_controller(struct net_device *dev)
return;
}
-static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo)
+static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo,
+ gfp_t gfp)
{
- struct vlan_dev_priv *info = vlan_dev_priv(dev);
- struct net_device *real_dev = info->real_dev;
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+ struct net_device *real_dev = vlan->real_dev;
struct netpoll *netpoll;
int err = 0;
- netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL);
+ netpoll = kzalloc(sizeof(*netpoll), gfp);
err = -ENOMEM;
if (!netpoll)
goto out;
- err = __netpoll_setup(netpoll, real_dev);
+ err = __netpoll_setup(netpoll, real_dev, gfp);
if (err) {
kfree(netpoll);
goto out;
}
- info->netpoll = netpoll;
+ vlan->netpoll = netpoll;
out:
return err;
@@ -695,19 +709,15 @@ out:
static void vlan_dev_netpoll_cleanup(struct net_device *dev)
{
- struct vlan_dev_priv *info = vlan_dev_priv(dev);
- struct netpoll *netpoll = info->netpoll;
+ struct vlan_dev_priv *vlan= vlan_dev_priv(dev);
+ struct netpoll *netpoll = vlan->netpoll;
if (!netpoll)
return;
- info->netpoll = NULL;
-
- /* Wait for transmitting packets to finish before freeing. */
- synchronize_rcu_bh();
+ vlan->netpoll = NULL;
- __netpoll_cleanup(netpoll);
- kfree(netpoll);
+ __netpoll_free_rcu(netpoll);
}
#endif /* CONFIG_NET_POLL_CONTROLLER */
diff --git a/net/atm/common.c b/net/atm/common.c
index b4b44dbed645..0c0ad930a632 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -812,6 +812,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
return -ENOTCONN;
+ memset(&pvc, 0, sizeof(pvc));
pvc.sap_family = AF_ATMPVC;
pvc.sap_addr.itf = vcc->dev->number;
pvc.sap_addr.vpi = vcc->vpi;
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 3a734919c36c..ae0324021407 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -95,6 +95,7 @@ static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
return -ENOTCONN;
*sockaddr_len = sizeof(struct sockaddr_atmpvc);
addr = (struct sockaddr_atmpvc *)sockaddr;
+ memset(addr, 0, sizeof(*addr));
addr->sap_family = AF_ATMPVC;
addr->sap_addr.itf = vcc->dev->number;
addr->sap_addr.vpi = vcc->vpi;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index b421cc49d2cd..fc866f2e4528 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -200,11 +200,11 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
goto out;
- if (!batadv_atomic_dec_not_zero(&bat_priv->gw_reselect))
- goto out;
-
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+ if (!batadv_atomic_dec_not_zero(&bat_priv->gw_reselect) && curr_gw)
+ goto out;
+
next_gw = batadv_gw_get_best_gw_node(bat_priv);
if (curr_gw == next_gw)
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index a438f4b582fc..99dd8f75b3ff 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -197,6 +197,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
del:
list_del(&entry->list);
kfree(entry);
+ kfree(tt_change_node);
event_removed = true;
goto unlock;
}
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 4ff0bf3ba9a5..0760d1fed6f0 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -316,7 +316,7 @@ send_rsp:
static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
struct a2mp_cmd *hdr)
{
- BT_DBG("ident %d code %d", hdr->ident, hdr->code);
+ BT_DBG("ident %d code 0x%2.2x", hdr->ident, hdr->code);
skb_pull(skb, le16_to_cpu(hdr->len));
return 0;
@@ -325,17 +325,19 @@ static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
/* Handle A2MP signalling */
static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
{
- struct a2mp_cmd *hdr = (void *) skb->data;
+ struct a2mp_cmd *hdr;
struct amp_mgr *mgr = chan->data;
int err = 0;
amp_mgr_get(mgr);
while (skb->len >= sizeof(*hdr)) {
- struct a2mp_cmd *hdr = (void *) skb->data;
- u16 len = le16_to_cpu(hdr->len);
+ u16 len;
- BT_DBG("code 0x%02x id %d len %d", hdr->code, hdr->ident, len);
+ hdr = (void *) skb->data;
+ len = le16_to_cpu(hdr->len);
+
+ BT_DBG("code 0x%2.2x id %d len %u", hdr->code, hdr->ident, len);
skb_pull(skb, sizeof(*hdr));
@@ -393,7 +395,9 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
if (err) {
struct a2mp_cmd_rej rej;
+
rej.reason = __constant_cpu_to_le16(0);
+ hdr = (void *) skb->data;
BT_DBG("Send A2MP Rej: cmd 0x%2.2x err %d", hdr->code, err);
@@ -412,7 +416,7 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
static void a2mp_chan_close_cb(struct l2cap_chan *chan)
{
- l2cap_chan_destroy(chan);
+ l2cap_chan_put(chan);
}
static void a2mp_chan_state_change_cb(struct l2cap_chan *chan, int state)
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index f7db5792ec64..58f9762b339a 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -28,6 +28,7 @@
#include <asm/ioctls.h>
#include <net/bluetooth/bluetooth.h>
+#include <linux/proc_fs.h>
#define VERSION "2.16"
@@ -532,6 +533,146 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
}
EXPORT_SYMBOL(bt_sock_wait_state);
+#ifdef CONFIG_PROC_FS
+struct bt_seq_state {
+ struct bt_sock_list *l;
+};
+
+static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(seq->private->l->lock)
+{
+ struct bt_seq_state *s = seq->private;
+ struct bt_sock_list *l = s->l;
+
+ read_lock(&l->lock);
+ return seq_hlist_start_head(&l->head, *pos);
+}
+
+static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bt_seq_state *s = seq->private;
+ struct bt_sock_list *l = s->l;
+
+ return seq_hlist_next(v, &l->head, pos);
+}
+
+static void bt_seq_stop(struct seq_file *seq, void *v)
+ __releases(seq->private->l->lock)
+{
+ struct bt_seq_state *s = seq->private;
+ struct bt_sock_list *l = s->l;
+
+ read_unlock(&l->lock);
+}
+
+static int bt_seq_show(struct seq_file *seq, void *v)
+{
+ struct sock *sk;
+ struct bt_sock *bt;
+ struct bt_seq_state *s = seq->private;
+ struct bt_sock_list *l = s->l;
+ bdaddr_t src_baswapped, dst_baswapped;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq ,"sk RefCnt Rmem Wmem User Inode Src Dst Parent");
+
+ if (l->custom_seq_show) {
+ seq_putc(seq, ' ');
+ l->custom_seq_show(seq, v);
+ }
+
+ seq_putc(seq, '\n');
+ } else {
+ sk = sk_entry(v);
+ bt = bt_sk(sk);
+ baswap(&src_baswapped, &bt->src);
+ baswap(&dst_baswapped, &bt->dst);
+
+ seq_printf(seq, "%pK %-6d %-6u %-6u %-6u %-6lu %pM %pM %-6lu",
+ sk,
+ atomic_read(&sk->sk_refcnt),
+ sk_rmem_alloc_get(sk),
+ sk_wmem_alloc_get(sk),
+ sock_i_uid(sk),
+ sock_i_ino(sk),
+ &src_baswapped,
+ &dst_baswapped,
+ bt->parent? sock_i_ino(bt->parent): 0LU);
+
+ if (l->custom_seq_show) {
+ seq_putc(seq, ' ');
+ l->custom_seq_show(seq, v);
+ }
+
+ seq_putc(seq, '\n');
+ }
+ return 0;
+}
+
+static struct seq_operations bt_seq_ops = {
+ .start = bt_seq_start,
+ .next = bt_seq_next,
+ .stop = bt_seq_stop,
+ .show = bt_seq_show,
+};
+
+static int bt_seq_open(struct inode *inode, struct file *file)
+{
+ struct bt_sock_list *sk_list;
+ struct bt_seq_state *s;
+
+ sk_list = PDE(inode)->data;
+ s = __seq_open_private(file, &bt_seq_ops,
+ sizeof(struct bt_seq_state));
+ if (s == NULL)
+ return -ENOMEM;
+
+ s->l = sk_list;
+ return 0;
+}
+
+int bt_procfs_init(struct module* module, struct net *net, const char *name,
+ struct bt_sock_list* sk_list,
+ int (* seq_show)(struct seq_file *, void *))
+{
+ struct proc_dir_entry * pde;
+
+ sk_list->custom_seq_show = seq_show;
+
+ sk_list->fops.owner = module;
+ sk_list->fops.open = bt_seq_open;
+ sk_list->fops.read = seq_read;
+ sk_list->fops.llseek = seq_lseek;
+ sk_list->fops.release = seq_release_private;
+
+ pde = proc_net_fops_create(net, name, 0, &sk_list->fops);
+ if (pde == NULL)
+ return -ENOMEM;
+
+ pde->data = sk_list;
+
+ return 0;
+}
+
+void bt_procfs_cleanup(struct net *net, const char *name)
+{
+ proc_net_remove(net, name);
+}
+#else
+int bt_procfs_init(struct module* module, struct net *net, const char *name,
+ struct bt_sock_list* sk_list,
+ int (* seq_show)(struct seq_file *, void *))
+{
+ return 0;
+}
+
+void bt_procfs_cleanup(struct net *net, const char *name)
+{
+}
+#endif
+EXPORT_SYMBOL(bt_procfs_init);
+EXPORT_SYMBOL(bt_procfs_cleanup);
+
static struct net_proto_family bt_sock_family_ops = {
.owner = THIS_MODULE,
.family = PF_BLUETOOTH,
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 5e5f5b410e0b..5b6cc0bf4dec 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -29,6 +29,10 @@
#include "bnep.h"
+static struct bt_sock_list bnep_sk_list = {
+ .lock = __RW_LOCK_UNLOCKED(bnep_sk_list.lock)
+};
+
static int bnep_sock_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -38,6 +42,8 @@ static int bnep_sock_release(struct socket *sock)
if (!sk)
return 0;
+ bt_sock_unlink(&bnep_sk_list, sk);
+
sock_orphan(sk);
sock_put(sk);
return 0;
@@ -204,6 +210,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol,
sk->sk_protocol = protocol;
sk->sk_state = BT_OPEN;
+ bt_sock_link(&bnep_sk_list, sk);
return 0;
}
@@ -222,19 +229,30 @@ int __init bnep_sock_init(void)
return err;
err = bt_sock_register(BTPROTO_BNEP, &bnep_sock_family_ops);
- if (err < 0)
+ if (err < 0) {
+ BT_ERR("Can't register BNEP socket");
goto error;
+ }
+
+ err = bt_procfs_init(THIS_MODULE, &init_net, "bnep", &bnep_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create BNEP proc file");
+ bt_sock_unregister(BTPROTO_BNEP);
+ goto error;
+ }
+
+ BT_INFO("BNEP socket layer initialized");
return 0;
error:
- BT_ERR("Can't register BNEP socket");
proto_unregister(&bnep_proto);
return err;
}
void __exit bnep_sock_cleanup(void)
{
+ bt_procfs_cleanup(&init_net, "bnep");
if (bt_sock_unregister(BTPROTO_BNEP) < 0)
BT_ERR("Can't unregister BNEP socket");
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 311668d14571..d5cacef52748 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -42,6 +42,10 @@
#include "cmtp.h"
+static struct bt_sock_list cmtp_sk_list = {
+ .lock = __RW_LOCK_UNLOCKED(cmtp_sk_list.lock)
+};
+
static int cmtp_sock_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -51,6 +55,8 @@ static int cmtp_sock_release(struct socket *sock)
if (!sk)
return 0;
+ bt_sock_unlink(&cmtp_sk_list, sk);
+
sock_orphan(sk);
sock_put(sk);
@@ -214,6 +220,8 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol,
sk->sk_protocol = protocol;
sk->sk_state = BT_OPEN;
+ bt_sock_link(&cmtp_sk_list, sk);
+
return 0;
}
@@ -232,19 +240,30 @@ int cmtp_init_sockets(void)
return err;
err = bt_sock_register(BTPROTO_CMTP, &cmtp_sock_family_ops);
- if (err < 0)
+ if (err < 0) {
+ BT_ERR("Can't register CMTP socket");
goto error;
+ }
+
+ err = bt_procfs_init(THIS_MODULE, &init_net, "cmtp", &cmtp_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create CMTP proc file");
+ bt_sock_unregister(BTPROTO_HIDP);
+ goto error;
+ }
+
+ BT_INFO("CMTP socket layer initialized");
return 0;
error:
- BT_ERR("Can't register CMTP socket");
proto_unregister(&cmtp_proto);
return err;
}
void cmtp_cleanup_sockets(void)
{
+ bt_procfs_cleanup(&init_net, "cmtp");
if (bt_sock_unregister(BTPROTO_CMTP) < 0)
BT_ERR("Can't unregister CMTP socket");
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index d4de5db18d5a..fa974a19d365 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -696,7 +696,8 @@ int hci_dev_open(__u16 dev)
hci_dev_hold(hdev);
set_bit(HCI_UP, &hdev->flags);
hci_notify(hdev, HCI_DEV_UP);
- if (!test_bit(HCI_SETUP, &hdev->dev_flags)) {
+ if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
+ mgmt_valid_hdev(hdev)) {
hci_dev_lock(hdev);
mgmt_powered(hdev, 1);
hci_dev_unlock(hdev);
@@ -797,7 +798,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
* and no tasks are scheduled. */
hdev->close(hdev);
- if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
+ if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) &&
+ mgmt_valid_hdev(hdev)) {
hci_dev_lock(hdev);
mgmt_powered(hdev, 0);
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 41ff978a33f9..4fd2cf3bcd05 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -513,7 +513,7 @@ static void hci_setup_event_mask(struct hci_dev *hdev)
if (hdev->features[3] & LMP_RSSI_INQ)
events[4] |= 0x02; /* Inquiry Result with RSSI */
- if (hdev->features[5] & LMP_SNIFF_SUBR)
+ if (lmp_sniffsubr_capable(hdev))
events[5] |= 0x20; /* Sniff Subrating */
if (hdev->features[5] & LMP_PAUSE_ENC)
@@ -522,13 +522,13 @@ static void hci_setup_event_mask(struct hci_dev *hdev)
if (hdev->features[6] & LMP_EXT_INQ)
events[5] |= 0x40; /* Extended Inquiry Result */
- if (hdev->features[6] & LMP_NO_FLUSH)
+ if (lmp_no_flush_capable(hdev))
events[7] |= 0x01; /* Enhanced Flush Complete */
if (hdev->features[7] & LMP_LSTO)
events[6] |= 0x80; /* Link Supervision Timeout Changed */
- if (hdev->features[6] & LMP_SIMPLE_PAIR) {
+ if (lmp_ssp_capable(hdev)) {
events[6] |= 0x01; /* IO Capability Request */
events[6] |= 0x02; /* IO Capability Response */
events[6] |= 0x04; /* User Confirmation Request */
@@ -541,7 +541,7 @@ static void hci_setup_event_mask(struct hci_dev *hdev)
* Features Notification */
}
- if (hdev->features[4] & LMP_LE)
+ if (lmp_le_capable(hdev))
events[7] |= 0x20; /* LE Meta-Event */
hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
@@ -623,11 +623,11 @@ static void hci_setup_link_policy(struct hci_dev *hdev)
struct hci_cp_write_def_link_policy cp;
u16 link_policy = 0;
- if (hdev->features[0] & LMP_RSWITCH)
+ if (lmp_rswitch_capable(hdev))
link_policy |= HCI_LP_RSWITCH;
if (hdev->features[0] & LMP_HOLD)
link_policy |= HCI_LP_HOLD;
- if (hdev->features[0] & LMP_SNIFF)
+ if (lmp_sniff_capable(hdev))
link_policy |= HCI_LP_SNIFF;
if (hdev->features[1] & LMP_PARK)
link_policy |= HCI_LP_PARK;
@@ -686,7 +686,7 @@ static void hci_cc_read_local_features(struct hci_dev *hdev,
hdev->esco_type |= (ESCO_HV3);
}
- if (hdev->features[3] & LMP_ESCO)
+ if (lmp_esco_capable(hdev))
hdev->esco_type |= (ESCO_EV3);
if (hdev->features[4] & LMP_EV4)
@@ -746,7 +746,7 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
break;
}
- if (test_bit(HCI_INIT, &hdev->flags) && hdev->features[4] & LMP_LE)
+ if (test_bit(HCI_INIT, &hdev->flags) && lmp_le_capable(hdev))
hci_set_le_support(hdev);
done:
@@ -1365,6 +1365,9 @@ static bool hci_resolve_next_name(struct hci_dev *hdev)
return false;
e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, NAME_NEEDED);
+ if (!e)
+ return false;
+
if (hci_resolve_name(hdev, e) == 0) {
e->name_state = NAME_PENDING;
return true;
@@ -1393,12 +1396,20 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn,
return;
e = hci_inquiry_cache_lookup_resolve(hdev, bdaddr, NAME_PENDING);
- if (e) {
+ /* If the device was not found in a list of found devices names of which
+ * are pending. there is no need to continue resolving a next name as it
+ * will be done upon receiving another Remote Name Request Complete
+ * Event */
+ if (!e)
+ return;
+
+ list_del(&e->list);
+ if (name) {
e->name_state = NAME_KNOWN;
- list_del(&e->list);
- if (name)
- mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00,
- e->data.rssi, name, name_len);
+ mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00,
+ e->data.rssi, name, name_len);
+ } else {
+ e->name_state = NAME_NOT_KNOWN;
}
if (hci_resolve_next_name(hdev))
@@ -1614,43 +1625,30 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status)
{
- struct hci_cp_le_create_conn *cp;
struct hci_conn *conn;
BT_DBG("%s status 0x%2.2x", hdev->name, status);
- cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CONN);
- if (!cp)
- return;
+ if (status) {
+ hci_dev_lock(hdev);
- hci_dev_lock(hdev);
+ conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+ if (!conn) {
+ hci_dev_unlock(hdev);
+ return;
+ }
- conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr);
+ BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&conn->dst),
+ conn);
- BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->peer_addr),
- conn);
+ conn->state = BT_CLOSED;
+ mgmt_connect_failed(hdev, &conn->dst, conn->type,
+ conn->dst_type, status);
+ hci_proto_connect_cfm(conn, status);
+ hci_conn_del(conn);
- if (status) {
- if (conn && conn->state == BT_CONNECT) {
- conn->state = BT_CLOSED;
- mgmt_connect_failed(hdev, &cp->peer_addr, conn->type,
- conn->dst_type, status);
- hci_proto_connect_cfm(conn, status);
- hci_conn_del(conn);
- }
- } else {
- if (!conn) {
- conn = hci_conn_add(hdev, LE_LINK, &cp->peer_addr);
- if (conn) {
- conn->dst_type = cp->peer_addr_type;
- conn->out = true;
- } else {
- BT_ERR("No memory for new connection");
- }
- }
+ hci_dev_unlock(hdev);
}
-
- hci_dev_unlock(hdev);
}
static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status)
@@ -1762,7 +1760,12 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (conn->type == ACL_LINK) {
conn->state = BT_CONFIG;
hci_conn_hold(conn);
- conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+
+ if (!conn->out && !hci_conn_ssp_enabled(conn) &&
+ !hci_find_link_key(hdev, &ev->bdaddr))
+ conn->disc_timeout = HCI_PAIRING_TIMEOUT;
+ else
+ conn->disc_timeout = HCI_DISCONN_TIMEOUT;
} else
conn->state = BT_CONNECTED;
@@ -3252,12 +3255,8 @@ static void hci_user_passkey_request_evt(struct hci_dev *hdev,
BT_DBG("%s", hdev->name);
- hci_dev_lock(hdev);
-
if (test_bit(HCI_MGMT, &hdev->dev_flags))
mgmt_user_passkey_request(hdev, &ev->bdaddr, ACL_LINK, 0);
-
- hci_dev_unlock(hdev);
}
static void hci_simple_pair_complete_evt(struct hci_dev *hdev,
@@ -3350,11 +3349,23 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_lock(hdev);
- if (ev->status) {
- conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
- if (!conn)
+ conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+ if (!conn) {
+ conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr);
+ if (!conn) {
+ BT_ERR("No memory for new connection");
goto unlock;
+ }
+
+ conn->dst_type = ev->bdaddr_type;
+ if (ev->role == LE_CONN_ROLE_MASTER) {
+ conn->out = true;
+ conn->link_mode |= HCI_LM_MASTER;
+ }
+ }
+
+ if (ev->status) {
mgmt_connect_failed(hdev, &conn->dst, conn->type,
conn->dst_type, ev->status);
hci_proto_connect_cfm(conn, ev->status);
@@ -3363,18 +3374,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
goto unlock;
}
- conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr);
- if (!conn) {
- conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr);
- if (!conn) {
- BT_ERR("No memory for new connection");
- hci_dev_unlock(hdev);
- return;
- }
-
- conn->dst_type = ev->bdaddr_type;
- }
-
if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
mgmt_device_connected(hdev, &ev->bdaddr, conn->type,
conn->dst_type, 0, NULL, 0, NULL);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index a7f04de03d79..bb64331db3b7 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -694,6 +694,7 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
*addr_len = sizeof(*haddr);
haddr->hci_family = AF_BLUETOOTH;
haddr->hci_dev = hdev->id;
+ haddr->hci_channel= 0;
release_sock(sk);
return 0;
@@ -1009,6 +1010,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname,
{
struct hci_filter *f = &hci_pi(sk)->filter;
+ memset(&uf, 0, sizeof(uf));
uf.type_mask = f->type_mask;
uf.opcode = f->opcode;
uf.event_mask[0] = *((u32 *) f->event_mask + 0);
@@ -1100,21 +1102,30 @@ int __init hci_sock_init(void)
return err;
err = bt_sock_register(BTPROTO_HCI, &hci_sock_family_ops);
- if (err < 0)
+ if (err < 0) {
+ BT_ERR("HCI socket registration failed");
goto error;
+ }
+
+ err = bt_procfs_init(THIS_MODULE, &init_net, "hci", &hci_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create HCI proc file");
+ bt_sock_unregister(BTPROTO_HCI);
+ goto error;
+ }
BT_INFO("HCI socket layer initialized");
return 0;
error:
- BT_ERR("HCI socket registration failed");
proto_unregister(&hci_sk_proto);
return err;
}
void hci_sock_cleanup(void)
{
+ bt_procfs_cleanup(&init_net, "hci");
if (bt_sock_unregister(BTPROTO_HCI) < 0)
BT_ERR("HCI socket unregistration failed");
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 18b3f6892a36..eca3889371c4 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -25,6 +25,10 @@
#include "hidp.h"
+static struct bt_sock_list hidp_sk_list = {
+ .lock = __RW_LOCK_UNLOCKED(hidp_sk_list.lock)
+};
+
static int hidp_sock_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -34,6 +38,8 @@ static int hidp_sock_release(struct socket *sock)
if (!sk)
return 0;
+ bt_sock_unlink(&hidp_sk_list, sk);
+
sock_orphan(sk);
sock_put(sk);
@@ -253,6 +259,8 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol,
sk->sk_protocol = protocol;
sk->sk_state = BT_OPEN;
+ bt_sock_link(&hidp_sk_list, sk);
+
return 0;
}
@@ -271,8 +279,19 @@ int __init hidp_init_sockets(void)
return err;
err = bt_sock_register(BTPROTO_HIDP, &hidp_sock_family_ops);
- if (err < 0)
+ if (err < 0) {
+ BT_ERR("Can't register HIDP socket");
goto error;
+ }
+
+ err = bt_procfs_init(THIS_MODULE, &init_net, "hidp", &hidp_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create HIDP proc file");
+ bt_sock_unregister(BTPROTO_HIDP);
+ goto error;
+ }
+
+ BT_INFO("HIDP socket layer initialized");
return 0;
@@ -284,6 +303,7 @@ error:
void __exit hidp_cleanup_sockets(void)
{
+ bt_procfs_cleanup(&init_net, "hidp");
if (bt_sock_unregister(BTPROTO_HIDP) < 0)
BT_ERR("Can't unregister HIDP socket");
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a8964db04bfb..f0a3ab156ec6 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -416,13 +416,30 @@ struct l2cap_chan *l2cap_chan_create(void)
return chan;
}
-void l2cap_chan_destroy(struct l2cap_chan *chan)
+static void l2cap_chan_destroy(struct l2cap_chan *chan)
{
+ BT_DBG("chan %p", chan);
+
write_lock(&chan_list_lock);
list_del(&chan->global_l);
write_unlock(&chan_list_lock);
- l2cap_chan_put(chan);
+ kfree(chan);
+}
+
+void l2cap_chan_hold(struct l2cap_chan *c)
+{
+ BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt));
+
+ atomic_inc(&c->refcnt);
+}
+
+void l2cap_chan_put(struct l2cap_chan *c)
+{
+ BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt));
+
+ if (atomic_dec_and_test(&c->refcnt))
+ l2cap_chan_destroy(c);
}
void l2cap_chan_set_defaults(struct l2cap_chan *chan)
@@ -1181,6 +1198,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn)
sk = chan->sk;
hci_conn_hold(conn->hcon);
+ conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
bacpy(&bt_sk(sk)->src, conn->src);
bacpy(&bt_sk(sk)->dst, conn->dst);
@@ -5329,7 +5347,7 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr)
return exact ? lm1 : lm2;
}
-int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
+void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
{
struct l2cap_conn *conn;
@@ -5342,7 +5360,6 @@ int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
} else
l2cap_conn_del(hcon, bt_to_errno(status));
- return 0;
}
int l2cap_disconn_ind(struct hci_conn *hcon)
@@ -5356,12 +5373,11 @@ int l2cap_disconn_ind(struct hci_conn *hcon)
return conn->disc_reason;
}
-int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
+void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
{
BT_DBG("hcon %p reason %d", hcon, reason);
l2cap_conn_del(hcon, bt_to_errno(reason));
- return 0;
}
static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt)
@@ -5404,6 +5420,11 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
BT_DBG("chan %p scid 0x%4.4x state %s", chan, chan->scid,
state_to_string(chan->state));
+ if (chan->chan_type == L2CAP_CHAN_CONN_FIX_A2MP) {
+ l2cap_chan_unlock(chan);
+ continue;
+ }
+
if (chan->scid == L2CAP_CID_LE_DATA) {
if (!status && encrypt) {
chan->sec_level = hcon->sec_level;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a4bb27e8427e..3a6ce73541d9 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -34,6 +34,10 @@
#include <net/bluetooth/l2cap.h>
#include <net/bluetooth/smp.h>
+static struct bt_sock_list l2cap_sk_list = {
+ .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock)
+};
+
static const struct proto_ops l2cap_sock_ops;
static void l2cap_sock_init(struct sock *sk, struct sock *parent);
static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio);
@@ -245,6 +249,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
BT_DBG("sock %p, sk %p", sock, sk);
+ memset(la, 0, sizeof(struct sockaddr_l2));
addr->sa_family = AF_BLUETOOTH;
*len = sizeof(struct sockaddr_l2);
@@ -823,7 +828,7 @@ static void l2cap_sock_kill(struct sock *sk)
/* Kill poor orphan */
- l2cap_chan_destroy(l2cap_pi(sk)->chan);
+ l2cap_chan_put(l2cap_pi(sk)->chan);
sock_set_flag(sk, SOCK_DEAD);
sock_put(sk);
}
@@ -886,6 +891,8 @@ static int l2cap_sock_release(struct socket *sock)
if (!sk)
return 0;
+ bt_sock_unlink(&l2cap_sk_list, sk);
+
err = l2cap_sock_shutdown(sock, 2);
sock_orphan(sk);
@@ -1174,7 +1181,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p
chan = l2cap_chan_create();
if (!chan) {
- l2cap_sock_kill(sk);
+ sk_free(sk);
return NULL;
}
@@ -1210,6 +1217,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
return -ENOMEM;
l2cap_sock_init(sk, NULL);
+ bt_sock_link(&l2cap_sk_list, sk);
return 0;
}
@@ -1248,21 +1256,30 @@ int __init l2cap_init_sockets(void)
return err;
err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops);
- if (err < 0)
+ if (err < 0) {
+ BT_ERR("L2CAP socket registration failed");
goto error;
+ }
+
+ err = bt_procfs_init(THIS_MODULE, &init_net, "l2cap", &l2cap_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create L2CAP proc file");
+ bt_sock_unregister(BTPROTO_L2CAP);
+ goto error;
+ }
BT_INFO("L2CAP socket layer initialized");
return 0;
error:
- BT_ERR("L2CAP socket registration failed");
proto_unregister(&l2cap_proto);
return err;
}
void l2cap_cleanup_sockets(void)
{
+ bt_procfs_cleanup(&init_net, "l2cap");
if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
BT_ERR("L2CAP socket unregistration failed");
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ad6613d17ca6..a3329cbd3e4d 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -193,6 +193,11 @@ static u8 mgmt_status_table[] = {
MGMT_STATUS_CONNECT_FAILED, /* MAC Connection Failed */
};
+bool mgmt_valid_hdev(struct hci_dev *hdev)
+{
+ return hdev->dev_type == HCI_BREDR;
+}
+
static u8 mgmt_status(u8 hci_status)
{
if (hci_status < ARRAY_SIZE(mgmt_status_table))
@@ -317,7 +322,6 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
u16 data_len)
{
struct mgmt_rp_read_index_list *rp;
- struct list_head *p;
struct hci_dev *d;
size_t rp_len;
u16 count;
@@ -328,7 +332,10 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
read_lock(&hci_dev_list_lock);
count = 0;
- list_for_each(p, &hci_dev_list) {
+ list_for_each_entry(d, &hci_dev_list, list) {
+ if (!mgmt_valid_hdev(d))
+ continue;
+
count++;
}
@@ -346,6 +353,9 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
if (test_bit(HCI_SETUP, &d->dev_flags))
continue;
+ if (!mgmt_valid_hdev(d))
+ continue;
+
rp->index[i++] = cpu_to_le16(d->id);
BT_DBG("Added hci%u", d->id);
}
@@ -370,10 +380,10 @@ static u32 get_supported_settings(struct hci_dev *hdev)
settings |= MGMT_SETTING_DISCOVERABLE;
settings |= MGMT_SETTING_PAIRABLE;
- if (hdev->features[6] & LMP_SIMPLE_PAIR)
+ if (lmp_ssp_capable(hdev))
settings |= MGMT_SETTING_SSP;
- if (!(hdev->features[4] & LMP_NO_BREDR)) {
+ if (lmp_bredr_capable(hdev)) {
settings |= MGMT_SETTING_BREDR;
settings |= MGMT_SETTING_LINK_SECURITY;
}
@@ -381,7 +391,7 @@ static u32 get_supported_settings(struct hci_dev *hdev)
if (enable_hs)
settings |= MGMT_SETTING_HS;
- if (hdev->features[4] & LMP_LE)
+ if (lmp_le_capable(hdev))
settings |= MGMT_SETTING_LE;
return settings;
@@ -403,7 +413,7 @@ static u32 get_current_settings(struct hci_dev *hdev)
if (test_bit(HCI_PAIRABLE, &hdev->dev_flags))
settings |= MGMT_SETTING_PAIRABLE;
- if (!(hdev->features[4] & LMP_NO_BREDR))
+ if (lmp_bredr_capable(hdev))
settings |= MGMT_SETTING_BREDR;
if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
@@ -1111,7 +1121,7 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
hci_dev_lock(hdev);
- if (!(hdev->features[6] & LMP_SIMPLE_PAIR)) {
+ if (!lmp_ssp_capable(hdev)) {
err = cmd_status(sk, hdev->id, MGMT_OP_SET_SSP,
MGMT_STATUS_NOT_SUPPORTED);
goto failed;
@@ -1195,7 +1205,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
hci_dev_lock(hdev);
- if (!(hdev->features[4] & LMP_LE)) {
+ if (!lmp_le_capable(hdev)) {
err = cmd_status(sk, hdev->id, MGMT_OP_SET_LE,
MGMT_STATUS_NOT_SUPPORTED);
goto unlock;
@@ -2191,7 +2201,7 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (!(hdev->features[6] & LMP_SIMPLE_PAIR)) {
+ if (!lmp_ssp_capable(hdev)) {
err = cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA,
MGMT_STATUS_NOT_SUPPORTED);
goto unlock;
@@ -2820,6 +2830,9 @@ static void cmd_status_rsp(struct pending_cmd *cmd, void *data)
int mgmt_index_added(struct hci_dev *hdev)
{
+ if (!mgmt_valid_hdev(hdev))
+ return -ENOTSUPP;
+
return mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL);
}
@@ -2827,6 +2840,9 @@ int mgmt_index_removed(struct hci_dev *hdev)
{
u8 status = MGMT_STATUS_INVALID_INDEX;
+ if (!mgmt_valid_hdev(hdev))
+ return -ENOTSUPP;
+
mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status);
return mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 7e1e59645c05..b3226f3658cf 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -528,6 +528,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
BT_DBG("sock %p, sk %p", sock, sk);
+ memset(sa, 0, sizeof(*sa));
sa->rc_family = AF_BLUETOOTH;
sa->rc_channel = rfcomm_pi(sk)->channel;
if (peer)
@@ -822,6 +823,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c
}
sec.level = rfcomm_pi(sk)->sec_level;
+ sec.key_size = 0;
len = min_t(unsigned int, len, sizeof(sec));
if (copy_to_user(optval, (char *) &sec, len))
@@ -1033,8 +1035,17 @@ int __init rfcomm_init_sockets(void)
return err;
err = bt_sock_register(BTPROTO_RFCOMM, &rfcomm_sock_family_ops);
- if (err < 0)
+ if (err < 0) {
+ BT_ERR("RFCOMM socket layer registration failed");
+ goto error;
+ }
+
+ err = bt_procfs_init(THIS_MODULE, &init_net, "rfcomm", &rfcomm_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create RFCOMM proc file");
+ bt_sock_unregister(BTPROTO_RFCOMM);
goto error;
+ }
if (bt_debugfs) {
rfcomm_sock_debugfs = debugfs_create_file("rfcomm", 0444,
@@ -1048,13 +1059,14 @@ int __init rfcomm_init_sockets(void)
return 0;
error:
- BT_ERR("RFCOMM socket layer registration failed");
proto_unregister(&rfcomm_proto);
return err;
}
void __exit rfcomm_cleanup_sockets(void)
{
+ bt_procfs_cleanup(&init_net, "rfcomm");
+
debugfs_remove(rfcomm_sock_debugfs);
if (bt_sock_unregister(BTPROTO_RFCOMM) < 0)
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index cb960773c002..56f182393c4c 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -456,7 +456,7 @@ static int rfcomm_get_dev_list(void __user *arg)
size = sizeof(*dl) + dev_num * sizeof(*di);
- dl = kmalloc(size, GFP_KERNEL);
+ dl = kzalloc(size, GFP_KERNEL);
if (!dl)
return -ENOMEM;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 40bbe25dcff7..dc42b917aaaf 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -131,6 +131,15 @@ static int sco_conn_del(struct hci_conn *hcon, int err)
sco_sock_clear_timer(sk);
sco_chan_del(sk, err);
bh_unlock_sock(sk);
+
+ sco_conn_lock(conn);
+ conn->sk = NULL;
+ sco_pi(sk)->conn = NULL;
+ sco_conn_unlock(conn);
+
+ if (conn->hcon)
+ hci_conn_put(conn->hcon);
+
sco_sock_kill(sk);
}
@@ -821,16 +830,6 @@ static void sco_chan_del(struct sock *sk, int err)
BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
- if (conn) {
- sco_conn_lock(conn);
- conn->sk = NULL;
- sco_pi(sk)->conn = NULL;
- sco_conn_unlock(conn);
-
- if (conn->hcon)
- hci_conn_put(conn->hcon);
- }
-
sk->sk_state = BT_CLOSED;
sk->sk_err = err;
sk->sk_state_change(sk);
@@ -913,7 +912,7 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr)
return lm;
}
-int sco_connect_cfm(struct hci_conn *hcon, __u8 status)
+void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
{
BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
if (!status) {
@@ -924,16 +923,13 @@ int sco_connect_cfm(struct hci_conn *hcon, __u8 status)
sco_conn_ready(conn);
} else
sco_conn_del(hcon, bt_to_errno(status));
-
- return 0;
}
-int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
+void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
{
BT_DBG("hcon %p reason %d", hcon, reason);
sco_conn_del(hcon, bt_to_errno(reason));
- return 0;
}
int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
@@ -1026,6 +1022,13 @@ int __init sco_init(void)
goto error;
}
+ err = bt_procfs_init(THIS_MODULE, &init_net, "sco", &sco_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create SCO proc file");
+ bt_sock_unregister(BTPROTO_SCO);
+ goto error;
+ }
+
if (bt_debugfs) {
sco_debugfs = debugfs_create_file("sco", 0444, bt_debugfs,
NULL, &sco_debugfs_fops);
@@ -1044,6 +1047,8 @@ error:
void __exit sco_exit(void)
{
+ bt_procfs_cleanup(&init_net, "sco");
+
debugfs_remove(sco_debugfs);
if (bt_sock_unregister(BTPROTO_SCO) < 0)
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 16ef0dc85a0a..901a616c8083 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -579,8 +579,11 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
if (!test_and_set_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags))
smp = smp_chan_create(conn);
+ else
+ smp = conn->smp_chan;
- smp = conn->smp_chan;
+ if (!smp)
+ return SMP_UNSPECIFIED;
smp->preq[0] = SMP_CMD_PAIRING_REQ;
memcpy(&smp->preq[1], req, sizeof(*req));
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 333484537600..070e8a68cfc6 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -31,9 +31,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
struct net_bridge_mdb_entry *mdst;
struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
+ rcu_read_lock();
#ifdef CONFIG_BRIDGE_NETFILTER
if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
br_nf_pre_routing_finish_bridge_slow(skb);
+ rcu_read_unlock();
return NETDEV_TX_OK;
}
#endif
@@ -48,7 +50,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
- rcu_read_lock();
if (is_broadcast_ether_addr(dest))
br_flood_deliver(br, skb);
else if (is_multicast_ether_addr(dest)) {
@@ -206,24 +207,23 @@ static void br_poll_controller(struct net_device *br_dev)
static void br_netpoll_cleanup(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
- struct net_bridge_port *p, *n;
+ struct net_bridge_port *p;
- list_for_each_entry_safe(p, n, &br->port_list, list) {
+ list_for_each_entry(p, &br->port_list, list)
br_netpoll_disable(p);
- }
}
-static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
+static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
+ gfp_t gfp)
{
struct net_bridge *br = netdev_priv(dev);
- struct net_bridge_port *p, *n;
+ struct net_bridge_port *p;
int err = 0;
- list_for_each_entry_safe(p, n, &br->port_list, list) {
+ list_for_each_entry(p, &br->port_list, list) {
if (!p->dev)
continue;
-
- err = br_netpoll_enable(p);
+ err = br_netpoll_enable(p, gfp);
if (err)
goto fail;
}
@@ -236,17 +236,17 @@ fail:
goto out;
}
-int br_netpoll_enable(struct net_bridge_port *p)
+int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
{
struct netpoll *np;
int err = 0;
- np = kzalloc(sizeof(*p->np), GFP_KERNEL);
+ np = kzalloc(sizeof(*p->np), gfp);
err = -ENOMEM;
if (!np)
goto out;
- err = __netpoll_setup(np, p->dev);
+ err = __netpoll_setup(np, p->dev, gfp);
if (err) {
kfree(np);
goto out;
@@ -267,11 +267,7 @@ void br_netpoll_disable(struct net_bridge_port *p)
p->np = NULL;
- /* Wait for transmitting packets to finish before freeing. */
- synchronize_rcu_bh();
-
- __netpoll_cleanup(np);
- kfree(np);
+ __netpoll_free_rcu(np);
}
#endif
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index d21f32383517..9ce430b4657c 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -312,7 +312,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
fe->is_local = f->is_local;
if (!f->is_static)
- fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->updated);
+ fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
++fe;
++num;
}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index e9466d412707..02015a505d2a 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -65,7 +65,7 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
{
skb->dev = to->dev;
- if (unlikely(netpoll_tx_running(to->dev))) {
+ if (unlikely(netpoll_tx_running(to->br->dev))) {
if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
kfree_skb(skb);
else {
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index e1144e1617be..1c8fdc3558cd 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -361,7 +361,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
if (err)
goto err2;
- if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
+ if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL))))
goto err3;
err = netdev_set_master(dev, br->dev);
@@ -427,6 +427,10 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
if (!p || p->br != br)
return -EINVAL;
+ /* Since more than one interface can be attached to a bridge,
+ * there still maybe an alternate path for netconsole to use;
+ * therefore there is no reason for a NETDEV_RELEASE event.
+ */
del_nbp(p);
spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a768b2408edf..f507d2af9646 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -316,7 +316,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
netpoll_send_skb(np, skb);
}
-extern int br_netpoll_enable(struct net_bridge_port *p);
+extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp);
extern void br_netpoll_disable(struct net_bridge_port *p);
#else
static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
@@ -329,7 +329,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
{
}
-static inline int br_netpoll_enable(struct net_bridge_port *p)
+static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
{
return 0;
}
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index a6747e673426..c3530a81a33b 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -170,5 +170,5 @@ void br_stp_port_timer_init(struct net_bridge_port *p)
unsigned long br_timer_value(const struct timer_list *timer)
{
return timer_pending(timer)
- ? jiffies_to_clock_t(timer->expires - jiffies) : 0;
+ ? jiffies_delta_to_clock_t(timer->expires - jiffies) : 0;
}
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 6229b62749e8..13b36bdc76a7 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -27,7 +27,7 @@ struct brport_attribute {
};
#define BRPORT_ATTR(_name,_mode,_show,_store) \
-struct brport_attribute brport_attr_##_name = { \
+const struct brport_attribute brport_attr_##_name = { \
.attr = {.name = __stringify(_name), \
.mode = _mode }, \
.show = _show, \
@@ -164,7 +164,7 @@ static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
store_multicast_router);
#endif
-static struct brport_attribute *brport_attrs[] = {
+static const struct brport_attribute *brport_attrs[] = {
&brport_attr_path_cost,
&brport_attr_priority,
&brport_attr_port_id,
@@ -241,7 +241,7 @@ const struct sysfs_ops brport_sysfs_ops = {
int br_sysfs_addif(struct net_bridge_port *p)
{
struct net_bridge *br = p->br;
- struct brport_attribute **a;
+ const struct brport_attribute **a;
int err;
err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj,
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 78f1cdad5b33..095259f83902 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -141,7 +141,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
err = sk_filter(sk, skb);
if (err)
return err;
- if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) {
+ if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) {
set_rx_flow_off(cf_sk);
net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n");
caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 69771c04ba8f..e597733affb8 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -94,6 +94,10 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
/* check the version of IP */
ip_version = skb_header_pointer(skb, 0, 1, &buf);
+ if (!ip_version) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
switch (*ip_version >> 4) {
case 4:
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index ba4323bce0e9..69e38db28e5f 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -17,6 +17,7 @@
#include <linux/string.h>
+#include <linux/ceph/ceph_features.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/debugfs.h>
#include <linux/ceph/decode.h>
@@ -460,27 +461,23 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
client->auth_err = 0;
client->extra_mon_dispatch = NULL;
- client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT |
+ client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT |
supported_features;
- client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT |
+ client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT |
required_features;
/* msgr */
if (ceph_test_opt(client, MYIP))
myaddr = &client->options->my_addr;
- client->msgr = ceph_messenger_create(myaddr,
- client->supported_features,
- client->required_features);
- if (IS_ERR(client->msgr)) {
- err = PTR_ERR(client->msgr);
- goto fail;
- }
- client->msgr->nocrc = ceph_test_opt(client, NOCRC);
+ ceph_messenger_init(&client->msgr, myaddr,
+ client->supported_features,
+ client->required_features,
+ ceph_test_opt(client, NOCRC));
/* subsystems */
err = ceph_monc_init(&client->monc, client);
if (err < 0)
- goto fail_msgr;
+ goto fail;
err = ceph_osdc_init(&client->osdc, client);
if (err < 0)
goto fail_monc;
@@ -489,8 +486,6 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
fail_monc:
ceph_monc_stop(&client->monc);
-fail_msgr:
- ceph_messenger_destroy(client->msgr);
fail:
kfree(client);
return ERR_PTR(err);
@@ -501,6 +496,8 @@ void ceph_destroy_client(struct ceph_client *client)
{
dout("destroy_client %p\n", client);
+ atomic_set(&client->msgr.stopping, 1);
+
/* unmount */
ceph_osdc_stop(&client->osdc);
@@ -508,8 +505,6 @@ void ceph_destroy_client(struct ceph_client *client)
ceph_debugfs_client_cleanup(client);
- ceph_messenger_destroy(client->msgr);
-
ceph_destroy_options(client->options);
kfree(client);
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index d7edc24333b8..35fce755ce10 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -306,7 +306,6 @@ static int crush_choose(const struct crush_map *map,
int item = 0;
int itemtype;
int collide, reject;
- const unsigned int orig_tries = 5; /* attempts before we fall back to search */
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
bucket->id, x, outpos, numrep);
@@ -351,8 +350,9 @@ static int crush_choose(const struct crush_map *map,
reject = 1;
goto reject;
}
- if (flocal >= (in->size>>1) &&
- flocal > orig_tries)
+ if (map->choose_local_fallback_tries > 0 &&
+ flocal >= (in->size>>1) &&
+ flocal > map->choose_local_fallback_tries)
item = bucket_perm_choose(in, x, r);
else
item = crush_bucket_choose(in, x, r);
@@ -422,13 +422,14 @@ reject:
ftotal++;
flocal++;
- if (collide && flocal < 3)
+ if (collide && flocal <= map->choose_local_tries)
/* retry locally a few times */
retry_bucket = 1;
- else if (flocal <= in->size + orig_tries)
+ else if (map->choose_local_fallback_tries > 0 &&
+ flocal <= in->size + map->choose_local_fallback_tries)
/* exhaustive bucket search */
retry_bucket = 1;
- else if (ftotal < 20)
+ else if (ftotal <= map->choose_total_tries)
/* then retry descent */
retry_descent = 1;
else
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index b780cb7947dd..9da7fdd3cd8a 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -466,6 +466,7 @@ void ceph_key_destroy(struct key *key) {
struct ceph_crypto_key *ckey = key->payload.data;
ceph_crypto_key_destroy(ckey);
+ kfree(ckey);
}
struct key_type key_type_ceph = {
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index 1919d1550d75..3572dc518bc9 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -16,7 +16,8 @@ struct ceph_crypto_key {
static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
{
- kfree(key->key);
+ if (key)
+ kfree(key->key);
}
extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 10255e81be79..b9796750034a 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -29,6 +29,74 @@
* the sender.
*/
+/*
+ * We track the state of the socket on a given connection using
+ * values defined below. The transition to a new socket state is
+ * handled by a function which verifies we aren't coming from an
+ * unexpected state.
+ *
+ * --------
+ * | NEW* | transient initial state
+ * --------
+ * | con_sock_state_init()
+ * v
+ * ----------
+ * | CLOSED | initialized, but no socket (and no
+ * ---------- TCP connection)
+ * ^ \
+ * | \ con_sock_state_connecting()
+ * | ----------------------
+ * | \
+ * + con_sock_state_closed() \
+ * |+--------------------------- \
+ * | \ \ \
+ * | ----------- \ \
+ * | | CLOSING | socket event; \ \
+ * | ----------- await close \ \
+ * | ^ \ |
+ * | | \ |
+ * | + con_sock_state_closing() \ |
+ * | / \ | |
+ * | / --------------- | |
+ * | / \ v v
+ * | / --------------
+ * | / -----------------| CONNECTING | socket created, TCP
+ * | | / -------------- connect initiated
+ * | | | con_sock_state_connected()
+ * | | v
+ * -------------
+ * | CONNECTED | TCP connection established
+ * -------------
+ *
+ * State values for ceph_connection->sock_state; NEW is assumed to be 0.
+ */
+
+#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */
+#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */
+#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */
+#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */
+#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */
+
+/*
+ * connection states
+ */
+#define CON_STATE_CLOSED 1 /* -> PREOPEN */
+#define CON_STATE_PREOPEN 2 /* -> CONNECTING, CLOSED */
+#define CON_STATE_CONNECTING 3 /* -> NEGOTIATING, CLOSED */
+#define CON_STATE_NEGOTIATING 4 /* -> OPEN, CLOSED */
+#define CON_STATE_OPEN 5 /* -> STANDBY, CLOSED */
+#define CON_STATE_STANDBY 6 /* -> PREOPEN, CLOSED */
+
+/*
+ * ceph_connection flag bits
+ */
+#define CON_FLAG_LOSSYTX 0 /* we can close channel or drop
+ * messages on errors */
+#define CON_FLAG_KEEPALIVE_PENDING 1 /* we need to send a keepalive */
+#define CON_FLAG_WRITE_PENDING 2 /* we have data ready to send */
+#define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */
+#define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */
+
/* static tag bytes (protocol control messages) */
static char tag_msg = CEPH_MSGR_TAG_MSG;
static char tag_ack = CEPH_MSGR_TAG_ACK;
@@ -147,72 +215,130 @@ void ceph_msgr_flush(void)
}
EXPORT_SYMBOL(ceph_msgr_flush);
+/* Connection socket state transition functions */
+
+static void con_sock_state_init(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CLOSED);
+}
+
+static void con_sock_state_connecting(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CONNECTING);
+}
+
+static void con_sock_state_connected(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CONNECTED);
+}
+
+static void con_sock_state_closing(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING &&
+ old_state != CON_SOCK_STATE_CONNECTED &&
+ old_state != CON_SOCK_STATE_CLOSING))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CLOSING);
+}
+
+static void con_sock_state_closed(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
+ old_state != CON_SOCK_STATE_CLOSING &&
+ old_state != CON_SOCK_STATE_CONNECTING &&
+ old_state != CON_SOCK_STATE_CLOSED))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CLOSED);
+}
/*
* socket callback functions
*/
/* data available on socket, or listen socket received a connect */
-static void ceph_data_ready(struct sock *sk, int count_unused)
+static void ceph_sock_data_ready(struct sock *sk, int count_unused)
{
struct ceph_connection *con = sk->sk_user_data;
+ if (atomic_read(&con->msgr->stopping)) {
+ return;
+ }
if (sk->sk_state != TCP_CLOSE_WAIT) {
- dout("ceph_data_ready on %p state = %lu, queueing work\n",
+ dout("%s on %p state = %lu, queueing work\n", __func__,
con, con->state);
queue_con(con);
}
}
/* socket has buffer space for writing */
-static void ceph_write_space(struct sock *sk)
+static void ceph_sock_write_space(struct sock *sk)
{
struct ceph_connection *con = sk->sk_user_data;
/* only queue to workqueue if there is data we want to write,
* and there is sufficient space in the socket buffer to accept
- * more data. clear SOCK_NOSPACE so that ceph_write_space()
+ * more data. clear SOCK_NOSPACE so that ceph_sock_write_space()
* doesn't get called again until try_write() fills the socket
* buffer. See net/ipv4/tcp_input.c:tcp_check_space()
* and net/core/stream.c:sk_stream_write_space().
*/
- if (test_bit(WRITE_PENDING, &con->state)) {
+ if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) {
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
- dout("ceph_write_space %p queueing write work\n", con);
+ dout("%s %p queueing write work\n", __func__, con);
clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
queue_con(con);
}
} else {
- dout("ceph_write_space %p nothing to write\n", con);
+ dout("%s %p nothing to write\n", __func__, con);
}
}
/* socket's state has changed */
-static void ceph_state_change(struct sock *sk)
+static void ceph_sock_state_change(struct sock *sk)
{
struct ceph_connection *con = sk->sk_user_data;
- dout("ceph_state_change %p state = %lu sk_state = %u\n",
+ dout("%s %p state = %lu sk_state = %u\n", __func__,
con, con->state, sk->sk_state);
- if (test_bit(CLOSED, &con->state))
- return;
-
switch (sk->sk_state) {
case TCP_CLOSE:
- dout("ceph_state_change TCP_CLOSE\n");
+ dout("%s TCP_CLOSE\n", __func__);
case TCP_CLOSE_WAIT:
- dout("ceph_state_change TCP_CLOSE_WAIT\n");
- if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
- if (test_bit(CONNECTING, &con->state))
- con->error_msg = "connection failed";
- else
- con->error_msg = "socket closed";
- queue_con(con);
- }
+ dout("%s TCP_CLOSE_WAIT\n", __func__);
+ con_sock_state_closing(con);
+ set_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
+ queue_con(con);
break;
case TCP_ESTABLISHED:
- dout("ceph_state_change TCP_ESTABLISHED\n");
+ dout("%s TCP_ESTABLISHED\n", __func__);
+ con_sock_state_connected(con);
queue_con(con);
break;
default: /* Everything else is uninteresting */
@@ -228,9 +354,9 @@ static void set_sock_callbacks(struct socket *sock,
{
struct sock *sk = sock->sk;
sk->sk_user_data = con;
- sk->sk_data_ready = ceph_data_ready;
- sk->sk_write_space = ceph_write_space;
- sk->sk_state_change = ceph_state_change;
+ sk->sk_data_ready = ceph_sock_data_ready;
+ sk->sk_write_space = ceph_sock_write_space;
+ sk->sk_state_change = ceph_sock_state_change;
}
@@ -262,6 +388,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
+ con_sock_state_connecting(con);
ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
O_NONBLOCK);
if (ret == -EINPROGRESS) {
@@ -277,7 +404,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
return ret;
}
con->sock = sock;
-
return 0;
}
@@ -333,16 +459,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
*/
static int con_close_socket(struct ceph_connection *con)
{
- int rc;
+ int rc = 0;
dout("con_close_socket on %p sock %p\n", con, con->sock);
- if (!con->sock)
- return 0;
- set_bit(SOCK_CLOSED, &con->state);
- rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
- sock_release(con->sock);
- con->sock = NULL;
- clear_bit(SOCK_CLOSED, &con->state);
+ if (con->sock) {
+ rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
+ sock_release(con->sock);
+ con->sock = NULL;
+ }
+
+ /*
+ * Forcibly clear the SOCK_CLOSED flag. It gets set
+ * independent of the connection mutex, and we could have
+ * received a socket close event before we had the chance to
+ * shut the socket down.
+ */
+ clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
+
+ con_sock_state_closed(con);
return rc;
}
@@ -353,6 +487,10 @@ static int con_close_socket(struct ceph_connection *con)
static void ceph_msg_remove(struct ceph_msg *msg)
{
list_del_init(&msg->list_head);
+ BUG_ON(msg->con == NULL);
+ msg->con->ops->put(msg->con);
+ msg->con = NULL;
+
ceph_msg_put(msg);
}
static void ceph_msg_remove_list(struct list_head *head)
@@ -372,8 +510,11 @@ static void reset_connection(struct ceph_connection *con)
ceph_msg_remove_list(&con->out_sent);
if (con->in_msg) {
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
+ con->ops->put(con);
}
con->connect_seq = 0;
@@ -391,32 +532,44 @@ static void reset_connection(struct ceph_connection *con)
*/
void ceph_con_close(struct ceph_connection *con)
{
+ mutex_lock(&con->mutex);
dout("con_close %p peer %s\n", con,
ceph_pr_addr(&con->peer_addr.in_addr));
- set_bit(CLOSED, &con->state); /* in case there's queued work */
- clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
- clear_bit(LOSSYTX, &con->state); /* so we retry next connect */
- clear_bit(KEEPALIVE_PENDING, &con->state);
- clear_bit(WRITE_PENDING, &con->state);
- mutex_lock(&con->mutex);
+ con->state = CON_STATE_CLOSED;
+
+ clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */
+ clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
+ clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+ clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
+ clear_bit(CON_FLAG_BACKOFF, &con->flags);
+
reset_connection(con);
con->peer_global_seq = 0;
cancel_delayed_work(&con->work);
+ con_close_socket(con);
mutex_unlock(&con->mutex);
- queue_con(con);
}
EXPORT_SYMBOL(ceph_con_close);
/*
* Reopen a closed connection, with a new peer address.
*/
-void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
+void ceph_con_open(struct ceph_connection *con,
+ __u8 entity_type, __u64 entity_num,
+ struct ceph_entity_addr *addr)
{
+ mutex_lock(&con->mutex);
dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
- set_bit(OPENING, &con->state);
- clear_bit(CLOSED, &con->state);
+
+ BUG_ON(con->state != CON_STATE_CLOSED);
+ con->state = CON_STATE_PREOPEN;
+
+ con->peer_name.type = (__u8) entity_type;
+ con->peer_name.num = cpu_to_le64(entity_num);
+
memcpy(&con->peer_addr, addr, sizeof(*addr));
con->delay = 0; /* reset backoff memory */
+ mutex_unlock(&con->mutex);
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_open);
@@ -430,42 +583,26 @@ bool ceph_con_opened(struct ceph_connection *con)
}
/*
- * generic get/put
- */
-struct ceph_connection *ceph_con_get(struct ceph_connection *con)
-{
- int nref = __atomic_add_unless(&con->nref, 1, 0);
-
- dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1);
-
- return nref ? con : NULL;
-}
-
-void ceph_con_put(struct ceph_connection *con)
-{
- int nref = atomic_dec_return(&con->nref);
-
- BUG_ON(nref < 0);
- if (nref == 0) {
- BUG_ON(con->sock);
- kfree(con);
- }
- dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref);
-}
-
-/*
* initialize a new connection.
*/
-void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
+void ceph_con_init(struct ceph_connection *con, void *private,
+ const struct ceph_connection_operations *ops,
+ struct ceph_messenger *msgr)
{
dout("con_init %p\n", con);
memset(con, 0, sizeof(*con));
- atomic_set(&con->nref, 1);
+ con->private = private;
+ con->ops = ops;
con->msgr = msgr;
+
+ con_sock_state_init(con);
+
mutex_init(&con->mutex);
INIT_LIST_HEAD(&con->out_queue);
INIT_LIST_HEAD(&con->out_sent);
INIT_DELAYED_WORK(&con->work, con_work);
+
+ con->state = CON_STATE_CLOSED;
}
EXPORT_SYMBOL(ceph_con_init);
@@ -486,14 +623,14 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
return ret;
}
-static void ceph_con_out_kvec_reset(struct ceph_connection *con)
+static void con_out_kvec_reset(struct ceph_connection *con)
{
con->out_kvec_left = 0;
con->out_kvec_bytes = 0;
con->out_kvec_cur = &con->out_kvec[0];
}
-static void ceph_con_out_kvec_add(struct ceph_connection *con,
+static void con_out_kvec_add(struct ceph_connection *con,
size_t size, void *data)
{
int index;
@@ -507,6 +644,53 @@ static void ceph_con_out_kvec_add(struct ceph_connection *con,
con->out_kvec_bytes += size;
}
+#ifdef CONFIG_BLOCK
+static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
+{
+ if (!bio) {
+ *iter = NULL;
+ *seg = 0;
+ return;
+ }
+ *iter = bio;
+ *seg = bio->bi_idx;
+}
+
+static void iter_bio_next(struct bio **bio_iter, int *seg)
+{
+ if (*bio_iter == NULL)
+ return;
+
+ BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
+
+ (*seg)++;
+ if (*seg == (*bio_iter)->bi_vcnt)
+ init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
+}
+#endif
+
+static void prepare_write_message_data(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->out_msg;
+
+ BUG_ON(!msg);
+ BUG_ON(!msg->hdr.data_len);
+
+ /* initialize page iterator */
+ con->out_msg_pos.page = 0;
+ if (msg->pages)
+ con->out_msg_pos.page_pos = msg->page_alignment;
+ else
+ con->out_msg_pos.page_pos = 0;
+#ifdef CONFIG_BLOCK
+ if (msg->bio)
+ init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
+#endif
+ con->out_msg_pos.data_pos = 0;
+ con->out_msg_pos.did_page_crc = false;
+ con->out_more = 1; /* data + footer will follow */
+}
+
/*
* Prepare footer for currently outgoing message, and finish things
* off. Assumes out_kvec* are already valid.. we just add on to the end.
@@ -516,6 +700,8 @@ static void prepare_write_message_footer(struct ceph_connection *con)
struct ceph_msg *m = con->out_msg;
int v = con->out_kvec_left;
+ m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
+
dout("prepare_write_message_footer %p\n", con);
con->out_kvec_is_msg = true;
con->out_kvec[v].iov_base = &m->footer;
@@ -534,7 +720,7 @@ static void prepare_write_message(struct ceph_connection *con)
struct ceph_msg *m;
u32 crc;
- ceph_con_out_kvec_reset(con);
+ con_out_kvec_reset(con);
con->out_kvec_is_msg = true;
con->out_msg_done = false;
@@ -542,14 +728,16 @@ static void prepare_write_message(struct ceph_connection *con)
* TCP packet that's a good thing. */
if (con->in_seq > con->in_seq_acked) {
con->in_seq_acked = con->in_seq;
- ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+ con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
- ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
+ con_out_kvec_add(con, sizeof (con->out_temp_ack),
&con->out_temp_ack);
}
+ BUG_ON(list_empty(&con->out_queue));
m = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
con->out_msg = m;
+ BUG_ON(m->con != con);
/* put message on sent list */
ceph_msg_get(m);
@@ -576,18 +764,18 @@ static void prepare_write_message(struct ceph_connection *con)
BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
/* tag + hdr + front + middle */
- ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
- ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
- ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
+ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
+ con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+ con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
if (m->middle)
- ceph_con_out_kvec_add(con, m->middle->vec.iov_len,
+ con_out_kvec_add(con, m->middle->vec.iov_len,
m->middle->vec.iov_base);
/* fill in crc (except data pages), footer */
crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
con->out_msg->hdr.crc = cpu_to_le32(crc);
- con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE;
+ con->out_msg->footer.flags = 0;
crc = crc32c(0, m->front.iov_base, m->front.iov_len);
con->out_msg->footer.front_crc = cpu_to_le32(crc);
@@ -597,28 +785,19 @@ static void prepare_write_message(struct ceph_connection *con)
con->out_msg->footer.middle_crc = cpu_to_le32(crc);
} else
con->out_msg->footer.middle_crc = 0;
- con->out_msg->footer.data_crc = 0;
- dout("prepare_write_message front_crc %u data_crc %u\n",
+ dout("%s front_crc %u middle_crc %u\n", __func__,
le32_to_cpu(con->out_msg->footer.front_crc),
le32_to_cpu(con->out_msg->footer.middle_crc));
/* is there a data payload? */
- if (le32_to_cpu(m->hdr.data_len) > 0) {
- /* initialize page iterator */
- con->out_msg_pos.page = 0;
- if (m->pages)
- con->out_msg_pos.page_pos = m->page_alignment;
- else
- con->out_msg_pos.page_pos = 0;
- con->out_msg_pos.data_pos = 0;
- con->out_msg_pos.did_page_crc = false;
- con->out_more = 1; /* data + footer will follow */
- } else {
+ con->out_msg->footer.data_crc = 0;
+ if (m->hdr.data_len)
+ prepare_write_message_data(con);
+ else
/* no, queue up footer too and be done */
prepare_write_message_footer(con);
- }
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
}
/*
@@ -630,16 +809,16 @@ static void prepare_write_ack(struct ceph_connection *con)
con->in_seq_acked, con->in_seq);
con->in_seq_acked = con->in_seq;
- ceph_con_out_kvec_reset(con);
+ con_out_kvec_reset(con);
- ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+ con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
- ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
+ con_out_kvec_add(con, sizeof (con->out_temp_ack),
&con->out_temp_ack);
con->out_more = 1; /* more will follow.. eventually.. */
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
}
/*
@@ -648,9 +827,9 @@ static void prepare_write_ack(struct ceph_connection *con)
static void prepare_write_keepalive(struct ceph_connection *con)
{
dout("prepare_write_keepalive %p\n", con);
- ceph_con_out_kvec_reset(con);
- ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
- set_bit(WRITE_PENDING, &con->state);
+ con_out_kvec_reset(con);
+ con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
}
/*
@@ -665,27 +844,21 @@ static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection
if (!con->ops->get_authorizer) {
con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
con->out_connect.authorizer_len = 0;
-
return NULL;
}
/* Can't hold the mutex while getting authorizer */
-
mutex_unlock(&con->mutex);
-
auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
-
mutex_lock(&con->mutex);
if (IS_ERR(auth))
return auth;
- if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->state))
+ if (con->state != CON_STATE_NEGOTIATING)
return ERR_PTR(-EAGAIN);
con->auth_reply_buf = auth->authorizer_reply_buf;
con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
-
-
return auth;
}
@@ -694,12 +867,12 @@ static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection
*/
static void prepare_write_banner(struct ceph_connection *con)
{
- ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
- ceph_con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
+ con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
+ con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
&con->msgr->my_enc_addr);
con->out_more = 0;
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
}
static int prepare_write_connect(struct ceph_connection *con)
@@ -742,14 +915,15 @@ static int prepare_write_connect(struct ceph_connection *con)
con->out_connect.authorizer_len = auth ?
cpu_to_le32(auth->authorizer_buf_len) : 0;
- ceph_con_out_kvec_add(con, sizeof (con->out_connect),
+ con_out_kvec_reset(con);
+ con_out_kvec_add(con, sizeof (con->out_connect),
&con->out_connect);
if (auth && auth->authorizer_buf_len)
- ceph_con_out_kvec_add(con, auth->authorizer_buf_len,
+ con_out_kvec_add(con, auth->authorizer_buf_len,
auth->authorizer_buf);
con->out_more = 0;
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
return 0;
}
@@ -797,30 +971,34 @@ out:
return ret; /* done! */
}
-#ifdef CONFIG_BLOCK
-static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
+static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
+ size_t len, size_t sent, bool in_trail)
{
- if (!bio) {
- *iter = NULL;
- *seg = 0;
- return;
- }
- *iter = bio;
- *seg = bio->bi_idx;
-}
+ struct ceph_msg *msg = con->out_msg;
-static void iter_bio_next(struct bio **bio_iter, int *seg)
-{
- if (*bio_iter == NULL)
- return;
+ BUG_ON(!msg);
+ BUG_ON(!sent);
- BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
+ con->out_msg_pos.data_pos += sent;
+ con->out_msg_pos.page_pos += sent;
+ if (sent < len)
+ return;
- (*seg)++;
- if (*seg == (*bio_iter)->bi_vcnt)
- init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
-}
+ BUG_ON(sent != len);
+ con->out_msg_pos.page_pos = 0;
+ con->out_msg_pos.page++;
+ con->out_msg_pos.did_page_crc = false;
+ if (in_trail)
+ list_move_tail(&page->lru,
+ &msg->trail->head);
+ else if (msg->pagelist)
+ list_move_tail(&page->lru,
+ &msg->pagelist->head);
+#ifdef CONFIG_BLOCK
+ else if (msg->bio)
+ iter_bio_next(&msg->bio_iter, &msg->bio_seg);
#endif
+}
/*
* Write as much message data payload as we can. If we finish, queue
@@ -837,41 +1015,36 @@ static int write_partial_msg_pages(struct ceph_connection *con)
bool do_datacrc = !con->msgr->nocrc;
int ret;
int total_max_write;
- int in_trail = 0;
- size_t trail_len = (msg->trail ? msg->trail->length : 0);
+ bool in_trail = false;
+ const size_t trail_len = (msg->trail ? msg->trail->length : 0);
+ const size_t trail_off = data_len - trail_len;
dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
- con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages,
+ con, msg, con->out_msg_pos.page, msg->nr_pages,
con->out_msg_pos.page_pos);
-#ifdef CONFIG_BLOCK
- if (msg->bio && !msg->bio_iter)
- init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
-#endif
-
+ /*
+ * Iterate through each page that contains data to be
+ * written, and send as much as possible for each.
+ *
+ * If we are calculating the data crc (the default), we will
+ * need to map the page. If we have no pages, they have
+ * been revoked, so use the zero page.
+ */
while (data_len > con->out_msg_pos.data_pos) {
struct page *page = NULL;
int max_write = PAGE_SIZE;
int bio_offset = 0;
- total_max_write = data_len - trail_len -
- con->out_msg_pos.data_pos;
-
- /*
- * if we are calculating the data crc (the default), we need
- * to map the page. if our pages[] has been revoked, use the
- * zero page.
- */
-
- /* have we reached the trail part of the data? */
- if (con->out_msg_pos.data_pos >= data_len - trail_len) {
- in_trail = 1;
+ in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off;
+ if (!in_trail)
+ total_max_write = trail_off - con->out_msg_pos.data_pos;
+ if (in_trail) {
total_max_write = data_len - con->out_msg_pos.data_pos;
page = list_first_entry(&msg->trail->head,
struct page, lru);
- max_write = PAGE_SIZE;
} else if (msg->pages) {
page = msg->pages[con->out_msg_pos.page];
} else if (msg->pagelist) {
@@ -894,15 +1067,14 @@ static int write_partial_msg_pages(struct ceph_connection *con)
if (do_datacrc && !con->out_msg_pos.did_page_crc) {
void *base;
- u32 crc;
- u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc);
+ u32 crc = le32_to_cpu(msg->footer.data_crc);
char *kaddr;
kaddr = kmap(page);
BUG_ON(kaddr == NULL);
base = kaddr + con->out_msg_pos.page_pos + bio_offset;
- crc = crc32c(tmpcrc, base, len);
- con->out_msg->footer.data_crc = cpu_to_le32(crc);
+ crc = crc32c(crc, base, len);
+ msg->footer.data_crc = cpu_to_le32(crc);
con->out_msg_pos.did_page_crc = true;
}
ret = ceph_tcp_sendpage(con->sock, page,
@@ -915,31 +1087,15 @@ static int write_partial_msg_pages(struct ceph_connection *con)
if (ret <= 0)
goto out;
- con->out_msg_pos.data_pos += ret;
- con->out_msg_pos.page_pos += ret;
- if (ret == len) {
- con->out_msg_pos.page_pos = 0;
- con->out_msg_pos.page++;
- con->out_msg_pos.did_page_crc = false;
- if (in_trail)
- list_move_tail(&page->lru,
- &msg->trail->head);
- else if (msg->pagelist)
- list_move_tail(&page->lru,
- &msg->pagelist->head);
-#ifdef CONFIG_BLOCK
- else if (msg->bio)
- iter_bio_next(&msg->bio_iter, &msg->bio_seg);
-#endif
- }
+ out_msg_pos_next(con, page, len, (size_t) ret, in_trail);
}
dout("write_partial_msg_pages %p msg %p done\n", con, msg);
/* prepare and queue up footer, too */
if (!do_datacrc)
- con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
- ceph_con_out_kvec_reset(con);
+ msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
+ con_out_kvec_reset(con);
prepare_write_message_footer(con);
ret = 1;
out:
@@ -1351,20 +1507,14 @@ static int process_banner(struct ceph_connection *con)
ceph_pr_addr(&con->msgr->inst.addr.in_addr));
}
- set_bit(NEGOTIATING, &con->state);
- prepare_read_connect(con);
return 0;
}
static void fail_protocol(struct ceph_connection *con)
{
reset_connection(con);
- set_bit(CLOSED, &con->state); /* in case there's queued work */
-
- mutex_unlock(&con->mutex);
- if (con->ops->bad_proto)
- con->ops->bad_proto(con);
- mutex_lock(&con->mutex);
+ BUG_ON(con->state != CON_STATE_NEGOTIATING);
+ con->state = CON_STATE_CLOSED;
}
static int process_connect(struct ceph_connection *con)
@@ -1407,7 +1557,6 @@ static int process_connect(struct ceph_connection *con)
return -1;
}
con->auth_retry = 1;
- ceph_con_out_kvec_reset(con);
ret = prepare_write_connect(con);
if (ret < 0)
return ret;
@@ -1428,7 +1577,6 @@ static int process_connect(struct ceph_connection *con)
ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr));
reset_connection(con);
- ceph_con_out_kvec_reset(con);
ret = prepare_write_connect(con);
if (ret < 0)
return ret;
@@ -1440,8 +1588,7 @@ static int process_connect(struct ceph_connection *con)
if (con->ops->peer_reset)
con->ops->peer_reset(con);
mutex_lock(&con->mutex);
- if (test_bit(CLOSED, &con->state) ||
- test_bit(OPENING, &con->state))
+ if (con->state != CON_STATE_NEGOTIATING)
return -EAGAIN;
break;
@@ -1454,7 +1601,6 @@ static int process_connect(struct ceph_connection *con)
le32_to_cpu(con->out_connect.connect_seq),
le32_to_cpu(con->in_reply.connect_seq));
con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
- ceph_con_out_kvec_reset(con);
ret = prepare_write_connect(con);
if (ret < 0)
return ret;
@@ -1471,7 +1617,6 @@ static int process_connect(struct ceph_connection *con)
le32_to_cpu(con->in_reply.global_seq));
get_global_seq(con->msgr,
le32_to_cpu(con->in_reply.global_seq));
- ceph_con_out_kvec_reset(con);
ret = prepare_write_connect(con);
if (ret < 0)
return ret;
@@ -1489,7 +1634,10 @@ static int process_connect(struct ceph_connection *con)
fail_protocol(con);
return -1;
}
- clear_bit(CONNECTING, &con->state);
+
+ BUG_ON(con->state != CON_STATE_NEGOTIATING);
+ con->state = CON_STATE_OPEN;
+
con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
con->connect_seq++;
con->peer_features = server_feat;
@@ -1501,7 +1649,9 @@ static int process_connect(struct ceph_connection *con)
le32_to_cpu(con->in_reply.connect_seq));
if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
- set_bit(LOSSYTX, &con->state);
+ set_bit(CON_FLAG_LOSSYTX, &con->flags);
+
+ con->delay = 0; /* reset backoff memory */
prepare_read_tag(con);
break;
@@ -1587,10 +1737,7 @@ static int read_partial_message_section(struct ceph_connection *con,
return 1;
}
-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
- struct ceph_msg_header *hdr,
- int *skip);
-
+static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
static int read_partial_message_pages(struct ceph_connection *con,
struct page **pages,
@@ -1633,9 +1780,6 @@ static int read_partial_message_bio(struct ceph_connection *con,
void *p;
int ret, left;
- if (IS_ERR(bv))
- return PTR_ERR(bv);
-
left = min((int)(data_len - con->in_msg_pos.data_pos),
(int)(bv->bv_len - con->in_msg_pos.page_pos));
@@ -1672,7 +1816,6 @@ static int read_partial_message(struct ceph_connection *con)
int ret;
unsigned int front_len, middle_len, data_len;
bool do_datacrc = !con->msgr->nocrc;
- int skip;
u64 seq;
u32 crc;
@@ -1723,10 +1866,13 @@ static int read_partial_message(struct ceph_connection *con)
/* allocate message? */
if (!con->in_msg) {
+ int skip = 0;
+
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
con->in_hdr.front_len, con->in_hdr.data_len);
- skip = 0;
- con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
+ ret = ceph_con_in_msg_alloc(con, &skip);
+ if (ret < 0)
+ return ret;
if (skip) {
/* skip this message */
dout("alloc_msg said skip message\n");
@@ -1737,11 +1883,9 @@ static int read_partial_message(struct ceph_connection *con)
con->in_seq++;
return 0;
}
- if (!con->in_msg) {
- con->error_msg =
- "error allocating memory for incoming message";
- return -ENOMEM;
- }
+
+ BUG_ON(!con->in_msg);
+ BUG_ON(con->in_msg->con != con);
m = con->in_msg;
m->front.iov_len = 0; /* haven't read it yet */
if (m->middle)
@@ -1753,6 +1897,11 @@ static int read_partial_message(struct ceph_connection *con)
else
con->in_msg_pos.page_pos = 0;
con->in_msg_pos.data_pos = 0;
+
+#ifdef CONFIG_BLOCK
+ if (m->bio)
+ init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
+#endif
}
/* front */
@@ -1769,10 +1918,6 @@ static int read_partial_message(struct ceph_connection *con)
if (ret <= 0)
return ret;
}
-#ifdef CONFIG_BLOCK
- if (m->bio && !m->bio_iter)
- init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
-#endif
/* (page) data */
while (con->in_msg_pos.data_pos < data_len) {
@@ -1783,7 +1928,7 @@ static int read_partial_message(struct ceph_connection *con)
return ret;
#ifdef CONFIG_BLOCK
} else if (m->bio) {
-
+ BUG_ON(!m->bio_iter);
ret = read_partial_message_bio(con,
&m->bio_iter, &m->bio_seg,
data_len, do_datacrc);
@@ -1837,8 +1982,11 @@ static void process_message(struct ceph_connection *con)
{
struct ceph_msg *msg;
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
msg = con->in_msg;
con->in_msg = NULL;
+ con->ops->put(con);
/* if first message, set peer_name */
if (con->peer_name.type == 0)
@@ -1858,7 +2006,6 @@ static void process_message(struct ceph_connection *con)
con->ops->dispatch(con, msg);
mutex_lock(&con->mutex);
- prepare_read_tag(con);
}
@@ -1870,22 +2017,19 @@ static int try_write(struct ceph_connection *con)
{
int ret = 1;
- dout("try_write start %p state %lu nref %d\n", con, con->state,
- atomic_read(&con->nref));
+ dout("try_write start %p state %lu\n", con, con->state);
more:
dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
/* open the socket first? */
- if (con->sock == NULL) {
- ceph_con_out_kvec_reset(con);
+ if (con->state == CON_STATE_PREOPEN) {
+ BUG_ON(con->sock);
+ con->state = CON_STATE_CONNECTING;
+
+ con_out_kvec_reset(con);
prepare_write_banner(con);
- ret = prepare_write_connect(con);
- if (ret < 0)
- goto out;
prepare_read_banner(con);
- set_bit(CONNECTING, &con->state);
- clear_bit(NEGOTIATING, &con->state);
BUG_ON(con->in_msg);
con->in_tag = CEPH_MSGR_TAG_READY;
@@ -1932,7 +2076,7 @@ more_kvec:
}
do_next:
- if (!test_bit(CONNECTING, &con->state)) {
+ if (con->state == CON_STATE_OPEN) {
/* is anything else pending? */
if (!list_empty(&con->out_queue)) {
prepare_write_message(con);
@@ -1942,14 +2086,15 @@ do_next:
prepare_write_ack(con);
goto more;
}
- if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) {
+ if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING,
+ &con->flags)) {
prepare_write_keepalive(con);
goto more;
}
}
/* Nothing to do! */
- clear_bit(WRITE_PENDING, &con->state);
+ clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
dout("try_write nothing else to write.\n");
ret = 0;
out:
@@ -1966,38 +2111,42 @@ static int try_read(struct ceph_connection *con)
{
int ret = -1;
- if (!con->sock)
- return 0;
-
- if (test_bit(STANDBY, &con->state))
+more:
+ dout("try_read start on %p state %lu\n", con, con->state);
+ if (con->state != CON_STATE_CONNECTING &&
+ con->state != CON_STATE_NEGOTIATING &&
+ con->state != CON_STATE_OPEN)
return 0;
- dout("try_read start on %p\n", con);
+ BUG_ON(!con->sock);
-more:
dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
con->in_base_pos);
- /*
- * process_connect and process_message drop and re-take
- * con->mutex. make sure we handle a racing close or reopen.
- */
- if (test_bit(CLOSED, &con->state) ||
- test_bit(OPENING, &con->state)) {
- ret = -EAGAIN;
+ if (con->state == CON_STATE_CONNECTING) {
+ dout("try_read connecting\n");
+ ret = read_partial_banner(con);
+ if (ret <= 0)
+ goto out;
+ ret = process_banner(con);
+ if (ret < 0)
+ goto out;
+
+ BUG_ON(con->state != CON_STATE_CONNECTING);
+ con->state = CON_STATE_NEGOTIATING;
+
+ /* Banner is good, exchange connection info */
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ goto out;
+ prepare_read_connect(con);
+
+ /* Send connection info before awaiting response */
goto out;
}
- if (test_bit(CONNECTING, &con->state)) {
- if (!test_bit(NEGOTIATING, &con->state)) {
- dout("try_read connecting\n");
- ret = read_partial_banner(con);
- if (ret <= 0)
- goto out;
- ret = process_banner(con);
- if (ret < 0)
- goto out;
- }
+ if (con->state == CON_STATE_NEGOTIATING) {
+ dout("try_read negotiating\n");
ret = read_partial_connect(con);
if (ret <= 0)
goto out;
@@ -2007,6 +2156,8 @@ more:
goto more;
}
+ BUG_ON(con->state != CON_STATE_OPEN);
+
if (con->in_base_pos < 0) {
/*
* skipping + discarding content.
@@ -2040,7 +2191,8 @@ more:
prepare_read_ack(con);
break;
case CEPH_MSGR_TAG_CLOSE:
- set_bit(CLOSED, &con->state); /* fixme */
+ con_close_socket(con);
+ con->state = CON_STATE_CLOSED;
goto out;
default:
goto bad_tag;
@@ -2063,6 +2215,8 @@ more:
if (con->in_tag == CEPH_MSGR_TAG_READY)
goto more;
process_message(con);
+ if (con->state == CON_STATE_OPEN)
+ prepare_read_tag(con);
goto more;
}
if (con->in_tag == CEPH_MSGR_TAG_ACK) {
@@ -2091,12 +2245,6 @@ bad_tag:
*/
static void queue_con(struct ceph_connection *con)
{
- if (test_bit(DEAD, &con->state)) {
- dout("queue_con %p ignoring: DEAD\n",
- con);
- return;
- }
-
if (!con->ops->get(con)) {
dout("queue_con %p ref count 0\n", con);
return;
@@ -2121,7 +2269,26 @@ static void con_work(struct work_struct *work)
mutex_lock(&con->mutex);
restart:
- if (test_and_clear_bit(BACKOFF, &con->state)) {
+ if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) {
+ switch (con->state) {
+ case CON_STATE_CONNECTING:
+ con->error_msg = "connection failed";
+ break;
+ case CON_STATE_NEGOTIATING:
+ con->error_msg = "negotiation failed";
+ break;
+ case CON_STATE_OPEN:
+ con->error_msg = "socket closed";
+ break;
+ default:
+ dout("unrecognized con state %d\n", (int)con->state);
+ con->error_msg = "unrecognized con state";
+ BUG();
+ }
+ goto fault;
+ }
+
+ if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) {
dout("con_work %p backing off\n", con);
if (queue_delayed_work(ceph_msgr_wq, &con->work,
round_jiffies_relative(con->delay))) {
@@ -2135,35 +2302,35 @@ restart:
}
}
- if (test_bit(STANDBY, &con->state)) {
+ if (con->state == CON_STATE_STANDBY) {
dout("con_work %p STANDBY\n", con);
goto done;
}
- if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
- dout("con_work CLOSED\n");
- con_close_socket(con);
+ if (con->state == CON_STATE_CLOSED) {
+ dout("con_work %p CLOSED\n", con);
+ BUG_ON(con->sock);
goto done;
}
- if (test_and_clear_bit(OPENING, &con->state)) {
- /* reopen w/ new peer */
+ if (con->state == CON_STATE_PREOPEN) {
dout("con_work OPENING\n");
- con_close_socket(con);
+ BUG_ON(con->sock);
}
- if (test_and_clear_bit(SOCK_CLOSED, &con->state))
- goto fault;
-
ret = try_read(con);
if (ret == -EAGAIN)
goto restart;
- if (ret < 0)
+ if (ret < 0) {
+ con->error_msg = "socket error on read";
goto fault;
+ }
ret = try_write(con);
if (ret == -EAGAIN)
goto restart;
- if (ret < 0)
+ if (ret < 0) {
+ con->error_msg = "socket error on write";
goto fault;
+ }
done:
mutex_unlock(&con->mutex);
@@ -2172,7 +2339,6 @@ done_unlocked:
return;
fault:
- mutex_unlock(&con->mutex);
ceph_fault(con); /* error/fault path */
goto done_unlocked;
}
@@ -2183,26 +2349,31 @@ fault:
* exponential backoff
*/
static void ceph_fault(struct ceph_connection *con)
+ __releases(con->mutex)
{
pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
dout("fault %p state %lu to peer %s\n",
con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
- if (test_bit(LOSSYTX, &con->state)) {
- dout("fault on LOSSYTX channel\n");
- goto out;
- }
-
- mutex_lock(&con->mutex);
- if (test_bit(CLOSED, &con->state))
- goto out_unlock;
+ BUG_ON(con->state != CON_STATE_CONNECTING &&
+ con->state != CON_STATE_NEGOTIATING &&
+ con->state != CON_STATE_OPEN);
con_close_socket(con);
+ if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) {
+ dout("fault on LOSSYTX channel, marking CLOSED\n");
+ con->state = CON_STATE_CLOSED;
+ goto out_unlock;
+ }
+
if (con->in_msg) {
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
+ con->ops->put(con);
}
/* Requeue anything that hasn't been acked */
@@ -2211,12 +2382,13 @@ static void ceph_fault(struct ceph_connection *con)
/* If there are no messages queued or keepalive pending, place
* the connection in a STANDBY state */
if (list_empty(&con->out_queue) &&
- !test_bit(KEEPALIVE_PENDING, &con->state)) {
+ !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) {
dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
- clear_bit(WRITE_PENDING, &con->state);
- set_bit(STANDBY, &con->state);
+ clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+ con->state = CON_STATE_STANDBY;
} else {
/* retry after a delay. */
+ con->state = CON_STATE_PREOPEN;
if (con->delay == 0)
con->delay = BASE_DELAY_INTERVAL;
else if (con->delay < MAX_DELAY_INTERVAL)
@@ -2237,13 +2409,12 @@ static void ceph_fault(struct ceph_connection *con)
* that when con_work restarts we schedule the
* delay then.
*/
- set_bit(BACKOFF, &con->state);
+ set_bit(CON_FLAG_BACKOFF, &con->flags);
}
}
out_unlock:
mutex_unlock(&con->mutex);
-out:
/*
* in case we faulted due to authentication, invalidate our
* current tickets so that we can get new ones.
@@ -2260,18 +2431,14 @@ out:
/*
- * create a new messenger instance
+ * initialize a new messenger instance
*/
-struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
- u32 supported_features,
- u32 required_features)
+void ceph_messenger_init(struct ceph_messenger *msgr,
+ struct ceph_entity_addr *myaddr,
+ u32 supported_features,
+ u32 required_features,
+ bool nocrc)
{
- struct ceph_messenger *msgr;
-
- msgr = kzalloc(sizeof(*msgr), GFP_KERNEL);
- if (msgr == NULL)
- return ERR_PTR(-ENOMEM);
-
msgr->supported_features = supported_features;
msgr->required_features = required_features;
@@ -2284,30 +2451,23 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
msgr->inst.addr.type = 0;
get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
encode_my_addr(msgr);
+ msgr->nocrc = nocrc;
- dout("messenger_create %p\n", msgr);
- return msgr;
-}
-EXPORT_SYMBOL(ceph_messenger_create);
+ atomic_set(&msgr->stopping, 0);
-void ceph_messenger_destroy(struct ceph_messenger *msgr)
-{
- dout("destroy %p\n", msgr);
- kfree(msgr);
- dout("destroyed messenger %p\n", msgr);
+ dout("%s %p\n", __func__, msgr);
}
-EXPORT_SYMBOL(ceph_messenger_destroy);
+EXPORT_SYMBOL(ceph_messenger_init);
static void clear_standby(struct ceph_connection *con)
{
/* come back from STANDBY? */
- if (test_and_clear_bit(STANDBY, &con->state)) {
- mutex_lock(&con->mutex);
+ if (con->state == CON_STATE_STANDBY) {
dout("clear_standby %p and ++connect_seq\n", con);
+ con->state = CON_STATE_PREOPEN;
con->connect_seq++;
- WARN_ON(test_bit(WRITE_PENDING, &con->state));
- WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
- mutex_unlock(&con->mutex);
+ WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags));
+ WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags));
}
}
@@ -2316,21 +2476,24 @@ static void clear_standby(struct ceph_connection *con)
*/
void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
{
- if (test_bit(CLOSED, &con->state)) {
- dout("con_send %p closed, dropping %p\n", con, msg);
- ceph_msg_put(msg);
- return;
- }
-
/* set src+dst */
msg->hdr.src = con->msgr->inst.name;
-
BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
-
msg->needs_out_seq = true;
- /* queue */
mutex_lock(&con->mutex);
+
+ if (con->state == CON_STATE_CLOSED) {
+ dout("con_send %p closed, dropping %p\n", con, msg);
+ ceph_msg_put(msg);
+ mutex_unlock(&con->mutex);
+ return;
+ }
+
+ BUG_ON(msg->con != NULL);
+ msg->con = con->ops->get(con);
+ BUG_ON(msg->con == NULL);
+
BUG_ON(!list_empty(&msg->list_head));
list_add_tail(&msg->list_head, &con->out_queue);
dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
@@ -2339,12 +2502,13 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
le32_to_cpu(msg->hdr.front_len),
le32_to_cpu(msg->hdr.middle_len),
le32_to_cpu(msg->hdr.data_len));
+
+ clear_standby(con);
mutex_unlock(&con->mutex);
/* if there wasn't anything waiting to send before, queue
* new work */
- clear_standby(con);
- if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
+ if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_send);
@@ -2352,24 +2516,34 @@ EXPORT_SYMBOL(ceph_con_send);
/*
* Revoke a message that was previously queued for send
*/
-void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
+void ceph_msg_revoke(struct ceph_msg *msg)
{
+ struct ceph_connection *con = msg->con;
+
+ if (!con)
+ return; /* Message not in our possession */
+
mutex_lock(&con->mutex);
if (!list_empty(&msg->list_head)) {
- dout("con_revoke %p msg %p - was on queue\n", con, msg);
+ dout("%s %p msg %p - was on queue\n", __func__, con, msg);
list_del_init(&msg->list_head);
- ceph_msg_put(msg);
+ BUG_ON(msg->con == NULL);
+ msg->con->ops->put(msg->con);
+ msg->con = NULL;
msg->hdr.seq = 0;
+
+ ceph_msg_put(msg);
}
if (con->out_msg == msg) {
- dout("con_revoke %p msg %p - was sending\n", con, msg);
+ dout("%s %p msg %p - was sending\n", __func__, con, msg);
con->out_msg = NULL;
if (con->out_kvec_is_msg) {
con->out_skip = con->out_kvec_bytes;
con->out_kvec_is_msg = false;
}
- ceph_msg_put(msg);
msg->hdr.seq = 0;
+
+ ceph_msg_put(msg);
}
mutex_unlock(&con->mutex);
}
@@ -2377,17 +2551,27 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
/*
* Revoke a message that we may be reading data into
*/
-void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
+void ceph_msg_revoke_incoming(struct ceph_msg *msg)
{
+ struct ceph_connection *con;
+
+ BUG_ON(msg == NULL);
+ if (!msg->con) {
+ dout("%s msg %p null con\n", __func__, msg);
+
+ return; /* Message not in our possession */
+ }
+
+ con = msg->con;
mutex_lock(&con->mutex);
- if (con->in_msg && con->in_msg == msg) {
+ if (con->in_msg == msg) {
unsigned int front_len = le32_to_cpu(con->in_hdr.front_len);
unsigned int middle_len = le32_to_cpu(con->in_hdr.middle_len);
unsigned int data_len = le32_to_cpu(con->in_hdr.data_len);
/* skip rest of message */
- dout("con_revoke_pages %p msg %p revoked\n", con, msg);
- con->in_base_pos = con->in_base_pos -
+ dout("%s %p msg %p revoked\n", __func__, con, msg);
+ con->in_base_pos = con->in_base_pos -
sizeof(struct ceph_msg_header) -
front_len -
middle_len -
@@ -2398,8 +2582,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
con->in_tag = CEPH_MSGR_TAG_READY;
con->in_seq++;
} else {
- dout("con_revoke_pages %p msg %p pages %p no-op\n",
- con, con->in_msg, msg);
+ dout("%s %p in_msg %p msg %p no-op\n",
+ __func__, con, con->in_msg, msg);
}
mutex_unlock(&con->mutex);
}
@@ -2410,9 +2594,11 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
void ceph_con_keepalive(struct ceph_connection *con)
{
dout("con_keepalive %p\n", con);
+ mutex_lock(&con->mutex);
clear_standby(con);
- if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
- test_and_set_bit(WRITE_PENDING, &con->state) == 0)
+ mutex_unlock(&con->mutex);
+ if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 &&
+ test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_keepalive);
@@ -2431,6 +2617,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
if (m == NULL)
goto out;
kref_init(&m->kref);
+
+ m->con = NULL;
INIT_LIST_HEAD(&m->list_head);
m->hdr.tid = 0;
@@ -2526,46 +2714,77 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
}
/*
- * Generic message allocator, for incoming messages.
+ * Allocate a message for receiving an incoming message on a
+ * connection, and save the result in con->in_msg. Uses the
+ * connection's private alloc_msg op if available.
+ *
+ * Returns 0 on success, or a negative error code.
+ *
+ * On success, if we set *skip = 1:
+ * - the next message should be skipped and ignored.
+ * - con->in_msg == NULL
+ * or if we set *skip = 0:
+ * - con->in_msg is non-null.
+ * On error (ENOMEM, EAGAIN, ...),
+ * - con->in_msg == NULL
*/
-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
- struct ceph_msg_header *hdr,
- int *skip)
+static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
{
+ struct ceph_msg_header *hdr = &con->in_hdr;
int type = le16_to_cpu(hdr->type);
int front_len = le32_to_cpu(hdr->front_len);
int middle_len = le32_to_cpu(hdr->middle_len);
- struct ceph_msg *msg = NULL;
- int ret;
+ int ret = 0;
+
+ BUG_ON(con->in_msg != NULL);
if (con->ops->alloc_msg) {
+ struct ceph_msg *msg;
+
mutex_unlock(&con->mutex);
msg = con->ops->alloc_msg(con, hdr, skip);
mutex_lock(&con->mutex);
- if (!msg || *skip)
- return NULL;
+ if (con->state != CON_STATE_OPEN) {
+ ceph_msg_put(msg);
+ return -EAGAIN;
+ }
+ con->in_msg = msg;
+ if (con->in_msg) {
+ con->in_msg->con = con->ops->get(con);
+ BUG_ON(con->in_msg->con == NULL);
+ }
+ if (*skip) {
+ con->in_msg = NULL;
+ return 0;
+ }
+ if (!con->in_msg) {
+ con->error_msg =
+ "error allocating memory for incoming message";
+ return -ENOMEM;
+ }
}
- if (!msg) {
- *skip = 0;
- msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
- if (!msg) {
+ if (!con->in_msg) {
+ con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
+ if (!con->in_msg) {
pr_err("unable to allocate msg type %d len %d\n",
type, front_len);
- return NULL;
+ return -ENOMEM;
}
- msg->page_alignment = le16_to_cpu(hdr->data_off);
+ con->in_msg->con = con->ops->get(con);
+ BUG_ON(con->in_msg->con == NULL);
+ con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
}
- memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
+ memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
- if (middle_len && !msg->middle) {
- ret = ceph_alloc_middle(con, msg);
+ if (middle_len && !con->in_msg->middle) {
+ ret = ceph_alloc_middle(con, con->in_msg);
if (ret < 0) {
- ceph_msg_put(msg);
- return NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
}
}
- return msg;
+ return ret;
}
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index d0649a9655be..105d533b55f3 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -106,9 +106,9 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
monc->pending_auth = 1;
monc->m_auth->front.iov_len = len;
monc->m_auth->hdr.front_len = cpu_to_le32(len);
- ceph_con_revoke(monc->con, monc->m_auth);
+ ceph_msg_revoke(monc->m_auth);
ceph_msg_get(monc->m_auth); /* keep our ref */
- ceph_con_send(monc->con, monc->m_auth);
+ ceph_con_send(&monc->con, monc->m_auth);
}
/*
@@ -117,8 +117,11 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
static void __close_session(struct ceph_mon_client *monc)
{
dout("__close_session closing mon%d\n", monc->cur_mon);
- ceph_con_revoke(monc->con, monc->m_auth);
- ceph_con_close(monc->con);
+ ceph_msg_revoke(monc->m_auth);
+ ceph_msg_revoke_incoming(monc->m_auth_reply);
+ ceph_msg_revoke(monc->m_subscribe);
+ ceph_msg_revoke_incoming(monc->m_subscribe_ack);
+ ceph_con_close(&monc->con);
monc->cur_mon = -1;
monc->pending_auth = 0;
ceph_auth_reset(monc->auth);
@@ -142,9 +145,8 @@ static int __open_session(struct ceph_mon_client *monc)
monc->want_next_osdmap = !!monc->want_next_osdmap;
dout("open_session mon%d opening\n", monc->cur_mon);
- monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON;
- monc->con->peer_name.num = cpu_to_le64(monc->cur_mon);
- ceph_con_open(monc->con,
+ ceph_con_open(&monc->con,
+ CEPH_ENTITY_TYPE_MON, monc->cur_mon,
&monc->monmap->mon_inst[monc->cur_mon].addr);
/* initiatiate authentication handshake */
@@ -226,8 +228,8 @@ static void __send_subscribe(struct ceph_mon_client *monc)
msg->front.iov_len = p - msg->front.iov_base;
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
- ceph_con_revoke(monc->con, msg);
- ceph_con_send(monc->con, ceph_msg_get(msg));
+ ceph_msg_revoke(msg);
+ ceph_con_send(&monc->con, ceph_msg_get(msg));
monc->sub_sent = jiffies | 1; /* never 0 */
}
@@ -247,7 +249,7 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
if (monc->hunting) {
pr_info("mon%d %s session established\n",
monc->cur_mon,
- ceph_pr_addr(&monc->con->peer_addr.in_addr));
+ ceph_pr_addr(&monc->con.peer_addr.in_addr));
monc->hunting = false;
}
dout("handle_subscribe_ack after %d seconds\n", seconds);
@@ -439,6 +441,7 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
m = NULL;
} else {
dout("get_generic_reply %lld got %p\n", tid, req->reply);
+ *skip = 0;
m = ceph_msg_get(req->reply);
/*
* we don't need to track the connection reading into
@@ -461,7 +464,7 @@ static int do_generic_request(struct ceph_mon_client *monc,
req->request->hdr.tid = cpu_to_le64(req->tid);
__insert_generic_request(monc, req);
monc->num_generic_requests++;
- ceph_con_send(monc->con, ceph_msg_get(req->request));
+ ceph_con_send(&monc->con, ceph_msg_get(req->request));
mutex_unlock(&monc->mutex);
err = wait_for_completion_interruptible(&req->completion);
@@ -684,8 +687,9 @@ static void __resend_generic_request(struct ceph_mon_client *monc)
for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
req = rb_entry(p, struct ceph_mon_generic_request, node);
- ceph_con_revoke(monc->con, req->request);
- ceph_con_send(monc->con, ceph_msg_get(req->request));
+ ceph_msg_revoke(req->request);
+ ceph_msg_revoke_incoming(req->reply);
+ ceph_con_send(&monc->con, ceph_msg_get(req->request));
}
}
@@ -705,7 +709,7 @@ static void delayed_work(struct work_struct *work)
__close_session(monc);
__open_session(monc); /* continue hunting */
} else {
- ceph_con_keepalive(monc->con);
+ ceph_con_keepalive(&monc->con);
__validate_auth(monc);
@@ -760,19 +764,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
goto out;
/* connection */
- monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
- if (!monc->con)
- goto out_monmap;
- ceph_con_init(monc->client->msgr, monc->con);
- monc->con->private = monc;
- monc->con->ops = &mon_con_ops;
-
/* authentication */
monc->auth = ceph_auth_init(cl->options->name,
cl->options->key);
if (IS_ERR(monc->auth)) {
err = PTR_ERR(monc->auth);
- goto out_con;
+ goto out_monmap;
}
monc->auth->want_keys =
CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
@@ -801,6 +798,9 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
if (!monc->m_auth)
goto out_auth_reply;
+ ceph_con_init(&monc->con, monc, &mon_con_ops,
+ &monc->client->msgr);
+
monc->cur_mon = -1;
monc->hunting = true;
monc->sub_renew_after = jiffies;
@@ -824,8 +824,6 @@ out_subscribe_ack:
ceph_msg_put(monc->m_subscribe_ack);
out_auth:
ceph_auth_destroy(monc->auth);
-out_con:
- monc->con->ops->put(monc->con);
out_monmap:
kfree(monc->monmap);
out:
@@ -841,10 +839,6 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
mutex_lock(&monc->mutex);
__close_session(monc);
- monc->con->private = NULL;
- monc->con->ops->put(monc->con);
- monc->con = NULL;
-
mutex_unlock(&monc->mutex);
/*
@@ -888,8 +882,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
} else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");
- monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
- monc->client->msgr->inst.name.num =
+ monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
+ monc->client->msgr.inst.name.num =
cpu_to_le64(monc->auth->global_id);
__send_subscribe(monc);
@@ -1000,6 +994,8 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
case CEPH_MSG_MDS_MAP:
case CEPH_MSG_OSD_MAP:
m = ceph_msg_new(type, front_len, GFP_NOFS, false);
+ if (!m)
+ return NULL; /* ENOMEM--return skip == 0 */
break;
}
@@ -1029,7 +1025,7 @@ static void mon_fault(struct ceph_connection *con)
if (!monc->hunting)
pr_info("mon%d %s session lost, "
"hunting for new mon\n", monc->cur_mon,
- ceph_pr_addr(&monc->con->peer_addr.in_addr));
+ ceph_pr_addr(&monc->con.peer_addr.in_addr));
__close_session(monc);
if (!monc->hunting) {
@@ -1044,9 +1040,23 @@ out:
mutex_unlock(&monc->mutex);
}
+/*
+ * We can ignore refcounting on the connection struct, as all references
+ * will come from the messenger workqueue, which is drained prior to
+ * mon_client destruction.
+ */
+static struct ceph_connection *con_get(struct ceph_connection *con)
+{
+ return con;
+}
+
+static void con_put(struct ceph_connection *con)
+{
+}
+
static const struct ceph_connection_operations mon_con_ops = {
- .get = ceph_con_get,
- .put = ceph_con_put,
+ .get = con_get,
+ .put = con_put,
.dispatch = dispatch,
.fault = mon_fault,
.alloc_msg = mon_alloc_msg,
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index 11d5f4196a73..ddec1c10ac80 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -12,7 +12,7 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
struct ceph_msgpool *pool = arg;
struct ceph_msg *msg;
- msg = ceph_msg_new(0, pool->front_len, gfp_mask, true);
+ msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true);
if (!msg) {
dout("msgpool_alloc %s failed\n", pool->name);
} else {
@@ -32,10 +32,11 @@ static void msgpool_free(void *element, void *arg)
ceph_msg_put(msg);
}
-int ceph_msgpool_init(struct ceph_msgpool *pool,
+int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
int front_len, int size, bool blocking, const char *name)
{
dout("msgpool %s init\n", name);
+ pool->type = type;
pool->front_len = front_len;
pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
if (!pool->pool)
@@ -61,7 +62,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
WARN_ON(1);
/* try to alloc a fresh message */
- return ceph_msg_new(0, front_len, GFP_NOFS, false);
+ return ceph_msg_new(pool->type, front_len, GFP_NOFS, false);
}
msg = mempool_alloc(pool->pool, GFP_NOFS);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ca59e66c9787..42119c05e82c 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -140,10 +140,9 @@ void ceph_osdc_release_request(struct kref *kref)
if (req->r_request)
ceph_msg_put(req->r_request);
if (req->r_con_filling_msg) {
- dout("release_request revoking pages %p from con %p\n",
+ dout("%s revoking pages %p from con %p\n", __func__,
req->r_pages, req->r_con_filling_msg);
- ceph_con_revoke_message(req->r_con_filling_msg,
- req->r_reply);
+ ceph_msg_revoke_incoming(req->r_reply);
req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
}
if (req->r_reply)
@@ -214,10 +213,13 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
kref_init(&req->r_kref);
init_completion(&req->r_completion);
init_completion(&req->r_safe_completion);
+ rb_init_node(&req->r_node);
INIT_LIST_HEAD(&req->r_unsafe_item);
INIT_LIST_HEAD(&req->r_linger_item);
INIT_LIST_HEAD(&req->r_linger_osd);
INIT_LIST_HEAD(&req->r_req_lru_item);
+ INIT_LIST_HEAD(&req->r_osd_item);
+
req->r_flags = flags;
WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
@@ -243,6 +245,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
}
ceph_pagelist_init(req->r_trail);
}
+
/* create request message; allow space for oid */
msg_size += MAX_OBJ_NAME_SIZE;
if (snapc)
@@ -256,7 +259,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
return NULL;
}
- msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP);
memset(msg->front.iov_base, 0, msg->front.iov_len);
req->r_request = msg;
@@ -624,7 +626,7 @@ static void osd_reset(struct ceph_connection *con)
/*
* Track open sessions with osds.
*/
-static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
+static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
{
struct ceph_osd *osd;
@@ -634,15 +636,13 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
atomic_set(&osd->o_ref, 1);
osd->o_osdc = osdc;
+ osd->o_osd = onum;
INIT_LIST_HEAD(&osd->o_requests);
INIT_LIST_HEAD(&osd->o_linger_requests);
INIT_LIST_HEAD(&osd->o_osd_lru);
osd->o_incarnation = 1;
- ceph_con_init(osdc->client->msgr, &osd->o_con);
- osd->o_con.private = osd;
- osd->o_con.ops = &osd_con_ops;
- osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
+ ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
INIT_LIST_HEAD(&osd->o_keepalive_item);
return osd;
@@ -688,7 +688,7 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
static void remove_all_osds(struct ceph_osd_client *osdc)
{
- dout("__remove_old_osds %p\n", osdc);
+ dout("%s %p\n", __func__, osdc);
mutex_lock(&osdc->request_mutex);
while (!RB_EMPTY_ROOT(&osdc->osds)) {
struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
@@ -752,7 +752,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
ret = -EAGAIN;
} else {
ceph_con_close(&osd->o_con);
- ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
+ ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd,
+ &osdc->osdmap->osd_addr[osd->o_osd]);
osd->o_incarnation++;
}
return ret;
@@ -853,7 +854,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
if (req->r_osd) {
/* make sure the original request isn't in flight. */
- ceph_con_revoke(&req->r_osd->o_con, req->r_request);
+ ceph_msg_revoke(req->r_request);
list_del_init(&req->r_osd_item);
if (list_empty(&req->r_osd->o_requests) &&
@@ -880,7 +881,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
static void __cancel_request(struct ceph_osd_request *req)
{
if (req->r_sent && req->r_osd) {
- ceph_con_revoke(&req->r_osd->o_con, req->r_request);
+ ceph_msg_revoke(req->r_request);
req->r_sent = 0;
}
}
@@ -890,7 +891,9 @@ static void __register_linger_request(struct ceph_osd_client *osdc,
{
dout("__register_linger_request %p\n", req);
list_add_tail(&req->r_linger_item, &osdc->req_linger);
- list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
+ if (req->r_osd)
+ list_add_tail(&req->r_linger_osd,
+ &req->r_osd->o_linger_requests);
}
static void __unregister_linger_request(struct ceph_osd_client *osdc,
@@ -998,18 +1001,18 @@ static int __map_request(struct ceph_osd_client *osdc,
req->r_osd = __lookup_osd(osdc, o);
if (!req->r_osd && o >= 0) {
err = -ENOMEM;
- req->r_osd = create_osd(osdc);
+ req->r_osd = create_osd(osdc, o);
if (!req->r_osd) {
list_move(&req->r_req_lru_item, &osdc->req_notarget);
goto out;
}
dout("map_request osd %p is osd%d\n", req->r_osd, o);
- req->r_osd->o_osd = o;
- req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
__insert_osd(osdc, req->r_osd);
- ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]);
+ ceph_con_open(&req->r_osd->o_con,
+ CEPH_ENTITY_TYPE_OSD, o,
+ &osdc->osdmap->osd_addr[o]);
}
if (req->r_osd) {
@@ -1304,8 +1307,9 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
dout("kick_requests %s\n", force_resend ? " (force resend)" : "");
mutex_lock(&osdc->request_mutex);
- for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
+ for (p = rb_first(&osdc->requests); p; ) {
req = rb_entry(p, struct ceph_osd_request, r_node);
+ p = rb_next(p);
err = __map_request(osdc, req, force_resend);
if (err < 0)
continue; /* error */
@@ -1313,10 +1317,23 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
dout("%p tid %llu maps to no osd\n", req, req->r_tid);
needmap++; /* request a newer map */
} else if (err > 0) {
- dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
- req->r_osd ? req->r_osd->o_osd : -1);
- if (!req->r_linger)
+ if (!req->r_linger) {
+ dout("%p tid %llu requeued on osd%d\n", req,
+ req->r_tid,
+ req->r_osd ? req->r_osd->o_osd : -1);
req->r_flags |= CEPH_OSD_FLAG_RETRY;
+ }
+ }
+ if (req->r_linger && list_empty(&req->r_linger_item)) {
+ /*
+ * register as a linger so that we will
+ * re-submit below and get a new tid
+ */
+ dout("%p tid %llu restart on osd%d\n",
+ req, req->r_tid,
+ req->r_osd ? req->r_osd->o_osd : -1);
+ __register_linger_request(osdc, req);
+ __unregister_request(osdc, req);
}
}
@@ -1391,7 +1408,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
epoch, maplen);
newmap = osdmap_apply_incremental(&p, next,
osdc->osdmap,
- osdc->client->msgr);
+ &osdc->client->msgr);
if (IS_ERR(newmap)) {
err = PTR_ERR(newmap);
goto bad;
@@ -1839,11 +1856,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
if (!osdc->req_mempool)
goto out;
- err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true,
+ err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
+ OSD_OP_FRONT_LEN, 10, true,
"osd_op");
if (err < 0)
goto out_mempool;
- err = ceph_msgpool_init(&osdc->msgpool_op_reply,
+ err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
OSD_OPREPLY_FRONT_LEN, 10, true,
"osd_op_reply");
if (err < 0)
@@ -2019,15 +2037,15 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
if (!req) {
*skip = 1;
m = NULL;
- pr_info("get_reply unknown tid %llu from osd%d\n", tid,
- osd->o_osd);
+ dout("get_reply unknown tid %llu from osd%d\n", tid,
+ osd->o_osd);
goto out;
}
if (req->r_con_filling_msg) {
- dout("get_reply revoking msg %p from old con %p\n",
+ dout("%s revoking msg %p from old con %p\n", __func__,
req->r_reply, req->r_con_filling_msg);
- ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply);
+ ceph_msg_revoke_incoming(req->r_reply);
req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
req->r_con_filling_msg = NULL;
}
@@ -2080,6 +2098,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
int type = le16_to_cpu(hdr->type);
int front = le32_to_cpu(hdr->front_len);
+ *skip = 0;
switch (type) {
case CEPH_MSG_OSD_MAP:
case CEPH_MSG_WATCH_NOTIFY:
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 81e3b84a77ef..3124b71a8883 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -135,6 +135,21 @@ bad:
return -EINVAL;
}
+static int skip_name_map(void **p, void *end)
+{
+ int len;
+ ceph_decode_32_safe(p, end, len ,bad);
+ while (len--) {
+ int strlen;
+ *p += sizeof(u32);
+ ceph_decode_32_safe(p, end, strlen, bad);
+ *p += strlen;
+}
+ return 0;
+bad:
+ return -EINVAL;
+}
+
static struct crush_map *crush_decode(void *pbyval, void *end)
{
struct crush_map *c;
@@ -143,6 +158,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
void **p = &pbyval;
void *start = pbyval;
u32 magic;
+ u32 num_name_maps;
dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
@@ -150,6 +166,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
if (c == NULL)
return ERR_PTR(-ENOMEM);
+ /* set tunables to default values */
+ c->choose_local_tries = 2;
+ c->choose_local_fallback_tries = 5;
+ c->choose_total_tries = 19;
+
ceph_decode_need(p, end, 4*sizeof(u32), bad);
magic = ceph_decode_32(p);
if (magic != CRUSH_MAGIC) {
@@ -297,7 +318,25 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
}
/* ignore trailing name maps. */
+ for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) {
+ err = skip_name_map(p, end);
+ if (err < 0)
+ goto done;
+ }
+
+ /* tunables */
+ ceph_decode_need(p, end, 3*sizeof(u32), done);
+ c->choose_local_tries = ceph_decode_32(p);
+ c->choose_local_fallback_tries = ceph_decode_32(p);
+ c->choose_total_tries = ceph_decode_32(p);
+ dout("crush decode tunable choose_local_tries = %d",
+ c->choose_local_tries);
+ dout("crush decode tunable choose_local_fallback_tries = %d",
+ c->choose_local_fallback_tries);
+ dout("crush decode tunable choose_total_tries = %d",
+ c->choose_total_tries);
+done:
dout("crush_decode success\n");
return c;
@@ -488,15 +527,16 @@ static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
ceph_decode_32_safe(p, end, pool, bad);
ceph_decode_32_safe(p, end, len, bad);
dout(" pool %d len %d\n", pool, len);
+ ceph_decode_need(p, end, len, bad);
pi = __lookup_pg_pool(&map->pg_pools, pool);
if (pi) {
+ char *name = kstrndup(*p, len, GFP_NOFS);
+
+ if (!name)
+ return -ENOMEM;
kfree(pi->name);
- pi->name = kmalloc(len + 1, GFP_NOFS);
- if (pi->name) {
- memcpy(pi->name, *p, len);
- pi->name[len] = '\0';
- dout(" name is %s\n", pi->name);
- }
+ pi->name = name;
+ dout(" name is %s\n", pi->name);
}
*p += len;
}
@@ -666,6 +706,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
ceph_decode_copy(p, &pgid, sizeof(pgid));
n = ceph_decode_32(p);
+ err = -EINVAL;
+ if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
+ goto bad;
ceph_decode_need(p, end, n * sizeof(u32), bad);
err = -ENOMEM;
pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
@@ -889,6 +932,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
(void) __remove_pg_mapping(&map->pg_temp, pgid);
/* insert */
+ if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) {
+ err = -EINVAL;
+ goto bad;
+ }
pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
if (!pg) {
err = -ENOMEM;
diff --git a/net/core/dev.c b/net/core/dev.c
index 0ebaea16632f..088923fe4066 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1055,6 +1055,8 @@ rollback:
*/
int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
{
+ char *new_ifalias;
+
ASSERT_RTNL();
if (len >= IFALIASZ)
@@ -1068,9 +1070,10 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
return 0;
}
- dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
- if (!dev->ifalias)
+ new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
+ if (!new_ifalias)
return -ENOMEM;
+ dev->ifalias = new_ifalias;
strlcpy(dev->ifalias, alias, len+1);
return len;
@@ -1106,11 +1109,23 @@ void netdev_state_change(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_state_change);
-int netdev_bonding_change(struct net_device *dev, unsigned long event)
+/**
+ * netdev_notify_peers - notify network peers about existence of @dev
+ * @dev: network device
+ *
+ * Generate traffic such that interested network peers are aware of
+ * @dev, such as by generating a gratuitous ARP. This may be used when
+ * a device wants to inform the rest of the network about some sort of
+ * reconfiguration such as a failover event or virtual machine
+ * migration.
+ */
+void netdev_notify_peers(struct net_device *dev)
{
- return call_netdevice_notifiers(event, dev);
+ rtnl_lock();
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+ rtnl_unlock();
}
-EXPORT_SYMBOL(netdev_bonding_change);
+EXPORT_SYMBOL(netdev_notify_peers);
/**
* dev_load - load a network module
@@ -1172,6 +1187,7 @@ static int __dev_open(struct net_device *dev)
net_dmaengine_get();
dev_set_rx_mode(dev);
dev_activate(dev);
+ add_device_randomness(dev->dev_addr, dev->addr_len);
}
return ret;
@@ -1638,6 +1654,19 @@ static inline int deliver_skb(struct sk_buff *skb,
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
+static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
+{
+ if (ptype->af_packet_priv == NULL)
+ return false;
+
+ if (ptype->id_match)
+ return ptype->id_match(ptype, skb->sk);
+ else if ((struct sock *)ptype->af_packet_priv == skb->sk)
+ return true;
+
+ return false;
+}
+
/*
* Support routine. Sends outgoing frames to any network
* taps currently in use.
@@ -1655,8 +1684,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
* they originated from - MvS (miquels@drinkel.ow.org)
*/
if ((ptype->dev == dev || !ptype->dev) &&
- (ptype->af_packet_priv == NULL ||
- (struct sock *)ptype->af_packet_priv != skb->sk)) {
+ (!skb_loop_sk(ptype, skb))) {
if (pt_prev) {
deliver_skb(skb2, pt_prev, skb->dev);
pt_prev = ptype;
@@ -2133,6 +2161,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
__be16 protocol = skb->protocol;
netdev_features_t features = skb->dev->features;
+ if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
+ features &= ~NETIF_F_GSO_MASK;
+
if (protocol == htons(ETH_P_8021Q)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto;
@@ -3155,6 +3186,23 @@ void netdev_rx_handler_unregister(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+/*
+ * Limit the use of PFMEMALLOC reserves to those protocols that implement
+ * the special handling of PFMEMALLOC skbs.
+ */
+static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
+{
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_ARP):
+ case __constant_htons(ETH_P_IP):
+ case __constant_htons(ETH_P_IPV6):
+ case __constant_htons(ETH_P_8021Q):
+ return true;
+ default:
+ return false;
+ }
+}
+
static int __netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
@@ -3164,14 +3212,27 @@ static int __netif_receive_skb(struct sk_buff *skb)
bool deliver_exact = false;
int ret = NET_RX_DROP;
__be16 type;
+ unsigned long pflags = current->flags;
net_timestamp_check(!netdev_tstamp_prequeue, skb);
trace_netif_receive_skb(skb);
+ /*
+ * PFMEMALLOC skbs are special, they should
+ * - be delivered to SOCK_MEMALLOC sockets only
+ * - stay away from userspace
+ * - have bounded memory usage
+ *
+ * Use PF_MEMALLOC as this saves us from propagating the allocation
+ * context down to all allocation sites.
+ */
+ if (sk_memalloc_socks() && skb_pfmemalloc(skb))
+ current->flags |= PF_MEMALLOC;
+
/* if we've gotten here through NAPI, check netpoll */
if (netpoll_receive_skb(skb))
- return NET_RX_DROP;
+ goto out;
orig_dev = skb->dev;
@@ -3191,7 +3252,7 @@ another_round:
if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
skb = vlan_untag(skb);
if (unlikely(!skb))
- goto out;
+ goto unlock;
}
#ifdef CONFIG_NET_CLS_ACT
@@ -3201,6 +3262,9 @@ another_round:
}
#endif
+ if (sk_memalloc_socks() && skb_pfmemalloc(skb))
+ goto skip_taps;
+
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
@@ -3209,13 +3273,18 @@ another_round:
}
}
+skip_taps:
#ifdef CONFIG_NET_CLS_ACT
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb)
- goto out;
+ goto unlock;
ncls:
#endif
+ if (sk_memalloc_socks() && skb_pfmemalloc(skb)
+ && !skb_pfmemalloc_protocol(skb))
+ goto drop;
+
rx_handler = rcu_dereference(skb->dev->rx_handler);
if (vlan_tx_tag_present(skb)) {
if (pt_prev) {
@@ -3225,7 +3294,7 @@ ncls:
if (vlan_do_receive(&skb, !rx_handler))
goto another_round;
else if (unlikely(!skb))
- goto out;
+ goto unlock;
}
if (rx_handler) {
@@ -3235,7 +3304,7 @@ ncls:
}
switch (rx_handler(&skb)) {
case RX_HANDLER_CONSUMED:
- goto out;
+ goto unlock;
case RX_HANDLER_ANOTHER:
goto another_round;
case RX_HANDLER_EXACT:
@@ -3268,6 +3337,7 @@ ncls:
else
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
+drop:
atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
@@ -3276,8 +3346,10 @@ ncls:
ret = NET_RX_DROP;
}
-out:
+unlock:
rcu_read_unlock();
+out:
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
return ret;
}
@@ -4801,6 +4873,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
err = ops->ndo_set_mac_address(dev, sa);
if (!err)
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ add_device_randomness(dev->dev_addr, dev->addr_len);
return err;
}
EXPORT_SYMBOL(dev_set_mac_address);
@@ -5175,12 +5248,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
*/
static int dev_new_index(struct net *net)
{
- static int ifindex;
+ int ifindex = net->ifindex;
for (;;) {
if (++ifindex <= 0)
ifindex = 1;
if (!__dev_get_by_index(net, ifindex))
- return ifindex;
+ return net->ifindex = ifindex;
}
}
@@ -5533,7 +5606,12 @@ int register_netdevice(struct net_device *dev)
}
}
- dev->ifindex = dev_new_index(net);
+ ret = -EBUSY;
+ if (!dev->ifindex)
+ dev->ifindex = dev_new_index(net);
+ else if (__dev_get_by_index(net, dev->ifindex))
+ goto err_uninit;
+
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
@@ -5579,6 +5657,7 @@ int register_netdevice(struct net_device *dev)
dev_init_scheduler(dev);
dev_hold(dev);
list_netdevice(dev);
+ add_device_randomness(dev->dev_addr, dev->addr_len);
/* Notify protocols, that a new device appeared. */
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
@@ -5682,6 +5761,7 @@ EXPORT_SYMBOL(netdev_refcnt_read);
/**
* netdev_wait_allrefs - wait until all references are gone.
+ * @dev: target net_device
*
* This is called when unregistering network devices.
*
@@ -5942,6 +6022,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
dev->gso_max_size = GSO_MAX_SIZE;
+ dev->gso_max_segs = GSO_MAX_SEGS;
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
diff --git a/net/core/dst.c b/net/core/dst.c
index 069d51d29414..56d63612e1e4 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -149,7 +149,15 @@ int dst_discard(struct sk_buff *skb)
}
EXPORT_SYMBOL(dst_discard);
-const u32 dst_default_metrics[RTAX_MAX];
+const u32 dst_default_metrics[RTAX_MAX + 1] = {
+ /* This initializer is needed to force linker to place this variable
+ * into const section. Otherwise it might end into bss section.
+ * We really want to avoid false sharing on this variable, and catch
+ * any writes on it.
+ */
+ [RTAX_MAX] = 0xdeadbeef,
+};
+
void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
int initial_ref, int initial_obsolete, unsigned short flags)
diff --git a/net/core/filter.c b/net/core/filter.c
index d4ce2dc712e3..907efd27ec77 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -83,6 +83,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
int err;
struct sk_filter *filter;
+ /*
+ * If the skb was allocated from pfmemalloc reserves, only
+ * allow SOCK_MEMALLOC sockets to use it as this socket is
+ * helping free memory
+ */
+ if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
+ return -ENOMEM;
+
err = security_sock_rcv_skb(sk, skb);
if (err)
return err;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b4c90e42b443..346b1eb83a1f 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -26,6 +26,7 @@
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/if_vlan.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <asm/unaligned.h>
@@ -54,7 +55,7 @@ static atomic_t trapped;
MAX_UDP_CHUNK)
static void zap_completion_queue(void);
-static void arp_reply(struct sk_buff *skb);
+static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);
@@ -167,15 +168,24 @@ static void poll_napi(struct net_device *dev)
struct napi_struct *napi;
int budget = 16;
+ WARN_ON_ONCE(!irqs_disabled());
+
list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ local_irq_enable();
if (napi->poll_owner != smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
- budget = poll_one_napi(dev->npinfo, napi, budget);
+ rcu_read_lock_bh();
+ budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
+ napi, budget);
+ rcu_read_unlock_bh();
spin_unlock(&napi->poll_lock);
- if (!budget)
+ if (!budget) {
+ local_irq_disable();
break;
+ }
}
+ local_irq_disable();
}
}
@@ -185,13 +195,14 @@ static void service_arp_queue(struct netpoll_info *npi)
struct sk_buff *skb;
while ((skb = skb_dequeue(&npi->arp_tx)))
- arp_reply(skb);
+ netpoll_arp_reply(skb, npi);
}
}
static void netpoll_poll_dev(struct net_device *dev)
{
const struct net_device_ops *ops;
+ struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
if (!dev || !netif_running(dev))
return;
@@ -206,17 +217,18 @@ static void netpoll_poll_dev(struct net_device *dev)
poll_napi(dev);
if (dev->flags & IFF_SLAVE) {
- if (dev->npinfo) {
+ if (ni) {
struct net_device *bond_dev = dev->master;
struct sk_buff *skb;
- while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
+ struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo);
+ while ((skb = skb_dequeue(&ni->arp_tx))) {
skb->dev = bond_dev;
- skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
+ skb_queue_tail(&bond_ni->arp_tx, skb);
}
}
}
- service_arp_queue(dev->npinfo);
+ service_arp_queue(ni);
zap_completion_queue();
}
@@ -302,6 +314,7 @@ static int netpoll_owner_active(struct net_device *dev)
return 0;
}
+/* call with IRQ disabled */
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
struct net_device *dev)
{
@@ -309,8 +322,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
unsigned long tries;
const struct net_device_ops *ops = dev->netdev_ops;
/* It is up to the caller to keep npinfo alive. */
- struct netpoll_info *npinfo = np->dev->npinfo;
+ struct netpoll_info *npinfo;
+
+ WARN_ON_ONCE(!irqs_disabled());
+ npinfo = rcu_dereference_bh(np->dev->npinfo);
if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
__kfree_skb(skb);
return;
@@ -319,16 +335,22 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
/* don't get messages out of order, and no recursion */
if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
struct netdev_queue *txq;
- unsigned long flags;
txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
- local_irq_save(flags);
/* try until next clock tick */
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) {
if (__netif_tx_trylock(txq)) {
if (!netif_xmit_stopped(txq)) {
+ if (vlan_tx_tag_present(skb) &&
+ !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) {
+ skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+ if (unlikely(!skb))
+ break;
+ skb->vlan_tci = 0;
+ }
+
status = ops->ndo_start_xmit(skb, dev);
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
@@ -347,10 +369,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
}
WARN_ONCE(!irqs_disabled(),
- "netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n",
+ "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
dev->name, ops->ndo_start_xmit);
- local_irq_restore(flags);
}
if (status != NETDEV_TX_OK) {
@@ -423,9 +444,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
}
EXPORT_SYMBOL(netpoll_send_udp);
-static void arp_reply(struct sk_buff *skb)
+static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
{
- struct netpoll_info *npinfo = skb->dev->npinfo;
struct arphdr *arp;
unsigned char *arp_ptr;
int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
@@ -543,13 +563,12 @@ static void arp_reply(struct sk_buff *skb)
spin_unlock_irqrestore(&npinfo->rx_lock, flags);
}
-int __netpoll_rx(struct sk_buff *skb)
+int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
{
int proto, len, ulen;
int hits = 0;
const struct iphdr *iph;
struct udphdr *uh;
- struct netpoll_info *npinfo = skb->dev->npinfo;
struct netpoll *np, *tmp;
if (list_empty(&npinfo->rx_np))
@@ -565,6 +584,12 @@ int __netpoll_rx(struct sk_buff *skb)
return 1;
}
+ if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+ skb = vlan_untag(skb);
+ if (unlikely(!skb))
+ goto out;
+ }
+
proto = ntohs(eth_hdr(skb)->h_proto);
if (proto != ETH_P_IP)
goto out;
@@ -715,7 +740,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
}
EXPORT_SYMBOL(netpoll_parse_options);
-int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
+int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
{
struct netpoll_info *npinfo;
const struct net_device_ops *ops;
@@ -734,7 +759,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
}
if (!ndev->npinfo) {
- npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+ npinfo = kmalloc(sizeof(*npinfo), gfp);
if (!npinfo) {
err = -ENOMEM;
goto out;
@@ -752,7 +777,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
ops = np->dev->netdev_ops;
if (ops->ndo_netpoll_setup) {
- err = ops->ndo_netpoll_setup(ndev, npinfo);
+ err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
if (err)
goto free_npinfo;
}
@@ -857,7 +882,7 @@ int netpoll_setup(struct netpoll *np)
refill_skbs();
rtnl_lock();
- err = __netpoll_setup(np, ndev);
+ err = __netpoll_setup(np, ndev, GFP_KERNEL);
rtnl_unlock();
if (err)
@@ -878,6 +903,24 @@ static int __init netpoll_init(void)
}
core_initcall(netpoll_init);
+static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
+{
+ struct netpoll_info *npinfo =
+ container_of(rcu_head, struct netpoll_info, rcu);
+
+ skb_queue_purge(&npinfo->arp_tx);
+ skb_queue_purge(&npinfo->txq);
+
+ /* we can't call cancel_delayed_work_sync here, as we are in softirq */
+ cancel_delayed_work(&npinfo->tx_work);
+
+ /* clean after last, unfinished work */
+ __skb_queue_purge(&npinfo->txq);
+ /* now cancel it again */
+ cancel_delayed_work(&npinfo->tx_work);
+ kfree(npinfo);
+}
+
void __netpoll_cleanup(struct netpoll *np)
{
struct netpoll_info *npinfo;
@@ -903,20 +946,24 @@ void __netpoll_cleanup(struct netpoll *np)
ops->ndo_netpoll_cleanup(np->dev);
RCU_INIT_POINTER(np->dev->npinfo, NULL);
+ call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
+ }
+}
+EXPORT_SYMBOL_GPL(__netpoll_cleanup);
- /* avoid racing with NAPI reading npinfo */
- synchronize_rcu_bh();
+static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
+{
+ struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
- skb_queue_purge(&npinfo->arp_tx);
- skb_queue_purge(&npinfo->txq);
- cancel_delayed_work_sync(&npinfo->tx_work);
+ __netpoll_cleanup(np);
+ kfree(np);
+}
- /* clean after last, unfinished work */
- __skb_queue_purge(&npinfo->txq);
- kfree(npinfo);
- }
+void __netpoll_free_rcu(struct netpoll *np)
+{
+ call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
}
-EXPORT_SYMBOL_GPL(__netpoll_cleanup);
+EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
void netpoll_cleanup(struct netpoll *np)
{
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index ed0c0431fcd8..c75e3f9d060f 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -101,12 +101,10 @@ static int write_update_netdev_table(struct net_device *dev)
u32 max_len;
struct netprio_map *map;
- rtnl_lock();
max_len = atomic_read(&max_prioidx) + 1;
map = rtnl_dereference(dev->priomap);
if (!map || map->priomap_len < max_len)
ret = extend_netdev_table(dev, max_len);
- rtnl_unlock();
return ret;
}
@@ -256,17 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
if (!dev)
goto out_free_devname;
+ rtnl_lock();
ret = write_update_netdev_table(dev);
if (ret < 0)
goto out_put_dev;
- rcu_read_lock();
- map = rcu_dereference(dev->priomap);
+ map = rtnl_dereference(dev->priomap);
if (map)
map->priomap[prioidx] = priority;
- rcu_read_unlock();
out_put_dev:
+ rtnl_unlock();
dev_put(dev);
out_free_devname:
@@ -277,12 +275,6 @@ out_free_devname:
void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
struct task_struct *p;
- char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
-
- if (!tmp) {
- pr_warn("Unable to attach cgrp due to alloc failure!\n");
- return;
- }
cgroup_taskset_for_each(p, cgrp, tset) {
unsigned int fd;
@@ -296,32 +288,24 @@ void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
continue;
}
- rcu_read_lock();
+ spin_lock(&files->file_lock);
fdt = files_fdtable(files);
for (fd = 0; fd < fdt->max_fds; fd++) {
- char *path;
struct file *file;
struct socket *sock;
- unsigned long s;
- int rv, err = 0;
+ int err;
file = fcheck_files(files, fd);
if (!file)
continue;
- path = d_path(&file->f_path, tmp, PAGE_SIZE);
- rv = sscanf(path, "socket:[%lu]", &s);
- if (rv <= 0)
- continue;
-
sock = sock_from_file(file, &err);
- if (!err)
+ if (sock)
sock_update_netprioidx(sock->sk, p);
}
- rcu_read_unlock();
+ spin_unlock(&files->file_lock);
task_unlock(p);
}
- kfree(tmp);
}
static struct cftype ss_files[] = {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 334b930e0de3..34d975b0f277 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -618,16 +618,20 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
long expires, u32 error)
{
struct rta_cacheinfo ci = {
- .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse),
+ .rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse),
.rta_used = dst->__use,
.rta_clntref = atomic_read(&(dst->__refcnt)),
.rta_error = error,
.rta_id = id,
};
- if (expires)
- ci.rta_expires = jiffies_to_clock_t(expires);
+ if (expires) {
+ unsigned long clock;
+ clock = jiffies_to_clock_t(abs(expires));
+ clock = min_t(unsigned long, clock, INT_MAX);
+ ci.rta_expires = (expires > 0) ? clock : -clock;
+ }
return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
}
EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
@@ -659,6 +663,12 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
}
}
+static unsigned int rtnl_dev_get_flags(const struct net_device *dev)
+{
+ return (dev->flags & ~(IFF_PROMISC | IFF_ALLMULTI)) |
+ (dev->gflags & (IFF_PROMISC | IFF_ALLMULTI));
+}
+
static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
const struct ifinfomsg *ifm)
{
@@ -667,7 +677,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
/* bugwards compatibility: ifi_change == 0 is treated as ~0 */
if (ifm->ifi_change)
flags = (flags & ifm->ifi_change) |
- (dev->flags & ~ifm->ifi_change);
+ (rtnl_dev_get_flags(dev) & ~ifm->ifi_change);
return flags;
}
@@ -1371,6 +1381,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
goto errout;
send_addr_notify = 1;
modified = 1;
+ add_device_randomness(dev->dev_addr, dev->addr_len);
}
if (tb[IFLA_MTU]) {
@@ -1801,8 +1812,6 @@ replay:
return -ENODEV;
}
- if (ifm->ifi_index)
- return -EOPNOTSUPP;
if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
return -EOPNOTSUPP;
@@ -1828,10 +1837,14 @@ replay:
return PTR_ERR(dest_net);
dev = rtnl_create_link(net, dest_net, ifname, ops, tb);
-
- if (IS_ERR(dev))
+ if (IS_ERR(dev)) {
err = PTR_ERR(dev);
- else if (ops->newlink)
+ goto out;
+ }
+
+ dev->ifindex = ifm->ifi_index;
+
+ if (ops->newlink)
err = ops->newlink(net, dev, tb, data);
else
err = register_netdevice(dev);
diff --git a/net/core/scm.c b/net/core/scm.c
index 8f6ccfd68ef4..040cebeed45b 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -265,6 +265,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax;
i++, cmfptr++)
{
+ struct socket *sock;
int new_fd;
err = security_file_receive(fp[i]);
if (err)
@@ -281,6 +282,9 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
}
/* Bump the usage count and install the file. */
get_file(fp[i]);
+ sock = sock_from_file(fp[i], &err);
+ if (sock)
+ sock_update_netprioidx(sock->sk, current);
fd_install(new_fd, fp[i]);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 368f65c15e4f..fe00d1208167 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
BUG();
}
+
+/*
+ * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
+ * the caller if emergency pfmemalloc reserves are being used. If it is and
+ * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
+ * may be used. Otherwise, the packet data may be discarded until enough
+ * memory is free
+ */
+#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
+ __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
+void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip,
+ bool *pfmemalloc)
+{
+ void *obj;
+ bool ret_pfmemalloc = false;
+
+ /*
+ * Try a regular allocation, when that fails and we're not entitled
+ * to the reserves, fail.
+ */
+ obj = kmalloc_node_track_caller(size,
+ flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
+ node);
+ if (obj || !(gfp_pfmemalloc_allowed(flags)))
+ goto out;
+
+ /* Try again but now we are using pfmemalloc reserves */
+ ret_pfmemalloc = true;
+ obj = kmalloc_node_track_caller(size, flags, node);
+
+out:
+ if (pfmemalloc)
+ *pfmemalloc = ret_pfmemalloc;
+
+ return obj;
+}
+
/* Allocate a new skbuff. We do this ourselves so we can fill in a few
* 'private' fields and also do memory statistics to find all the
* [BEEP] leaks.
@@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
* __alloc_skb - allocate a network buffer
* @size: size to allocate
* @gfp_mask: allocation mask
- * @fclone: allocate from fclone cache instead of head cache
- * and allocate a cloned (child) skb
+ * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
+ * instead of head cache and allocate a cloned (child) skb.
+ * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
+ * allocations in case the data is required for writeback
* @node: numa node to allocate memory on
*
* Allocate a new &sk_buff. The returned buffer has no headroom and a
@@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
* %GFP_ATOMIC.
*/
struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
- int fclone, int node)
+ int flags, int node)
{
struct kmem_cache *cache;
struct skb_shared_info *shinfo;
struct sk_buff *skb;
u8 *data;
+ bool pfmemalloc;
- cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
+ cache = (flags & SKB_ALLOC_FCLONE)
+ ? skbuff_fclone_cache : skbuff_head_cache;
+
+ if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
+ gfp_mask |= __GFP_MEMALLOC;
/* Get the HEAD */
skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
@@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
*/
size = SKB_DATA_ALIGN(size);
size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- data = kmalloc_node_track_caller(size, gfp_mask, node);
+ data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
if (!data)
goto nodata;
/* kmalloc(size) might give us more room than requested.
@@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
memset(skb, 0, offsetof(struct sk_buff, tail));
/* Account for allocated memory : skb + skb->head */
skb->truesize = SKB_TRUESIZE(size);
+ skb->pfmemalloc = pfmemalloc;
atomic_set(&skb->users, 1);
skb->head = data;
skb->data = data;
@@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
atomic_set(&shinfo->dataref, 1);
kmemcheck_annotate_variable(shinfo->destructor_arg);
- if (fclone) {
+ if (flags & SKB_ALLOC_FCLONE) {
struct sk_buff *child = skb + 1;
atomic_t *fclone_ref = (atomic_t *) (child + 1);
@@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
atomic_set(fclone_ref, 1);
child->fclone = SKB_FCLONE_UNAVAILABLE;
+ child->pfmemalloc = pfmemalloc;
}
out:
return skb;
@@ -302,14 +348,7 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
-/**
- * netdev_alloc_frag - allocate a page fragment
- * @fragsz: fragment size
- *
- * Allocates a frag from a page for receive buffer.
- * Uses GFP_ATOMIC allocations.
- */
-void *netdev_alloc_frag(unsigned int fragsz)
+static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
struct netdev_alloc_cache *nc;
void *data = NULL;
@@ -319,7 +358,7 @@ void *netdev_alloc_frag(unsigned int fragsz)
nc = &__get_cpu_var(netdev_alloc_cache);
if (unlikely(!nc->page)) {
refill:
- nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+ nc->page = alloc_page(gfp_mask);
if (unlikely(!nc->page))
goto end;
recycle:
@@ -343,6 +382,18 @@ end:
local_irq_restore(flags);
return data;
}
+
+/**
+ * netdev_alloc_frag - allocate a page fragment
+ * @fragsz: fragment size
+ *
+ * Allocates a frag from a page for receive buffer.
+ * Uses GFP_ATOMIC allocations.
+ */
+void *netdev_alloc_frag(unsigned int fragsz)
+{
+ return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+}
EXPORT_SYMBOL(netdev_alloc_frag);
/**
@@ -366,7 +417,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
- void *data = netdev_alloc_frag(fragsz);
+ void *data;
+
+ if (sk_memalloc_socks())
+ gfp_mask |= __GFP_MEMALLOC;
+
+ data = __netdev_alloc_frag(fragsz, gfp_mask);
if (likely(data)) {
skb = build_skb(data, fragsz);
@@ -374,7 +430,8 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
put_page(virt_to_head_page(data));
}
} else {
- skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
+ skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
+ SKB_ALLOC_RX, NUMA_NO_NODE);
}
if (likely(skb)) {
skb_reserve(skb, NET_SKB_PAD);
@@ -656,6 +713,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#if IS_ENABLED(CONFIG_IP_VS)
new->ipvs_property = old->ipvs_property;
#endif
+ new->pfmemalloc = old->pfmemalloc;
new->protocol = old->protocol;
new->mark = old->mark;
new->skb_iif = old->skb_iif;
@@ -814,6 +872,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
n->fclone = SKB_FCLONE_CLONE;
atomic_inc(fclone_ref);
} else {
+ if (skb_pfmemalloc(skb))
+ gfp_mask |= __GFP_MEMALLOC;
+
n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
if (!n)
return NULL;
@@ -850,6 +911,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
}
+static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
+{
+ if (skb_pfmemalloc(skb))
+ return SKB_ALLOC_RX;
+ return 0;
+}
+
/**
* skb_copy - create private copy of an sk_buff
* @skb: buffer to copy
@@ -871,7 +939,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
{
int headerlen = skb_headroom(skb);
unsigned int size = skb_end_offset(skb) + skb->data_len;
- struct sk_buff *n = alloc_skb(size, gfp_mask);
+ struct sk_buff *n = __alloc_skb(size, gfp_mask,
+ skb_alloc_rx_flag(skb), NUMA_NO_NODE);
if (!n)
return NULL;
@@ -906,7 +975,8 @@ EXPORT_SYMBOL(skb_copy);
struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
{
unsigned int size = skb_headlen(skb) + headroom;
- struct sk_buff *n = alloc_skb(size, gfp_mask);
+ struct sk_buff *n = __alloc_skb(size, gfp_mask,
+ skb_alloc_rx_flag(skb), NUMA_NO_NODE);
if (!n)
goto out;
@@ -979,8 +1049,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
size = SKB_DATA_ALIGN(size);
- data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
- gfp_mask);
+ if (skb_pfmemalloc(skb))
+ gfp_mask |= __GFP_MEMALLOC;
+ data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
+ gfp_mask, NUMA_NO_NODE, NULL);
if (!data)
goto nodata;
size = SKB_WITH_OVERHEAD(ksize(data));
@@ -1092,8 +1164,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
/*
* Allocate the copy buffer
*/
- struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
- gfp_mask);
+ struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
+ gfp_mask, skb_alloc_rx_flag(skb),
+ NUMA_NO_NODE);
int oldheadroom = skb_headroom(skb);
int head_copy_len, head_copy_off;
int off;
@@ -2775,8 +2848,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
skb_release_head_state(nskb);
__skb_push(nskb, doffset);
} else {
- nskb = alloc_skb(hsize + doffset + headroom,
- GFP_ATOMIC);
+ nskb = __alloc_skb(hsize + doffset + headroom,
+ GFP_ATOMIC, skb_alloc_rx_flag(skb),
+ NUMA_NO_NODE);
if (unlikely(!nskb))
goto err;
diff --git a/net/core/sock.c b/net/core/sock.c
index 2676a88f533e..8f67ced8d6a8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -142,7 +142,7 @@
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
{
struct proto *proto;
@@ -271,6 +271,61 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
EXPORT_SYMBOL(sysctl_optmem_max);
+struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL_GPL(memalloc_socks);
+
+/**
+ * sk_set_memalloc - sets %SOCK_MEMALLOC
+ * @sk: socket to set it on
+ *
+ * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
+ * It's the responsibility of the admin to adjust min_free_kbytes
+ * to meet the requirements
+ */
+void sk_set_memalloc(struct sock *sk)
+{
+ sock_set_flag(sk, SOCK_MEMALLOC);
+ sk->sk_allocation |= __GFP_MEMALLOC;
+ static_key_slow_inc(&memalloc_socks);
+}
+EXPORT_SYMBOL_GPL(sk_set_memalloc);
+
+void sk_clear_memalloc(struct sock *sk)
+{
+ sock_reset_flag(sk, SOCK_MEMALLOC);
+ sk->sk_allocation &= ~__GFP_MEMALLOC;
+ static_key_slow_dec(&memalloc_socks);
+
+ /*
+ * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
+ * progress of swapping. However, if SOCK_MEMALLOC is cleared while
+ * it has rmem allocations there is a risk that the user of the
+ * socket cannot make forward progress due to exceeding the rmem
+ * limits. By rights, sk_clear_memalloc() should only be called
+ * on sockets being torn down but warn and reset the accounting if
+ * that assumption breaks.
+ */
+ if (WARN_ON(sk->sk_forward_alloc))
+ sk_mem_reclaim(sk);
+}
+EXPORT_SYMBOL_GPL(sk_clear_memalloc);
+
+int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ int ret;
+ unsigned long pflags = current->flags;
+
+ /* these should have been dropped before queueing */
+ BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
+
+ current->flags |= PF_MEMALLOC;
+ ret = sk->sk_backlog_rcv(sk, skb);
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
+
+ return ret;
+}
+EXPORT_SYMBOL(__sk_backlog_rcv);
+
#if defined(CONFIG_CGROUPS)
#if !defined(CONFIG_NET_CLS_CGROUP)
int net_cls_subsys_id = -1;
@@ -353,7 +408,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (err)
return err;
- if (!sk_rmem_schedule(sk, skb->truesize)) {
+ if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
atomic_inc(&sk->sk_drops);
return -ENOBUFS;
}
@@ -1403,6 +1458,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
} else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
sk->sk_gso_max_size = dst->dev->gso_max_size;
+ sk->sk_gso_max_segs = dst->dev->gso_max_segs;
}
}
}
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 75c3582a7678..fb85d371a8de 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -246,7 +246,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
u32 __user *optval, int __user *optlen)
{
int rc = -ENOPROTOOPT;
- if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
+ if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len,
optval, optlen);
return rc;
@@ -257,7 +257,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
u32 __user *optval, int __user *optlen)
{
int rc = -ENOPROTOOPT;
- if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
+ if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len,
optval, optlen);
return rc;
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index d65e98798eca..119c04317d48 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -535,6 +535,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
case DCCP_SOCKOPT_CCID_TX_INFO:
if (len < sizeof(tfrc))
return -EINVAL;
+ memset(&tfrc, 0, sizeof(tfrc));
tfrc.tfrctx_x = hc->tx_x;
tfrc.tfrctx_x_recv = hc->tx_x_recv;
tfrc.tfrctx_x_calc = hc->tx_x_calc;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 85a3604c87c8..c855e8d0738f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -961,7 +961,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o
.saddr = oldflp->saddr,
.flowidn_scope = RT_SCOPE_UNIVERSE,
.flowidn_mark = oldflp->flowidn_mark,
- .flowidn_iif = init_net.loopback_dev->ifindex,
+ .flowidn_iif = LOOPBACK_IFINDEX,
.flowidn_oif = oldflp->flowidn_oif,
};
struct dn_route *rt = NULL;
@@ -979,7 +979,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o
"dn_route_output_slow: dst=%04x src=%04x mark=%d"
" iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr),
le16_to_cpu(oldflp->saddr),
- oldflp->flowidn_mark, init_net.loopback_dev->ifindex,
+ oldflp->flowidn_mark, LOOPBACK_IFINDEX,
oldflp->flowidn_oif);
/* If we have an output interface, verify its a DECnet device */
@@ -1042,7 +1042,7 @@ source_ok:
if (!fld.daddr)
goto out;
}
- fld.flowidn_oif = init_net.loopback_dev->ifindex;
+ fld.flowidn_oif = LOOPBACK_IFINDEX;
res.type = RTN_LOCAL;
goto make_route;
}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index ae2ccf2890e4..15ca63ec604e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -49,7 +49,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
-obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o
+obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fe4582ca969a..6681ccf5c3ee 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1364,7 +1364,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
if (*(u8 *)iph != 0x45)
goto out_unlock;
- if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
+ if (unlikely(ip_fast_csum((u8 *)iph, 5)))
goto out_unlock;
id = ntohl(*(__be32 *)&iph->id);
@@ -1380,7 +1380,6 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
iph2 = ip_hdr(p);
if ((iph->protocol ^ iph2->protocol) |
- (iph->tos ^ iph2->tos) |
((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
NAPI_GRO_CB(p)->same_flow = 0;
@@ -1390,6 +1389,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
/* All fields must match except length and checksum. */
NAPI_GRO_CB(p)->flush |=
(iph->ttl ^ iph2->ttl) |
+ (iph->tos ^ iph2->tos) |
((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
NAPI_GRO_CB(p)->flush |= flush;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 44bf82e3aef7..adf273f8ad2e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -94,25 +94,22 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
[IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
};
-/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
- * value. So if you change this define, make appropriate changes to
- * inet_addr_hash as well.
- */
-#define IN4_ADDR_HSIZE 256
+#define IN4_ADDR_HSIZE_SHIFT 8
+#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
+
static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
static DEFINE_SPINLOCK(inet_addr_hash_lock);
-static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
+static u32 inet_addr_hash(struct net *net, __be32 addr)
{
- u32 val = (__force u32) addr ^ hash_ptr(net, 8);
+ u32 val = (__force u32) addr ^ net_hash_mix(net);
- return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
- (IN4_ADDR_HSIZE - 1));
+ return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
}
static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
{
- unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
+ u32 hash = inet_addr_hash(net, ifa->ifa_local);
spin_lock(&inet_addr_hash_lock);
hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
@@ -136,18 +133,18 @@ static void inet_hash_remove(struct in_ifaddr *ifa)
*/
struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
{
- unsigned int hash = inet_addr_hash(net, addr);
+ u32 hash = inet_addr_hash(net, addr);
struct net_device *result = NULL;
struct in_ifaddr *ifa;
struct hlist_node *node;
rcu_read_lock();
hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
- struct net_device *dev = ifa->ifa_dev->dev;
-
- if (!net_eq(dev_net(dev), net))
- continue;
if (ifa->ifa_local == addr) {
+ struct net_device *dev = ifa->ifa_dev->dev;
+
+ if (!net_eq(dev_net(dev), net))
+ continue;
result = dev;
break;
}
@@ -182,10 +179,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
static void devinet_sysctl_register(struct in_device *idev);
static void devinet_sysctl_unregister(struct in_device *idev);
#else
-static inline void devinet_sysctl_register(struct in_device *idev)
+static void devinet_sysctl_register(struct in_device *idev)
{
}
-static inline void devinet_sysctl_unregister(struct in_device *idev)
+static void devinet_sysctl_unregister(struct in_device *idev)
{
}
#endif
@@ -205,7 +202,7 @@ static void inet_rcu_free_ifa(struct rcu_head *head)
kfree(ifa);
}
-static inline void inet_free_ifa(struct in_ifaddr *ifa)
+static void inet_free_ifa(struct in_ifaddr *ifa)
{
call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
}
@@ -659,7 +656,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
* Determine a default network mask, based on the IP address.
*/
-static inline int inet_abc_len(__be32 addr)
+static int inet_abc_len(__be32 addr)
{
int rc = -1; /* Something else, probably a multicast. */
@@ -1124,7 +1121,7 @@ skip:
}
}
-static inline bool inetdev_valid_mtu(unsigned int mtu)
+static bool inetdev_valid_mtu(unsigned int mtu)
{
return mtu >= 68;
}
@@ -1239,7 +1236,7 @@ static struct notifier_block ip_netdev_notifier = {
.notifier_call = inetdev_event,
};
-static inline size_t inet_nlmsg_size(void)
+static size_t inet_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+ nla_total_size(4) /* IFA_ADDRESS */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8732cc7920ed..7f073a38c87d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -218,7 +218,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
scope = RT_SCOPE_UNIVERSE;
if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = net->loopback_dev->ifindex;
+ fl4.flowi4_iif = LOOPBACK_IFINDEX;
fl4.daddr = ip_hdr(skb)->saddr;
fl4.saddr = 0;
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
@@ -1046,6 +1046,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
if (event == NETDEV_UNREGISTER) {
fib_disable_ip(dev, 2, -1);
+ rt_flush_dev(dev);
return NOTIFY_DONE;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index da0cc2e6b250..da80dc14cc76 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
},
};
+static void rt_fibinfo_free(struct rtable __rcu **rtp)
+{
+ struct rtable *rt = rcu_dereference_protected(*rtp, 1);
+
+ if (!rt)
+ return;
+
+ /* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
+ * because we waited an RCU grace period before calling
+ * free_fib_info_rcu()
+ */
+
+ dst_free(&rt->dst);
+}
+
static void free_nh_exceptions(struct fib_nh *nh)
{
struct fnhe_hash_bucket *hash = nh->nh_exceptions;
@@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh)
struct fib_nh_exception *next;
next = rcu_dereference_protected(fnhe->fnhe_next, 1);
+
+ rt_fibinfo_free(&fnhe->fnhe_rth);
+
kfree(fnhe);
fnhe = next;
@@ -161,6 +179,23 @@ static void free_nh_exceptions(struct fib_nh *nh)
kfree(hash);
}
+static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
+{
+ int cpu;
+
+ if (!rtp)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct rtable *rt;
+
+ rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
+ if (rt)
+ dst_free(&rt->dst);
+ }
+ free_percpu(rtp);
+}
+
/* Release a nexthop info record */
static void free_fib_info_rcu(struct rcu_head *head)
{
@@ -171,10 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
dev_put(nexthop_nh->nh_dev);
if (nexthop_nh->nh_exceptions)
free_nh_exceptions(nexthop_nh);
- if (nexthop_nh->nh_rth_output)
- dst_free(&nexthop_nh->nh_rth_output->dst);
- if (nexthop_nh->nh_rth_input)
- dst_free(&nexthop_nh->nh_rth_input->dst);
+ rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
+ rt_fibinfo_free(&nexthop_nh->nh_rth_input);
} endfor_nexthops(fi);
release_net(fi->fib_net);
@@ -804,6 +837,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi->fib_nhs = nhs;
change_nexthops(fi) {
nexthop_nh->nh_parent = fi;
+ nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
} endfor_nexthops(fi)
if (cfg->fc_mx) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 18cbc15b20d5..3c820dae235e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -159,7 +159,6 @@ struct trie {
#endif
};
-static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n);
static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
int wasfull);
static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
@@ -368,7 +367,7 @@ static void __leaf_free_rcu(struct rcu_head *head)
static inline void free_leaf(struct leaf *l)
{
- call_rcu_bh(&l->rcu, __leaf_free_rcu);
+ call_rcu(&l->rcu, __leaf_free_rcu);
}
static inline void free_leaf_info(struct leaf_info *leaf)
@@ -473,7 +472,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
}
pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
- sizeof(struct rt_trie_node) << bits);
+ sizeof(struct rt_trie_node *) << bits);
return tn;
}
@@ -490,7 +489,7 @@ static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *
return ((struct tnode *) n)->pos == tn->pos + tn->bits;
}
-static inline void put_child(struct trie *t, struct tnode *tn, int i,
+static inline void put_child(struct tnode *tn, int i,
struct rt_trie_node *n)
{
tnode_put_child_reorg(tn, i, n, -1);
@@ -754,8 +753,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
goto nomem;
}
- put_child(t, tn, 2*i, (struct rt_trie_node *) left);
- put_child(t, tn, 2*i+1, (struct rt_trie_node *) right);
+ put_child(tn, 2*i, (struct rt_trie_node *) left);
+ put_child(tn, 2*i+1, (struct rt_trie_node *) right);
}
}
@@ -776,9 +775,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
if (tkey_extract_bits(node->key,
oldtnode->pos + oldtnode->bits,
1) == 0)
- put_child(t, tn, 2*i, node);
+ put_child(tn, 2*i, node);
else
- put_child(t, tn, 2*i+1, node);
+ put_child(tn, 2*i+1, node);
continue;
}
@@ -786,8 +785,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
inode = (struct tnode *) node;
if (inode->bits == 1) {
- put_child(t, tn, 2*i, rtnl_dereference(inode->child[0]));
- put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1]));
+ put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
+ put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
tnode_free_safe(inode);
continue;
@@ -817,22 +816,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
*/
left = (struct tnode *) tnode_get_child(tn, 2*i);
- put_child(t, tn, 2*i, NULL);
+ put_child(tn, 2*i, NULL);
BUG_ON(!left);
right = (struct tnode *) tnode_get_child(tn, 2*i+1);
- put_child(t, tn, 2*i+1, NULL);
+ put_child(tn, 2*i+1, NULL);
BUG_ON(!right);
size = tnode_child_length(left);
for (j = 0; j < size; j++) {
- put_child(t, left, j, rtnl_dereference(inode->child[j]));
- put_child(t, right, j, rtnl_dereference(inode->child[j + size]));
+ put_child(left, j, rtnl_dereference(inode->child[j]));
+ put_child(right, j, rtnl_dereference(inode->child[j + size]));
}
- put_child(t, tn, 2*i, resize(t, left));
- put_child(t, tn, 2*i+1, resize(t, right));
+ put_child(tn, 2*i, resize(t, left));
+ put_child(tn, 2*i+1, resize(t, right));
tnode_free_safe(inode);
}
@@ -877,7 +876,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
if (!newn)
goto nomem;
- put_child(t, tn, i/2, (struct rt_trie_node *)newn);
+ put_child(tn, i/2, (struct rt_trie_node *)newn);
}
}
@@ -892,21 +891,21 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
if (left == NULL) {
if (right == NULL) /* Both are empty */
continue;
- put_child(t, tn, i/2, right);
+ put_child(tn, i/2, right);
continue;
}
if (right == NULL) {
- put_child(t, tn, i/2, left);
+ put_child(tn, i/2, left);
continue;
}
/* Two nonempty children */
newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
- put_child(t, tn, i/2, NULL);
- put_child(t, newBinNode, 0, left);
- put_child(t, newBinNode, 1, right);
- put_child(t, tn, i/2, resize(t, newBinNode));
+ put_child(tn, i/2, NULL);
+ put_child(newBinNode, 0, left);
+ put_child(newBinNode, 1, right);
+ put_child(tn, i/2, resize(t, newBinNode));
}
tnode_free_safe(oldtnode);
return tn;
@@ -1125,7 +1124,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
node_set_parent((struct rt_trie_node *)l, tp);
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
- put_child(t, tp, cindex, (struct rt_trie_node *)l);
+ put_child(tp, cindex, (struct rt_trie_node *)l);
} else {
/* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
/*
@@ -1155,12 +1154,12 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
node_set_parent((struct rt_trie_node *)tn, tp);
missbit = tkey_extract_bits(key, newpos, 1);
- put_child(t, tn, missbit, (struct rt_trie_node *)l);
- put_child(t, tn, 1-missbit, n);
+ put_child(tn, missbit, (struct rt_trie_node *)l);
+ put_child(tn, 1-missbit, n);
if (tp) {
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
- put_child(t, tp, cindex, (struct rt_trie_node *)tn);
+ put_child(tp, cindex, (struct rt_trie_node *)tn);
} else {
rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
tp = tn;
@@ -1551,7 +1550,8 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
* state.directly.
*/
if (pref_mismatch) {
- int mp = KEYLENGTH - fls(pref_mismatch);
+ /* fls(x) = __fls(x) + 1 */
+ int mp = KEYLENGTH - __fls(pref_mismatch) - 1;
if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
goto backtrace;
@@ -1619,7 +1619,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
if (tp) {
t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
- put_child(t, tp, cindex, NULL);
+ put_child(tp, cindex, NULL);
trie_rebalance(t, tp);
} else
RCU_INIT_POINTER(t->trie, NULL);
@@ -1656,7 +1656,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
if (!l)
return -ESRCH;
- fa_head = get_fa_head(l, plen);
+ li = find_leaf_info(l, plen);
+
+ if (!li)
+ return -ESRCH;
+
+ fa_head = &li->falh;
fa = fib_find_alias(fa_head, tos, 0);
if (!fa)
@@ -1692,9 +1697,6 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
- l = fib_find_node(t, key);
- li = find_leaf_info(l, plen);
-
list_del_rcu(&fa->fa_list);
if (!plen)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6699f23e6f55..0b5580c69f2d 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2435,6 +2435,8 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
struct ip_mc_list *im = (struct ip_mc_list *)v;
struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
char *querier;
+ long delta;
+
#ifdef CONFIG_IP_MULTICAST
querier = IGMP_V1_SEEN(state->in_dev) ? "V1" :
IGMP_V2_SEEN(state->in_dev) ? "V2" :
@@ -2448,11 +2450,12 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
}
+ delta = im->timer.expires - jiffies;
seq_printf(seq,
"\t\t\t\t%08X %5d %d:%08lX\t\t%d\n",
im->multiaddr, im->users,
- im->tm_running, im->tm_running ?
- jiffies_to_clock_t(im->timer.expires-jiffies) : 0,
+ im->tm_running,
+ im->tm_running ? jiffies_delta_to_clock_t(delta) : 0,
im->reporter);
}
return 0;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index db0cf17c00f7..7f75f21d7b83 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -404,12 +404,15 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct inet_sock *newinet = inet_sk(newsk);
- struct ip_options_rcu *opt = ireq->opt;
+ struct ip_options_rcu *opt;
struct net *net = sock_net(sk);
struct flowi4 *fl4;
struct rtable *rt;
fl4 = &newinet->cork.fl.u.ip4;
+
+ rcu_read_lock();
+ opt = rcu_dereference(newinet->inet_opt);
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -421,11 +424,13 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_gateway)
goto route_err;
+ rcu_read_unlock();
return &rt->dst;
route_err:
ip_rt_put(rt);
no_route:
+ rcu_read_unlock();
IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 981ff1eef28c..f1395a6fb35f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -325,14 +325,12 @@ static int ip_rcv_finish(struct sk_buff *skb)
const struct net_protocol *ipprot;
int protocol = iph->protocol;
- rcu_read_lock();
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && ipprot->early_demux) {
ipprot->early_demux(skb);
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
}
- rcu_read_unlock();
}
/*
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ba39a52d18c1..c196d749daf2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -197,7 +197,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
- if (neigh) {
+ if (!IS_ERR(neigh)) {
int res = dst_neigh_output(dst, neigh, skb);
rcu_read_unlock_bh();
@@ -1338,10 +1338,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph->ihl = 5;
iph->tos = inet->tos;
iph->frag_off = df;
- ip_select_ident(iph, &rt->dst, sk);
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
+ ip_select_ident(iph, &rt->dst, sk);
if (opt) {
iph->ihl += opt->optlen>>2;
@@ -1366,9 +1366,8 @@ out:
return skb;
}
-int ip_send_skb(struct sk_buff *skb)
+int ip_send_skb(struct net *net, struct sk_buff *skb)
{
- struct net *net = sock_net(skb->sk);
int err;
err = ip_local_out(skb);
@@ -1391,7 +1390,7 @@ int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4)
return 0;
/* Netfilter gets whole the not fragmented skb. */
- return ip_send_skb(skb);
+ return ip_send_skb(sock_net(sk), skb);
}
/*
@@ -1536,6 +1535,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
+ skb_orphan(nskb);
skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
ip_push_pending_frames(sk, &fl4);
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8eec8f4a0536..3a57570c8ee5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1798,7 +1798,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
.flowi4_oif = (rt_is_output_route(rt) ?
skb->dev->ifindex : 0),
.flowi4_iif = (rt_is_output_route(rt) ?
- net->loopback_dev->ifindex :
+ LOOPBACK_IFINDEX :
skb->dev->ifindex),
.flowi4_mark = skb->mark,
};
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 31371be8174b..c30130062cd6 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -85,7 +85,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
return ipv4_is_local_multicast(iph->daddr) ^ invert;
flow.flowi4_iif = 0;
} else {
- flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex;
+ flow.flowi4_iif = LOOPBACK_IFINDEX;
}
flow.daddr = iph->saddr;
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index ea4a23813d26..4ad9cf173992 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
hdr, NULL, &matchoff, &matchlen,
&addr, &port) > 0) {
- unsigned int matchend, poff, plen, buflen, n;
+ unsigned int olen, matchend, poff, plen, buflen, n;
char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
/* We're only interested in headers related to this
@@ -163,17 +163,18 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
goto next;
}
+ olen = *datalen;
if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
&addr, port))
return NF_DROP;
- matchend = matchoff + matchlen;
+ matchend = matchoff + matchlen + *datalen - olen;
/* The maddr= parameter (RFC 2361) specifies where to send
* the reply. */
if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
"maddr=", &poff, &plen,
- &addr) > 0 &&
+ &addr, true) > 0 &&
addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
buflen = sprintf(buffer, "%pI4",
@@ -187,7 +188,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
* from which the server received the request. */
if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
"received=", &poff, &plen,
- &addr) > 0 &&
+ &addr, false) > 0 &&
addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
buflen = sprintf(buffer, "%pI4",
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fc1a81ca79a7..50f6d3adb474 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -70,7 +70,6 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/bootmem.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
@@ -80,7 +79,6 @@
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
-#include <linux/workqueue.h>
#include <linux/skbuff.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
@@ -88,11 +86,9 @@
#include <linux/mroute.h>
#include <linux/netfilter_ipv4.h>
#include <linux/random.h>
-#include <linux/jhash.h>
#include <linux/rcupdate.h>
#include <linux/times.h>
#include <linux/slab.h>
-#include <linux/prefetch.h>
#include <net/dst.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
@@ -147,6 +143,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
+static void ipv4_dst_destroy(struct dst_entry *dst);
static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
int how)
@@ -170,6 +167,7 @@ static struct dst_ops ipv4_dst_ops = {
.default_advmss = ipv4_default_advmss,
.mtu = ipv4_mtu,
.cow_metrics = ipv4_cow_metrics,
+ .destroy = ipv4_dst_destroy,
.ifdown = ipv4_dst_ifdown,
.negative_advice = ipv4_negative_advice,
.link_failure = ipv4_link_failure,
@@ -587,11 +585,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
build_sk_flow_key(fl4, sk);
}
-static DEFINE_SEQLOCK(fnhe_seqlock);
+static inline void rt_free(struct rtable *rt)
+{
+ call_rcu(&rt->dst.rcu_head, dst_rcu_free);
+}
+
+static DEFINE_SPINLOCK(fnhe_lock);
static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
{
struct fib_nh_exception *fnhe, *oldest;
+ struct rtable *orig;
oldest = rcu_dereference(hash->chain);
for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -599,6 +603,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
oldest = fnhe;
}
+ orig = rcu_dereference(oldest->fnhe_rth);
+ if (orig) {
+ RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
+ rt_free(orig);
+ }
return oldest;
}
@@ -620,7 +629,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
int depth;
u32 hval = fnhe_hashfun(daddr);
- write_seqlock_bh(&fnhe_seqlock);
+ spin_lock_bh(&fnhe_lock);
hash = nh->nh_exceptions;
if (!hash) {
@@ -667,7 +676,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_stamp = jiffies;
out_unlock:
- write_sequnlock_bh(&fnhe_seqlock);
+ spin_unlock_bh(&fnhe_lock);
return;
}
@@ -1164,53 +1173,62 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
return NULL;
}
-static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
+static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
__be32 daddr)
{
- __be32 fnhe_daddr, gw;
- unsigned long expires;
- unsigned int seq;
- u32 pmtu;
-
-restart:
- seq = read_seqbegin(&fnhe_seqlock);
- fnhe_daddr = fnhe->fnhe_daddr;
- gw = fnhe->fnhe_gw;
- pmtu = fnhe->fnhe_pmtu;
- expires = fnhe->fnhe_expires;
- if (read_seqretry(&fnhe_seqlock, seq))
- goto restart;
-
- if (daddr != fnhe_daddr)
- return;
+ bool ret = false;
+
+ spin_lock_bh(&fnhe_lock);
+
+ if (daddr == fnhe->fnhe_daddr) {
+ struct rtable *orig;
- if (pmtu) {
- unsigned long diff = expires - jiffies;
+ if (fnhe->fnhe_pmtu) {
+ unsigned long expires = fnhe->fnhe_expires;
+ unsigned long diff = expires - jiffies;
- if (time_before(jiffies, expires)) {
- rt->rt_pmtu = pmtu;
- dst_set_expires(&rt->dst, diff);
+ if (time_before(jiffies, expires)) {
+ rt->rt_pmtu = fnhe->fnhe_pmtu;
+ dst_set_expires(&rt->dst, diff);
+ }
}
+ if (fnhe->fnhe_gw) {
+ rt->rt_flags |= RTCF_REDIRECTED;
+ rt->rt_gateway = fnhe->fnhe_gw;
+ }
+
+ orig = rcu_dereference(fnhe->fnhe_rth);
+ rcu_assign_pointer(fnhe->fnhe_rth, rt);
+ if (orig)
+ rt_free(orig);
+
+ fnhe->fnhe_stamp = jiffies;
+ ret = true;
+ } else {
+ /* Routes we intend to cache in nexthop exception have
+ * the DST_NOCACHE bit clear. However, if we are
+ * unsuccessful at storing this route into the cache
+ * we really need to set it.
+ */
+ rt->dst.flags |= DST_NOCACHE;
}
- if (gw) {
- rt->rt_flags |= RTCF_REDIRECTED;
- rt->rt_gateway = gw;
- }
- fnhe->fnhe_stamp = jiffies;
-}
+ spin_unlock_bh(&fnhe_lock);
-static inline void rt_free(struct rtable *rt)
-{
- call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
+ return ret;
}
-static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
+static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
{
- struct rtable *orig, *prev, **p = &nh->nh_rth_output;
-
- if (rt_is_input_route(rt))
- p = &nh->nh_rth_input;
+ struct rtable *orig, *prev, **p;
+ bool ret = true;
+ if (rt_is_input_route(rt)) {
+ p = (struct rtable **)&nh->nh_rth_input;
+ } else {
+ if (!nh->nh_pcpu_rth_output)
+ goto nocache;
+ p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
+ }
orig = *p;
prev = cmpxchg(p, orig, rt);
@@ -1223,7 +1241,50 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
* unsuccessful at storing this route into the cache
* we really need to set it.
*/
+nocache:
rt->dst.flags |= DST_NOCACHE;
+ ret = false;
+ }
+
+ return ret;
+}
+
+static DEFINE_SPINLOCK(rt_uncached_lock);
+static LIST_HEAD(rt_uncached_list);
+
+static void rt_add_uncached_list(struct rtable *rt)
+{
+ spin_lock_bh(&rt_uncached_lock);
+ list_add_tail(&rt->rt_uncached, &rt_uncached_list);
+ spin_unlock_bh(&rt_uncached_lock);
+}
+
+static void ipv4_dst_destroy(struct dst_entry *dst)
+{
+ struct rtable *rt = (struct rtable *) dst;
+
+ if (dst->flags & DST_NOCACHE) {
+ spin_lock_bh(&rt_uncached_lock);
+ list_del(&rt->rt_uncached);
+ spin_unlock_bh(&rt_uncached_lock);
+ }
+}
+
+void rt_flush_dev(struct net_device *dev)
+{
+ if (!list_empty(&rt_uncached_list)) {
+ struct net *net = dev_net(dev);
+ struct rtable *rt;
+
+ spin_lock_bh(&rt_uncached_lock);
+ list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
+ if (rt->dst.dev != dev)
+ continue;
+ rt->dst.dev = net->loopback_dev;
+ dev_hold(rt->dst.dev);
+ dev_put(dev);
+ }
+ spin_unlock_bh(&rt_uncached_lock);
}
}
@@ -1239,20 +1300,24 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
struct fib_nh_exception *fnhe,
struct fib_info *fi, u16 type, u32 itag)
{
+ bool cached = false;
+
if (fi) {
struct fib_nh *nh = &FIB_RES_NH(*res);
if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = nh->nh_gw;
- if (unlikely(fnhe))
- rt_bind_exception(rt, fnhe, daddr);
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
- if (!(rt->dst.flags & DST_NOCACHE))
- rt_cache_route(nh, rt);
+ if (unlikely(fnhe))
+ cached = rt_bind_exception(rt, fnhe, daddr);
+ else if (!(rt->dst.flags & DST_NOCACHE))
+ cached = rt_cache_route(nh, rt);
}
+ if (unlikely(!cached))
+ rt_add_uncached_list(rt);
#ifdef CONFIG_IP_ROUTE_CLASSID
#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1319,6 +1384,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
if (our) {
rth->dst.input= ip_local_deliver;
rth->rt_flags |= RTCF_LOCAL;
@@ -1420,7 +1486,7 @@ static int __mkroute_input(struct sk_buff *skb,
do_cache = false;
if (res->fi) {
if (!itag) {
- rth = FIB_RES_NH(*res).nh_rth_input;
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
goto out;
@@ -1444,6 +1510,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
@@ -1520,11 +1587,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (ipv4_is_zeronet(daddr))
goto martian_destination;
- if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) {
- if (ipv4_is_loopback(daddr))
+ /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
+ * and call it once if daddr or/and saddr are loopback addresses
+ */
+ if (ipv4_is_loopback(daddr)) {
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
goto martian_destination;
-
- if (ipv4_is_loopback(saddr))
+ } else if (ipv4_is_loopback(saddr)) {
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
goto martian_source;
}
@@ -1549,7 +1619,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (res.type == RTN_LOCAL) {
err = fib_validate_source(skb, saddr, daddr, tos,
- net->loopback_dev->ifindex,
+ LOOPBACK_IFINDEX,
dev, in_dev, &itag);
if (err < 0)
goto martian_source_keep_err;
@@ -1582,7 +1652,7 @@ local_input:
do_cache = false;
if (res.fi) {
if (!itag) {
- rth = FIB_RES_NH(res).nh_rth_input;
+ rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
err = 0;
@@ -1610,6 +1680,7 @@ local_input:
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
rth->dst.error= -err;
@@ -1748,19 +1819,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
fnhe = NULL;
if (fi) {
+ struct rtable __rcu **prth;
+
fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
- if (!fnhe) {
- rth = FIB_RES_NH(*res).nh_rth_output;
- if (rt_cache_valid(rth)) {
- dst_hold(&rth->dst);
- return rth;
- }
+ if (fnhe)
+ prth = &fnhe->fnhe_rth;
+ else
+ prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
+ rth = rcu_dereference(*prth);
+ if (rt_cache_valid(rth)) {
+ dst_hold(&rth->dst);
+ return rth;
}
}
rth = rt_dst_alloc(dev_out,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
IN_DEV_CONF_GET(in_dev, NOXFRM),
- fi && !fnhe);
+ fi);
if (!rth)
return ERR_PTR(-ENOBUFS);
@@ -1773,6 +1848,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_iif = orig_oif ? : 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
RT_CACHE_STAT_INC(out_slow_tot);
@@ -1819,7 +1895,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
orig_oif = fl4->flowi4_oif;
- fl4->flowi4_iif = net->loopback_dev->ifindex;
+ fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
@@ -1908,7 +1984,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
if (!fl4->daddr)
fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
dev_out = net->loopback_dev;
- fl4->flowi4_oif = net->loopback_dev->ifindex;
+ fl4->flowi4_oif = LOOPBACK_IFINDEX;
res.type = RTN_LOCAL;
flags |= RTCF_LOCAL;
goto make_route;
@@ -1955,7 +2031,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
}
dev_out = net->loopback_dev;
fl4->flowi4_oif = dev_out->ifindex;
- res.fi = NULL;
flags |= RTCF_LOCAL;
goto make_route;
}
@@ -2052,6 +2127,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_type = ort->rt_type;
rt->rt_gateway = ort->rt_gateway;
+ INIT_LIST_HEAD(&rt->rt_uncached);
+
dst_free(new);
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5840c3255721..1b5ce96707a3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -184,7 +184,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write,
int ret;
unsigned long vec[3];
struct net *net = current->nsproxy->net_ns;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
struct mem_cgroup *memcg;
#endif
@@ -203,7 +203,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write,
if (ret)
return ret;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
rcu_read_lock();
memcg = mem_cgroup_from_task(current);
@@ -784,13 +784,6 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "rt_cache_rebuild_count",
- .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "ping_group_range",
.data = &init_net.ipv4.sysctl_ping_group_range,
.maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range),
@@ -829,8 +822,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
table[5].data =
&net->ipv4.sysctl_icmp_ratemask;
table[6].data =
- &net->ipv4.sysctl_rt_cache_rebuild_count;
- table[7].data =
&net->ipv4.sysctl_ping_group_range;
}
@@ -842,8 +833,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
net->ipv4.sysctl_ping_group_range[0] = 1;
net->ipv4.sysctl_ping_group_range[1] = 0;
- net->ipv4.sysctl_rt_cache_rebuild_count = 4;
-
tcp_init_mem(net);
net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 581ecf02c6b5..2109ff4a1daf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -811,7 +811,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
old_size_goal + mss_now > xmit_size_goal)) {
xmit_size_goal = old_size_goal;
} else {
- tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
+ tp->xmit_size_goal_segs =
+ min_t(u16, xmit_size_goal / mss_now,
+ sk->sk_gso_max_segs);
xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
}
}
@@ -2681,7 +2683,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Cap the max timeout in ms TCP will retry/retrans
* before giving up and aborting (ETIMEDOUT) a connection.
*/
- icsk->icsk_user_timeout = msecs_to_jiffies(val);
+ if (val < 0)
+ err = -EINVAL;
+ else
+ icsk->icsk_user_timeout = msecs_to_jiffies(val);
break;
default:
err = -ENOPROTOOPT;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 4d4db16e336e..1432cdb0644c 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -291,7 +291,8 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
left = tp->snd_cwnd - in_flight;
if (sk_can_gso(sk) &&
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
- left * tp->mss_cache < sk->sk_gso_max_size)
+ left * tp->mss_cache < sk->sk_gso_max_size &&
+ left < sk->sk_gso_max_segs)
return true;
return left <= tcp_max_tso_deferred_mss(tp);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3e07a64ca44e..bcfccc5cb8d0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -237,7 +237,11 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s
tcp_enter_quickack_mode((struct sock *)tp);
break;
case INET_ECN_CE:
- tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
+ /* Better not delay acks, sender can have a very low cwnd */
+ tcp_enter_quickack_mode((struct sock *)tp);
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ }
/* fallinto */
default:
tp->ecn_flags |= TCP_ECN_SEEN;
@@ -4351,19 +4355,20 @@ static void tcp_ofo_queue(struct sock *sk)
static bool tcp_prune_ofo_queue(struct sock *sk);
static int tcp_prune_queue(struct sock *sk);
-static int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
+static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
+ unsigned int size)
{
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- !sk_rmem_schedule(sk, size)) {
+ !sk_rmem_schedule(sk, skb, size)) {
if (tcp_prune_queue(sk) < 0)
return -1;
- if (!sk_rmem_schedule(sk, size)) {
+ if (!sk_rmem_schedule(sk, skb, size)) {
if (!tcp_prune_ofo_queue(sk))
return -1;
- if (!sk_rmem_schedule(sk, size))
+ if (!sk_rmem_schedule(sk, skb, size))
return -1;
}
}
@@ -4418,7 +4423,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
TCP_ECN_check_ce(tp, skb);
- if (unlikely(tcp_try_rmem_schedule(sk, skb->truesize))) {
+ if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
__kfree_skb(skb);
return;
@@ -4552,17 +4557,17 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
{
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
struct tcphdr *th;
bool fragstolen;
- if (tcp_try_rmem_schedule(sk, size + sizeof(*th)))
- goto err;
-
skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
if (!skb)
goto err;
+ if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th)))
+ goto err_free;
+
th = (struct tcphdr *)skb_put(skb, sizeof(*th));
skb_reset_transport_header(skb);
memset(th, 0, sizeof(*th));
@@ -4633,7 +4638,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
if (eaten <= 0) {
queue_and_out:
if (eaten < 0 &&
- tcp_try_rmem_schedule(sk, skb->truesize))
+ tcp_try_rmem_schedule(sk, skb, skb->truesize))
goto drop;
eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
@@ -5391,6 +5396,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
{
struct tcp_sock *tp = tcp_sk(sk);
+ if (unlikely(sk->sk_rx_dst == NULL))
+ inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
/*
* Header prediction.
* The code loosely follows the one in the famous
@@ -5475,7 +5482,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (tp->copied_seq == tp->rcv_nxt &&
len - tcp_header_len <= tp->ucopy.len) {
#ifdef CONFIG_NET_DMA
- if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
+ if (tp->ucopy.task == current &&
+ sock_owned_by_user(sk) &&
+ tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
copied_early = 1;
eaten = 1;
}
@@ -5602,7 +5611,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
tcp_set_state(sk, TCP_ESTABLISHED);
if (skb != NULL) {
- sk->sk_rx_dst = dst_clone(skb_dst(skb));
+ icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
security_inet_conn_established(sk, skb);
}
@@ -5737,7 +5746,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
TCP_ECN_rcv_synack(tp, th);
- tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
+ tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
tcp_ack(sk, skb, FLAG_SLOWPATH);
/* Ok.. it's good. Set up sequence numbers and
@@ -5750,7 +5759,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* never scaled.
*/
tp->snd_wnd = ntohs(th->window);
- tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
if (!tp->rx_opt.wscale_ok) {
tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b6b07c93924c..1e15c5be04e7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -417,10 +417,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
tp->mtu_info = info;
- if (!sock_owned_by_user(sk))
+ if (!sock_owned_by_user(sk)) {
tcp_v4_mtu_reduced(sk);
- else
- set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
+ } else {
+ if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
+ sock_hold(sk);
+ }
goto out;
}
@@ -1462,6 +1464,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
goto exit_nonewsk;
newsk->sk_gso_type = SKB_GSO_TCPV4;
+ inet_sk_rx_dst_set(newsk, skb);
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
@@ -1617,21 +1620,16 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
#endif
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+ struct dst_entry *dst = sk->sk_rx_dst;
+
sock_rps_save_rxhash(sk, skb);
- if (sk->sk_rx_dst) {
- struct dst_entry *dst = sk->sk_rx_dst;
- if (dst->ops->check(dst, 0) == NULL) {
+ if (dst) {
+ if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
+ dst->ops->check(dst, 0) == NULL) {
dst_release(dst);
sk->sk_rx_dst = NULL;
}
}
- if (unlikely(sk->sk_rx_dst == NULL)) {
- struct inet_sock *icsk = inet_sk(sk);
- struct rtable *rt = skb_rtable(skb);
-
- sk->sk_rx_dst = dst_clone(&rt->dst);
- icsk->rx_dst_ifindex = inet_iif(skb);
- }
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
@@ -1709,11 +1707,11 @@ void tcp_v4_early_demux(struct sk_buff *skb)
skb->destructor = sock_edemux;
if (sk->sk_state != TCP_TIME_WAIT) {
struct dst_entry *dst = sk->sk_rx_dst;
- struct inet_sock *icsk = inet_sk(sk);
+
if (dst)
dst = dst_check(dst, 0);
if (dst &&
- icsk->rx_dst_ifindex == skb->skb_iif)
+ inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
skb_dst_set_noref(skb, dst);
}
}
@@ -1874,10 +1872,21 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = {
.twsk_destructor= tcp_twsk_destructor,
};
+void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ dst_hold(dst);
+ sk->sk_rx_dst = dst;
+ inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+}
+EXPORT_SYMBOL(inet_sk_rx_dst_set);
+
const struct inet_connection_sock_af_ops ipv4_specific = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
+ .sk_rx_dst_set = inet_sk_rx_dst_set,
.conn_request = tcp_v4_conn_request,
.syn_recv_sock = tcp_v4_syn_recv_sock,
.net_header_len = sizeof(struct iphdr),
@@ -2387,7 +2396,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
struct seq_file *f, int i, int uid, int *len)
{
const struct inet_request_sock *ireq = inet_rsk(req);
- int ttd = req->expires - jiffies;
+ long delta = req->expires - jiffies;
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
@@ -2399,7 +2408,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
TCP_SYN_RECV,
0, 0, /* could print option size, but that is af dependent. */
1, /* timers active (only the expire timer) */
- jiffies_to_clock_t(ttd),
+ jiffies_delta_to_clock_t(delta),
req->retrans,
uid,
0, /* non standard timer */
@@ -2450,7 +2459,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
tp->write_seq - tp->snd_una,
rx_queue,
timer_active,
- jiffies_to_clock_t(timer_expires - jiffies),
+ jiffies_delta_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
sock_i_uid(sk),
icsk->icsk_probes_out,
@@ -2469,10 +2478,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
{
__be32 dest, src;
__u16 destp, srcp;
- int ttd = tw->tw_ttd - jiffies;
-
- if (ttd < 0)
- ttd = 0;
+ long delta = tw->tw_ttd - jiffies;
dest = tw->tw_daddr;
src = tw->tw_rcv_saddr;
@@ -2482,7 +2488,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
- 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
+ 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
atomic_read(&tw->tw_refcnt), tw, len);
}
@@ -2635,7 +2641,7 @@ struct proto tcp_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
.init_cgroup = tcp_init_cgroup,
.destroy_cgroup = tcp_destroy_cgroup,
.proto_cgroup = tcp_proto_cgroup,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 2288a6399e1e..0abe67bb4d3a 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -731,6 +731,18 @@ static int __net_init tcp_net_metrics_init(struct net *net)
static void __net_exit tcp_net_metrics_exit(struct net *net)
{
+ unsigned int i;
+
+ for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) {
+ struct tcp_metrics_block *tm, *next;
+
+ tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1);
+ while (tm) {
+ next = rcu_dereference_protected(tm->tcpm_next, 1);
+ kfree(tm);
+ tm = next;
+ }
+ }
kfree(net->ipv4.tcp_metrics_hash);
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 5912ac3fd240..6ff7f10dce9d 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -387,8 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
struct tcp_sock *oldtp = tcp_sk(sk);
struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
- newsk->sk_rx_dst = dst_clone(skb_dst(skb));
-
/* TCP Cookie Transactions require space for the cookie pair,
* as it differs for each connection. There is no need to
* copy any s_data_payload stored at the original socket.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 33cd065cfbd8..d04632673a9e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -910,14 +910,18 @@ void tcp_release_cb(struct sock *sk)
if (flags & (1UL << TCP_TSQ_DEFERRED))
tcp_tsq_handler(sk);
- if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED))
+ if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
tcp_write_timer_handler(sk);
-
- if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED))
+ __sock_put(sk);
+ }
+ if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
tcp_delack_timer_handler(sk);
-
- if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED))
+ __sock_put(sk);
+ }
+ if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
sk->sk_prot->mtu_reduced(sk);
+ __sock_put(sk);
+ }
}
EXPORT_SYMBOL(tcp_release_cb);
@@ -940,7 +944,7 @@ void __init tcp_tasklet_init(void)
* We cant xmit new skbs from this context, as we might already
* hold qdisc lock.
*/
-void tcp_wfree(struct sk_buff *skb)
+static void tcp_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct tcp_sock *tp = tcp_sk(sk);
@@ -1522,21 +1526,21 @@ static void tcp_cwnd_validate(struct sock *sk)
* when we would be allowed to send the split-due-to-Nagle skb fully.
*/
static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
- unsigned int mss_now, unsigned int cwnd)
+ unsigned int mss_now, unsigned int max_segs)
{
const struct tcp_sock *tp = tcp_sk(sk);
- u32 needed, window, cwnd_len;
+ u32 needed, window, max_len;
window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
- cwnd_len = mss_now * cwnd;
+ max_len = mss_now * max_segs;
- if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
- return cwnd_len;
+ if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
+ return max_len;
needed = min(skb->len, window);
- if (cwnd_len <= needed)
- return cwnd_len;
+ if (max_len <= needed)
+ return max_len;
return needed - needed % mss_now;
}
@@ -1765,7 +1769,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
limit = min(send_win, cong_win);
/* If a full-sized TSO skb can be sent, do it. */
- if (limit >= sk->sk_gso_max_size)
+ if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
+ sk->sk_gso_max_segs * tp->mss_cache))
goto send_now;
/* Middle in queue won't get any more data, full sendable already? */
@@ -1999,7 +2004,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
- cwnd_quota);
+ min_t(unsigned int,
+ cwnd_quota,
+ sk->sk_gso_max_segs));
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
@@ -2045,7 +2052,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
if (unlikely(sk->sk_state == TCP_CLOSE))
return;
- if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC))
+ if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
+ sk_gfp_atomic(sk, GFP_ATOMIC)))
tcp_check_probe_timer(sk);
}
@@ -2666,7 +2674,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
s_data_desired = cvp->s_data_desired;
- skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC);
+ skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
if (unlikely(!skb)) {
dst_release(dst);
return NULL;
@@ -3064,7 +3073,7 @@ void tcp_send_ack(struct sock *sk)
* tcp_transmit_skb() will set the ownership to this
* sock.
*/
- buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
+ buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
if (buff == NULL) {
inet_csk_schedule_ack(sk);
inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
@@ -3079,7 +3088,7 @@ void tcp_send_ack(struct sock *sk)
/* Send it off, this clears delayed acks for us. */
TCP_SKB_CB(buff)->when = tcp_time_stamp;
- tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
+ tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
}
/* This routine sends a packet with an out of date sequence
@@ -3099,7 +3108,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
struct sk_buff *skb;
/* We don't queue it, tcp_transmit_skb() sets ownership. */
- skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
+ skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
if (skb == NULL)
return -1;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6df36ad55a38..b774a03bd1dc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data)
inet_csk(sk)->icsk_ack.blocked = 1;
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
/* deleguate our work to tcp_release_cb() */
- set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
+ if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+ sock_hold(sk);
}
bh_unlock_sock(sk);
sock_put(sk);
@@ -481,7 +482,8 @@ static void tcp_write_timer(unsigned long data)
tcp_write_timer_handler(sk);
} else {
/* deleguate our work to tcp_release_cb() */
- set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
+ if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+ sock_hold(sk);
}
bh_unlock_sock(sk);
sock_put(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b4c3582a991f..6f6d1aca3c3d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -758,7 +758,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
uh->check = CSUM_MANGLED_0;
send:
- err = ip_send_skb(skb);
+ err = ip_send_skb(sock_net(sk), skb);
if (err) {
if (err == -ENOBUFS && !inet->recverr) {
UDP_INC_STATS_USER(sock_net(sk),
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index c6281847f16a..681ea2f413e2 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_type = rt->rt_type;
xdst->u.rt.rt_gateway = rt->rt_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
+ INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
return 0;
}
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 5728695b5449..4f7fe7270e37 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -201,6 +201,22 @@ config IPV6_TUNNEL
If unsure, say N.
+config IPV6_GRE
+ tristate "IPv6: GRE tunnel"
+ select IPV6_TUNNEL
+ ---help---
+ Tunneling means encapsulating data of one protocol type within
+ another protocol and sending it over a channel that understands the
+ encapsulating protocol. This particular tunneling driver implements
+ GRE (Generic Routing Encapsulation) and at this time allows
+ encapsulating of IPv4 or IPv6 over existing IPv6 infrastructure.
+ This driver is useful if the other endpoint is a Cisco router: Cisco
+ likes GRE much better than the other Linux tunneling driver ("IP
+ tunneling" above). In addition, GRE allows multicast redistribution
+ through the tunnel.
+
+ Saying M here will produce a module called ip6_gre. If unsure, say N.
+
config IPV6_MULTIPLE_TABLES
bool "IPv6: Multiple Routing Tables"
depends on EXPERIMENTAL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 686934acfac1..b6d3f79151e2 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_IPV6_SIT) += sit.o
obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
+obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
obj-y += addrconf_core.o exthdrs_core.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 79181819a24f..6bc85f7c31e3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -494,8 +494,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
struct net_device *dev;
struct inet6_dev *idev;
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
+ for_each_netdev(net, dev) {
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.forwarding) ^ (!newf);
@@ -504,7 +503,6 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
dev_forward_change(idev);
}
}
- rcu_read_unlock();
}
static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
new file mode 100644
index 000000000000..a84ad5dc4fcf
--- /dev/null
+++ b/net/ipv6/ip6_gre.c
@@ -0,0 +1,1790 @@
+/*
+ * GRE over IPv6 protocol decoder.
+ *
+ * Authors: Dmitry Kozlov (xeb@mail.ru)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/hash.h>
+#include <linux/if_tunnel.h>
+#include <linux/ip6_tunnel.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/addrconf.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/rtnetlink.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/ip6_tunnel.h>
+
+
+#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
+#define IPV6_TCLASS_SHIFT 20
+
+#define HASH_SIZE_SHIFT 5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+
+static int ip6gre_net_id __read_mostly;
+struct ip6gre_net {
+ struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+
+ struct net_device *fb_tunnel_dev;
+};
+
+static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
+static int ip6gre_tunnel_init(struct net_device *dev);
+static void ip6gre_tunnel_setup(struct net_device *dev);
+static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
+
+/* Tunnel hash table */
+
+/*
+ 4 hash tables:
+
+ 3: (remote,local)
+ 2: (remote,*)
+ 1: (*,local)
+ 0: (*,*)
+
+ We require exact key match i.e. if a key is present in packet
+ it will match only tunnel with the same key; if it is not present,
+ it will match only keyless tunnel.
+
+ All keysless packets, if not matched configured keyless tunnels
+ will match fallback tunnel.
+ */
+
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+static u32 HASH_ADDR(const struct in6_addr *addr)
+{
+ u32 hash = ipv6_addr_hash(addr);
+
+ return hash_32(hash, HASH_SIZE_SHIFT);
+}
+
+#define tunnels_r_l tunnels[3]
+#define tunnels_r tunnels[2]
+#define tunnels_l tunnels[1]
+#define tunnels_wc tunnels[0]
+/*
+ * Locking : hash tables are protected by RCU and RTNL
+ */
+
+#define for_each_ip_tunnel_rcu(start) \
+ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
+/* often modified stats are per cpu, other are shared (netdev->stats) */
+struct pcpu_tstats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+ struct u64_stats_sync syncp;
+};
+
+static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *tot)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_bh(&tstats->syncp);
+ rx_packets = tstats->rx_packets;
+ tx_packets = tstats->tx_packets;
+ rx_bytes = tstats->rx_bytes;
+ tx_bytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+
+ tot->rx_packets += rx_packets;
+ tot->tx_packets += tx_packets;
+ tot->rx_bytes += rx_bytes;
+ tot->tx_bytes += tx_bytes;
+ }
+
+ tot->multicast = dev->stats.multicast;
+ tot->rx_crc_errors = dev->stats.rx_crc_errors;
+ tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
+ tot->rx_length_errors = dev->stats.rx_length_errors;
+ tot->rx_errors = dev->stats.rx_errors;
+ tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
+ tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
+ tot->tx_dropped = dev->stats.tx_dropped;
+ tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+ tot->tx_errors = dev->stats.tx_errors;
+
+ return tot;
+}
+
+/* Given src, dst and key, find appropriate for input tunnel. */
+
+static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
+ const struct in6_addr *remote, const struct in6_addr *local,
+ __be32 key, __be16 gre_proto)
+{
+ struct net *net = dev_net(dev);
+ int link = dev->ifindex;
+ unsigned int h0 = HASH_ADDR(remote);
+ unsigned int h1 = HASH_KEY(key);
+ struct ip6_tnl *t, *cand = NULL;
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
+ ARPHRD_ETHER : ARPHRD_IP6GRE;
+ int score, cand_score = 4;
+
+ for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_equal(remote, &t->parms.raddr) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
+ if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
+ if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
+ (!ipv6_addr_equal(local, &t->parms.raddr) ||
+ !ipv6_addr_is_multicast(local))) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
+ if (t->parms.i_key != key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ if (cand != NULL)
+ return cand;
+
+ dev = ign->fb_tunnel_dev;
+ if (dev->flags & IFF_UP)
+ return netdev_priv(dev);
+
+ return NULL;
+}
+
+static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
+ const struct __ip6_tnl_parm *p)
+{
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
+ unsigned int h = HASH_KEY(p->i_key);
+ int prio = 0;
+
+ if (!ipv6_addr_any(local))
+ prio |= 1;
+ if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) {
+ prio |= 2;
+ h ^= HASH_ADDR(remote);
+ }
+
+ return &ign->tunnels[prio][h];
+}
+
+static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
+ const struct ip6_tnl *t)
+{
+ return __ip6gre_bucket(ign, &t->parms);
+}
+
+static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
+
+ rcu_assign_pointer(t->next, rtnl_dereference(*tp));
+ rcu_assign_pointer(*tp, t);
+}
+
+static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp;
+ struct ip6_tnl *iter;
+
+ for (tp = ip6gre_bucket(ign, t);
+ (iter = rtnl_dereference(*tp)) != NULL;
+ tp = &iter->next) {
+ if (t == iter) {
+ rcu_assign_pointer(*tp, t->next);
+ break;
+ }
+ }
+}
+
+static struct ip6_tnl *ip6gre_tunnel_find(struct net *net,
+ const struct __ip6_tnl_parm *parms,
+ int type)
+{
+ const struct in6_addr *remote = &parms->raddr;
+ const struct in6_addr *local = &parms->laddr;
+ __be32 key = parms->i_key;
+ int link = parms->link;
+ struct ip6_tnl *t;
+ struct ip6_tnl __rcu **tp;
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ for (tp = __ip6gre_bucket(ign, parms);
+ (t = rtnl_dereference(*tp)) != NULL;
+ tp = &t->next)
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ key == t->parms.i_key &&
+ link == t->parms.link &&
+ type == t->dev->type)
+ break;
+
+ return t;
+}
+
+static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
+ const struct __ip6_tnl_parm *parms, int create)
+{
+ struct ip6_tnl *t, *nt;
+ struct net_device *dev;
+ char name[IFNAMSIZ];
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
+ if (t || !create)
+ return t;
+
+ if (parms->name[0])
+ strlcpy(name, parms->name, IFNAMSIZ);
+ else
+ strcpy(name, "ip6gre%d");
+
+ dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup);
+ if (!dev)
+ return NULL;
+
+ dev_net_set(dev, net);
+
+ nt = netdev_priv(dev);
+ nt->parms = *parms;
+ dev->rtnl_link_ops = &ip6gre_link_ops;
+
+ nt->dev = dev;
+ ip6gre_tnl_link_config(nt, 1);
+
+ if (register_netdevice(dev) < 0)
+ goto failed_free;
+
+ /* Can use a lockless transmit, unless we generate output sequences */
+ if (!(nt->parms.o_flags & GRE_SEQ))
+ dev->features |= NETIF_F_LLTX;
+
+ dev_hold(dev);
+ ip6gre_tunnel_link(ign, nt);
+ return nt;
+
+failed_free:
+ free_netdev(dev);
+ return NULL;
+}
+
+static void ip6gre_tunnel_uninit(struct net_device *dev)
+{
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ ip6gre_tunnel_unlink(ign, netdev_priv(dev));
+ dev_put(dev);
+}
+
+
+static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
+ __be16 *p = (__be16 *)(ipv6h + 1);
+ int grehlen = sizeof(ipv6h) + 4;
+ struct ip6_tnl *t;
+ __be16 flags;
+
+ flags = p[0];
+ if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
+ if (flags&(GRE_VERSION|GRE_ROUTING))
+ return;
+ if (flags&GRE_KEY) {
+ grehlen += 4;
+ if (flags&GRE_CSUM)
+ grehlen += 4;
+ }
+ }
+
+ /* If only 8 bytes returned, keyed message will be dropped here */
+ if (skb_headlen(skb) < grehlen)
+ return;
+
+ rcu_read_lock();
+
+ t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
+ flags & GRE_KEY ?
+ *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
+ p[1]);
+ if (t == NULL)
+ goto out;
+
+ switch (type) {
+ __u32 teli;
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 mtu;
+ case ICMPV6_DEST_UNREACH:
+ net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
+ t->parms.name);
+ break;
+ case ICMPV6_TIME_EXCEED:
+ if (code == ICMPV6_EXC_HOPLIMIT) {
+ net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
+ t->parms.name);
+ }
+ break;
+ case ICMPV6_PARAMPROB:
+ teli = 0;
+ if (code == ICMPV6_HDR_FIELD)
+ teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
+
+ if (teli && teli == info - 2) {
+ tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
+ if (tel->encap_limit == 0) {
+ net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
+ t->parms.name);
+ }
+ } else {
+ net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
+ t->parms.name);
+ }
+ break;
+ case ICMPV6_PKT_TOOBIG:
+ mtu = info - offset;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ t->dev->mtu = mtu;
+ break;
+ }
+
+ if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
+ t->err_count++;
+ else
+ t->err_count = 1;
+ t->err_time = jiffies;
+out:
+ rcu_read_unlock();
+}
+
+static inline void ip6gre_ecn_decapsulate_ipv4(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h, struct sk_buff *skb)
+{
+ __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
+
+ if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+ ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
+
+ if (INET_ECN_is_ce(dsfield))
+ IP_ECN_set_ce(ip_hdr(skb));
+}
+
+static inline void ip6gre_ecn_decapsulate_ipv6(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h, struct sk_buff *skb)
+{
+ if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+ ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
+
+ if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
+ IP6_ECN_set_ce(ipv6_hdr(skb));
+}
+
+static int ip6gre_rcv(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ u8 *h;
+ __be16 flags;
+ __sum16 csum = 0;
+ __be32 key = 0;
+ u32 seqno = 0;
+ struct ip6_tnl *tunnel;
+ int offset = 4;
+ __be16 gre_proto;
+
+ if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+ goto drop_nolock;
+
+ ipv6h = ipv6_hdr(skb);
+ h = skb->data;
+ flags = *(__be16 *)h;
+
+ if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
+ /* - Version must be 0.
+ - We do not support routing headers.
+ */
+ if (flags&(GRE_VERSION|GRE_ROUTING))
+ goto drop_nolock;
+
+ if (flags&GRE_CSUM) {
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ csum = csum_fold(skb->csum);
+ if (!csum)
+ break;
+ /* fall through */
+ case CHECKSUM_NONE:
+ skb->csum = 0;
+ csum = __skb_checksum_complete(skb);
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ }
+ offset += 4;
+ }
+ if (flags&GRE_KEY) {
+ key = *(__be32 *)(h + offset);
+ offset += 4;
+ }
+ if (flags&GRE_SEQ) {
+ seqno = ntohl(*(__be32 *)(h + offset));
+ offset += 4;
+ }
+ }
+
+ gre_proto = *(__be16 *)(h + 2);
+
+ rcu_read_lock();
+ tunnel = ip6gre_tunnel_lookup(skb->dev,
+ &ipv6h->saddr, &ipv6h->daddr, key,
+ gre_proto);
+ if (tunnel) {
+ struct pcpu_tstats *tstats;
+
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+
+ if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) {
+ tunnel->dev->stats.rx_dropped++;
+ goto drop;
+ }
+
+ secpath_reset(skb);
+
+ skb->protocol = gre_proto;
+ /* WCCP version 1 and 2 protocol decoding.
+ * - Change protocol to IP
+ * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+ */
+ if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
+ skb->protocol = htons(ETH_P_IP);
+ if ((*(h + offset) & 0xF0) != 0x40)
+ offset += 4;
+ }
+
+ skb->mac_header = skb->network_header;
+ __pskb_pull(skb, offset);
+ skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
+ skb->pkt_type = PACKET_HOST;
+
+ if (((flags&GRE_CSUM) && csum) ||
+ (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
+ tunnel->dev->stats.rx_crc_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
+ if (tunnel->parms.i_flags&GRE_SEQ) {
+ if (!(flags&GRE_SEQ) ||
+ (tunnel->i_seqno &&
+ (s32)(seqno - tunnel->i_seqno) < 0)) {
+ tunnel->dev->stats.rx_fifo_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
+ tunnel->i_seqno = seqno + 1;
+ }
+
+ /* Warning: All skb pointers will be invalidated! */
+ if (tunnel->dev->type == ARPHRD_ETHER) {
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ tunnel->dev->stats.rx_length_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
+
+ ipv6h = ipv6_hdr(skb);
+ skb->protocol = eth_type_trans(skb, tunnel->dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ }
+
+ tstats = this_cpu_ptr(tunnel->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ __skb_tunnel_rx(skb, tunnel->dev);
+
+ skb_reset_network_header(skb);
+ if (skb->protocol == htons(ETH_P_IP))
+ ip6gre_ecn_decapsulate_ipv4(tunnel, ipv6h, skb);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ ip6gre_ecn_decapsulate_ipv6(tunnel, ipv6h, skb);
+
+ netif_rx(skb);
+
+ rcu_read_unlock();
+ return 0;
+ }
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+drop:
+ rcu_read_unlock();
+drop_nolock:
+ kfree_skb(skb);
+ return 0;
+}
+
+struct ipv6_tel_txoption {
+ struct ipv6_txoptions ops;
+ __u8 dst_opt[8];
+};
+
+static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
+{
+ memset(opt, 0, sizeof(struct ipv6_tel_txoption));
+
+ opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
+ opt->dst_opt[3] = 1;
+ opt->dst_opt[4] = encap_limit;
+ opt->dst_opt[5] = IPV6_TLV_PADN;
+ opt->dst_opt[6] = 1;
+
+ opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
+ opt->ops.opt_nflen = 8;
+}
+
+static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
+ struct net_device *dev,
+ __u8 dsfield,
+ struct flowi6 *fl6,
+ int encap_limit,
+ __u32 *pmtu)
+{
+ struct net *net = dev_net(dev);
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+ struct net_device *tdev; /* Device to other host */
+ struct ipv6hdr *ipv6h; /* Our new IP header */
+ unsigned int max_headroom; /* The extra header space needed */
+ int gre_hlen;
+ struct ipv6_tel_txoption opt;
+ int mtu;
+ struct dst_entry *dst = NULL, *ndst = NULL;
+ struct net_device_stats *stats = &tunnel->dev->stats;
+ int err = -1;
+ u8 proto;
+ int pkt_len;
+ struct sk_buff *new_skb;
+
+ if (dev->type == ARPHRD_ETHER)
+ IPCB(skb)->flags = 0;
+
+ if (dev->header_ops && dev->type == ARPHRD_IP6GRE) {
+ gre_hlen = 0;
+ ipv6h = (struct ipv6hdr *)skb->data;
+ fl6->daddr = ipv6h->daddr;
+ } else {
+ gre_hlen = tunnel->hlen;
+ fl6->daddr = tunnel->parms.raddr;
+ }
+
+ if (!fl6->flowi6_mark)
+ dst = ip6_tnl_dst_check(tunnel);
+
+ if (!dst) {
+ ndst = ip6_route_output(net, NULL, fl6);
+
+ if (ndst->error)
+ goto tx_err_link_failure;
+ ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
+ if (IS_ERR(ndst)) {
+ err = PTR_ERR(ndst);
+ ndst = NULL;
+ goto tx_err_link_failure;
+ }
+ dst = ndst;
+ }
+
+ tdev = dst->dev;
+
+ if (tdev == dev) {
+ stats->collisions++;
+ net_warn_ratelimited("%s: Local routing loop detected!\n",
+ tunnel->parms.name);
+ goto tx_err_dst_release;
+ }
+
+ mtu = dst_mtu(dst) - sizeof(*ipv6h);
+ if (encap_limit >= 0) {
+ max_headroom += 8;
+ mtu -= 8;
+ }
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ if (skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ if (skb->len > mtu) {
+ *pmtu = mtu;
+ err = -EMSGSIZE;
+ goto tx_err_dst_release;
+ }
+
+ if (tunnel->err_count > 0) {
+ if (time_before(jiffies,
+ tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) {
+ tunnel->err_count--;
+
+ dst_link_failure(skb);
+ } else
+ tunnel->err_count = 0;
+ }
+
+ max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+
+ if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
+ (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
+ new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (max_headroom > dev->needed_headroom)
+ dev->needed_headroom = max_headroom;
+ if (!new_skb)
+ goto tx_err_dst_release;
+
+ if (skb->sk)
+ skb_set_owner_w(new_skb, skb->sk);
+ consume_skb(skb);
+ skb = new_skb;
+ }
+
+ skb_dst_drop(skb);
+
+ if (fl6->flowi6_mark) {
+ skb_dst_set(skb, dst);
+ ndst = NULL;
+ } else {
+ skb_dst_set_noref(skb, dst);
+ }
+
+ skb->transport_header = skb->network_header;
+
+ proto = NEXTHDR_GRE;
+ if (encap_limit >= 0) {
+ init_tel_txopt(&opt, encap_limit);
+ ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
+ }
+
+ skb_push(skb, gre_hlen);
+ skb_reset_network_header(skb);
+
+ /*
+ * Push down and install the IP header.
+ */
+ ipv6h = ipv6_hdr(skb);
+ *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000);
+ dsfield = INET_ECN_encapsulate(0, dsfield);
+ ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
+ ipv6h->hop_limit = tunnel->parms.hop_limit;
+ ipv6h->nexthdr = proto;
+ ipv6h->saddr = fl6->saddr;
+ ipv6h->daddr = fl6->daddr;
+
+ ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
+ ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ?
+ htons(ETH_P_TEB) : skb->protocol;
+
+ if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
+ __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
+
+ if (tunnel->parms.o_flags&GRE_SEQ) {
+ ++tunnel->o_seqno;
+ *ptr = htonl(tunnel->o_seqno);
+ ptr--;
+ }
+ if (tunnel->parms.o_flags&GRE_KEY) {
+ *ptr = tunnel->parms.o_key;
+ ptr--;
+ }
+ if (tunnel->parms.o_flags&GRE_CSUM) {
+ *ptr = 0;
+ *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1),
+ skb->len - sizeof(struct ipv6hdr));
+ }
+ }
+
+ nf_reset(skb);
+ pkt_len = skb->len;
+ err = ip6_local_out(skb);
+
+ if (net_xmit_eval(err) == 0) {
+ struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats);
+
+ tstats->tx_bytes += pkt_len;
+ tstats->tx_packets++;
+ } else {
+ stats->tx_errors++;
+ stats->tx_aborted_errors++;
+ }
+
+ if (ndst)
+ ip6_tnl_dst_store(tunnel, ndst);
+
+ return 0;
+tx_err_link_failure:
+ stats->tx_carrier_errors++;
+ dst_link_failure(skb);
+tx_err_dst_release:
+ dst_release(ndst);
+ return err;
+}
+
+static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ const struct iphdr *iph = ip_hdr(skb);
+ int encap_limit = -1;
+ struct flowi6 fl6;
+ __u8 dsfield;
+ __u32 mtu;
+ int err;
+
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPIP;
+
+ dsfield = ipv4_get_dsfield(iph);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+ & IPV6_TCLASS_MASK;
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
+
+ err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ if (err != 0) {
+ /* XXX: send ICMP error even if DF is not set. */
+ if (err == -EMSGSIZE)
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ int encap_limit = -1;
+ __u16 offset;
+ struct flowi6 fl6;
+ __u8 dsfield;
+ __u32 mtu;
+ int err;
+
+ if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+ return -1;
+
+ offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ if (offset > 0) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+ if (tel->encap_limit == 0) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2);
+ return -1;
+ }
+ encap_limit = tel->encap_limit - 1;
+ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPV6;
+
+ dsfield = ipv6_get_dsfield(ipv6h);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+ fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
+
+ err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ if (err != 0) {
+ if (err == -EMSGSIZE)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ * @t: the outgoing tunnel device
+ * @hdr: IPv6 header from the incoming packet
+ *
+ * Description:
+ * Avoid trivial tunneling loop by checking that tunnel exit-point
+ * doesn't match source of incoming packet.
+ *
+ * Return:
+ * 1 if conflict,
+ * 0 else
+ **/
+
+static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t,
+ const struct ipv6hdr *hdr)
+{
+ return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
+}
+
+static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ int encap_limit = -1;
+ struct flowi6 fl6;
+ __u32 mtu;
+ int err;
+
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ fl6.flowi6_proto = skb->protocol;
+
+ err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu);
+
+ return err;
+}
+
+static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net_device_stats *stats = &t->dev->stats;
+ int ret;
+
+ if (!ip6_tnl_xmit_ctl(t))
+ return -1;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ret = ip6gre_xmit_ipv4(skb, dev);
+ break;
+ case htons(ETH_P_IPV6):
+ ret = ip6gre_xmit_ipv6(skb, dev);
+ break;
+ default:
+ ret = ip6gre_xmit_other(skb, dev);
+ break;
+ }
+
+ if (ret < 0)
+ goto tx_err;
+
+ return NETDEV_TX_OK;
+
+tx_err:
+ stats->tx_errors++;
+ stats->tx_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+{
+ struct net_device *dev = t->dev;
+ struct __ip6_tnl_parm *p = &t->parms;
+ struct flowi6 *fl6 = &t->fl.u.ip6;
+ int addend = sizeof(struct ipv6hdr) + 4;
+
+ if (dev->type != ARPHRD_ETHER) {
+ memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+ memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+ }
+
+ /* Set up flowi template */
+ fl6->saddr = p->laddr;
+ fl6->daddr = p->raddr;
+ fl6->flowi6_oif = p->link;
+ fl6->flowlabel = 0;
+
+ if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
+ fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
+ if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
+ fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
+
+ p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
+ p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
+
+ if (p->flags&IP6_TNL_F_CAP_XMIT &&
+ p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER)
+ dev->flags |= IFF_POINTOPOINT;
+ else
+ dev->flags &= ~IFF_POINTOPOINT;
+
+ dev->iflink = p->link;
+
+ /* Precalculate GRE options length */
+ if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
+ if (t->parms.o_flags&GRE_CSUM)
+ addend += 4;
+ if (t->parms.o_flags&GRE_KEY)
+ addend += 4;
+ if (t->parms.o_flags&GRE_SEQ)
+ addend += 4;
+ }
+
+ if (p->flags & IP6_TNL_F_CAP_XMIT) {
+ int strict = (ipv6_addr_type(&p->raddr) &
+ (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
+
+ struct rt6_info *rt = rt6_lookup(dev_net(dev),
+ &p->raddr, &p->laddr,
+ p->link, strict);
+
+ if (rt == NULL)
+ return;
+
+ if (rt->dst.dev) {
+ dev->hard_header_len = rt->dst.dev->hard_header_len + addend;
+
+ if (set_mtu) {
+ dev->mtu = rt->dst.dev->mtu - addend;
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu -= 8;
+
+ if (dev->mtu < IPV6_MIN_MTU)
+ dev->mtu = IPV6_MIN_MTU;
+ }
+ }
+ dst_release(&rt->dst);
+ }
+
+ t->hlen = addend;
+}
+
+static int ip6gre_tnl_change(struct ip6_tnl *t,
+ const struct __ip6_tnl_parm *p, int set_mtu)
+{
+ t->parms.laddr = p->laddr;
+ t->parms.raddr = p->raddr;
+ t->parms.flags = p->flags;
+ t->parms.hop_limit = p->hop_limit;
+ t->parms.encap_limit = p->encap_limit;
+ t->parms.flowinfo = p->flowinfo;
+ t->parms.link = p->link;
+ t->parms.proto = p->proto;
+ t->parms.i_key = p->i_key;
+ t->parms.o_key = p->o_key;
+ t->parms.i_flags = p->i_flags;
+ t->parms.o_flags = p->o_flags;
+ ip6_tnl_dst_reset(t);
+ ip6gre_tnl_link_config(t, set_mtu);
+ return 0;
+}
+
+static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
+ const struct ip6_tnl_parm2 *u)
+{
+ p->laddr = u->laddr;
+ p->raddr = u->raddr;
+ p->flags = u->flags;
+ p->hop_limit = u->hop_limit;
+ p->encap_limit = u->encap_limit;
+ p->flowinfo = u->flowinfo;
+ p->link = u->link;
+ p->i_key = u->i_key;
+ p->o_key = u->o_key;
+ p->i_flags = u->i_flags;
+ p->o_flags = u->o_flags;
+ memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
+ const struct __ip6_tnl_parm *p)
+{
+ u->proto = IPPROTO_GRE;
+ u->laddr = p->laddr;
+ u->raddr = p->raddr;
+ u->flags = p->flags;
+ u->hop_limit = p->hop_limit;
+ u->encap_limit = p->encap_limit;
+ u->flowinfo = p->flowinfo;
+ u->link = p->link;
+ u->i_key = p->i_key;
+ u->o_key = p->o_key;
+ u->i_flags = p->i_flags;
+ u->o_flags = p->o_flags;
+ memcpy(u->name, p->name, sizeof(u->name));
+}
+
+static int ip6gre_tunnel_ioctl(struct net_device *dev,
+ struct ifreq *ifr, int cmd)
+{
+ int err = 0;
+ struct ip6_tnl_parm2 p;
+ struct __ip6_tnl_parm p1;
+ struct ip6_tnl *t;
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ t = NULL;
+ if (dev == ign->fb_tunnel_dev) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ err = -EFAULT;
+ break;
+ }
+ ip6gre_tnl_parm_from_user(&p1, &p);
+ t = ip6gre_tunnel_locate(net, &p1, 0);
+ }
+ if (t == NULL)
+ t = netdev_priv(dev);
+ ip6gre_tnl_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+ break;
+
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ goto done;
+
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+
+ err = -EINVAL;
+ if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))
+ goto done;
+
+ if (!(p.i_flags&GRE_KEY))
+ p.i_key = 0;
+ if (!(p.o_flags&GRE_KEY))
+ p.o_key = 0;
+
+ ip6gre_tnl_parm_from_user(&p1, &p);
+ t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL);
+
+ if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+ if (t != NULL) {
+ if (t->dev != dev) {
+ err = -EEXIST;
+ break;
+ }
+ } else {
+ t = netdev_priv(dev);
+
+ ip6gre_tunnel_unlink(ign, t);
+ synchronize_net();
+ ip6gre_tnl_change(t, &p1, 1);
+ ip6gre_tunnel_link(ign, t);
+ netdev_state_change(dev);
+ }
+ }
+
+ if (t) {
+ err = 0;
+
+ ip6gre_tnl_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+
+ case SIOCDELTUNNEL:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ goto done;
+
+ if (dev == ign->fb_tunnel_dev) {
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+ err = -ENOENT;
+ ip6gre_tnl_parm_from_user(&p1, &p);
+ t = ip6gre_tunnel_locate(net, &p1, 0);
+ if (t == NULL)
+ goto done;
+ err = -EPERM;
+ if (t == netdev_priv(ign->fb_tunnel_dev))
+ goto done;
+ dev = t->dev;
+ }
+ unregister_netdevice(dev);
+ err = 0;
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+done:
+ return err;
+}
+
+static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+ if (new_mtu < 68 ||
+ new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type,
+ const void *daddr, const void *saddr, unsigned int len)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
+ __be16 *p = (__be16 *)(ipv6h+1);
+
+ *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000);
+ ipv6h->hop_limit = t->parms.hop_limit;
+ ipv6h->nexthdr = NEXTHDR_GRE;
+ ipv6h->saddr = t->parms.laddr;
+ ipv6h->daddr = t->parms.raddr;
+
+ p[0] = t->parms.o_flags;
+ p[1] = htons(type);
+
+ /*
+ * Set the source hardware address.
+ */
+
+ if (saddr)
+ memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr));
+ if (daddr)
+ memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr));
+ if (!ipv6_addr_any(&ipv6h->daddr))
+ return t->hlen;
+
+ return -t->hlen;
+}
+
+static int ip6gre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
+{
+ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb_mac_header(skb);
+ memcpy(haddr, &ipv6h->saddr, sizeof(struct in6_addr));
+ return sizeof(struct in6_addr);
+}
+
+static const struct header_ops ip6gre_header_ops = {
+ .create = ip6gre_header,
+ .parse = ip6gre_header_parse,
+};
+
+static const struct net_device_ops ip6gre_netdev_ops = {
+ .ndo_init = ip6gre_tunnel_init,
+ .ndo_uninit = ip6gre_tunnel_uninit,
+ .ndo_start_xmit = ip6gre_tunnel_xmit,
+ .ndo_do_ioctl = ip6gre_tunnel_ioctl,
+ .ndo_change_mtu = ip6gre_tunnel_change_mtu,
+ .ndo_get_stats64 = ip6gre_get_stats64,
+};
+
+static void ip6gre_dev_free(struct net_device *dev)
+{
+ free_percpu(dev->tstats);
+ free_netdev(dev);
+}
+
+static void ip6gre_tunnel_setup(struct net_device *dev)
+{
+ struct ip6_tnl *t;
+
+ dev->netdev_ops = &ip6gre_netdev_ops;
+ dev->destructor = ip6gre_dev_free;
+
+ dev->type = ARPHRD_IP6GRE;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4;
+ dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4;
+ t = netdev_priv(dev);
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu -= 8;
+ dev->flags |= IFF_NOARP;
+ dev->iflink = 0;
+ dev->addr_len = sizeof(struct in6_addr);
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+}
+
+static int ip6gre_tunnel_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel;
+
+ tunnel = netdev_priv(dev);
+
+ tunnel->dev = dev;
+ strcpy(tunnel->parms.name, dev->name);
+
+ memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
+ memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
+
+ if (ipv6_addr_any(&tunnel->parms.raddr))
+ dev->header_ops = &ip6gre_header_ops;
+
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void ip6gre_fb_tunnel_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+
+ tunnel->dev = dev;
+ strcpy(tunnel->parms.name, dev->name);
+
+ tunnel->hlen = sizeof(struct ipv6hdr) + 4;
+
+ dev_hold(dev);
+}
+
+
+static struct inet6_protocol ip6gre_protocol __read_mostly = {
+ .handler = ip6gre_rcv,
+ .err_handler = ip6gre_err,
+ .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
+ struct list_head *head)
+{
+ int prio;
+
+ for (prio = 0; prio < 4; prio++) {
+ int h;
+ for (h = 0; h < HASH_SIZE; h++) {
+ struct ip6_tnl *t;
+
+ t = rtnl_dereference(ign->tunnels[prio][h]);
+
+ while (t != NULL) {
+ unregister_netdevice_queue(t->dev, head);
+ t = rtnl_dereference(t->next);
+ }
+ }
+ }
+}
+
+static int __net_init ip6gre_init_net(struct net *net)
+{
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ int err;
+
+ ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
+ ip6gre_tunnel_setup);
+ if (!ign->fb_tunnel_dev) {
+ err = -ENOMEM;
+ goto err_alloc_dev;
+ }
+ dev_net_set(ign->fb_tunnel_dev, net);
+
+ ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
+ ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
+
+ err = register_netdev(ign->fb_tunnel_dev);
+ if (err)
+ goto err_reg_dev;
+
+ rcu_assign_pointer(ign->tunnels_wc[0],
+ netdev_priv(ign->fb_tunnel_dev));
+ return 0;
+
+err_reg_dev:
+ ip6gre_dev_free(ign->fb_tunnel_dev);
+err_alloc_dev:
+ return err;
+}
+
+static void __net_exit ip6gre_exit_net(struct net *net)
+{
+ struct ip6gre_net *ign;
+ LIST_HEAD(list);
+
+ ign = net_generic(net, ip6gre_net_id);
+ rtnl_lock();
+ ip6gre_destroy_tunnels(ign, &list);
+ unregister_netdevice_many(&list);
+ rtnl_unlock();
+}
+
+static struct pernet_operations ip6gre_net_ops = {
+ .init = ip6gre_init_net,
+ .exit = ip6gre_exit_net,
+ .id = &ip6gre_net_id,
+ .size = sizeof(struct ip6gre_net),
+};
+
+static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ __be16 flags;
+
+ if (!data)
+ return 0;
+
+ flags = 0;
+ if (data[IFLA_GRE_IFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ if (data[IFLA_GRE_OFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ if (flags & (GRE_VERSION|GRE_ROUTING))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ struct in6_addr daddr;
+
+ if (tb[IFLA_ADDRESS]) {
+ if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+ return -EINVAL;
+ if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+ return -EADDRNOTAVAIL;
+ }
+
+ if (!data)
+ goto out;
+
+ if (data[IFLA_GRE_REMOTE]) {
+ nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
+ if (ipv6_addr_any(&daddr))
+ return -EINVAL;
+ }
+
+out:
+ return ip6gre_tunnel_validate(tb, data);
+}
+
+
+static void ip6gre_netlink_parms(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ memset(parms, 0, sizeof(*parms));
+
+ if (!data)
+ return;
+
+ if (data[IFLA_GRE_LINK])
+ parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
+
+ if (data[IFLA_GRE_IFLAGS])
+ parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
+
+ if (data[IFLA_GRE_OFLAGS])
+ parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
+
+ if (data[IFLA_GRE_IKEY])
+ parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
+
+ if (data[IFLA_GRE_OKEY])
+ parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
+
+ if (data[IFLA_GRE_LOCAL])
+ nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr));
+
+ if (data[IFLA_GRE_REMOTE])
+ nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
+
+ if (data[IFLA_GRE_TTL])
+ parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]);
+
+ if (data[IFLA_GRE_ENCAP_LIMIT])
+ parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
+
+ if (data[IFLA_GRE_FLOWINFO])
+ parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
+
+ if (data[IFLA_GRE_FLAGS])
+ parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
+}
+
+static int ip6gre_tap_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel;
+
+ tunnel = netdev_priv(dev);
+
+ tunnel->dev = dev;
+ strcpy(tunnel->parms.name, dev->name);
+
+ ip6gre_tnl_link_config(tunnel, 1);
+
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static const struct net_device_ops ip6gre_tap_netdev_ops = {
+ .ndo_init = ip6gre_tap_init,
+ .ndo_uninit = ip6gre_tunnel_uninit,
+ .ndo_start_xmit = ip6gre_tunnel_xmit,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = ip6gre_tunnel_change_mtu,
+ .ndo_get_stats64 = ip6gre_get_stats64,
+};
+
+static void ip6gre_tap_setup(struct net_device *dev)
+{
+
+ ether_setup(dev);
+
+ dev->netdev_ops = &ip6gre_tap_netdev_ops;
+ dev->destructor = ip6gre_dev_free;
+
+ dev->iflink = 0;
+ dev->features |= NETIF_F_NETNS_LOCAL;
+}
+
+static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct ip6_tnl *nt;
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ int err;
+
+ nt = netdev_priv(dev);
+ ip6gre_netlink_parms(data, &nt->parms);
+
+ if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+ return -EEXIST;
+
+ if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
+ eth_hw_addr_random(dev);
+
+ nt->dev = dev;
+ ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+
+ /* Can use a lockless transmit, unless we generate output sequences */
+ if (!(nt->parms.o_flags & GRE_SEQ))
+ dev->features |= NETIF_F_LLTX;
+
+ err = register_netdevice(dev);
+ if (err)
+ goto out;
+
+ dev_hold(dev);
+ ip6gre_tunnel_link(ign, nt);
+
+out:
+ return err;
+}
+
+static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct ip6_tnl *t, *nt;
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ struct __ip6_tnl_parm p;
+
+ if (dev == ign->fb_tunnel_dev)
+ return -EINVAL;
+
+ nt = netdev_priv(dev);
+ ip6gre_netlink_parms(data, &p);
+
+ t = ip6gre_tunnel_locate(net, &p, 0);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else {
+ t = nt;
+
+ ip6gre_tunnel_unlink(ign, t);
+ ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+ ip6gre_tunnel_link(ign, t);
+ netdev_state_change(dev);
+ }
+
+ return 0;
+}
+
+static size_t ip6gre_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_GRE_LINK */
+ nla_total_size(4) +
+ /* IFLA_GRE_IFLAGS */
+ nla_total_size(2) +
+ /* IFLA_GRE_OFLAGS */
+ nla_total_size(2) +
+ /* IFLA_GRE_IKEY */
+ nla_total_size(4) +
+ /* IFLA_GRE_OKEY */
+ nla_total_size(4) +
+ /* IFLA_GRE_LOCAL */
+ nla_total_size(4) +
+ /* IFLA_GRE_REMOTE */
+ nla_total_size(4) +
+ /* IFLA_GRE_TTL */
+ nla_total_size(1) +
+ /* IFLA_GRE_TOS */
+ nla_total_size(1) +
+ /* IFLA_GRE_ENCAP_LIMIT */
+ nla_total_size(1) +
+ /* IFLA_GRE_FLOWINFO */
+ nla_total_size(4) +
+ /* IFLA_GRE_FLAGS */
+ nla_total_size(4) +
+ 0;
+}
+
+static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct __ip6_tnl_parm *p = &t->parms;
+
+ if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
+ nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
+ nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
+ nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
+ nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
+ nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->raddr) ||
+ nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->laddr) ||
+ nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
+ /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
+ nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
+ nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
+ nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
+ [IFLA_GRE_LINK] = { .type = NLA_U32 },
+ [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
+ [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
+ [IFLA_GRE_IKEY] = { .type = NLA_U32 },
+ [IFLA_GRE_OKEY] = { .type = NLA_U32 },
+ [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
+ [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
+ [IFLA_GRE_TTL] = { .type = NLA_U8 },
+ [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
+ [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
+ [IFLA_GRE_FLAGS] = { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
+ .kind = "ip6gre",
+ .maxtype = IFLA_GRE_MAX,
+ .policy = ip6gre_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = ip6gre_tunnel_setup,
+ .validate = ip6gre_tunnel_validate,
+ .newlink = ip6gre_newlink,
+ .changelink = ip6gre_changelink,
+ .get_size = ip6gre_get_size,
+ .fill_info = ip6gre_fill_info,
+};
+
+static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
+ .kind = "ip6gretap",
+ .maxtype = IFLA_GRE_MAX,
+ .policy = ip6gre_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = ip6gre_tap_setup,
+ .validate = ip6gre_tap_validate,
+ .newlink = ip6gre_newlink,
+ .changelink = ip6gre_changelink,
+ .get_size = ip6gre_get_size,
+ .fill_info = ip6gre_fill_info,
+};
+
+/*
+ * And now the modules code and kernel interface.
+ */
+
+static int __init ip6gre_init(void)
+{
+ int err;
+
+ pr_info("GRE over IPv6 tunneling driver\n");
+
+ err = register_pernet_device(&ip6gre_net_ops);
+ if (err < 0)
+ return err;
+
+ err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE);
+ if (err < 0) {
+ pr_info("%s: can't add protocol\n", __func__);
+ goto add_proto_failed;
+ }
+
+ err = rtnl_link_register(&ip6gre_link_ops);
+ if (err < 0)
+ goto rtnl_link_failed;
+
+ err = rtnl_link_register(&ip6gre_tap_ops);
+ if (err < 0)
+ goto tap_ops_failed;
+
+out:
+ return err;
+
+tap_ops_failed:
+ rtnl_link_unregister(&ip6gre_link_ops);
+rtnl_link_failed:
+ inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
+add_proto_failed:
+ unregister_pernet_device(&ip6gre_net_ops);
+ goto out;
+}
+
+static void __exit ip6gre_fini(void)
+{
+ rtnl_link_unregister(&ip6gre_tap_ops);
+ rtnl_link_unregister(&ip6gre_link_ops);
+ inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
+ unregister_pernet_device(&ip6gre_net_ops);
+}
+
+module_init(ip6gre_init);
+module_exit(ip6gre_fini);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
+MODULE_ALIAS_RTNL_LINK("ip6gre");
+MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 47975e363fcd..a52d864d562b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -52,11 +52,9 @@ int ip6_rcv_finish(struct sk_buff *skb)
if (sysctl_ip_early_demux && !skb_dst(skb)) {
const struct inet6_protocol *ipprot;
- rcu_read_lock();
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
if (ipprot && ipprot->early_demux)
ipprot->early_demux(skb);
- rcu_read_unlock();
}
if (!skb_dst(skb))
ip6_route_input(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9a1d5fe6aef8..cb7e2ded6f08 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -126,7 +126,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
* Locking : hash tables are protected by RCU and RTNL
*/
-static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
+struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
{
struct dst_entry *dst = t->dst_cache;
@@ -139,20 +139,23 @@ static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
return dst;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_check);
-static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
+void ip6_tnl_dst_reset(struct ip6_tnl *t)
{
dst_release(t->dst_cache);
t->dst_cache = NULL;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
-static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
+void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *) dst;
t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
dst_release(t->dst_cache);
t->dst_cache = dst;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
/**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
@@ -200,7 +203,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
**/
static struct ip6_tnl __rcu **
-ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p)
+ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
{
const struct in6_addr *remote = &p->raddr;
const struct in6_addr *local = &p->laddr;
@@ -267,7 +270,7 @@ static void ip6_dev_free(struct net_device *dev)
* created tunnel or NULL
**/
-static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
+static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
{
struct net_device *dev;
struct ip6_tnl *t;
@@ -322,7 +325,7 @@ failed:
**/
static struct ip6_tnl *ip6_tnl_locate(struct net *net,
- struct ip6_tnl_parm *p, int create)
+ struct __ip6_tnl_parm *p, int create)
{
const struct in6_addr *remote = &p->raddr;
const struct in6_addr *local = &p->laddr;
@@ -374,8 +377,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
* else index to encapsulation limit
**/
-static __u16
-parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
+__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
{
const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
__u8 nexthdr = ipv6h->nexthdr;
@@ -425,6 +427,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
}
return 0;
}
+EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
/**
* ip6_tnl_err - tunnel error handler
@@ -480,7 +483,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
case ICMPV6_PARAMPROB:
teli = 0;
if ((*code) == ICMPV6_HDR_FIELD)
- teli = parse_tlv_tnl_enc_lim(skb, skb->data);
+ teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
if (teli && teli == *info - 2) {
tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
@@ -693,11 +696,11 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
IP6_ECN_set_ce(ipv6_hdr(skb));
}
-static __u32 ip6_tnl_get_cap(struct ip6_tnl *t,
+__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
const struct in6_addr *laddr,
const struct in6_addr *raddr)
{
- struct ip6_tnl_parm *p = &t->parms;
+ struct __ip6_tnl_parm *p = &t->parms;
int ltype = ipv6_addr_type(laddr);
int rtype = ipv6_addr_type(raddr);
__u32 flags = 0;
@@ -715,13 +718,14 @@ static __u32 ip6_tnl_get_cap(struct ip6_tnl *t,
}
return flags;
}
+EXPORT_SYMBOL(ip6_tnl_get_cap);
/* called with rcu_read_lock() */
-static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
+int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
const struct in6_addr *laddr,
const struct in6_addr *raddr)
{
- struct ip6_tnl_parm *p = &t->parms;
+ struct __ip6_tnl_parm *p = &t->parms;
int ret = 0;
struct net *net = dev_net(t->dev);
@@ -740,6 +744,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
}
return ret;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
/**
* ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
@@ -859,9 +864,9 @@ ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}
-static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
+int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
{
- struct ip6_tnl_parm *p = &t->parms;
+ struct __ip6_tnl_parm *p = &t->parms;
int ret = 0;
struct net *net = dev_net(t->dev);
@@ -885,6 +890,8 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
}
return ret;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
+
/**
* ip6_tnl_xmit2 - encapsulate packet and send
* @skb: the outgoing socket buffer
@@ -1085,7 +1092,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
!ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
return -1;
- offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
+ offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
if (offset > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
@@ -1152,7 +1159,7 @@ tx_err:
static void ip6_tnl_link_config(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
- struct ip6_tnl_parm *p = &t->parms;
+ struct __ip6_tnl_parm *p = &t->parms;
struct flowi6 *fl6 = &t->fl.u.ip6;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -1215,7 +1222,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
**/
static int
-ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
+ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
{
t->parms.laddr = p->laddr;
t->parms.raddr = p->raddr;
@@ -1230,6 +1237,34 @@ ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
return 0;
}
+static void
+ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
+{
+ p->laddr = u->laddr;
+ p->raddr = u->raddr;
+ p->flags = u->flags;
+ p->hop_limit = u->hop_limit;
+ p->encap_limit = u->encap_limit;
+ p->flowinfo = u->flowinfo;
+ p->link = u->link;
+ p->proto = u->proto;
+ memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void
+ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
+{
+ u->laddr = p->laddr;
+ u->raddr = p->raddr;
+ u->flags = p->flags;
+ u->hop_limit = p->hop_limit;
+ u->encap_limit = p->encap_limit;
+ u->flowinfo = p->flowinfo;
+ u->link = p->link;
+ u->proto = p->proto;
+ memcpy(u->name, p->name, sizeof(u->name));
+}
+
/**
* ip6_tnl_ioctl - configure ipv6 tunnels from userspace
* @dev: virtual device associated with tunnel
@@ -1263,6 +1298,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
int err = 0;
struct ip6_tnl_parm p;
+ struct __ip6_tnl_parm p1;
struct ip6_tnl *t = NULL;
struct net *net = dev_net(dev);
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
@@ -1274,11 +1310,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EFAULT;
break;
}
- t = ip6_tnl_locate(net, &p, 0);
+ ip6_tnl_parm_from_user(&p1, &p);
+ t = ip6_tnl_locate(net, &p1, 0);
+ } else {
+ memset(&p, 0, sizeof(p));
}
if (t == NULL)
t = netdev_priv(dev);
- memcpy(&p, &t->parms, sizeof (p));
+ ip6_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
err = -EFAULT;
}
@@ -1295,7 +1334,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
p.proto != 0)
break;
- t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL);
+ ip6_tnl_parm_from_user(&p1, &p);
+ t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
if (t != NULL) {
if (t->dev != dev) {
@@ -1307,13 +1347,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
ip6_tnl_unlink(ip6n, t);
synchronize_net();
- err = ip6_tnl_change(t, &p);
+ err = ip6_tnl_change(t, &p1);
ip6_tnl_link(ip6n, t);
netdev_state_change(dev);
}
if (t) {
err = 0;
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p)))
+ ip6_tnl_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
err = -EFAULT;
} else
@@ -1329,7 +1370,9 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
break;
err = -ENOENT;
- if ((t = ip6_tnl_locate(net, &p, 0)) == NULL)
+ ip6_tnl_parm_from_user(&p1, &p);
+ t = ip6_tnl_locate(net, &p1, 0);
+ if (t == NULL)
break;
err = -EPERM;
if (t->dev == ip6n->fb_tnl_dev)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index da2e92d05c15..745a32042950 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -307,10 +307,10 @@ static int __net_init ipv6_proc_init_net(struct net *net)
goto proc_dev_snmp6_fail;
return 0;
+proc_dev_snmp6_fail:
+ proc_net_remove(net, "snmp6");
proc_snmp6_fail:
proc_net_remove(net, "sockstat6");
-proc_dev_snmp6_fail:
- proc_net_remove(net, "dev_snmp6");
return -ENOMEM;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cf02cb97bbdd..0ddf2d132e7f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -965,7 +965,7 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
{
int flags = 0;
- fl6->flowi6_iif = net->loopback_dev->ifindex;
+ fl6->flowi6_iif = LOOPBACK_IFINDEX;
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
flags |= RT6_LOOKUP_F_IFACE;
@@ -2480,12 +2480,8 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
goto nla_put_failure;
- if (!(rt->rt6i_flags & RTF_EXPIRES))
- expires = 0;
- else if (rt->dst.expires - jiffies < INT_MAX)
- expires = rt->dst.expires - jiffies;
- else
- expires = INT_MAX;
+
+ expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
goto nla_put_failure;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 221224e72507..cd49de3678fb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -94,6 +94,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
}
#endif
+static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+ dst_hold(dst);
+ sk->sk_rx_dst = dst;
+ inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+ if (rt->rt6i_node)
+ inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
+}
+
static void tcp_v6_hash(struct sock *sk)
{
if (sk->sk_state != TCP_CLOSE) {
@@ -1270,6 +1282,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->sk_gso_type = SKB_GSO_TCPV6;
__ip6_dst_store(newsk, dst, NULL, NULL);
+ inet6_sk_rx_dst_set(newsk, skb);
newtcp6sk = (struct tcp6_sock *)newsk;
inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
@@ -1299,7 +1312,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
/* Clone pktoptions received with SYN */
newnp->pktoptions = NULL;
if (treq->pktopts != NULL) {
- newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
+ newnp->pktoptions = skb_clone(treq->pktopts,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
consume_skb(treq->pktopts);
treq->pktopts = NULL;
if (newnp->pktoptions)
@@ -1349,7 +1363,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
* across. Shucks.
*/
tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
- AF_INET6, key->key, key->keylen, GFP_ATOMIC);
+ AF_INET6, key->key, key->keylen,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
}
#endif
@@ -1442,10 +1457,20 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, GFP_ATOMIC);
+ opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+ struct dst_entry *dst = sk->sk_rx_dst;
+
sock_rps_save_rxhash(sk, skb);
+ if (dst) {
+ if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
+ dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
+ dst_release(dst);
+ sk->sk_rx_dst = NULL;
+ }
+ }
+
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
goto reset;
if (opt_skb)
@@ -1703,9 +1728,9 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
struct dst_entry *dst = sk->sk_rx_dst;
struct inet_sock *icsk = inet_sk(sk);
if (dst)
- dst = dst_check(dst, 0);
+ dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
if (dst &&
- icsk->rx_dst_ifindex == inet6_iif(skb))
+ icsk->rx_dst_ifindex == skb->skb_iif)
skb_dst_set_noref(skb, dst);
}
}
@@ -1721,6 +1746,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.queue_xmit = inet6_csk_xmit,
.send_check = tcp_v6_send_check,
.rebuild_header = inet6_sk_rebuild_header,
+ .sk_rx_dst_set = inet6_sk_rx_dst_set,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
.net_header_len = sizeof(struct ipv6hdr),
@@ -1752,6 +1778,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
+ .sk_rx_dst_set = inet_sk_rx_dst_set,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
.net_header_len = sizeof(struct iphdr),
@@ -1873,7 +1900,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
tp->write_seq-tp->snd_una,
(sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
timer_active,
- jiffies_to_clock_t(timer_expires - jiffies),
+ jiffies_delta_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
sock_i_uid(sp),
icsk->icsk_probes_out,
@@ -1893,10 +1920,7 @@ static void get_timewait6_sock(struct seq_file *seq,
const struct in6_addr *dest, *src;
__u16 destp, srcp;
const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
- int ttd = tw->tw_ttd - jiffies;
-
- if (ttd < 0)
- ttd = 0;
+ long delta = tw->tw_ttd - jiffies;
dest = &tw6->tw_v6_daddr;
src = &tw6->tw_v6_rcv_saddr;
@@ -1912,7 +1936,7 @@ static void get_timewait6_sock(struct seq_file *seq,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
tw->tw_substate, 0, 0,
- 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
+ 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
atomic_read(&tw->tw_refcnt), tw);
}
@@ -2015,7 +2039,7 @@ struct proto tcpv6_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
.proto_cgroup = tcp_proto_cgroup,
#endif
};
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ef39812107b1..f8c4c08ffb60 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -73,6 +73,13 @@ static int xfrm6_get_tos(const struct flowi *fl)
return 0;
}
+static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
+{
+ struct rt6_info *rt = (struct rt6_info *)xdst;
+
+ rt6_init_peer(rt, net->ipv6.peers);
+}
+
static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
@@ -286,6 +293,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.get_saddr = xfrm6_get_saddr,
.decode_session = _decode_session6,
.get_tos = xfrm6_get_tos,
+ .init_dst = xfrm6_init_dst,
.init_path = xfrm6_init_path,
.fill_dst = xfrm6_fill_dst,
.blackhole_route = ip6_blackhole_route,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 34e418508a67..ec7d161c129b 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3024,7 +3024,7 @@ static u32 get_acqseq(void)
return res;
}
-static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp, int dir)
+static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp)
{
struct sk_buff *skb;
struct sadb_msg *hdr;
@@ -3105,7 +3105,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t);
pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
- pol->sadb_x_policy_dir = dir+1;
+ pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1;
pol->sadb_x_policy_id = xp->index;
/* Set sadb_comb's. */
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 35e1e4bde587..927547171bc7 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -410,6 +410,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
lsa->l2tp_family = AF_INET6;
lsa->l2tp_flowinfo = 0;
lsa->l2tp_scope_id = 0;
+ lsa->l2tp_unused = 0;
if (peer) {
if (!lsk->peer_conn_id)
return -ENOTCONN;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index f6fe4d400502..c2190005a114 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -969,14 +969,13 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
struct sockaddr_llc sllc;
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
- int rc = 0;
+ int rc = -EBADF;
memset(&sllc, 0, sizeof(sllc));
lock_sock(sk);
if (sock_flag(sk, SOCK_ZAPPED))
goto out;
*uaddrlen = sizeof(sllc);
- memset(uaddr, 0, *uaddrlen);
if (peer) {
rc = -ENOTCONN;
if (sk->sk_state != TCP_ESTABLISHED)
@@ -1206,7 +1205,7 @@ static int __init llc2_init(void)
rc = llc_proc_init();
if (rc != 0) {
printk(llc_proc_err_msg);
- goto out_unregister_llc_proto;
+ goto out_station;
}
rc = llc_sysctl_init();
if (rc) {
@@ -1226,7 +1225,8 @@ out_sysctl:
llc_sysctl_exit();
out_proc:
llc_proc_exit();
-out_unregister_llc_proto:
+out_station:
+ llc_station_exit();
proto_unregister(&llc_proto);
goto out;
}
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index e32cab44ea95..dd3e83328ad5 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -42,6 +42,7 @@ static void (*llc_type_handlers[2])(struct llc_sap *sap,
void llc_add_pack(int type, void (*handler)(struct llc_sap *sap,
struct sk_buff *skb))
{
+ smp_wmb(); /* ensure initialisation is complete before it's called */
if (type == LLC_DEST_SAP || type == LLC_DEST_CONN)
llc_type_handlers[type - 1] = handler;
}
@@ -50,11 +51,19 @@ void llc_remove_pack(int type)
{
if (type == LLC_DEST_SAP || type == LLC_DEST_CONN)
llc_type_handlers[type - 1] = NULL;
+ synchronize_net();
}
void llc_set_station_handler(void (*handler)(struct sk_buff *skb))
{
+ /* Ensure initialisation is complete before it's called */
+ if (handler)
+ smp_wmb();
+
llc_station_handler = handler;
+
+ if (!handler)
+ synchronize_net();
}
/**
@@ -150,6 +159,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
int dest;
int (*rcv)(struct sk_buff *, struct net_device *,
struct packet_type *, struct net_device *);
+ void (*sta_handler)(struct sk_buff *skb);
+ void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb);
if (!net_eq(dev_net(dev), &init_net))
goto drop;
@@ -182,7 +193,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
*/
rcv = rcu_dereference(sap->rcv_func);
dest = llc_pdu_type(skb);
- if (unlikely(!dest || !llc_type_handlers[dest - 1])) {
+ sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL;
+ if (unlikely(!sap_handler)) {
if (rcv)
rcv(skb, dev, pt, orig_dev);
else
@@ -193,7 +205,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
if (cskb)
rcv(cskb, dev, pt, orig_dev);
}
- llc_type_handlers[dest - 1](sap, skb);
+ sap_handler(sap, skb);
}
llc_sap_put(sap);
out:
@@ -202,9 +214,10 @@ drop:
kfree_skb(skb);
goto out;
handle_station:
- if (!llc_station_handler)
+ sta_handler = ACCESS_ONCE(llc_station_handler);
+ if (!sta_handler)
goto drop;
- llc_station_handler(skb);
+ sta_handler(skb);
goto out;
}
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 39a8d8924b9c..b2f2bac2c2a2 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -268,7 +268,7 @@ static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb)
out:
return rc;
free:
- kfree_skb(skb);
+ kfree_skb(nskb);
goto out;
}
@@ -293,7 +293,7 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb)
out:
return rc;
free:
- kfree_skb(skb);
+ kfree_skb(nskb);
goto out;
}
@@ -322,7 +322,7 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb)
out:
return rc;
free:
- kfree_skb(skb);
+ kfree_skb(nskb);
goto out;
}
@@ -687,12 +687,8 @@ static void llc_station_rcv(struct sk_buff *skb)
llc_station_state_process(skb);
}
-int __init llc_station_init(void)
+void __init llc_station_init(void)
{
- int rc = -ENOBUFS;
- struct sk_buff *skb;
- struct llc_station_state_ev *ev;
-
skb_queue_head_init(&llc_main_station.mac_pdu_q);
skb_queue_head_init(&llc_main_station.ev_q.list);
spin_lock_init(&llc_main_station.ev_q.lock);
@@ -700,23 +696,12 @@ int __init llc_station_init(void)
(unsigned long)&llc_main_station);
llc_main_station.ack_timer.expires = jiffies +
sysctl_llc_station_ack_timeout;
- skb = alloc_skb(0, GFP_ATOMIC);
- if (!skb)
- goto out;
- rc = 0;
- llc_set_station_handler(llc_station_rcv);
- ev = llc_station_ev(skb);
- memset(ev, 0, sizeof(*ev));
llc_main_station.maximum_retry = 1;
- llc_main_station.state = LLC_STATION_STATE_DOWN;
- ev->type = LLC_STATION_EV_TYPE_SIMPLE;
- ev->prim_type = LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK;
- rc = llc_station_next_state(skb);
-out:
- return rc;
+ llc_main_station.state = LLC_STATION_STATE_UP;
+ llc_set_station_handler(llc_station_rcv);
}
-void __exit llc_station_exit(void)
+void llc_station_exit(void)
{
llc_set_station_handler(NULL);
}
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 6fac18c0423f..0e2f83e71277 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -136,10 +136,13 @@ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie)
* mesh_accept_plinks_update - update accepting_plink in local mesh beacons
*
* @sdata: mesh interface in which mesh beacons are going to be updated
+ *
+ * Returns: beacon changed flag if the beacon content changed.
*/
-void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
+u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
{
bool free_plinks;
+ u32 changed = 0;
/* In case mesh_plink_free_count > 0 and mesh_plinktbl_capacity == 0,
* the mesh interface might be able to establish plinks with peers that
@@ -149,8 +152,12 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
*/
free_plinks = mesh_plink_availables(sdata);
- if (free_plinks != sdata->u.mesh.accepting_plinks)
- ieee80211_mesh_housekeeping_timer((unsigned long) sdata);
+ if (free_plinks != sdata->u.mesh.accepting_plinks) {
+ sdata->u.mesh.accepting_plinks = free_plinks;
+ changed = BSS_CHANGED_BEACON;
+ }
+
+ return changed;
}
int mesh_rmc_init(struct ieee80211_sub_if_data *sdata)
@@ -262,7 +269,6 @@ mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
neighbors = (neighbors > 15) ? 15 : neighbors;
*pos++ = neighbors << 1;
/* Mesh capability */
- ifmsh->accepting_plinks = mesh_plink_availables(sdata);
*pos = MESHCONF_CAPAB_FORWARDING;
*pos |= ifmsh->accepting_plinks ?
MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
@@ -521,14 +527,13 @@ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_mesh *ifmsh)
{
- bool free_plinks;
+ u32 changed;
ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT);
mesh_path_expire(sdata);
- free_plinks = mesh_plink_availables(sdata);
- if (free_plinks != sdata->u.mesh.accepting_plinks)
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
+ changed = mesh_accept_plinks_update(sdata);
+ ieee80211_bss_info_change_notify(sdata, changed);
mod_timer(&ifmsh->housekeeping_timer,
round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL));
@@ -622,6 +627,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
del_timer_sync(&sdata->u.mesh.housekeeping_timer);
del_timer_sync(&sdata->u.mesh.mesh_path_root_timer);
+ del_timer_sync(&sdata->u.mesh.mesh_path_timer);
/*
* If the timer fired while we waited for it, it will have
* requeued the work. Now the work will be running again
@@ -634,6 +640,8 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
local->fif_other_bss--;
atomic_dec(&local->iff_allmultis);
ieee80211_configure_filter(local);
+
+ sdata->u.mesh.timers_running = 0;
}
static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index faaa39bcfd10..13fd5b5fdb0a 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -282,7 +282,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
u8 *hw_addr,
struct ieee802_11_elems *ie);
bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
-void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
+u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
void mesh_plink_broken(struct sta_info *sta);
void mesh_plink_deactivate(struct sta_info *sta);
int mesh_plink_open(struct sta_info *sta);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index af671b984df3..f20e9f26d137 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -48,17 +48,17 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
u8 *da, __le16 llid, __le16 plid, __le16 reason);
static inline
-void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata)
+u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata)
{
atomic_inc(&sdata->u.mesh.mshstats.estab_plinks);
- mesh_accept_plinks_update(sdata);
+ return mesh_accept_plinks_update(sdata);
}
static inline
-void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
+u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
{
atomic_dec(&sdata->u.mesh.mshstats.estab_plinks);
- mesh_accept_plinks_update(sdata);
+ return mesh_accept_plinks_update(sdata);
}
/**
@@ -170,22 +170,21 @@ out:
* @sta: mesh peer link to deactivate
*
* All mesh paths with this peer as next hop will be flushed
+ * Returns beacon changed flag if the beacon content changed.
*
* Locking: the caller must hold sta->lock
*/
-static bool __mesh_plink_deactivate(struct sta_info *sta)
+static u32 __mesh_plink_deactivate(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- bool deactivated = false;
+ u32 changed = 0;
- if (sta->plink_state == NL80211_PLINK_ESTAB) {
- mesh_plink_dec_estab_count(sdata);
- deactivated = true;
- }
+ if (sta->plink_state == NL80211_PLINK_ESTAB)
+ changed = mesh_plink_dec_estab_count(sdata);
sta->plink_state = NL80211_PLINK_BLOCKED;
mesh_path_flush_by_nexthop(sta);
- return deactivated;
+ return changed;
}
/**
@@ -198,18 +197,17 @@ static bool __mesh_plink_deactivate(struct sta_info *sta)
void mesh_plink_deactivate(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- bool deactivated;
+ u32 changed;
spin_lock_bh(&sta->lock);
- deactivated = __mesh_plink_deactivate(sta);
+ changed = __mesh_plink_deactivate(sta);
sta->reason = cpu_to_le16(WLAN_REASON_MESH_PEER_CANCELED);
mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE,
sta->sta.addr, sta->llid, sta->plid,
sta->reason);
spin_unlock_bh(&sta->lock);
- if (deactivated)
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
+ ieee80211_bss_info_change_notify(sdata, changed);
}
static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
@@ -541,15 +539,14 @@ int mesh_plink_open(struct sta_info *sta)
void mesh_plink_block(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- bool deactivated;
+ u32 changed;
spin_lock_bh(&sta->lock);
- deactivated = __mesh_plink_deactivate(sta);
+ changed = __mesh_plink_deactivate(sta);
sta->plink_state = NL80211_PLINK_BLOCKED;
spin_unlock_bh(&sta->lock);
- if (deactivated)
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
+ ieee80211_bss_info_change_notify(sdata, changed);
}
@@ -852,9 +849,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
del_timer(&sta->plink_timer);
sta->plink_state = NL80211_PLINK_ESTAB;
spin_unlock_bh(&sta->lock);
- mesh_plink_inc_estab_count(sdata);
+ changed |= mesh_plink_inc_estab_count(sdata);
changed |= mesh_set_ht_prot_mode(sdata);
- changed |= BSS_CHANGED_BEACON;
mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n",
sta->sta.addr);
break;
@@ -888,9 +884,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
del_timer(&sta->plink_timer);
sta->plink_state = NL80211_PLINK_ESTAB;
spin_unlock_bh(&sta->lock);
- mesh_plink_inc_estab_count(sdata);
+ changed |= mesh_plink_inc_estab_count(sdata);
changed |= mesh_set_ht_prot_mode(sdata);
- changed |= BSS_CHANGED_BEACON;
mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n",
sta->sta.addr);
mesh_plink_frame_tx(sdata,
@@ -908,13 +903,12 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
case CLS_ACPT:
reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE);
sta->reason = reason;
- __mesh_plink_deactivate(sta);
+ changed |= __mesh_plink_deactivate(sta);
sta->plink_state = NL80211_PLINK_HOLDING;
llid = sta->llid;
mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
spin_unlock_bh(&sta->lock);
changed |= mesh_set_ht_prot_mode(sdata);
- changed |= BSS_CHANGED_BEACON;
mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE,
sta->sta.addr, llid, plid, reason);
break;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cef0c9e79aba..a4a5acdbaa4d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1430,6 +1430,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
del_timer_sync(&sdata->u.mgd.timer);
del_timer_sync(&sdata->u.mgd.chswitch_timer);
+
+ sdata->u.mgd.timers_running = 0;
}
void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index bcaee5d12839..839dd9737989 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -299,7 +299,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
if (local->scan_req != local->int_scan_req)
cfg80211_scan_done(local->scan_req, aborted);
local->scan_req = NULL;
- local->scan_sdata = NULL;
+ rcu_assign_pointer(local->scan_sdata, NULL);
local->scanning = 0;
local->scan_channel = NULL;
@@ -984,7 +984,6 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata)
kfree(local->sched_scan_ies.ie[i]);
drv_sched_scan_stop(local, sdata);
- rcu_assign_pointer(local->sched_scan_sdata, NULL);
}
out:
mutex_unlock(&local->mtx);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 03d3fc6d9d64..3c601378d27e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2765,6 +2765,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
struct ip_vs_timeout_user t;
+ memset(&t, 0, sizeof(t));
__ip_vs_get_timeouts(net, &t);
if (copy_to_user(user, &t, sizeof(t)) != 0)
ret = -EFAULT;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 45cf602a76bc..527651a53a45 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -361,23 +361,6 @@ static void evict_oldest_expect(struct nf_conn *master,
}
}
-static inline int refresh_timer(struct nf_conntrack_expect *i)
-{
- struct nf_conn_help *master_help = nfct_help(i->master);
- const struct nf_conntrack_expect_policy *p;
-
- if (!del_timer(&i->timeout))
- return 0;
-
- p = &rcu_dereference_protected(
- master_help->helper,
- lockdep_is_held(&nf_conntrack_lock)
- )->expect_policy[i->class];
- i->timeout.expires = jiffies + p->timeout * HZ;
- add_timer(&i->timeout);
- return 1;
-}
-
static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
{
const struct nf_conntrack_expect_policy *p;
@@ -386,7 +369,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
struct nf_conn_help *master_help = nfct_help(master);
struct nf_conntrack_helper *helper;
struct net *net = nf_ct_exp_net(expect);
- struct hlist_node *n;
+ struct hlist_node *n, *next;
unsigned int h;
int ret = 1;
@@ -395,12 +378,12 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
goto out;
}
h = nf_ct_expect_dst_hash(&expect->tuple);
- hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
+ hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) {
if (expect_matches(i, expect)) {
- /* Refresh timer: if it's dying, ignore.. */
- if (refresh_timer(i)) {
- ret = 0;
- goto out;
+ if (del_timer(&i->timeout)) {
+ nf_ct_unlink_expect(i);
+ nf_ct_expect_put(i);
+ break;
}
} else if (expect_clash(i, expect)) {
ret = -EBUSY;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 14f67a2cbcb5..da4fc37a8578 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1896,10 +1896,15 @@ static int
ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
{
struct nlattr *cda[CTA_MAX+1];
+ int ret;
nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy);
- return ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
+ spin_lock_bh(&nf_conntrack_lock);
+ ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
+ spin_unlock_bh(&nf_conntrack_lock);
+
+ return ret;
}
static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 758a1bacc126..5c0a112aeee6 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -183,12 +183,12 @@ static int media_len(const struct nf_conn *ct, const char *dptr,
return len + digits_len(ct, dptr, limit, shift);
}
-static int parse_addr(const struct nf_conn *ct, const char *cp,
- const char **endp, union nf_inet_addr *addr,
- const char *limit)
+static int sip_parse_addr(const struct nf_conn *ct, const char *cp,
+ const char **endp, union nf_inet_addr *addr,
+ const char *limit, bool delim)
{
const char *end;
- int ret = 0;
+ int ret;
if (!ct)
return 0;
@@ -197,16 +197,28 @@ static int parse_addr(const struct nf_conn *ct, const char *cp,
switch (nf_ct_l3num(ct)) {
case AF_INET:
ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+ if (ret == 0)
+ return 0;
break;
case AF_INET6:
+ if (cp < limit && *cp == '[')
+ cp++;
+ else if (delim)
+ return 0;
+
ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+ if (ret == 0)
+ return 0;
+
+ if (end < limit && *end == ']')
+ end++;
+ else if (delim)
+ return 0;
break;
default:
BUG();
}
- if (ret == 0 || end == cp)
- return 0;
if (endp)
*endp = end;
return 1;
@@ -219,7 +231,7 @@ static int epaddr_len(const struct nf_conn *ct, const char *dptr,
union nf_inet_addr addr;
const char *aux = dptr;
- if (!parse_addr(ct, dptr, &dptr, &addr, limit)) {
+ if (!sip_parse_addr(ct, dptr, &dptr, &addr, limit, true)) {
pr_debug("ip: %s parse failed.!\n", dptr);
return 0;
}
@@ -296,7 +308,7 @@ int ct_sip_parse_request(const struct nf_conn *ct,
return 0;
dptr += shift;
- if (!parse_addr(ct, dptr, &end, addr, limit))
+ if (!sip_parse_addr(ct, dptr, &end, addr, limit, true))
return -1;
if (end < limit && *end == ':') {
end++;
@@ -550,7 +562,7 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
if (ret == 0)
return ret;
- if (!parse_addr(ct, dptr + *matchoff, &c, addr, limit))
+ if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true))
return -1;
if (*c == ':') {
c++;
@@ -599,7 +611,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
unsigned int dataoff, unsigned int datalen,
const char *name,
unsigned int *matchoff, unsigned int *matchlen,
- union nf_inet_addr *addr)
+ union nf_inet_addr *addr, bool delim)
{
const char *limit = dptr + datalen;
const char *start, *end;
@@ -613,7 +625,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
return 0;
start += strlen(name);
- if (!parse_addr(ct, start, &end, addr, limit))
+ if (!sip_parse_addr(ct, start, &end, addr, limit, delim))
return 0;
*matchoff = start - dptr;
*matchlen = end - start;
@@ -675,6 +687,47 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
return 1;
}
+static int sdp_parse_addr(const struct nf_conn *ct, const char *cp,
+ const char **endp, union nf_inet_addr *addr,
+ const char *limit)
+{
+ const char *end;
+ int ret;
+
+ memset(addr, 0, sizeof(*addr));
+ switch (nf_ct_l3num(ct)) {
+ case AF_INET:
+ ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+ break;
+ case AF_INET6:
+ ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+ break;
+ default:
+ BUG();
+ }
+
+ if (ret == 0)
+ return 0;
+ if (endp)
+ *endp = end;
+ return 1;
+}
+
+/* skip ip address. returns its length. */
+static int sdp_addr_len(const struct nf_conn *ct, const char *dptr,
+ const char *limit, int *shift)
+{
+ union nf_inet_addr addr;
+ const char *aux = dptr;
+
+ if (!sdp_parse_addr(ct, dptr, &dptr, &addr, limit)) {
+ pr_debug("ip: %s parse failed.!\n", dptr);
+ return 0;
+ }
+
+ return dptr - aux;
+}
+
/* SDP header parsing: a SDP session description contains an ordered set of
* headers, starting with a section containing general session parameters,
* optionally followed by multiple media descriptions.
@@ -686,10 +739,10 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
*/
static const struct sip_header ct_sdp_hdrs[] = {
[SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len),
- [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", epaddr_len),
- [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", epaddr_len),
- [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", epaddr_len),
- [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", epaddr_len),
+ [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", sdp_addr_len),
+ [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", sdp_addr_len),
+ [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", sdp_addr_len),
+ [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", sdp_addr_len),
[SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len),
};
@@ -775,8 +828,8 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr,
if (ret <= 0)
return ret;
- if (!parse_addr(ct, dptr + *matchoff, NULL, addr,
- dptr + *matchoff + *matchlen))
+ if (!sdp_parse_addr(ct, dptr + *matchoff, NULL, addr,
+ dptr + *matchoff + *matchlen))
return -1;
return 1;
}
@@ -1515,7 +1568,6 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
}
static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly;
-static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly;
static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
[SIP_EXPECT_SIGNALLING] = {
@@ -1585,9 +1637,9 @@ static int __init nf_conntrack_sip_init(void)
sip[i][j].me = THIS_MODULE;
if (ports[i] == SIP_PORT)
- sprintf(sip_names[i][j], "sip");
+ sprintf(sip[i][j].name, "sip");
else
- sprintf(sip_names[i][j], "sip-%u", i);
+ sprintf(sip[i][j].name, "sip-%u", i);
pr_debug("port #%u: %u\n", i, ports[i]);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5463969da45b..1445d73533ed 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1362,7 +1362,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (NULL == siocb->scm)
siocb->scm = &scm;
- err = scm_send(sock, msg, siocb->scm);
+ err = scm_send(sock, msg, siocb->scm, true);
if (err < 0)
return err;
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 320fa0e6951a..f3f96badf5aa 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -325,9 +325,6 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
}
}
- if (!acts_list)
- return 0;
-
return do_execute_actions(dp, skb, nla_data(acts_list),
nla_len(acts_list), true);
}
diff --git a/net/packet/Kconfig b/net/packet/Kconfig
index 0060e3b396b7..cc55b35f80e5 100644
--- a/net/packet/Kconfig
+++ b/net/packet/Kconfig
@@ -14,3 +14,11 @@ config PACKET
be called af_packet.
If unsure, say Y.
+
+config PACKET_DIAG
+ tristate "Packet: sockets monitoring interface"
+ depends on PACKET
+ default n
+ ---help---
+ Support for PF_PACKET sockets monitoring interface used by the ss tool.
+ If unsure, say Y.
diff --git a/net/packet/Makefile b/net/packet/Makefile
index 81183eabfdec..9df61347a3c3 100644
--- a/net/packet/Makefile
+++ b/net/packet/Makefile
@@ -3,3 +3,5 @@
#
obj-$(CONFIG_PACKET) += af_packet.o
+obj-$(CONFIG_PACKET_DIAG) += af_packet_diag.o
+af_packet_diag-y += diag.o
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ceaca7c134a0..fe0912f161ce 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -93,6 +93,8 @@
#include <net/inet_common.h>
#endif
+#include "internal.h"
+
/*
Assumptions:
- if device has no dev->hard_header routine, it adds and removes ll header
@@ -146,14 +148,6 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it)
/* Private packet socket structures. */
-struct packet_mclist {
- struct packet_mclist *next;
- int ifindex;
- int count;
- unsigned short type;
- unsigned short alen;
- unsigned char addr[MAX_ADDR_LEN];
-};
/* identical to struct packet_mreq except it has
* a longer address field.
*/
@@ -175,63 +169,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
#define BLK_PLUS_PRIV(sz_of_priv) \
(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
-/* kbdq - kernel block descriptor queue */
-struct tpacket_kbdq_core {
- struct pgv *pkbdq;
- unsigned int feature_req_word;
- unsigned int hdrlen;
- unsigned char reset_pending_on_curr_blk;
- unsigned char delete_blk_timer;
- unsigned short kactive_blk_num;
- unsigned short blk_sizeof_priv;
-
- /* last_kactive_blk_num:
- * trick to see if user-space has caught up
- * in order to avoid refreshing timer when every single pkt arrives.
- */
- unsigned short last_kactive_blk_num;
-
- char *pkblk_start;
- char *pkblk_end;
- int kblk_size;
- unsigned int knum_blocks;
- uint64_t knxt_seq_num;
- char *prev;
- char *nxt_offset;
- struct sk_buff *skb;
-
- atomic_t blk_fill_in_prog;
-
- /* Default is set to 8ms */
-#define DEFAULT_PRB_RETIRE_TOV (8)
-
- unsigned short retire_blk_tov;
- unsigned short version;
- unsigned long tov_in_jiffies;
-
- /* timer to retire an outstanding block */
- struct timer_list retire_blk_timer;
-};
-
#define PGV_FROM_VMALLOC 1
-struct pgv {
- char *buffer;
-};
-
-struct packet_ring_buffer {
- struct pgv *pg_vec;
- unsigned int head;
- unsigned int frames_per_block;
- unsigned int frame_size;
- unsigned int frame_max;
-
- unsigned int pg_vec_order;
- unsigned int pg_vec_pages;
- unsigned int pg_vec_len;
-
- struct tpacket_kbdq_core prb_bdqc;
- atomic_t pending;
-};
#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
@@ -269,52 +207,6 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
struct tpacket3_hdr *);
static void packet_flush_mclist(struct sock *sk);
-struct packet_fanout;
-struct packet_sock {
- /* struct sock has to be the first member of packet_sock */
- struct sock sk;
- struct packet_fanout *fanout;
- struct tpacket_stats stats;
- union tpacket_stats_u stats_u;
- struct packet_ring_buffer rx_ring;
- struct packet_ring_buffer tx_ring;
- int copy_thresh;
- spinlock_t bind_lock;
- struct mutex pg_vec_lock;
- unsigned int running:1, /* prot_hook is attached*/
- auxdata:1,
- origdev:1,
- has_vnet_hdr:1;
- int ifindex; /* bound device */
- __be16 num;
- struct packet_mclist *mclist;
- atomic_t mapped;
- enum tpacket_versions tp_version;
- unsigned int tp_hdrlen;
- unsigned int tp_reserve;
- unsigned int tp_loss:1;
- unsigned int tp_tstamp;
- struct packet_type prot_hook ____cacheline_aligned_in_smp;
-};
-
-#define PACKET_FANOUT_MAX 256
-
-struct packet_fanout {
-#ifdef CONFIG_NET_NS
- struct net *net;
-#endif
- unsigned int num_members;
- u16 id;
- u8 type;
- u8 defrag;
- atomic_t rr_cur;
- struct list_head list;
- struct sock *arr[PACKET_FANOUT_MAX];
- spinlock_t lock;
- atomic_t sk_ref;
- struct packet_type prot_hook ____cacheline_aligned_in_smp;
-};
-
struct packet_skb_cb {
unsigned int origlen;
union {
@@ -334,11 +226,6 @@ struct packet_skb_cb {
(((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
((x)->kactive_blk_num+1) : 0)
-static struct packet_sock *pkt_sk(struct sock *sk)
-{
- return (struct packet_sock *)sk;
-}
-
static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
static void __fanout_link(struct sock *sk, struct packet_sock *po);
@@ -1079,7 +966,7 @@ static void *packet_current_rx_frame(struct packet_sock *po,
default:
WARN(1, "TPACKET version not supported\n");
BUG();
- return 0;
+ return NULL;
}
}
@@ -1243,7 +1130,8 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
}
-static DEFINE_MUTEX(fanout_mutex);
+DEFINE_MUTEX(fanout_mutex);
+EXPORT_SYMBOL_GPL(fanout_mutex);
static LIST_HEAD(fanout_list);
static void __fanout_link(struct sock *sk, struct packet_sock *po)
@@ -1273,6 +1161,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
spin_unlock(&f->lock);
}
+bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
+{
+ if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout)
+ return true;
+
+ return false;
+}
+
static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
{
struct packet_sock *po = pkt_sk(sk);
@@ -1325,6 +1221,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
match->prot_hook.dev = po->prot_hook.dev;
match->prot_hook.func = packet_rcv_fanout;
match->prot_hook.af_packet_priv = match;
+ match->prot_hook.id_match = match_fanout_group;
dev_add_pack(&match->prot_hook);
list_add(&match->list, &fanout_list);
}
@@ -1355,9 +1252,9 @@ static void fanout_release(struct sock *sk)
if (!f)
return;
+ mutex_lock(&fanout_mutex);
po->fanout = NULL;
- mutex_lock(&fanout_mutex);
if (atomic_dec_and_test(&f->sk_ref)) {
list_del(&f->list);
dev_remove_pack(&f->prot_hook);
@@ -1936,7 +1833,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
if (likely(po->tx_ring.pg_vec)) {
ph = skb_shinfo(skb)->destructor_arg;
- BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
atomic_dec(&po->tx_ring.pending);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
diff --git a/net/packet/diag.c b/net/packet/diag.c
new file mode 100644
index 000000000000..bc33fbe8a5ef
--- /dev/null
+++ b/net/packet/diag.c
@@ -0,0 +1,242 @@
+#include <linux/module.h>
+#include <linux/sock_diag.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/packet_diag.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include "internal.h"
+
+static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
+{
+ struct packet_diag_info pinfo;
+
+ pinfo.pdi_index = po->ifindex;
+ pinfo.pdi_version = po->tp_version;
+ pinfo.pdi_reserve = po->tp_reserve;
+ pinfo.pdi_copy_thresh = po->copy_thresh;
+ pinfo.pdi_tstamp = po->tp_tstamp;
+
+ pinfo.pdi_flags = 0;
+ if (po->running)
+ pinfo.pdi_flags |= PDI_RUNNING;
+ if (po->auxdata)
+ pinfo.pdi_flags |= PDI_AUXDATA;
+ if (po->origdev)
+ pinfo.pdi_flags |= PDI_ORIGDEV;
+ if (po->has_vnet_hdr)
+ pinfo.pdi_flags |= PDI_VNETHDR;
+ if (po->tp_loss)
+ pinfo.pdi_flags |= PDI_LOSS;
+
+ return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo);
+}
+
+static int pdiag_put_mclist(const struct packet_sock *po, struct sk_buff *nlskb)
+{
+ struct nlattr *mca;
+ struct packet_mclist *ml;
+
+ mca = nla_nest_start(nlskb, PACKET_DIAG_MCLIST);
+ if (!mca)
+ return -EMSGSIZE;
+
+ rtnl_lock();
+ for (ml = po->mclist; ml; ml = ml->next) {
+ struct packet_diag_mclist *dml;
+
+ dml = nla_reserve_nohdr(nlskb, sizeof(*dml));
+ if (!dml) {
+ rtnl_unlock();
+ nla_nest_cancel(nlskb, mca);
+ return -EMSGSIZE;
+ }
+
+ dml->pdmc_index = ml->ifindex;
+ dml->pdmc_type = ml->type;
+ dml->pdmc_alen = ml->alen;
+ dml->pdmc_count = ml->count;
+ BUILD_BUG_ON(sizeof(dml->pdmc_addr) != sizeof(ml->addr));
+ memcpy(dml->pdmc_addr, ml->addr, sizeof(ml->addr));
+ }
+
+ rtnl_unlock();
+ nla_nest_end(nlskb, mca);
+
+ return 0;
+}
+
+static int pdiag_put_ring(struct packet_ring_buffer *ring, int ver, int nl_type,
+ struct sk_buff *nlskb)
+{
+ struct packet_diag_ring pdr;
+
+ if (!ring->pg_vec || ((ver > TPACKET_V2) &&
+ (nl_type == PACKET_DIAG_TX_RING)))
+ return 0;
+
+ pdr.pdr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
+ pdr.pdr_block_nr = ring->pg_vec_len;
+ pdr.pdr_frame_size = ring->frame_size;
+ pdr.pdr_frame_nr = ring->frame_max + 1;
+
+ if (ver > TPACKET_V2) {
+ pdr.pdr_retire_tmo = ring->prb_bdqc.retire_blk_tov;
+ pdr.pdr_sizeof_priv = ring->prb_bdqc.blk_sizeof_priv;
+ pdr.pdr_features = ring->prb_bdqc.feature_req_word;
+ } else {
+ pdr.pdr_retire_tmo = 0;
+ pdr.pdr_sizeof_priv = 0;
+ pdr.pdr_features = 0;
+ }
+
+ return nla_put(nlskb, nl_type, sizeof(pdr), &pdr);
+}
+
+static int pdiag_put_rings_cfg(struct packet_sock *po, struct sk_buff *skb)
+{
+ int ret;
+
+ mutex_lock(&po->pg_vec_lock);
+ ret = pdiag_put_ring(&po->rx_ring, po->tp_version,
+ PACKET_DIAG_RX_RING, skb);
+ if (!ret)
+ ret = pdiag_put_ring(&po->tx_ring, po->tp_version,
+ PACKET_DIAG_TX_RING, skb);
+ mutex_unlock(&po->pg_vec_lock);
+
+ return ret;
+}
+
+static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)
+{
+ int ret = 0;
+
+ mutex_lock(&fanout_mutex);
+ if (po->fanout) {
+ u32 val;
+
+ val = (u32)po->fanout->id | ((u32)po->fanout->type << 16);
+ ret = nla_put_u32(nlskb, PACKET_DIAG_FANOUT, val);
+ }
+ mutex_unlock(&fanout_mutex);
+
+ return ret;
+}
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req,
+ u32 pid, u32 seq, u32 flags, int sk_ino)
+{
+ struct nlmsghdr *nlh;
+ struct packet_diag_msg *rp;
+ struct packet_sock *po = pkt_sk(sk);
+
+ nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ rp = nlmsg_data(nlh);
+ rp->pdiag_family = AF_PACKET;
+ rp->pdiag_type = sk->sk_type;
+ rp->pdiag_num = ntohs(po->num);
+ rp->pdiag_ino = sk_ino;
+ sock_diag_save_cookie(sk, rp->pdiag_cookie);
+
+ if ((req->pdiag_show & PACKET_SHOW_INFO) &&
+ pdiag_put_info(po, skb))
+ goto out_nlmsg_trim;
+
+ if ((req->pdiag_show & PACKET_SHOW_MCLIST) &&
+ pdiag_put_mclist(po, skb))
+ goto out_nlmsg_trim;
+
+ if ((req->pdiag_show & PACKET_SHOW_RING_CFG) &&
+ pdiag_put_rings_cfg(po, skb))
+ goto out_nlmsg_trim;
+
+ if ((req->pdiag_show & PACKET_SHOW_FANOUT) &&
+ pdiag_put_fanout(po, skb))
+ goto out_nlmsg_trim;
+
+ return nlmsg_end(skb, nlh);
+
+out_nlmsg_trim:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int num = 0, s_num = cb->args[0];
+ struct packet_diag_req *req;
+ struct net *net;
+ struct sock *sk;
+ struct hlist_node *node;
+
+ net = sock_net(skb->sk);
+ req = nlmsg_data(cb->nlh);
+
+ rcu_read_lock();
+ sk_for_each_rcu(sk, node, &net->packet.sklist) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (num < s_num)
+ goto next;
+
+ if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ sock_i_ino(sk)) < 0)
+ goto done;
+next:
+ num++;
+ }
+done:
+ rcu_read_unlock();
+ cb->args[0] = num;
+
+ return skb->len;
+}
+
+static int packet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+ int hdrlen = sizeof(struct packet_diag_req);
+ struct net *net = sock_net(skb->sk);
+ struct packet_diag_req *req;
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ req = nlmsg_data(h);
+ /* Make it possible to support protocol filtering later */
+ if (req->sdiag_protocol)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = packet_diag_dump,
+ };
+ return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+ } else
+ return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler packet_diag_handler = {
+ .family = AF_PACKET,
+ .dump = packet_diag_handler_dump,
+};
+
+static int __init packet_diag_init(void)
+{
+ return sock_diag_register(&packet_diag_handler);
+}
+
+static void __exit packet_diag_exit(void)
+{
+ sock_diag_unregister(&packet_diag_handler);
+}
+
+module_init(packet_diag_init);
+module_exit(packet_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */);
diff --git a/net/packet/internal.h b/net/packet/internal.h
new file mode 100644
index 000000000000..44945f6b7252
--- /dev/null
+++ b/net/packet/internal.h
@@ -0,0 +1,121 @@
+#ifndef __PACKET_INTERNAL_H__
+#define __PACKET_INTERNAL_H__
+
+struct packet_mclist {
+ struct packet_mclist *next;
+ int ifindex;
+ int count;
+ unsigned short type;
+ unsigned short alen;
+ unsigned char addr[MAX_ADDR_LEN];
+};
+
+/* kbdq - kernel block descriptor queue */
+struct tpacket_kbdq_core {
+ struct pgv *pkbdq;
+ unsigned int feature_req_word;
+ unsigned int hdrlen;
+ unsigned char reset_pending_on_curr_blk;
+ unsigned char delete_blk_timer;
+ unsigned short kactive_blk_num;
+ unsigned short blk_sizeof_priv;
+
+ /* last_kactive_blk_num:
+ * trick to see if user-space has caught up
+ * in order to avoid refreshing timer when every single pkt arrives.
+ */
+ unsigned short last_kactive_blk_num;
+
+ char *pkblk_start;
+ char *pkblk_end;
+ int kblk_size;
+ unsigned int knum_blocks;
+ uint64_t knxt_seq_num;
+ char *prev;
+ char *nxt_offset;
+ struct sk_buff *skb;
+
+ atomic_t blk_fill_in_prog;
+
+ /* Default is set to 8ms */
+#define DEFAULT_PRB_RETIRE_TOV (8)
+
+ unsigned short retire_blk_tov;
+ unsigned short version;
+ unsigned long tov_in_jiffies;
+
+ /* timer to retire an outstanding block */
+ struct timer_list retire_blk_timer;
+};
+
+struct pgv {
+ char *buffer;
+};
+
+struct packet_ring_buffer {
+ struct pgv *pg_vec;
+ unsigned int head;
+ unsigned int frames_per_block;
+ unsigned int frame_size;
+ unsigned int frame_max;
+
+ unsigned int pg_vec_order;
+ unsigned int pg_vec_pages;
+ unsigned int pg_vec_len;
+
+ struct tpacket_kbdq_core prb_bdqc;
+ atomic_t pending;
+};
+
+extern struct mutex fanout_mutex;
+#define PACKET_FANOUT_MAX 256
+
+struct packet_fanout {
+#ifdef CONFIG_NET_NS
+ struct net *net;
+#endif
+ unsigned int num_members;
+ u16 id;
+ u8 type;
+ u8 defrag;
+ atomic_t rr_cur;
+ struct list_head list;
+ struct sock *arr[PACKET_FANOUT_MAX];
+ spinlock_t lock;
+ atomic_t sk_ref;
+ struct packet_type prot_hook ____cacheline_aligned_in_smp;
+};
+
+struct packet_sock {
+ /* struct sock has to be the first member of packet_sock */
+ struct sock sk;
+ struct packet_fanout *fanout;
+ struct tpacket_stats stats;
+ union tpacket_stats_u stats_u;
+ struct packet_ring_buffer rx_ring;
+ struct packet_ring_buffer tx_ring;
+ int copy_thresh;
+ spinlock_t bind_lock;
+ struct mutex pg_vec_lock;
+ unsigned int running:1, /* prot_hook is attached*/
+ auxdata:1,
+ origdev:1,
+ has_vnet_hdr:1;
+ int ifindex; /* bound device */
+ __be16 num;
+ struct packet_mclist *mclist;
+ atomic_t mapped;
+ enum tpacket_versions tp_version;
+ unsigned int tp_hdrlen;
+ unsigned int tp_reserve;
+ unsigned int tp_loss:1;
+ unsigned int tp_tstamp;
+ struct packet_type prot_hook ____cacheline_aligned_in_smp;
+};
+
+static struct packet_sock *pkt_sk(struct sock *sk)
+{
+ return (struct packet_sock *)sk;
+}
+
+#endif
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index f10fb8256442..05d60859d8e3 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -67,6 +67,9 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
struct tcf_common *pc;
int ret = 0;
int err;
+#ifdef CONFIG_GACT_PROB
+ struct tc_gact_p *p_parm = NULL;
+#endif
if (nla == NULL)
return -EINVAL;
@@ -82,6 +85,12 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
#ifndef CONFIG_GACT_PROB
if (tb[TCA_GACT_PROB] != NULL)
return -EOPNOTSUPP;
+#else
+ if (tb[TCA_GACT_PROB]) {
+ p_parm = nla_data(tb[TCA_GACT_PROB]);
+ if (p_parm->ptype >= MAX_RAND)
+ return -EINVAL;
+ }
#endif
pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info);
@@ -103,8 +112,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
spin_lock_bh(&gact->tcf_lock);
gact->tcf_action = parm->action;
#ifdef CONFIG_GACT_PROB
- if (tb[TCA_GACT_PROB] != NULL) {
- struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]);
+ if (p_parm) {
gact->tcfg_paction = p_parm->paction;
gact->tcfg_pval = p_parm->pval;
gact->tcfg_ptype = p_parm->ptype;
@@ -133,7 +141,7 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
spin_lock(&gact->tcf_lock);
#ifdef CONFIG_GACT_PROB
- if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL)
+ if (gact->tcfg_ptype)
action = gact_rand[gact->tcfg_ptype](gact);
else
action = gact->tcf_action;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 60e281ad0f07..58fb3c7aab9e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -185,7 +185,12 @@ err3:
err2:
kfree(tname);
err1:
- kfree(pc);
+ if (ret == ACT_P_CREATED) {
+ if (est)
+ gen_kill_estimator(&pc->tcfc_bstats,
+ &pc->tcfc_rate_est);
+ kfree_rcu(pc, tcfc_rcu);
+ }
return err;
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fe81cc18e9e0..9c0fd0c78814 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -200,13 +200,12 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
out:
if (err) {
m->tcf_qstats.overlimits++;
- /* should we be asking for packet to be dropped?
- * may make sense for redirect case only
- */
- retval = TC_ACT_SHOT;
- } else {
+ if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
+ retval = TC_ACT_SHOT;
+ else
+ retval = m->tcf_action;
+ } else
retval = m->tcf_action;
- }
spin_unlock(&m->tcf_lock);
return retval;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 26aa2f6ce257..45c53ab067a6 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -74,7 +74,10 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
p = to_pedit(pc);
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
- kfree(pc);
+ if (est)
+ gen_kill_estimator(&pc->tcfc_bstats,
+ &pc->tcfc_rate_est);
+ kfree_rcu(pc, tcfc_rcu);
return -ENOMEM;
}
ret = ACT_P_CREATED;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 3922f2a2821b..3714f60f0b3c 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -131,7 +131,10 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
d = to_defact(pc);
ret = alloc_defdata(d, defdata);
if (ret < 0) {
- kfree(pc);
+ if (est)
+ gen_kill_estimator(&pc->tcfc_bstats,
+ &pc->tcfc_rate_est);
+ kfree_rcu(pc, tcfc_rcu);
return ret;
}
d->tcf_action = parm->action;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 511323e89cec..6c4d5fe53ce8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -324,24 +324,6 @@ void netif_carrier_off(struct net_device *dev)
}
EXPORT_SYMBOL(netif_carrier_off);
-/**
- * netif_notify_peers - notify network peers about existence of @dev
- * @dev: network device
- *
- * Generate traffic such that interested network peers are aware of
- * @dev, such as by generating a gratuitous ARP. This may be used when
- * a device wants to inform the rest of the network about some sort of
- * reconfiguration such as a failover event or virtual machine
- * migration.
- */
-void netif_notify_peers(struct net_device *dev)
-{
- rtnl_lock();
- call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
- rtnl_unlock();
-}
-EXPORT_SYMBOL(netif_notify_peers);
-
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
under all circumstances. It is difficult to invent anything faster or
cheaper.
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 9af01f3df18c..e4723d31fdd5 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -203,6 +203,34 @@ out:
return index;
}
+/* Length of the next packet (0 if the queue is empty). */
+static unsigned int qdisc_peek_len(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+
+ skb = sch->ops->peek(sch);
+ return skb ? qdisc_pkt_len(skb) : 0;
+}
+
+static void qfq_deactivate_class(struct qfq_sched *, struct qfq_class *);
+static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl,
+ unsigned int len);
+
+static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl,
+ u32 lmax, u32 inv_w, int delta_w)
+{
+ int i;
+
+ /* update qfq-specific data */
+ cl->lmax = lmax;
+ cl->inv_w = inv_w;
+ i = qfq_calc_index(cl->inv_w, cl->lmax);
+
+ cl->grp = &q->groups[i];
+
+ q->wsum += delta_w;
+}
+
static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
struct nlattr **tca, unsigned long *arg)
{
@@ -250,6 +278,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
lmax = 1UL << QFQ_MTU_SHIFT;
if (cl != NULL) {
+ bool need_reactivation = false;
+
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
qdisc_root_sleeping_lock(sch),
@@ -258,12 +288,29 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return err;
}
- if (inv_w != cl->inv_w) {
- sch_tree_lock(sch);
- q->wsum += delta_w;
- cl->inv_w = inv_w;
- sch_tree_unlock(sch);
+ if (lmax == cl->lmax && inv_w == cl->inv_w)
+ return 0; /* nothing to update */
+
+ i = qfq_calc_index(inv_w, lmax);
+ sch_tree_lock(sch);
+ if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) {
+ /*
+ * shift cl->F back, to not charge the
+ * class for the not-yet-served head
+ * packet
+ */
+ cl->F = cl->S;
+ /* remove class from its slot in the old group */
+ qfq_deactivate_class(q, cl);
+ need_reactivation = true;
}
+
+ qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
+
+ if (need_reactivation) /* activate in new group */
+ qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
+ sch_tree_unlock(sch);
+
return 0;
}
@@ -273,11 +320,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->refcnt = 1;
cl->common.classid = classid;
- cl->lmax = lmax;
- cl->inv_w = inv_w;
- i = qfq_calc_index(cl->inv_w, cl->lmax);
- cl->grp = &q->groups[i];
+ qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
cl->qdisc = qdisc_create_dflt(sch->dev_queue,
&pfifo_qdisc_ops, classid);
@@ -294,7 +338,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return err;
}
}
- q->wsum += weight;
sch_tree_lock(sch);
qdisc_class_hash_insert(&q->clhash, &cl->common);
@@ -711,15 +754,6 @@ static void qfq_update_eligible(struct qfq_sched *q, u64 old_V)
}
}
-/* What is length of next packet in queue (0 if queue is empty) */
-static unsigned int qdisc_peek_len(struct Qdisc *sch)
-{
- struct sk_buff *skb;
-
- skb = sch->ops->peek(sch);
- return skb ? qdisc_pkt_len(skb) : 0;
-}
-
/*
* Updates the class, returns true if also the group needs to be updated.
*/
@@ -843,11 +877,8 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct qfq_sched *q = qdisc_priv(sch);
- struct qfq_group *grp;
struct qfq_class *cl;
int err;
- u64 roundedS;
- int s;
cl = qfq_classify(skb, sch, &err);
if (cl == NULL) {
@@ -876,11 +907,25 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return err;
/* If reach this point, queue q was idle */
- grp = cl->grp;
+ qfq_activate_class(q, cl, qdisc_pkt_len(skb));
+
+ return err;
+}
+
+/*
+ * Handle class switch from idle to backlogged.
+ */
+static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl,
+ unsigned int pkt_len)
+{
+ struct qfq_group *grp = cl->grp;
+ u64 roundedS;
+ int s;
+
qfq_update_start(q, cl);
/* compute new finish time and rounded start. */
- cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w;
+ cl->F = cl->S + (u64)pkt_len * cl->inv_w;
roundedS = qfq_round_down(cl->S, grp->slot_shift);
/*
@@ -917,8 +962,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
skip_update:
qfq_slot_insert(grp, cl, roundedS);
-
- return err;
}
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index ebaef3ed6065..b1ef3bc301a5 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -82,6 +82,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
sctp_scope_t scope,
gfp_t gfp)
{
+ struct net *net = sock_net(sk);
struct sctp_sock *sp;
int i;
sctp_paramhdr_t *p;
@@ -124,7 +125,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
* socket values.
*/
asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt;
- asoc->pf_retrans = sctp_pf_retrans;
+ asoc->pf_retrans = net->sctp.pf_retrans;
asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial);
asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max);
@@ -175,7 +176,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0;
asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay;
asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
- min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ;
+ min_t(unsigned long, sp->autoclose, net->sctp.max_autoclose) * HZ;
/* Initializes the timers */
for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
@@ -281,7 +282,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
* and will revert old behavior.
*/
asoc->peer.asconf_capable = 0;
- if (sctp_addip_noauth)
+ if (net->sctp.addip_noauth)
asoc->peer.asconf_capable = 1;
asoc->asconf_addr_del_pending = NULL;
asoc->src_out_of_asoc_ok = 0;
@@ -641,6 +642,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
const gfp_t gfp,
const int peer_state)
{
+ struct net *net = sock_net(asoc->base.sk);
struct sctp_transport *peer;
struct sctp_sock *sp;
unsigned short port;
@@ -674,7 +676,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
return peer;
}
- peer = sctp_transport_new(addr, gfp);
+ peer = sctp_transport_new(net, addr, gfp);
if (!peer)
return NULL;
@@ -1089,13 +1091,15 @@ out:
/* Is this the association we are looking for? */
struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
+ struct net *net,
const union sctp_addr *laddr,
const union sctp_addr *paddr)
{
struct sctp_transport *transport;
if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) &&
- (htons(asoc->peer.port) == paddr->v4.sin_port)) {
+ (htons(asoc->peer.port) == paddr->v4.sin_port) &&
+ net_eq(sock_net(asoc->base.sk), net)) {
transport = sctp_assoc_lookup_paddr(asoc, paddr);
if (!transport)
goto out;
@@ -1116,6 +1120,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
struct sctp_association *asoc =
container_of(work, struct sctp_association,
base.inqueue.immediate);
+ struct net *net = sock_net(asoc->base.sk);
struct sctp_endpoint *ep;
struct sctp_chunk *chunk;
struct sctp_inq *inqueue;
@@ -1148,13 +1153,13 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
if (sctp_chunk_is_data(chunk))
asoc->peer.last_data_from = chunk->transport;
else
- SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_INCTRLCHUNKS);
if (chunk->transport)
chunk->transport->last_time_heard = jiffies;
/* Run through the state machine. */
- error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype,
+ error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype,
state, ep, asoc, chunk, GFP_ATOMIC);
/* Check to see if the association is freed in response to
@@ -1414,6 +1419,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
/* Should we send a SACK to update our peer? */
static inline int sctp_peer_needs_update(struct sctp_association *asoc)
{
+ struct net *net = sock_net(asoc->base.sk);
switch (asoc->state) {
case SCTP_STATE_ESTABLISHED:
case SCTP_STATE_SHUTDOWN_PENDING:
@@ -1421,7 +1427,7 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc)
case SCTP_STATE_SHUTDOWN_SENT:
if ((asoc->rwnd > asoc->a_rwnd) &&
((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32,
- (asoc->base.sk->sk_rcvbuf >> sctp_rwnd_upd_shift),
+ (asoc->base.sk->sk_rcvbuf >> net->sctp.rwnd_upd_shift),
asoc->pathmtu)))
return 1;
break;
@@ -1542,7 +1548,8 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
if (asoc->peer.ipv6_address)
flags |= SCTP_ADDR6_PEERSUPP;
- return sctp_bind_addr_copy(&asoc->base.bind_addr,
+ return sctp_bind_addr_copy(sock_net(asoc->base.sk),
+ &asoc->base.bind_addr,
&asoc->ep->base.bind_addr,
scope, gfp, flags);
}
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index bf812048cf6f..159b9bc5d633 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -392,13 +392,14 @@ nomem:
*/
int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
{
+ struct net *net = sock_net(asoc->base.sk);
struct sctp_auth_bytes *secret;
struct sctp_shared_key *ep_key;
/* If we don't support AUTH, or peer is not capable
* we don't need to do anything.
*/
- if (!sctp_auth_enable || !asoc->peer.auth_capable)
+ if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
return 0;
/* If the key_id is non-zero and we couldn't find an
@@ -445,11 +446,12 @@ struct sctp_shared_key *sctp_auth_get_shkey(
*/
int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
{
+ struct net *net = sock_net(ep->base.sk);
struct crypto_hash *tfm = NULL;
__u16 id;
/* if the transforms are already allocted, we are done */
- if (!sctp_auth_enable) {
+ if (!net->sctp.auth_enable) {
ep->auth_hmacs = NULL;
return 0;
}
@@ -674,7 +676,12 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
/* Check if peer requested that this chunk is authenticated */
int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
{
- if (!sctp_auth_enable || !asoc || !asoc->peer.auth_capable)
+ struct net *net;
+ if (!asoc)
+ return 0;
+
+ net = sock_net(asoc->base.sk);
+ if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
return 0;
return __sctp_auth_cid(chunk, asoc->peer.peer_chunks);
@@ -683,7 +690,12 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
/* Check if we requested that peer authenticate this chunk. */
int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
{
- if (!sctp_auth_enable || !asoc)
+ struct net *net;
+ if (!asoc)
+ return 0;
+
+ net = sock_net(asoc->base.sk);
+ if (!net->sctp.auth_enable)
return 0;
return __sctp_auth_cid(chunk,
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 4ece451c8d27..d886b3bf84f5 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -52,8 +52,8 @@
#include <net/sctp/sm.h>
/* Forward declarations for internal helpers. */
-static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *,
- sctp_scope_t scope, gfp_t gfp,
+static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *,
+ union sctp_addr *, sctp_scope_t scope, gfp_t gfp,
int flags);
static void sctp_bind_addr_clean(struct sctp_bind_addr *);
@@ -62,7 +62,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *);
/* Copy 'src' to 'dest' taking 'scope' into account. Omit addresses
* in 'src' which have a broader scope than 'scope'.
*/
-int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
+int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
const struct sctp_bind_addr *src,
sctp_scope_t scope, gfp_t gfp,
int flags)
@@ -75,7 +75,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
/* Extract the addresses which are relevant for this scope. */
list_for_each_entry(addr, &src->address_list, list) {
- error = sctp_copy_one_addr(dest, &addr->a, scope,
+ error = sctp_copy_one_addr(net, dest, &addr->a, scope,
gfp, flags);
if (error < 0)
goto out;
@@ -87,7 +87,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
*/
if (list_empty(&dest->address_list) && (SCTP_SCOPE_GLOBAL == scope)) {
list_for_each_entry(addr, &src->address_list, list) {
- error = sctp_copy_one_addr(dest, &addr->a,
+ error = sctp_copy_one_addr(net, dest, &addr->a,
SCTP_SCOPE_LINK, gfp,
flags);
if (error < 0)
@@ -448,7 +448,7 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
}
/* Copy out addresses from the global local address list. */
-static int sctp_copy_one_addr(struct sctp_bind_addr *dest,
+static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
union sctp_addr *addr,
sctp_scope_t scope, gfp_t gfp,
int flags)
@@ -456,8 +456,8 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest,
int error = 0;
if (sctp_is_any(NULL, addr)) {
- error = sctp_copy_local_addr_list(dest, scope, gfp, flags);
- } else if (sctp_in_scope(addr, scope)) {
+ error = sctp_copy_local_addr_list(net, dest, scope, gfp, flags);
+ } else if (sctp_in_scope(net, addr, scope)) {
/* Now that the address is in scope, check to see if
* the address type is supported by local sock as
* well as the remote peer.
@@ -494,7 +494,7 @@ int sctp_is_any(struct sock *sk, const union sctp_addr *addr)
}
/* Is 'addr' valid for 'scope'? */
-int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope)
+int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope)
{
sctp_scope_t addr_scope = sctp_scope(addr);
@@ -512,7 +512,7 @@ int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope)
* Address scoping can be selectively controlled via sysctl
* option
*/
- switch (sctp_scope_policy) {
+ switch (net->sctp.scope_policy) {
case SCTP_SCOPE_POLICY_DISABLE:
return 1;
case SCTP_SCOPE_POLICY_ENABLE:
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 6c8556459a75..7c2df9c33df3 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -257,7 +257,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
offset = 0;
if ((whole > 1) || (whole && over))
- SCTP_INC_STATS_USER(SCTP_MIB_FRAGUSRMSGS);
+ SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS);
/* Create chunks for all the full sized DATA chunks. */
for (i=0, len=first_len; i < whole; i++) {
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 68a385d7c3bd..1859e2bc83d1 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -65,6 +65,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
struct sock *sk,
gfp_t gfp)
{
+ struct net *net = sock_net(sk);
struct sctp_hmac_algo_param *auth_hmacs = NULL;
struct sctp_chunks_param *auth_chunks = NULL;
struct sctp_shared_key *null_key;
@@ -74,7 +75,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
if (!ep->digest)
return NULL;
- if (sctp_auth_enable) {
+ if (net->sctp.auth_enable) {
/* Allocate space for HMACS and CHUNKS authentication
* variables. There are arrays that we encode directly
* into parameters to make the rest of the operations easier.
@@ -106,7 +107,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
/* If the Add-IP functionality is enabled, we must
* authenticate, ASCONF and ASCONF-ACK chunks
*/
- if (sctp_addip_enable) {
+ if (net->sctp.addip_enable) {
auth_chunks->chunks[0] = SCTP_CID_ASCONF;
auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK;
auth_chunks->param_hdr.length =
@@ -140,14 +141,14 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
INIT_LIST_HEAD(&ep->asocs);
/* Use SCTP specific send buffer space queues. */
- ep->sndbuf_policy = sctp_sndbuf_policy;
+ ep->sndbuf_policy = net->sctp.sndbuf_policy;
sk->sk_data_ready = sctp_data_ready;
sk->sk_write_space = sctp_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
/* Get the receive buffer policy for this endpoint */
- ep->rcvbuf_policy = sctp_rcvbuf_policy;
+ ep->rcvbuf_policy = net->sctp.rcvbuf_policy;
/* Initialize the secret key used with cookie. */
get_random_bytes(&ep->secret_key[0], SCTP_SECRET_SIZE);
@@ -302,11 +303,13 @@ void sctp_endpoint_put(struct sctp_endpoint *ep)
/* Is this the endpoint we are looking for? */
struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
+ struct net *net,
const union sctp_addr *laddr)
{
struct sctp_endpoint *retval = NULL;
- if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) {
+ if ((htons(ep->base.bind_addr.port) == laddr->v4.sin_port) &&
+ net_eq(sock_net(ep->base.sk), net)) {
if (sctp_bind_addr_match(&ep->base.bind_addr, laddr,
sctp_sk(ep->base.sk)))
retval = ep;
@@ -343,7 +346,8 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
rport = ntohs(paddr->v4.sin_port);
- hash = sctp_assoc_hashfn(ep->base.bind_addr.port, rport);
+ hash = sctp_assoc_hashfn(sock_net(ep->base.sk), ep->base.bind_addr.port,
+ rport);
head = &sctp_assoc_hashtable[hash];
read_lock(&head->lock);
sctp_for_each_hentry(epb, node, &head->chain) {
@@ -386,13 +390,14 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
{
struct sctp_sockaddr_entry *addr;
struct sctp_bind_addr *bp;
+ struct net *net = sock_net(ep->base.sk);
bp = &ep->base.bind_addr;
/* This function is called with the socket lock held,
* so the address_list can not change.
*/
list_for_each_entry(addr, &bp->address_list, list) {
- if (sctp_has_association(&addr->a, paddr))
+ if (sctp_has_association(net, &addr->a, paddr))
return 1;
}
@@ -409,6 +414,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work)
base.inqueue.immediate);
struct sctp_association *asoc;
struct sock *sk;
+ struct net *net;
struct sctp_transport *transport;
struct sctp_chunk *chunk;
struct sctp_inq *inqueue;
@@ -423,6 +429,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work)
asoc = NULL;
inqueue = &ep->base.inqueue;
sk = ep->base.sk;
+ net = sock_net(sk);
while (NULL != (chunk = sctp_inq_pop(inqueue))) {
subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type);
@@ -474,12 +481,12 @@ normal:
if (asoc && sctp_chunk_is_data(chunk))
asoc->peer.last_data_from = chunk->transport;
else
- SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS);
+ SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_INCTRLCHUNKS);
if (chunk->transport)
chunk->transport->last_time_heard = jiffies;
- error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype, state,
+ error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state,
ep, asoc, chunk, GFP_ATOMIC);
if (error && chunk)
diff --git a/net/sctp/input.c b/net/sctp/input.c
index e64d5210ed13..25dfe7380479 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -66,12 +66,15 @@
/* Forward declarations for internal helpers. */
static int sctp_rcv_ootb(struct sk_buff *);
-static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_lookup(struct net *net,
+ struct sk_buff *skb,
const union sctp_addr *laddr,
const union sctp_addr *paddr,
struct sctp_transport **transportp);
-static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr);
+static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
+ const union sctp_addr *laddr);
static struct sctp_association *__sctp_lookup_association(
+ struct net *net,
const union sctp_addr *local,
const union sctp_addr *peer,
struct sctp_transport **pt);
@@ -80,7 +83,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
/* Calculate the SCTP checksum of an SCTP packet. */
-static inline int sctp_rcv_checksum(struct sk_buff *skb)
+static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb)
{
struct sctphdr *sh = sctp_hdr(skb);
__le32 cmp = sh->checksum;
@@ -96,7 +99,7 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb)
if (val != cmp) {
/* CRC failure, dump it. */
- SCTP_INC_STATS_BH(SCTP_MIB_CHECKSUMERRORS);
+ SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS);
return -1;
}
return 0;
@@ -129,11 +132,12 @@ int sctp_rcv(struct sk_buff *skb)
union sctp_addr dest;
int family;
struct sctp_af *af;
+ struct net *net = dev_net(skb->dev);
if (skb->pkt_type!=PACKET_HOST)
goto discard_it;
- SCTP_INC_STATS_BH(SCTP_MIB_INSCTPPACKS);
+ SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS);
if (skb_linearize(skb))
goto discard_it;
@@ -145,7 +149,7 @@ int sctp_rcv(struct sk_buff *skb)
if (skb->len < sizeof(struct sctphdr))
goto discard_it;
if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) &&
- sctp_rcv_checksum(skb) < 0)
+ sctp_rcv_checksum(net, skb) < 0)
goto discard_it;
skb_pull(skb, sizeof(struct sctphdr));
@@ -178,10 +182,10 @@ int sctp_rcv(struct sk_buff *skb)
!af->addr_valid(&dest, NULL, skb))
goto discard_it;
- asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport);
+ asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport);
if (!asoc)
- ep = __sctp_rcv_lookup_endpoint(&dest);
+ ep = __sctp_rcv_lookup_endpoint(net, &dest);
/* Retrieve the common input handling substructure. */
rcvr = asoc ? &asoc->base : &ep->base;
@@ -200,7 +204,7 @@ int sctp_rcv(struct sk_buff *skb)
sctp_endpoint_put(ep);
ep = NULL;
}
- sk = sctp_get_ctl_sock();
+ sk = net->sctp.ctl_sock;
ep = sctp_sk(sk)->ep;
sctp_endpoint_hold(ep);
rcvr = &ep->base;
@@ -216,7 +220,7 @@ int sctp_rcv(struct sk_buff *skb)
*/
if (!asoc) {
if (sctp_rcv_ootb(skb)) {
- SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES);
+ SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES);
goto discard_release;
}
}
@@ -272,9 +276,9 @@ int sctp_rcv(struct sk_buff *skb)
skb = NULL; /* sctp_chunk_free already freed the skb */
goto discard_release;
}
- SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG);
+ SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG);
} else {
- SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ);
+ SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ);
sctp_inq_push(&chunk->rcvr->inqueue, chunk);
}
@@ -289,7 +293,7 @@ int sctp_rcv(struct sk_buff *skb)
return 0;
discard_it:
- SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS);
+ SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS);
kfree_skb(skb);
return 0;
@@ -462,11 +466,13 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
}
} else {
+ struct net *net = sock_net(sk);
+
if (timer_pending(&t->proto_unreach_timer) &&
del_timer(&t->proto_unreach_timer))
sctp_association_put(asoc);
- sctp_do_sm(SCTP_EVENT_T_OTHER,
+ sctp_do_sm(net, SCTP_EVENT_T_OTHER,
SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
asoc->state, asoc->ep, asoc, t,
GFP_ATOMIC);
@@ -474,7 +480,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
}
/* Common lookup code for icmp/icmpv6 error handler. */
-struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
+struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
struct sctphdr *sctphdr,
struct sctp_association **app,
struct sctp_transport **tpp)
@@ -503,7 +509,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
/* Look for an association that matches the incoming ICMP error
* packet.
*/
- asoc = __sctp_lookup_association(&saddr, &daddr, &transport);
+ asoc = __sctp_lookup_association(net, &saddr, &daddr, &transport);
if (!asoc)
return NULL;
@@ -539,7 +545,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
* servers this needs to be solved differently.
*/
if (sock_owned_by_user(sk))
- NET_INC_STATS_BH(&init_net, LINUX_MIB_LOCKDROPPEDICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
*app = asoc;
*tpp = transport;
@@ -586,9 +592,10 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
struct inet_sock *inet;
sk_buff_data_t saveip, savesctp;
int err;
+ struct net *net = dev_net(skb->dev);
if (skb->len < ihlen + 8) {
- ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
}
@@ -597,12 +604,12 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
savesctp = skb->transport_header;
skb_reset_network_header(skb);
skb_set_transport_header(skb, ihlen);
- sk = sctp_err_lookup(AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
+ sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
/* Put back, the original values. */
skb->network_header = saveip;
skb->transport_header = savesctp;
if (!sk) {
- ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
}
/* Warning: The sock lock is held. Remember to call
@@ -723,12 +730,13 @@ discard:
/* Insert endpoint into the hash table. */
static void __sctp_hash_endpoint(struct sctp_endpoint *ep)
{
+ struct net *net = sock_net(ep->base.sk);
struct sctp_ep_common *epb;
struct sctp_hashbucket *head;
epb = &ep->base;
- epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);
+ epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port);
head = &sctp_ep_hashtable[epb->hashent];
sctp_write_lock(&head->lock);
@@ -747,12 +755,13 @@ void sctp_hash_endpoint(struct sctp_endpoint *ep)
/* Remove endpoint from the hash table. */
static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
{
+ struct net *net = sock_net(ep->base.sk);
struct sctp_hashbucket *head;
struct sctp_ep_common *epb;
epb = &ep->base;
- epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);
+ epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port);
head = &sctp_ep_hashtable[epb->hashent];
@@ -770,7 +779,8 @@ void sctp_unhash_endpoint(struct sctp_endpoint *ep)
}
/* Look up an endpoint. */
-static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr)
+static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
+ const union sctp_addr *laddr)
{
struct sctp_hashbucket *head;
struct sctp_ep_common *epb;
@@ -778,16 +788,16 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *l
struct hlist_node *node;
int hash;
- hash = sctp_ep_hashfn(ntohs(laddr->v4.sin_port));
+ hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port));
head = &sctp_ep_hashtable[hash];
read_lock(&head->lock);
sctp_for_each_hentry(epb, node, &head->chain) {
ep = sctp_ep(epb);
- if (sctp_endpoint_is_match(ep, laddr))
+ if (sctp_endpoint_is_match(ep, net, laddr))
goto hit;
}
- ep = sctp_sk((sctp_get_ctl_sock()))->ep;
+ ep = sctp_sk(net->sctp.ctl_sock)->ep;
hit:
sctp_endpoint_hold(ep);
@@ -798,13 +808,15 @@ hit:
/* Insert association into the hash table. */
static void __sctp_hash_established(struct sctp_association *asoc)
{
+ struct net *net = sock_net(asoc->base.sk);
struct sctp_ep_common *epb;
struct sctp_hashbucket *head;
epb = &asoc->base;
/* Calculate which chain this entry will belong to. */
- epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port, asoc->peer.port);
+ epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port,
+ asoc->peer.port);
head = &sctp_assoc_hashtable[epb->hashent];
@@ -827,12 +839,13 @@ void sctp_hash_established(struct sctp_association *asoc)
/* Remove association from the hash table. */
static void __sctp_unhash_established(struct sctp_association *asoc)
{
+ struct net *net = sock_net(asoc->base.sk);
struct sctp_hashbucket *head;
struct sctp_ep_common *epb;
epb = &asoc->base;
- epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port,
+ epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port,
asoc->peer.port);
head = &sctp_assoc_hashtable[epb->hashent];
@@ -855,6 +868,7 @@ void sctp_unhash_established(struct sctp_association *asoc)
/* Look up an association. */
static struct sctp_association *__sctp_lookup_association(
+ struct net *net,
const union sctp_addr *local,
const union sctp_addr *peer,
struct sctp_transport **pt)
@@ -869,12 +883,13 @@ static struct sctp_association *__sctp_lookup_association(
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
- hash = sctp_assoc_hashfn(ntohs(local->v4.sin_port), ntohs(peer->v4.sin_port));
+ hash = sctp_assoc_hashfn(net, ntohs(local->v4.sin_port),
+ ntohs(peer->v4.sin_port));
head = &sctp_assoc_hashtable[hash];
read_lock(&head->lock);
sctp_for_each_hentry(epb, node, &head->chain) {
asoc = sctp_assoc(epb);
- transport = sctp_assoc_is_match(asoc, local, peer);
+ transport = sctp_assoc_is_match(asoc, net, local, peer);
if (transport)
goto hit;
}
@@ -892,27 +907,29 @@ hit:
/* Look up an association. BH-safe. */
SCTP_STATIC
-struct sctp_association *sctp_lookup_association(const union sctp_addr *laddr,
+struct sctp_association *sctp_lookup_association(struct net *net,
+ const union sctp_addr *laddr,
const union sctp_addr *paddr,
struct sctp_transport **transportp)
{
struct sctp_association *asoc;
sctp_local_bh_disable();
- asoc = __sctp_lookup_association(laddr, paddr, transportp);
+ asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
sctp_local_bh_enable();
return asoc;
}
/* Is there an association matching the given local and peer addresses? */
-int sctp_has_association(const union sctp_addr *laddr,
+int sctp_has_association(struct net *net,
+ const union sctp_addr *laddr,
const union sctp_addr *paddr)
{
struct sctp_association *asoc;
struct sctp_transport *transport;
- if ((asoc = sctp_lookup_association(laddr, paddr, &transport))) {
+ if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) {
sctp_association_put(asoc);
return 1;
}
@@ -938,7 +955,8 @@ int sctp_has_association(const union sctp_addr *laddr,
* in certain circumstances.
*
*/
-static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
+ struct sk_buff *skb,
const union sctp_addr *laddr, struct sctp_transport **transportp)
{
struct sctp_association *asoc;
@@ -978,7 +996,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
af->from_addr_param(paddr, params.addr, sh->source, 0);
- asoc = __sctp_lookup_association(laddr, paddr, &transport);
+ asoc = __sctp_lookup_association(net, laddr, paddr, &transport);
if (asoc)
return asoc;
}
@@ -1001,6 +1019,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
* subsequent ASCONF Chunks. If found, proceed to rule D4.
*/
static struct sctp_association *__sctp_rcv_asconf_lookup(
+ struct net *net,
sctp_chunkhdr_t *ch,
const union sctp_addr *laddr,
__be16 peer_port,
@@ -1020,7 +1039,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
af->from_addr_param(&paddr, param, peer_port, 0);
- return __sctp_lookup_association(laddr, &paddr, transportp);
+ return __sctp_lookup_association(net, laddr, &paddr, transportp);
}
@@ -1033,7 +1052,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
* This means that any chunks that can help us identify the association need
* to be looked at to find this association.
*/
-static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
+ struct sk_buff *skb,
const union sctp_addr *laddr,
struct sctp_transport **transportp)
{
@@ -1074,8 +1094,9 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
break;
case SCTP_CID_ASCONF:
- if (have_auth || sctp_addip_noauth)
- asoc = __sctp_rcv_asconf_lookup(ch, laddr,
+ if (have_auth || net->sctp.addip_noauth)
+ asoc = __sctp_rcv_asconf_lookup(
+ net, ch, laddr,
sctp_hdr(skb)->source,
transportp);
default:
@@ -1098,7 +1119,8 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
* include looking inside of INIT/INIT-ACK chunks or after the AUTH
* chunks.
*/
-static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
+ struct sk_buff *skb,
const union sctp_addr *laddr,
struct sctp_transport **transportp)
{
@@ -1118,11 +1140,11 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
switch (ch->type) {
case SCTP_CID_INIT:
case SCTP_CID_INIT_ACK:
- return __sctp_rcv_init_lookup(skb, laddr, transportp);
+ return __sctp_rcv_init_lookup(net, skb, laddr, transportp);
break;
default:
- return __sctp_rcv_walk_lookup(skb, laddr, transportp);
+ return __sctp_rcv_walk_lookup(net, skb, laddr, transportp);
break;
}
@@ -1131,21 +1153,22 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
}
/* Lookup an association for an inbound skb. */
-static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_lookup(struct net *net,
+ struct sk_buff *skb,
const union sctp_addr *paddr,
const union sctp_addr *laddr,
struct sctp_transport **transportp)
{
struct sctp_association *asoc;
- asoc = __sctp_lookup_association(laddr, paddr, transportp);
+ asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
/* Further lookup for INIT/INIT-ACK packets.
* SCTP Implementors Guide, 2.18 Handling of address
* parameters within the INIT or INIT-ACK.
*/
if (!asoc)
- asoc = __sctp_rcv_lookup_harder(skb, laddr, transportp);
+ asoc = __sctp_rcv_lookup_harder(net, skb, laddr, transportp);
return asoc;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ed7139ea7978..ea14cb445295 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -99,6 +99,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
struct sctp_sockaddr_entry *addr = NULL;
struct sctp_sockaddr_entry *temp;
+ struct net *net = dev_net(ifa->idev->dev);
int found = 0;
switch (ev) {
@@ -110,27 +111,27 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
addr->a.v6.sin6_addr = ifa->addr;
addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
addr->valid = 1;
- spin_lock_bh(&sctp_local_addr_lock);
- list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
- sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW);
- spin_unlock_bh(&sctp_local_addr_lock);
+ spin_lock_bh(&net->sctp.local_addr_lock);
+ list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list);
+ sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW);
+ spin_unlock_bh(&net->sctp.local_addr_lock);
}
break;
case NETDEV_DOWN:
- spin_lock_bh(&sctp_local_addr_lock);
+ spin_lock_bh(&net->sctp.local_addr_lock);
list_for_each_entry_safe(addr, temp,
- &sctp_local_addr_list, list) {
+ &net->sctp.local_addr_list, list) {
if (addr->a.sa.sa_family == AF_INET6 &&
ipv6_addr_equal(&addr->a.v6.sin6_addr,
&ifa->addr)) {
- sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL);
+ sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
found = 1;
addr->valid = 0;
list_del_rcu(&addr->list);
break;
}
}
- spin_unlock_bh(&sctp_local_addr_lock);
+ spin_unlock_bh(&net->sctp.local_addr_lock);
if (found)
kfree_rcu(addr, rcu);
break;
@@ -154,6 +155,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct ipv6_pinfo *np;
sk_buff_data_t saveip, savesctp;
int err;
+ struct net *net = dev_net(skb->dev);
idev = in6_dev_get(skb->dev);
@@ -162,12 +164,12 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
savesctp = skb->transport_header;
skb_reset_network_header(skb);
skb_set_transport_header(skb, offset);
- sk = sctp_err_lookup(AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
+ sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
/* Put back, the original pointers. */
skb->network_header = saveip;
skb->transport_header = savesctp;
if (!sk) {
- ICMP6_INC_STATS_BH(dev_net(skb->dev), idev, ICMP6_MIB_INERRORS);
+ ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_INERRORS);
goto out;
}
@@ -241,7 +243,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
__func__, skb, skb->len,
&fl6.saddr, &fl6.daddr);
- SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
+ SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
if (!(transport->param_flags & SPP_PMTUD_ENABLE))
skb->local_df = 1;
@@ -580,7 +582,7 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
if (!(type & IPV6_ADDR_UNICAST))
return 0;
- return ipv6_chk_addr(&init_net, in6, NULL, 0);
+ return ipv6_chk_addr(sock_net(&sp->inet.sk), in6, NULL, 0);
}
/* This function checks if the address is a valid address to be used for
@@ -857,14 +859,14 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
struct net_device *dev;
if (type & IPV6_ADDR_LINKLOCAL) {
+ struct net *net;
if (!addr->v6.sin6_scope_id)
return 0;
+ net = sock_net(&opt->inet.sk);
rcu_read_lock();
- dev = dev_get_by_index_rcu(&init_net,
- addr->v6.sin6_scope_id);
+ dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id);
if (!dev ||
- !ipv6_chk_addr(&init_net, &addr->v6.sin6_addr,
- dev, 0)) {
+ !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) {
rcu_read_unlock();
return 0;
}
@@ -897,7 +899,7 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
if (!addr->v6.sin6_scope_id)
return 0;
rcu_read_lock();
- dev = dev_get_by_index_rcu(&init_net,
+ dev = dev_get_by_index_rcu(sock_net(&opt->inet.sk),
addr->v6.sin6_scope_id);
rcu_read_unlock();
if (!dev)
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index 8ef8e7d9eb61..fe012c44f8df 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -129,20 +129,20 @@ static const struct file_operations sctp_objcnt_ops = {
};
/* Initialize the objcount in the proc filesystem. */
-void sctp_dbg_objcnt_init(void)
+void sctp_dbg_objcnt_init(struct net *net)
{
struct proc_dir_entry *ent;
ent = proc_create("sctp_dbg_objcnt", 0,
- proc_net_sctp, &sctp_objcnt_ops);
+ net->sctp.proc_net_sctp, &sctp_objcnt_ops);
if (!ent)
pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
}
/* Cleanup the objcount entry in the proc filesystem. */
-void sctp_dbg_objcnt_exit(void)
+void sctp_dbg_objcnt_exit(struct net *net)
{
- remove_proc_entry("sctp_dbg_objcnt", proc_net_sctp);
+ remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp);
}
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 838e18b4d7ea..0c6359bb973c 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -597,7 +597,7 @@ out:
return err;
no_route:
kfree_skb(nskb);
- IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
/* FIXME: Returning the 'err' will effect all the associations
* associated with a socket, although only one of the paths of the
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index e7aa177c9522..072bf6ae3c26 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -299,6 +299,7 @@ void sctp_outq_free(struct sctp_outq *q)
/* Put a new chunk in an sctp_outq. */
int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
{
+ struct net *net = sock_net(q->asoc->base.sk);
int error = 0;
SCTP_DEBUG_PRINTK("sctp_outq_tail(%p, %p[%s])\n",
@@ -337,15 +338,15 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
sctp_outq_tail_data(q, chunk);
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
- SCTP_INC_STATS(SCTP_MIB_OUTUNORDERCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
else
- SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS);
q->empty = 0;
break;
}
} else {
list_add_tail(&chunk->list, &q->control_chunk_list);
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
}
if (error < 0)
@@ -478,11 +479,12 @@ void sctp_retransmit_mark(struct sctp_outq *q,
void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
sctp_retransmit_reason_t reason)
{
+ struct net *net = sock_net(q->asoc->base.sk);
int error = 0;
switch(reason) {
case SCTP_RTXR_T3_RTX:
- SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS);
+ SCTP_INC_STATS(net, SCTP_MIB_T3_RETRANSMITS);
sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX);
/* Update the retran path if the T3-rtx timer has expired for
* the current retran path.
@@ -493,15 +495,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
transport->asoc->unack_data;
break;
case SCTP_RTXR_FAST_RTX:
- SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
+ SCTP_INC_STATS(net, SCTP_MIB_FAST_RETRANSMITS);
sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
q->fast_rtx = 1;
break;
case SCTP_RTXR_PMTUD:
- SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
+ SCTP_INC_STATS(net, SCTP_MIB_PMTUD_RETRANSMITS);
break;
case SCTP_RTXR_T1_RTX:
- SCTP_INC_STATS(SCTP_MIB_T1_RETRANSMITS);
+ SCTP_INC_STATS(net, SCTP_MIB_T1_RETRANSMITS);
transport->asoc->init_retries++;
break;
default:
@@ -1914,6 +1916,6 @@ static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn)
if (ftsn_chunk) {
list_add_tail(&ftsn_chunk->list, &q->control_chunk_list);
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS);
}
}
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index 534c7eae9d15..794bb14decde 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -57,7 +57,7 @@
#define DECLARE_PRIMITIVE(name) \
/* This is called in the code as sctp_primitive_ ## name. */ \
-int sctp_primitive_ ## name(struct sctp_association *asoc, \
+int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \
void *arg) { \
int error = 0; \
sctp_event_t event_type; sctp_subtype_t subtype; \
@@ -69,7 +69,7 @@ int sctp_primitive_ ## name(struct sctp_association *asoc, \
state = asoc ? asoc->state : SCTP_STATE_CLOSED; \
ep = asoc ? asoc->ep : NULL; \
\
- error = sctp_do_sm(event_type, subtype, state, ep, asoc, \
+ error = sctp_do_sm(net, event_type, subtype, state, ep, asoc, \
arg, GFP_KERNEL); \
return error; \
}
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 1e2eee88c3ea..d9cb2ab149fe 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -80,11 +80,12 @@ static const struct snmp_mib sctp_snmp_list[] = {
/* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */
static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
{
+ struct net *net = seq->private;
int i;
for (i = 0; sctp_snmp_list[i].name != NULL; i++)
seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
- snmp_fold_field((void __percpu **)sctp_statistics,
+ snmp_fold_field((void __percpu **)net->sctp.sctp_statistics,
sctp_snmp_list[i].entry));
return 0;
@@ -93,7 +94,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
/* Initialize the seq file operations for 'snmp' object. */
static int sctp_snmp_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, sctp_snmp_seq_show, NULL);
+ return single_open_net(inode, file, sctp_snmp_seq_show);
}
static const struct file_operations sctp_snmp_seq_fops = {
@@ -105,11 +106,12 @@ static const struct file_operations sctp_snmp_seq_fops = {
};
/* Set up the proc fs entry for 'snmp' object. */
-int __init sctp_snmp_proc_init(void)
+int __net_init sctp_snmp_proc_init(struct net *net)
{
struct proc_dir_entry *p;
- p = proc_create("snmp", S_IRUGO, proc_net_sctp, &sctp_snmp_seq_fops);
+ p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
+ &sctp_snmp_seq_fops);
if (!p)
return -ENOMEM;
@@ -117,9 +119,9 @@ int __init sctp_snmp_proc_init(void)
}
/* Cleanup the proc fs entry for 'snmp' object. */
-void sctp_snmp_proc_exit(void)
+void sctp_snmp_proc_exit(struct net *net)
{
- remove_proc_entry("snmp", proc_net_sctp);
+ remove_proc_entry("snmp", net->sctp.proc_net_sctp);
}
/* Dump local addresses of an association/endpoint. */
@@ -213,6 +215,8 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
sctp_for_each_hentry(epb, node, &head->chain) {
ep = sctp_ep(epb);
sk = epb->sk;
+ if (!net_eq(sock_net(sk), seq_file_net(seq)))
+ continue;
seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk,
sctp_sk(sk)->type, sk->sk_state, hash,
epb->bind_addr.port,
@@ -238,7 +242,8 @@ static const struct seq_operations sctp_eps_ops = {
/* Initialize the seq file operations for 'eps' object. */
static int sctp_eps_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &sctp_eps_ops);
+ return seq_open_net(inode, file, &sctp_eps_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations sctp_eps_seq_fops = {
@@ -249,11 +254,12 @@ static const struct file_operations sctp_eps_seq_fops = {
};
/* Set up the proc fs entry for 'eps' object. */
-int __init sctp_eps_proc_init(void)
+int __net_init sctp_eps_proc_init(struct net *net)
{
struct proc_dir_entry *p;
- p = proc_create("eps", S_IRUGO, proc_net_sctp, &sctp_eps_seq_fops);
+ p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
+ &sctp_eps_seq_fops);
if (!p)
return -ENOMEM;
@@ -261,9 +267,9 @@ int __init sctp_eps_proc_init(void)
}
/* Cleanup the proc fs entry for 'eps' object. */
-void sctp_eps_proc_exit(void)
+void sctp_eps_proc_exit(struct net *net)
{
- remove_proc_entry("eps", proc_net_sctp);
+ remove_proc_entry("eps", net->sctp.proc_net_sctp);
}
@@ -316,6 +322,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
sctp_for_each_hentry(epb, node, &head->chain) {
assoc = sctp_assoc(epb);
sk = epb->sk;
+ if (!net_eq(sock_net(sk), seq_file_net(seq)))
+ continue;
seq_printf(seq,
"%8pK %8pK %-3d %-3d %-2d %-4d "
"%4d %8d %8d %7d %5lu %-5d %5d ",
@@ -354,7 +362,8 @@ static const struct seq_operations sctp_assoc_ops = {
/* Initialize the seq file operations for 'assocs' object. */
static int sctp_assocs_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &sctp_assoc_ops);
+ return seq_open_net(inode, file, &sctp_assoc_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations sctp_assocs_seq_fops = {
@@ -365,11 +374,11 @@ static const struct file_operations sctp_assocs_seq_fops = {
};
/* Set up the proc fs entry for 'assocs' object. */
-int __init sctp_assocs_proc_init(void)
+int __net_init sctp_assocs_proc_init(struct net *net)
{
struct proc_dir_entry *p;
- p = proc_create("assocs", S_IRUGO, proc_net_sctp,
+ p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
&sctp_assocs_seq_fops);
if (!p)
return -ENOMEM;
@@ -378,9 +387,9 @@ int __init sctp_assocs_proc_init(void)
}
/* Cleanup the proc fs entry for 'assocs' object. */
-void sctp_assocs_proc_exit(void)
+void sctp_assocs_proc_exit(struct net *net)
{
- remove_proc_entry("assocs", proc_net_sctp);
+ remove_proc_entry("assocs", net->sctp.proc_net_sctp);
}
static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos)
@@ -426,6 +435,8 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
sctp_local_bh_disable();
read_lock(&head->lock);
sctp_for_each_hentry(epb, node, &head->chain) {
+ if (!net_eq(sock_net(epb->sk), seq_file_net(seq)))
+ continue;
assoc = sctp_assoc(epb);
list_for_each_entry(tsp, &assoc->peer.transport_addr_list,
transports) {
@@ -489,14 +500,15 @@ static const struct seq_operations sctp_remaddr_ops = {
};
/* Cleanup the proc fs entry for 'remaddr' object. */
-void sctp_remaddr_proc_exit(void)
+void sctp_remaddr_proc_exit(struct net *net)
{
- remove_proc_entry("remaddr", proc_net_sctp);
+ remove_proc_entry("remaddr", net->sctp.proc_net_sctp);
}
static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &sctp_remaddr_ops);
+ return seq_open_net(inode, file, &sctp_remaddr_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations sctp_remaddr_seq_fops = {
@@ -506,11 +518,12 @@ static const struct file_operations sctp_remaddr_seq_fops = {
.release = seq_release,
};
-int __init sctp_remaddr_proc_init(void)
+int __net_init sctp_remaddr_proc_init(struct net *net)
{
struct proc_dir_entry *p;
- p = proc_create("remaddr", S_IRUGO, proc_net_sctp, &sctp_remaddr_seq_fops);
+ p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
+ &sctp_remaddr_seq_fops);
if (!p)
return -ENOMEM;
return 0;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1f89c4e69645..2d518425d598 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -69,21 +69,10 @@
/* Global data structures. */
struct sctp_globals sctp_globals __read_mostly;
-DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly;
-
-#ifdef CONFIG_PROC_FS
-struct proc_dir_entry *proc_net_sctp;
-#endif
struct idr sctp_assocs_id;
DEFINE_SPINLOCK(sctp_assocs_id_lock);
-/* This is the global socket data structure used for responding to
- * the Out-of-the-blue (OOTB) packets. A control sock will be created
- * for this socket at the initialization time.
- */
-static struct sock *sctp_ctl_sock;
-
static struct sctp_pf *sctp_pf_inet6_specific;
static struct sctp_pf *sctp_pf_inet_specific;
static struct sctp_af *sctp_af_v4_specific;
@@ -96,74 +85,54 @@ long sysctl_sctp_mem[3];
int sysctl_sctp_rmem[3];
int sysctl_sctp_wmem[3];
-/* Return the address of the control sock. */
-struct sock *sctp_get_ctl_sock(void)
-{
- return sctp_ctl_sock;
-}
-
/* Set up the proc fs entry for the SCTP protocol. */
-static __init int sctp_proc_init(void)
+static __net_init int sctp_proc_init(struct net *net)
{
- if (percpu_counter_init(&sctp_sockets_allocated, 0))
- goto out_nomem;
#ifdef CONFIG_PROC_FS
- if (!proc_net_sctp) {
- proc_net_sctp = proc_mkdir("sctp", init_net.proc_net);
- if (!proc_net_sctp)
- goto out_free_percpu;
- }
-
- if (sctp_snmp_proc_init())
+ net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
+ if (!net->sctp.proc_net_sctp)
+ goto out_proc_net_sctp;
+ if (sctp_snmp_proc_init(net))
goto out_snmp_proc_init;
- if (sctp_eps_proc_init())
+ if (sctp_eps_proc_init(net))
goto out_eps_proc_init;
- if (sctp_assocs_proc_init())
+ if (sctp_assocs_proc_init(net))
goto out_assocs_proc_init;
- if (sctp_remaddr_proc_init())
+ if (sctp_remaddr_proc_init(net))
goto out_remaddr_proc_init;
return 0;
out_remaddr_proc_init:
- sctp_assocs_proc_exit();
+ sctp_assocs_proc_exit(net);
out_assocs_proc_init:
- sctp_eps_proc_exit();
+ sctp_eps_proc_exit(net);
out_eps_proc_init:
- sctp_snmp_proc_exit();
+ sctp_snmp_proc_exit(net);
out_snmp_proc_init:
- if (proc_net_sctp) {
- proc_net_sctp = NULL;
- remove_proc_entry("sctp", init_net.proc_net);
- }
-out_free_percpu:
- percpu_counter_destroy(&sctp_sockets_allocated);
-#else
- return 0;
-#endif /* CONFIG_PROC_FS */
-
-out_nomem:
+ remove_proc_entry("sctp", net->proc_net);
+ net->sctp.proc_net_sctp = NULL;
+out_proc_net_sctp:
return -ENOMEM;
+#endif /* CONFIG_PROC_FS */
+ return 0;
}
/* Clean up the proc fs entry for the SCTP protocol.
* Note: Do not make this __exit as it is used in the init error
* path.
*/
-static void sctp_proc_exit(void)
+static void sctp_proc_exit(struct net *net)
{
#ifdef CONFIG_PROC_FS
- sctp_snmp_proc_exit();
- sctp_eps_proc_exit();
- sctp_assocs_proc_exit();
- sctp_remaddr_proc_exit();
-
- if (proc_net_sctp) {
- proc_net_sctp = NULL;
- remove_proc_entry("sctp", init_net.proc_net);
- }
+ sctp_snmp_proc_exit(net);
+ sctp_eps_proc_exit(net);
+ sctp_assocs_proc_exit(net);
+ sctp_remaddr_proc_exit(net);
+
+ remove_proc_entry("sctp", net->proc_net);
+ net->sctp.proc_net_sctp = NULL;
#endif
- percpu_counter_destroy(&sctp_sockets_allocated);
}
/* Private helper to extract ipv4 address and stash them in
@@ -201,29 +170,29 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
/* Extract our IP addresses from the system and stash them in the
* protocol structure.
*/
-static void sctp_get_local_addr_list(void)
+static void sctp_get_local_addr_list(struct net *net)
{
struct net_device *dev;
struct list_head *pos;
struct sctp_af *af;
rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
+ for_each_netdev_rcu(net, dev) {
__list_for_each(pos, &sctp_address_families) {
af = list_entry(pos, struct sctp_af, list);
- af->copy_addrlist(&sctp_local_addr_list, dev);
+ af->copy_addrlist(&net->sctp.local_addr_list, dev);
}
}
rcu_read_unlock();
}
/* Free the existing local addresses. */
-static void sctp_free_local_addr_list(void)
+static void sctp_free_local_addr_list(struct net *net)
{
struct sctp_sockaddr_entry *addr;
struct list_head *pos, *temp;
- list_for_each_safe(pos, temp, &sctp_local_addr_list) {
+ list_for_each_safe(pos, temp, &net->sctp.local_addr_list) {
addr = list_entry(pos, struct sctp_sockaddr_entry, list);
list_del(pos);
kfree(addr);
@@ -231,17 +200,17 @@ static void sctp_free_local_addr_list(void)
}
/* Copy the local addresses which are valid for 'scope' into 'bp'. */
-int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
- gfp_t gfp, int copy_flags)
+int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
+ sctp_scope_t scope, gfp_t gfp, int copy_flags)
{
struct sctp_sockaddr_entry *addr;
int error = 0;
rcu_read_lock();
- list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+ list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) {
if (!addr->valid)
continue;
- if (sctp_in_scope(&addr->a, scope)) {
+ if (sctp_in_scope(net, &addr->a, scope)) {
/* Now that the address is in scope, check to see if
* the address type is really supported by the local
* sock as well as the remote peer.
@@ -397,7 +366,8 @@ static int sctp_v4_addr_valid(union sctp_addr *addr,
/* Should this be available for binding? */
static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
{
- int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr);
+ struct net *net = sock_net(&sp->inet.sk);
+ int ret = inet_addr_type(net, addr->v4.sin_addr.s_addr);
if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
@@ -484,7 +454,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ",
__func__, &fl4->daddr, &fl4->saddr);
- rt = ip_route_output_key(&init_net, fl4);
+ rt = ip_route_output_key(sock_net(sk), fl4);
if (!IS_ERR(rt))
dst = &rt->dst;
@@ -530,7 +500,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
(AF_INET == laddr->a.sa.sa_family)) {
fl4->saddr = laddr->a.v4.sin_addr.s_addr;
fl4->fl4_sport = laddr->a.v4.sin_port;
- rt = ip_route_output_key(&init_net, fl4);
+ rt = ip_route_output_key(sock_net(sk), fl4);
if (!IS_ERR(rt)) {
dst = &rt->dst;
goto out_unlock;
@@ -627,14 +597,15 @@ static void sctp_v4_ecn_capable(struct sock *sk)
void sctp_addr_wq_timeout_handler(unsigned long arg)
{
+ struct net *net = (struct net *)arg;
struct sctp_sockaddr_entry *addrw, *temp;
struct sctp_sock *sp;
- spin_lock_bh(&sctp_addr_wq_lock);
+ spin_lock_bh(&net->sctp.addr_wq_lock);
- list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) {
+ list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) {
SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ",
- " for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state,
+ " for cmd %d at entry %p\n", &net->sctp.addr_waitq, &addrw->a, addrw->state,
addrw);
#if IS_ENABLED(CONFIG_IPV6)
@@ -648,7 +619,7 @@ void sctp_addr_wq_timeout_handler(unsigned long arg)
goto free_next;
in6 = (struct in6_addr *)&addrw->a.v6.sin6_addr;
- if (ipv6_chk_addr(&init_net, in6, NULL, 0) == 0 &&
+ if (ipv6_chk_addr(net, in6, NULL, 0) == 0 &&
addrw->state == SCTP_ADDR_NEW) {
unsigned long timeo_val;
@@ -656,12 +627,12 @@ void sctp_addr_wq_timeout_handler(unsigned long arg)
SCTP_ADDRESS_TICK_DELAY);
timeo_val = jiffies;
timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY);
- mod_timer(&sctp_addr_wq_timer, timeo_val);
+ mod_timer(&net->sctp.addr_wq_timer, timeo_val);
break;
}
}
#endif
- list_for_each_entry(sp, &sctp_auto_asconf_splist, auto_asconf_list) {
+ list_for_each_entry(sp, &net->sctp.auto_asconf_splist, auto_asconf_list) {
struct sock *sk;
sk = sctp_opt2sk(sp);
@@ -679,31 +650,32 @@ free_next:
list_del(&addrw->list);
kfree(addrw);
}
- spin_unlock_bh(&sctp_addr_wq_lock);
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
}
-static void sctp_free_addr_wq(void)
+static void sctp_free_addr_wq(struct net *net)
{
struct sctp_sockaddr_entry *addrw;
struct sctp_sockaddr_entry *temp;
- spin_lock_bh(&sctp_addr_wq_lock);
- del_timer(&sctp_addr_wq_timer);
- list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) {
+ spin_lock_bh(&net->sctp.addr_wq_lock);
+ del_timer(&net->sctp.addr_wq_timer);
+ list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) {
list_del(&addrw->list);
kfree(addrw);
}
- spin_unlock_bh(&sctp_addr_wq_lock);
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
}
/* lookup the entry for the same address in the addr_waitq
* sctp_addr_wq MUST be locked
*/
-static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entry *addr)
+static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct net *net,
+ struct sctp_sockaddr_entry *addr)
{
struct sctp_sockaddr_entry *addrw;
- list_for_each_entry(addrw, &sctp_addr_waitq, list) {
+ list_for_each_entry(addrw, &net->sctp.addr_waitq, list) {
if (addrw->a.sa.sa_family != addr->a.sa.sa_family)
continue;
if (addrw->a.sa.sa_family == AF_INET) {
@@ -719,7 +691,7 @@ static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entr
return NULL;
}
-void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd)
+void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cmd)
{
struct sctp_sockaddr_entry *addrw;
unsigned long timeo_val;
@@ -730,38 +702,38 @@ void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd)
* new address after a couple of addition and deletion of that address
*/
- spin_lock_bh(&sctp_addr_wq_lock);
+ spin_lock_bh(&net->sctp.addr_wq_lock);
/* Offsets existing events in addr_wq */
- addrw = sctp_addr_wq_lookup(addr);
+ addrw = sctp_addr_wq_lookup(net, addr);
if (addrw) {
if (addrw->state != cmd) {
SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ",
" in wq %p\n", addrw->state, &addrw->a,
- &sctp_addr_waitq);
+ &net->sctp.addr_waitq);
list_del(&addrw->list);
kfree(addrw);
}
- spin_unlock_bh(&sctp_addr_wq_lock);
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
return;
}
/* OK, we have to add the new address to the wait queue */
addrw = kmemdup(addr, sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
if (addrw == NULL) {
- spin_unlock_bh(&sctp_addr_wq_lock);
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
return;
}
addrw->state = cmd;
- list_add_tail(&addrw->list, &sctp_addr_waitq);
+ list_add_tail(&addrw->list, &net->sctp.addr_waitq);
SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ",
- " in wq %p\n", addrw->state, &addrw->a, &sctp_addr_waitq);
+ " in wq %p\n", addrw->state, &addrw->a, &net->sctp.addr_waitq);
- if (!timer_pending(&sctp_addr_wq_timer)) {
+ if (!timer_pending(&net->sctp.addr_wq_timer)) {
timeo_val = jiffies;
timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY);
- mod_timer(&sctp_addr_wq_timer, timeo_val);
+ mod_timer(&net->sctp.addr_wq_timer, timeo_val);
}
- spin_unlock_bh(&sctp_addr_wq_lock);
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
}
/* Event handler for inet address addition/deletion events.
@@ -776,11 +748,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
struct sctp_sockaddr_entry *addr = NULL;
struct sctp_sockaddr_entry *temp;
+ struct net *net = dev_net(ifa->ifa_dev->dev);
int found = 0;
- if (!net_eq(dev_net(ifa->ifa_dev->dev), &init_net))
- return NOTIFY_DONE;
-
switch (ev) {
case NETDEV_UP:
addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
@@ -789,27 +759,27 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
addr->a.v4.sin_port = 0;
addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
addr->valid = 1;
- spin_lock_bh(&sctp_local_addr_lock);
- list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
- sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW);
- spin_unlock_bh(&sctp_local_addr_lock);
+ spin_lock_bh(&net->sctp.local_addr_lock);
+ list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list);
+ sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW);
+ spin_unlock_bh(&net->sctp.local_addr_lock);
}
break;
case NETDEV_DOWN:
- spin_lock_bh(&sctp_local_addr_lock);
+ spin_lock_bh(&net->sctp.local_addr_lock);
list_for_each_entry_safe(addr, temp,
- &sctp_local_addr_list, list) {
+ &net->sctp.local_addr_list, list) {
if (addr->a.sa.sa_family == AF_INET &&
addr->a.v4.sin_addr.s_addr ==
ifa->ifa_local) {
- sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL);
+ sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
found = 1;
addr->valid = 0;
list_del_rcu(&addr->list);
break;
}
}
- spin_unlock_bh(&sctp_local_addr_lock);
+ spin_unlock_bh(&net->sctp.local_addr_lock);
if (found)
kfree_rcu(addr, rcu);
break;
@@ -822,7 +792,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
* Initialize the control inode/socket with a control endpoint data
* structure. This endpoint is reserved exclusively for the OOTB processing.
*/
-static int sctp_ctl_sock_init(void)
+static int sctp_ctl_sock_init(struct net *net)
{
int err;
sa_family_t family = PF_INET;
@@ -830,14 +800,14 @@ static int sctp_ctl_sock_init(void)
if (sctp_get_pf_specific(PF_INET6))
family = PF_INET6;
- err = inet_ctl_sock_create(&sctp_ctl_sock, family,
- SOCK_SEQPACKET, IPPROTO_SCTP, &init_net);
+ err = inet_ctl_sock_create(&net->sctp.ctl_sock, family,
+ SOCK_SEQPACKET, IPPROTO_SCTP, net);
/* If IPv6 socket could not be created, try the IPv4 socket */
if (err < 0 && family == PF_INET6)
- err = inet_ctl_sock_create(&sctp_ctl_sock, AF_INET,
+ err = inet_ctl_sock_create(&net->sctp.ctl_sock, AF_INET,
SOCK_SEQPACKET, IPPROTO_SCTP,
- &init_net);
+ net);
if (err < 0) {
pr_err("Failed to create the SCTP control socket\n");
@@ -990,7 +960,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ?
IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
- SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
+ SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
return ip_queue_xmit(skb, &transport->fl);
}
@@ -1063,6 +1033,7 @@ static const struct net_protocol sctp_protocol = {
.handler = sctp_rcv,
.err_handler = sctp_v4_err,
.no_policy = 1,
+ .netns_ok = 1,
};
/* IPv4 address related functions. */
@@ -1130,16 +1101,16 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family)
return 1;
}
-static inline int init_sctp_mibs(void)
+static inline int init_sctp_mibs(struct net *net)
{
- return snmp_mib_init((void __percpu **)sctp_statistics,
+ return snmp_mib_init((void __percpu **)net->sctp.sctp_statistics,
sizeof(struct sctp_mib),
__alignof__(struct sctp_mib));
}
-static inline void cleanup_sctp_mibs(void)
+static inline void cleanup_sctp_mibs(struct net *net)
{
- snmp_mib_free((void __percpu **)sctp_statistics);
+ snmp_mib_free((void __percpu **)net->sctp.sctp_statistics);
}
static void sctp_v4_pf_init(void)
@@ -1194,6 +1165,143 @@ static void sctp_v4_del_protocol(void)
unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
}
+static int sctp_net_init(struct net *net)
+{
+ int status;
+
+ /*
+ * 14. Suggested SCTP Protocol Parameter Values
+ */
+ /* The following protocol parameters are RECOMMENDED: */
+ /* RTO.Initial - 3 seconds */
+ net->sctp.rto_initial = SCTP_RTO_INITIAL;
+ /* RTO.Min - 1 second */
+ net->sctp.rto_min = SCTP_RTO_MIN;
+ /* RTO.Max - 60 seconds */
+ net->sctp.rto_max = SCTP_RTO_MAX;
+ /* RTO.Alpha - 1/8 */
+ net->sctp.rto_alpha = SCTP_RTO_ALPHA;
+ /* RTO.Beta - 1/4 */
+ net->sctp.rto_beta = SCTP_RTO_BETA;
+
+ /* Valid.Cookie.Life - 60 seconds */
+ net->sctp.valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE;
+
+ /* Whether Cookie Preservative is enabled(1) or not(0) */
+ net->sctp.cookie_preserve_enable = 1;
+
+ /* Max.Burst - 4 */
+ net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST;
+
+ /* Association.Max.Retrans - 10 attempts
+ * Path.Max.Retrans - 5 attempts (per destination address)
+ * Max.Init.Retransmits - 8 attempts
+ */
+ net->sctp.max_retrans_association = 10;
+ net->sctp.max_retrans_path = 5;
+ net->sctp.max_retrans_init = 8;
+
+ /* Sendbuffer growth - do per-socket accounting */
+ net->sctp.sndbuf_policy = 0;
+
+ /* Rcvbuffer growth - do per-socket accounting */
+ net->sctp.rcvbuf_policy = 0;
+
+ /* HB.interval - 30 seconds */
+ net->sctp.hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT;
+
+ /* delayed SACK timeout */
+ net->sctp.sack_timeout = SCTP_DEFAULT_TIMEOUT_SACK;
+
+ /* Disable ADDIP by default. */
+ net->sctp.addip_enable = 0;
+ net->sctp.addip_noauth = 0;
+ net->sctp.default_auto_asconf = 0;
+
+ /* Enable PR-SCTP by default. */
+ net->sctp.prsctp_enable = 1;
+
+ /* Disable AUTH by default. */
+ net->sctp.auth_enable = 0;
+
+ /* Set SCOPE policy to enabled */
+ net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE;
+
+ /* Set the default rwnd update threshold */
+ net->sctp.rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT;
+
+ /* Initialize maximum autoclose timeout. */
+ net->sctp.max_autoclose = INT_MAX / HZ;
+
+ status = sctp_sysctl_net_register(net);
+ if (status)
+ goto err_sysctl_register;
+
+ /* Allocate and initialise sctp mibs. */
+ status = init_sctp_mibs(net);
+ if (status)
+ goto err_init_mibs;
+
+ /* Initialize proc fs directory. */
+ status = sctp_proc_init(net);
+ if (status)
+ goto err_init_proc;
+
+ sctp_dbg_objcnt_init(net);
+
+ /* Initialize the control inode/socket for handling OOTB packets. */
+ if ((status = sctp_ctl_sock_init(net))) {
+ pr_err("Failed to initialize the SCTP control sock\n");
+ goto err_ctl_sock_init;
+ }
+
+ /* Initialize the local address list. */
+ INIT_LIST_HEAD(&net->sctp.local_addr_list);
+ spin_lock_init(&net->sctp.local_addr_lock);
+ sctp_get_local_addr_list(net);
+
+ /* Initialize the address event list */
+ INIT_LIST_HEAD(&net->sctp.addr_waitq);
+ INIT_LIST_HEAD(&net->sctp.auto_asconf_splist);
+ spin_lock_init(&net->sctp.addr_wq_lock);
+ net->sctp.addr_wq_timer.expires = 0;
+ setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler,
+ (unsigned long)net);
+
+ return 0;
+
+err_ctl_sock_init:
+ sctp_dbg_objcnt_exit(net);
+ sctp_proc_exit(net);
+err_init_proc:
+ cleanup_sctp_mibs(net);
+err_init_mibs:
+ sctp_sysctl_net_unregister(net);
+err_sysctl_register:
+ return status;
+}
+
+static void sctp_net_exit(struct net *net)
+{
+ /* Free the local address list */
+ sctp_free_addr_wq(net);
+ sctp_free_local_addr_list(net);
+
+ /* Free the control endpoint. */
+ inet_ctl_sock_destroy(net->sctp.ctl_sock);
+
+ sctp_dbg_objcnt_exit(net);
+
+ sctp_proc_exit(net);
+ cleanup_sctp_mibs(net);
+ sctp_sysctl_net_unregister(net);
+}
+
+static struct pernet_operations sctp_net_ops = {
+ .init = sctp_net_init,
+ .exit = sctp_net_exit,
+};
+
/* Initialize the universe into something sensible. */
SCTP_STATIC __init int sctp_init(void)
{
@@ -1224,62 +1332,9 @@ SCTP_STATIC __init int sctp_init(void)
if (!sctp_chunk_cachep)
goto err_chunk_cachep;
- /* Allocate and initialise sctp mibs. */
- status = init_sctp_mibs();
+ status = percpu_counter_init(&sctp_sockets_allocated, 0);
if (status)
- goto err_init_mibs;
-
- /* Initialize proc fs directory. */
- status = sctp_proc_init();
- if (status)
- goto err_init_proc;
-
- /* Initialize object count debugging. */
- sctp_dbg_objcnt_init();
-
- /*
- * 14. Suggested SCTP Protocol Parameter Values
- */
- /* The following protocol parameters are RECOMMENDED: */
- /* RTO.Initial - 3 seconds */
- sctp_rto_initial = SCTP_RTO_INITIAL;
- /* RTO.Min - 1 second */
- sctp_rto_min = SCTP_RTO_MIN;
- /* RTO.Max - 60 seconds */
- sctp_rto_max = SCTP_RTO_MAX;
- /* RTO.Alpha - 1/8 */
- sctp_rto_alpha = SCTP_RTO_ALPHA;
- /* RTO.Beta - 1/4 */
- sctp_rto_beta = SCTP_RTO_BETA;
-
- /* Valid.Cookie.Life - 60 seconds */
- sctp_valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE;
-
- /* Whether Cookie Preservative is enabled(1) or not(0) */
- sctp_cookie_preserve_enable = 1;
-
- /* Max.Burst - 4 */
- sctp_max_burst = SCTP_DEFAULT_MAX_BURST;
-
- /* Association.Max.Retrans - 10 attempts
- * Path.Max.Retrans - 5 attempts (per destination address)
- * Max.Init.Retransmits - 8 attempts
- */
- sctp_max_retrans_association = 10;
- sctp_max_retrans_path = 5;
- sctp_max_retrans_init = 8;
-
- /* Sendbuffer growth - do per-socket accounting */
- sctp_sndbuf_policy = 0;
-
- /* Rcvbuffer growth - do per-socket accounting */
- sctp_rcvbuf_policy = 0;
-
- /* HB.interval - 30 seconds */
- sctp_hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT;
-
- /* delayed SACK timeout */
- sctp_sack_timeout = SCTP_DEFAULT_TIMEOUT_SACK;
+ goto err_percpu_counter_init;
/* Implementation specific variables. */
@@ -1287,9 +1342,6 @@ SCTP_STATIC __init int sctp_init(void)
sctp_max_instreams = SCTP_DEFAULT_INSTREAMS;
sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS;
- /* Initialize maximum autoclose timeout. */
- sctp_max_autoclose = INT_MAX / HZ;
-
/* Initialize handle used for association ids. */
idr_init(&sctp_assocs_id);
@@ -1376,41 +1428,12 @@ SCTP_STATIC __init int sctp_init(void)
pr_info("Hash tables configured (established %d bind %d)\n",
sctp_assoc_hashsize, sctp_port_hashsize);
- /* Disable ADDIP by default. */
- sctp_addip_enable = 0;
- sctp_addip_noauth = 0;
- sctp_default_auto_asconf = 0;
-
- /* Enable PR-SCTP by default. */
- sctp_prsctp_enable = 1;
-
- /* Disable AUTH by default. */
- sctp_auth_enable = 0;
-
- /* Set SCOPE policy to enabled */
- sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE;
-
- /* Set the default rwnd update threshold */
- sctp_rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT;
-
sctp_sysctl_register();
INIT_LIST_HEAD(&sctp_address_families);
sctp_v4_pf_init();
sctp_v6_pf_init();
- /* Initialize the local address list. */
- INIT_LIST_HEAD(&sctp_local_addr_list);
- spin_lock_init(&sctp_local_addr_lock);
- sctp_get_local_addr_list();
-
- /* Initialize the address event list */
- INIT_LIST_HEAD(&sctp_addr_waitq);
- INIT_LIST_HEAD(&sctp_auto_asconf_splist);
- spin_lock_init(&sctp_addr_wq_lock);
- sctp_addr_wq_timer.expires = 0;
- setup_timer(&sctp_addr_wq_timer, sctp_addr_wq_timeout_handler, 0);
-
status = sctp_v4_protosw_init();
if (status)
@@ -1420,11 +1443,9 @@ SCTP_STATIC __init int sctp_init(void)
if (status)
goto err_v6_protosw_init;
- /* Initialize the control inode/socket for handling OOTB packets. */
- if ((status = sctp_ctl_sock_init())) {
- pr_err("Failed to initialize the SCTP control sock\n");
- goto err_ctl_sock_init;
- }
+ status = register_pernet_subsys(&sctp_net_ops);
+ if (status)
+ goto err_register_pernet_subsys;
status = sctp_v4_add_protocol();
if (status)
@@ -1441,13 +1462,12 @@ out:
err_v6_add_protocol:
sctp_v4_del_protocol();
err_add_protocol:
- inet_ctl_sock_destroy(sctp_ctl_sock);
-err_ctl_sock_init:
+ unregister_pernet_subsys(&sctp_net_ops);
+err_register_pernet_subsys:
sctp_v6_protosw_exit();
err_v6_protosw_init:
sctp_v4_protosw_exit();
err_protosw_init:
- sctp_free_local_addr_list();
sctp_v4_pf_exit();
sctp_v6_pf_exit();
sctp_sysctl_unregister();
@@ -1461,11 +1481,8 @@ err_ehash_alloc:
get_order(sctp_assoc_hashsize *
sizeof(struct sctp_hashbucket)));
err_ahash_alloc:
- sctp_dbg_objcnt_exit();
- sctp_proc_exit();
-err_init_proc:
- cleanup_sctp_mibs();
-err_init_mibs:
+ percpu_counter_destroy(&sctp_sockets_allocated);
+err_percpu_counter_init:
kmem_cache_destroy(sctp_chunk_cachep);
err_chunk_cachep:
kmem_cache_destroy(sctp_bucket_cachep);
@@ -1482,18 +1499,13 @@ SCTP_STATIC __exit void sctp_exit(void)
/* Unregister with inet6/inet layers. */
sctp_v6_del_protocol();
sctp_v4_del_protocol();
- sctp_free_addr_wq();
- /* Free the control endpoint. */
- inet_ctl_sock_destroy(sctp_ctl_sock);
+ unregister_pernet_subsys(&sctp_net_ops);
/* Free protosw registrations */
sctp_v6_protosw_exit();
sctp_v4_protosw_exit();
- /* Free the local address list. */
- sctp_free_local_addr_list();
-
/* Unregister with socket layer. */
sctp_v6_pf_exit();
sctp_v4_pf_exit();
@@ -1508,9 +1520,7 @@ SCTP_STATIC __exit void sctp_exit(void)
get_order(sctp_port_hashsize *
sizeof(struct sctp_bind_hashbucket)));
- sctp_dbg_objcnt_exit();
- sctp_proc_exit();
- cleanup_sctp_mibs();
+ percpu_counter_destroy(&sctp_sockets_allocated);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 479a70ef6ff8..fbe1636309a7 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -198,6 +198,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
const struct sctp_bind_addr *bp,
gfp_t gfp, int vparam_len)
{
+ struct net *net = sock_net(asoc->base.sk);
sctp_inithdr_t init;
union sctp_params addrs;
size_t chunksize;
@@ -237,7 +238,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
chunksize += sizeof(ecap_param);
- if (sctp_prsctp_enable)
+ if (net->sctp.prsctp_enable)
chunksize += sizeof(prsctp_param);
/* ADDIP: Section 4.2.7:
@@ -245,7 +246,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
* the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and
* INIT-ACK parameters.
*/
- if (sctp_addip_enable) {
+ if (net->sctp.addip_enable) {
extensions[num_ext] = SCTP_CID_ASCONF;
extensions[num_ext+1] = SCTP_CID_ASCONF_ACK;
num_ext += 2;
@@ -257,7 +258,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
chunksize += vparam_len;
/* Account for AUTH related parameters */
- if (sctp_auth_enable) {
+ if (net->sctp.auth_enable) {
/* Add random parameter length*/
chunksize += sizeof(asoc->c.auth_random);
@@ -331,7 +332,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
sctp_addto_param(retval, num_ext, extensions);
}
- if (sctp_prsctp_enable)
+ if (net->sctp.prsctp_enable)
sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
if (sp->adaptation_ind) {
@@ -342,7 +343,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
}
/* Add SCTP-AUTH chunks to the parameter list */
- if (sctp_auth_enable) {
+ if (net->sctp.auth_enable) {
sctp_addto_chunk(retval, sizeof(asoc->c.auth_random),
asoc->c.auth_random);
if (auth_hmacs)
@@ -1940,7 +1941,7 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
return 0;
}
-static int sctp_verify_ext_param(union sctp_params param)
+static int sctp_verify_ext_param(struct net *net, union sctp_params param)
{
__u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
int have_auth = 0;
@@ -1964,10 +1965,10 @@ static int sctp_verify_ext_param(union sctp_params param)
* only if ADD-IP is turned on and we are not backward-compatible
* mode.
*/
- if (sctp_addip_noauth)
+ if (net->sctp.addip_noauth)
return 1;
- if (sctp_addip_enable && !have_auth && have_asconf)
+ if (net->sctp.addip_enable && !have_auth && have_asconf)
return 0;
return 1;
@@ -1976,13 +1977,14 @@ static int sctp_verify_ext_param(union sctp_params param)
static void sctp_process_ext_param(struct sctp_association *asoc,
union sctp_params param)
{
+ struct net *net = sock_net(asoc->base.sk);
__u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
int i;
for (i = 0; i < num_ext; i++) {
switch (param.ext->chunks[i]) {
case SCTP_CID_FWD_TSN:
- if (sctp_prsctp_enable &&
+ if (net->sctp.prsctp_enable &&
!asoc->peer.prsctp_capable)
asoc->peer.prsctp_capable = 1;
break;
@@ -1990,12 +1992,12 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
/* if the peer reports AUTH, assume that he
* supports AUTH.
*/
- if (sctp_auth_enable)
+ if (net->sctp.auth_enable)
asoc->peer.auth_capable = 1;
break;
case SCTP_CID_ASCONF:
case SCTP_CID_ASCONF_ACK:
- if (sctp_addip_enable)
+ if (net->sctp.addip_enable)
asoc->peer.asconf_capable = 1;
break;
default:
@@ -2081,7 +2083,8 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
* SCTP_IERROR_ERROR - stop processing, trigger an ERROR
* SCTP_IERROR_NO_ERROR - continue with the chunk
*/
-static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
+static sctp_ierror_t sctp_verify_param(struct net *net,
+ const struct sctp_association *asoc,
union sctp_params param,
sctp_cid_t cid,
struct sctp_chunk *chunk,
@@ -2110,12 +2113,12 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
break;
case SCTP_PARAM_SUPPORTED_EXT:
- if (!sctp_verify_ext_param(param))
+ if (!sctp_verify_ext_param(net, param))
return SCTP_IERROR_ABORT;
break;
case SCTP_PARAM_SET_PRIMARY:
- if (sctp_addip_enable)
+ if (net->sctp.addip_enable)
break;
goto fallthrough;
@@ -2126,12 +2129,12 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
break;
case SCTP_PARAM_FWD_TSN_SUPPORT:
- if (sctp_prsctp_enable)
+ if (net->sctp.prsctp_enable)
break;
goto fallthrough;
case SCTP_PARAM_RANDOM:
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
goto fallthrough;
/* SCTP-AUTH: Secion 6.1
@@ -2148,7 +2151,7 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
break;
case SCTP_PARAM_CHUNKS:
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
goto fallthrough;
/* SCTP-AUTH: Section 3.2
@@ -2164,7 +2167,7 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
break;
case SCTP_PARAM_HMAC_ALGO:
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
goto fallthrough;
hmacs = (struct sctp_hmac_algo_param *)param.p;
@@ -2198,7 +2201,7 @@ fallthrough:
}
/* Verify the INIT packet before we process it. */
-int sctp_verify_init(const struct sctp_association *asoc,
+int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
sctp_cid_t cid,
sctp_init_chunk_t *peer_init,
struct sctp_chunk *chunk,
@@ -2245,7 +2248,7 @@ int sctp_verify_init(const struct sctp_association *asoc,
/* Verify all the variable length parameters */
sctp_walk_params(param, peer_init, init_hdr.params) {
- result = sctp_verify_param(asoc, param, cid, chunk, errp);
+ result = sctp_verify_param(net, asoc, param, cid, chunk, errp);
switch (result) {
case SCTP_IERROR_ABORT:
case SCTP_IERROR_NOMEM:
@@ -2270,6 +2273,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
const union sctp_addr *peer_addr,
sctp_init_chunk_t *peer_init, gfp_t gfp)
{
+ struct net *net = sock_net(asoc->base.sk);
union sctp_params param;
struct sctp_transport *transport;
struct list_head *pos, *temp;
@@ -2326,7 +2330,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
* also give us an option to silently ignore the packet, which
* is what we'll do here.
*/
- if (!sctp_addip_noauth &&
+ if (!net->sctp.addip_noauth &&
(asoc->peer.asconf_capable && !asoc->peer.auth_capable)) {
asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP |
SCTP_PARAM_DEL_IP |
@@ -2466,6 +2470,7 @@ static int sctp_process_param(struct sctp_association *asoc,
const union sctp_addr *peer_addr,
gfp_t gfp)
{
+ struct net *net = sock_net(asoc->base.sk);
union sctp_addr addr;
int i;
__u16 sat;
@@ -2494,13 +2499,13 @@ do_addr_param:
af = sctp_get_af_specific(param_type2af(param.p->type));
af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0);
scope = sctp_scope(peer_addr);
- if (sctp_in_scope(&addr, scope))
+ if (sctp_in_scope(net, &addr, scope))
if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED))
return 0;
break;
case SCTP_PARAM_COOKIE_PRESERVATIVE:
- if (!sctp_cookie_preserve_enable)
+ if (!net->sctp.cookie_preserve_enable)
break;
stale = ntohl(param.life->lifespan_increment);
@@ -2580,7 +2585,7 @@ do_addr_param:
break;
case SCTP_PARAM_SET_PRIMARY:
- if (!sctp_addip_enable)
+ if (!net->sctp.addip_enable)
goto fall_through;
addr_param = param.v + sizeof(sctp_addip_param_t);
@@ -2607,7 +2612,7 @@ do_addr_param:
break;
case SCTP_PARAM_FWD_TSN_SUPPORT:
- if (sctp_prsctp_enable) {
+ if (net->sctp.prsctp_enable) {
asoc->peer.prsctp_capable = 1;
break;
}
@@ -2615,7 +2620,7 @@ do_addr_param:
goto fall_through;
case SCTP_PARAM_RANDOM:
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
goto fall_through;
/* Save peer's random parameter */
@@ -2628,7 +2633,7 @@ do_addr_param:
break;
case SCTP_PARAM_HMAC_ALGO:
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
goto fall_through;
/* Save peer's HMAC list */
@@ -2644,7 +2649,7 @@ do_addr_param:
break;
case SCTP_PARAM_CHUNKS:
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
goto fall_through;
asoc->peer.peer_chunks = kmemdup(param.p,
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index fe99628e1257..bcfebb91559d 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -251,6 +251,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
int error;
struct sctp_transport *transport = (struct sctp_transport *) peer;
struct sctp_association *asoc = transport->asoc;
+ struct net *net = sock_net(asoc->base.sk);
/* Check whether a task is in the sock. */
@@ -271,7 +272,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
goto out_unlock;
/* Run through the state machine. */
- error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT,
+ error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX),
asoc->state,
asoc->ep, asoc,
@@ -291,6 +292,7 @@ out_unlock:
static void sctp_generate_timeout_event(struct sctp_association *asoc,
sctp_event_timeout_t timeout_type)
{
+ struct net *net = sock_net(asoc->base.sk);
int error = 0;
sctp_bh_lock_sock(asoc->base.sk);
@@ -312,7 +314,7 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc,
goto out_unlock;
/* Run through the state machine. */
- error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT,
+ error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(timeout_type),
asoc->state, asoc->ep, asoc,
(void *)timeout_type, GFP_ATOMIC);
@@ -371,6 +373,7 @@ void sctp_generate_heartbeat_event(unsigned long data)
int error = 0;
struct sctp_transport *transport = (struct sctp_transport *) data;
struct sctp_association *asoc = transport->asoc;
+ struct net *net = sock_net(asoc->base.sk);
sctp_bh_lock_sock(asoc->base.sk);
if (sock_owned_by_user(asoc->base.sk)) {
@@ -388,7 +391,7 @@ void sctp_generate_heartbeat_event(unsigned long data)
if (transport->dead)
goto out_unlock;
- error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT,
+ error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT),
asoc->state, asoc->ep, asoc,
transport, GFP_ATOMIC);
@@ -408,6 +411,7 @@ void sctp_generate_proto_unreach_event(unsigned long data)
{
struct sctp_transport *transport = (struct sctp_transport *) data;
struct sctp_association *asoc = transport->asoc;
+ struct net *net = sock_net(asoc->base.sk);
sctp_bh_lock_sock(asoc->base.sk);
if (sock_owned_by_user(asoc->base.sk)) {
@@ -426,7 +430,7 @@ void sctp_generate_proto_unreach_event(unsigned long data)
if (asoc->base.dead)
goto out_unlock;
- sctp_do_sm(SCTP_EVENT_T_OTHER,
+ sctp_do_sm(net, SCTP_EVENT_T_OTHER,
SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC);
@@ -753,8 +757,10 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
int err = 0;
if (sctp_outq_sack(&asoc->outqueue, sackh)) {
+ struct net *net = sock_net(asoc->base.sk);
+
/* There are no more TSNs awaiting SACK. */
- err = sctp_do_sm(SCTP_EVENT_T_OTHER,
+ err = sctp_do_sm(net, SCTP_EVENT_T_OTHER,
SCTP_ST_OTHER(SCTP_EVENT_NO_PENDING_TSN),
asoc->state, asoc->ep, asoc, NULL,
GFP_ATOMIC);
@@ -1042,6 +1048,8 @@ static int sctp_cmd_send_msg(struct sctp_association *asoc,
*/
static void sctp_cmd_send_asconf(struct sctp_association *asoc)
{
+ struct net *net = sock_net(asoc->base.sk);
+
/* Send the next asconf chunk from the addip chunk
* queue.
*/
@@ -1053,7 +1061,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
/* Hold the chunk until an ASCONF_ACK is received. */
sctp_chunk_hold(asconf);
- if (sctp_primitive_ASCONF(asoc, asconf))
+ if (sctp_primitive_ASCONF(net, asoc, asconf))
sctp_chunk_free(asconf);
else
asoc->addip_last_asconf = asconf;
@@ -1089,7 +1097,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
* If you want to understand all of lksctp, this is a
* good place to start.
*/
-int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype,
+int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
sctp_state_t state,
struct sctp_endpoint *ep,
struct sctp_association *asoc,
@@ -1110,12 +1118,12 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype,
/* Look up the state function, run it, and then process the
* side effects. These three steps are the heart of lksctp.
*/
- state_fn = sctp_sm_lookup_event(event_type, state, subtype);
+ state_fn = sctp_sm_lookup_event(net, event_type, state, subtype);
sctp_init_cmd_seq(&commands);
DEBUG_PRE;
- status = (*state_fn->fn)(ep, asoc, subtype, event_arg, &commands);
+ status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands);
DEBUG_POST;
error = sctp_side_effects(event_type, subtype, state,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 9fca10357350..094813b6c3c3 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -66,7 +66,8 @@
#include <net/sctp/sm.h>
#include <net/sctp/structs.h>
-static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
+static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
const void *payload,
@@ -74,36 +75,43 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
static int sctp_eat_data(const struct sctp_association *asoc,
struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands);
-static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc,
+static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
+ const struct sctp_association *asoc,
const struct sctp_chunk *chunk);
-static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
+static void sctp_send_stale_cookie_err(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands,
struct sctp_chunk *err_chunk);
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands);
-static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands);
-static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands);
static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk);
-static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
+static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
+ sctp_cmd_seq_t *commands,
__be16 error, int sk_err,
const struct sctp_association *asoc,
struct sctp_transport *transport);
static sctp_disposition_t sctp_sf_abort_violation(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
void *arg,
@@ -112,6 +120,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
const size_t paylen);
static sctp_disposition_t sctp_sf_violation_chunklen(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -119,6 +128,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
sctp_cmd_seq_t *commands);
static sctp_disposition_t sctp_sf_violation_paramlen(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -126,6 +136,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
sctp_cmd_seq_t *commands);
static sctp_disposition_t sctp_sf_violation_ctsn(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -133,18 +144,21 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
sctp_cmd_seq_t *commands);
static sctp_disposition_t sctp_sf_violation_chunk(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands);
-static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep,
+static sctp_ierror_t sctp_sf_authenticate(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
struct sctp_chunk *chunk);
-static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
+static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -204,7 +218,8 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_4_C(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -214,7 +229,7 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
struct sctp_ulpevent *ev;
if (!sctp_vtag_verify_either(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* RFC 2960 6.10 Bundling
*
@@ -222,11 +237,11 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
* SHUTDOWN COMPLETE with any other chunks.
*/
if (!chunk->singleton)
- return sctp_sf_violation_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* RFC 2960 10.2 SCTP-to-ULP
@@ -259,8 +274,8 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
- SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
@@ -289,7 +304,8 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -313,21 +329,21 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
* with an INIT chunk that is bundled with other chunks.
*/
if (!chunk->singleton)
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* If the packet is an OOTB packet which is temporarily on the
* control endpoint, respond with an ABORT.
*/
- if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) {
- SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ if (ep == sctp_sk(net->sctp.ctl_sock)->ep) {
+ SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
}
/* 3.1 A packet containing an INIT chunk MUST have a zero Verification
* Tag.
*/
if (chunk->sctp_hdr->vtag != 0)
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
/* Make sure that the INIT chunk has a valid length.
* Normally, this would cause an ABORT with a Protocol Violation
@@ -335,7 +351,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
* just discard the packet.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* If the INIT is coming toward a closing socket, we'll send back
* and ABORT. Essentially, this catches the race of INIT being
@@ -344,18 +360,18 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
* can treat this OOTB
*/
if (sctp_sstate(ep->base.sk, CLOSING))
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
- if (!sctp_verify_init(asoc, chunk->chunk_hdr->type,
+ if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
(sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
&err_chunk)) {
/* This chunk contains fatal error. It is to be discarded.
* Send an ABORT, with causes if there is any.
*/
if (err_chunk) {
- packet = sctp_abort_pkt_new(ep, asoc, arg,
+ packet = sctp_abort_pkt_new(net, ep, asoc, arg,
(__u8 *)(err_chunk->chunk_hdr) +
sizeof(sctp_chunkhdr_t),
ntohs(err_chunk->chunk_hdr->length) -
@@ -366,13 +382,13 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
if (packet) {
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
return SCTP_DISPOSITION_CONSUME;
} else {
return SCTP_DISPOSITION_NOMEM;
}
} else {
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg,
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg,
commands);
}
}
@@ -484,7 +500,8 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -496,25 +513,25 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
struct sctp_packet *packet;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* 6.10 Bundling
* An endpoint MUST NOT bundle INIT, INIT ACK or
* SHUTDOWN COMPLETE with any other chunks.
*/
if (!chunk->singleton)
- return sctp_sf_violation_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the INIT-ACK chunk has a valid length */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Grab the INIT header. */
chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
- if (!sctp_verify_init(asoc, chunk->chunk_hdr->type,
+ if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
(sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
&err_chunk)) {
@@ -526,7 +543,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
* the association.
*/
if (err_chunk) {
- packet = sctp_abort_pkt_new(ep, asoc, arg,
+ packet = sctp_abort_pkt_new(net, ep, asoc, arg,
(__u8 *)(err_chunk->chunk_hdr) +
sizeof(sctp_chunkhdr_t),
ntohs(err_chunk->chunk_hdr->length) -
@@ -537,7 +554,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
if (packet) {
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
error = SCTP_ERROR_INV_PARAM;
}
}
@@ -554,10 +571,10 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
* was malformed.
*/
if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED,
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED,
asoc, chunk->transport);
}
@@ -633,7 +650,8 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type, void *arg,
sctp_cmd_seq_t *commands)
@@ -650,9 +668,9 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
/* If the packet is an OOTB packet which is temporarily on the
* control endpoint, respond with an ABORT.
*/
- if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) {
- SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ if (ep == sctp_sk(net->sctp.ctl_sock)->ep) {
+ SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
}
/* Make sure that the COOKIE_ECHO chunk has a valid length.
@@ -661,7 +679,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
* in sctp_unpack_cookie().
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* If the endpoint is not listening or if the number of associations
* on the TCP-style socket exceed the max backlog, respond with an
@@ -670,7 +688,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
sk = ep->base.sk;
if (!sctp_sstate(sk, LISTENING) ||
(sctp_style(sk, TCP) && sk_acceptq_is_full(sk)))
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
/* "Decode" the chunk. We have no optional parameters so we
* are in good shape.
@@ -703,13 +721,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
goto nomem;
case -SCTP_IERROR_STALE_COOKIE:
- sctp_send_stale_cookie_err(ep, asoc, chunk, commands,
+ sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands,
err_chk_p);
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
case -SCTP_IERROR_BAD_SIG:
default:
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
}
@@ -756,14 +774,14 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t));
auth.transport = chunk->transport;
- ret = sctp_sf_authenticate(ep, new_asoc, type, &auth);
+ ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
/* We can now safely free the auth_chunk clone */
kfree_skb(chunk->auth_chunk);
if (ret != SCTP_IERROR_NO_ERROR) {
sctp_association_free(new_asoc);
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
}
@@ -804,8 +822,8 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_ESTABLISHED));
- SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
- SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS);
+ SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS);
sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
if (new_asoc->autoclose)
@@ -856,7 +874,8 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type, void *arg,
sctp_cmd_seq_t *commands)
@@ -865,13 +884,13 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
struct sctp_ulpevent *ev;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Verify that the chunk length for the COOKIE-ACK is OK.
* If we don't do this, any bundled chunks may be junked.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Reset init error count upon receipt of COOKIE-ACK,
@@ -892,8 +911,8 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_ESTABLISHED));
- SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
- SCTP_INC_STATS(SCTP_MIB_ACTIVEESTABS);
+ SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS);
sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
if (asoc->autoclose)
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
@@ -958,7 +977,8 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
}
/* Generate a HEARTBEAT packet on the given transport. */
-sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -972,8 +992,8 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_NO_ERROR));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_DELETE_TCB;
}
@@ -1028,7 +1048,8 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -1039,11 +1060,11 @@ sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep,
size_t paylen = 0;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the HEARTBEAT chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* 8.3 The receiver of the HEARTBEAT should immediately
@@ -1095,7 +1116,8 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -1108,12 +1130,12 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
unsigned long max_interval;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the HEARTBEAT-ACK chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t) +
sizeof(sctp_sender_hb_info_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
@@ -1171,7 +1193,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
/* Helper function to send out an abort for the restart
* condition.
*/
-static int sctp_sf_send_restart_abort(union sctp_addr *ssa,
+static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
struct sctp_chunk *init,
sctp_cmd_seq_t *commands)
{
@@ -1197,18 +1219,18 @@ static int sctp_sf_send_restart_abort(union sctp_addr *ssa,
errhdr->length = htons(len);
/* Assign to the control socket. */
- ep = sctp_sk((sctp_get_ctl_sock()))->ep;
+ ep = sctp_sk(net->sctp.ctl_sock)->ep;
/* Association is NULL since this may be a restart attack and we
* want to send back the attacker's vtag.
*/
- pkt = sctp_abort_pkt_new(ep, NULL, init, errhdr, len);
+ pkt = sctp_abort_pkt_new(net, ep, NULL, init, errhdr, len);
if (!pkt)
goto out;
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(pkt));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
/* Discard the rest of the inbound packet. */
sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
@@ -1240,6 +1262,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
struct sctp_chunk *init,
sctp_cmd_seq_t *commands)
{
+ struct net *net = sock_net(new_asoc->base.sk);
struct sctp_transport *new_addr;
int ret = 1;
@@ -1258,7 +1281,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
transports) {
if (!list_has_sctp_addr(&asoc->peer.transport_addr_list,
&new_addr->ipaddr)) {
- sctp_sf_send_restart_abort(&new_addr->ipaddr, init,
+ sctp_sf_send_restart_abort(net, &new_addr->ipaddr, init,
commands);
ret = 0;
break;
@@ -1358,6 +1381,7 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc,
* chunk handling.
*/
static sctp_disposition_t sctp_sf_do_unexpected_init(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -1382,20 +1406,20 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
* with an INIT chunk that is bundled with other chunks.
*/
if (!chunk->singleton)
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* 3.1 A packet containing an INIT chunk MUST have a zero Verification
* Tag.
*/
if (chunk->sctp_hdr->vtag != 0)
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
/* Make sure that the INIT chunk has a valid length.
* In this case, we generate a protocol violation since we have
* an association established.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Grab the INIT header. */
chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
@@ -1405,14 +1429,14 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
- if (!sctp_verify_init(asoc, chunk->chunk_hdr->type,
+ if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
(sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
&err_chunk)) {
/* This chunk contains fatal error. It is to be discarded.
* Send an ABORT, with causes if there is any.
*/
if (err_chunk) {
- packet = sctp_abort_pkt_new(ep, asoc, arg,
+ packet = sctp_abort_pkt_new(net, ep, asoc, arg,
(__u8 *)(err_chunk->chunk_hdr) +
sizeof(sctp_chunkhdr_t),
ntohs(err_chunk->chunk_hdr->length) -
@@ -1421,14 +1445,14 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
if (packet) {
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
retval = SCTP_DISPOSITION_CONSUME;
} else {
retval = SCTP_DISPOSITION_NOMEM;
}
goto cleanup;
} else {
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg,
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg,
commands);
}
}
@@ -1570,7 +1594,8 @@ cleanup:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -1579,7 +1604,7 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep,
/* Call helper to do the real work for both simulataneous and
* duplicate INIT chunk handling.
*/
- return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands);
+ return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands);
}
/*
@@ -1623,7 +1648,8 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -1632,7 +1658,7 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep,
/* Call helper to do the real work for both simulataneous and
* duplicate INIT chunk handling.
*/
- return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands);
+ return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands);
}
@@ -1645,7 +1671,8 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep,
* An unexpected INIT ACK usually indicates the processing of an old or
* duplicated INIT chunk.
*/
-sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg, sctp_cmd_seq_t *commands)
@@ -1653,10 +1680,10 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep,
/* Per the above section, we'll discard the chunk if we have an
* endpoint. If this is an OOTB INIT-ACK, treat it as such.
*/
- if (ep == sctp_sk((sctp_get_ctl_sock()))->ep)
- return sctp_sf_ootb(ep, asoc, type, arg, commands);
+ if (ep == sctp_sk(net->sctp.ctl_sock)->ep)
+ return sctp_sf_ootb(net, ep, asoc, type, arg, commands);
else
- return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
}
/* Unexpected COOKIE-ECHO handler for peer restart (Table 2, action 'A')
@@ -1664,7 +1691,8 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep,
* Section 5.2.4
* A) In this case, the peer may have restarted.
*/
-static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands,
@@ -1700,7 +1728,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
* its peer.
*/
if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) {
- disposition = sctp_sf_do_9_2_reshutack(ep, asoc,
+ disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc,
SCTP_ST_CHUNK(chunk->chunk_hdr->type),
chunk, commands);
if (SCTP_DISPOSITION_NOMEM == disposition)
@@ -1763,7 +1791,8 @@ nomem:
* after responding to the local endpoint's INIT
*/
/* This case represents an initialization collision. */
-static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands,
@@ -1784,7 +1813,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_ESTABLISHED));
- SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -1833,7 +1862,8 @@ nomem:
* but a new tag of its own.
*/
/* This case represents an initialization collision. */
-static sctp_disposition_t sctp_sf_do_dupcook_c(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands,
@@ -1854,7 +1884,8 @@ static sctp_disposition_t sctp_sf_do_dupcook_c(const struct sctp_endpoint *ep,
* enter the ESTABLISHED state, if it has not already done so.
*/
/* This case represents an initialization collision. */
-static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_dupcook_d(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands,
@@ -1876,7 +1907,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_ESTABLISHED));
- SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START,
SCTP_NULL());
@@ -1948,7 +1979,8 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -1967,7 +1999,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
* done later.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* "Decode" the chunk. We have no optional parameters so we
@@ -2001,12 +2033,12 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
goto nomem;
case -SCTP_IERROR_STALE_COOKIE:
- sctp_send_stale_cookie_err(ep, asoc, chunk, commands,
+ sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands,
err_chk_p);
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
case -SCTP_IERROR_BAD_SIG:
default:
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
}
@@ -2017,27 +2049,27 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
switch (action) {
case 'A': /* Association restart. */
- retval = sctp_sf_do_dupcook_a(ep, asoc, chunk, commands,
+ retval = sctp_sf_do_dupcook_a(net, ep, asoc, chunk, commands,
new_asoc);
break;
case 'B': /* Collision case B. */
- retval = sctp_sf_do_dupcook_b(ep, asoc, chunk, commands,
+ retval = sctp_sf_do_dupcook_b(net, ep, asoc, chunk, commands,
new_asoc);
break;
case 'C': /* Collision case C. */
- retval = sctp_sf_do_dupcook_c(ep, asoc, chunk, commands,
+ retval = sctp_sf_do_dupcook_c(net, ep, asoc, chunk, commands,
new_asoc);
break;
case 'D': /* Collision case D. */
- retval = sctp_sf_do_dupcook_d(ep, asoc, chunk, commands,
+ retval = sctp_sf_do_dupcook_d(net, ep, asoc, chunk, commands,
new_asoc);
break;
default: /* Discard packet for all others. */
- retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ retval = sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
break;
}
@@ -2063,6 +2095,7 @@ nomem:
* See sctp_sf_do_9_1_abort().
*/
sctp_disposition_t sctp_sf_shutdown_pending_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -2072,7 +2105,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
struct sctp_chunk *chunk = arg;
if (!sctp_vtag_verify_either(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the ABORT chunk has a valid length.
* Since this is an ABORT chunk, we have to discard it
@@ -2085,7 +2118,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
* packet.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* ADD-IP: Special case for ABORT chunks
* F4) One special consideration is that ABORT Chunks arriving
@@ -2094,9 +2127,9 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
- return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+ return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -2104,7 +2137,8 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
*
* See sctp_sf_do_9_1_abort().
*/
-sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2113,7 +2147,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
struct sctp_chunk *chunk = arg;
if (!sctp_vtag_verify_either(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the ABORT chunk has a valid length.
* Since this is an ABORT chunk, we have to discard it
@@ -2126,7 +2160,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
* packet.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* ADD-IP: Special case for ABORT chunks
* F4) One special consideration is that ABORT Chunks arriving
@@ -2135,7 +2169,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
/* Stop the T2-shutdown timer. */
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -2145,7 +2179,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
- return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+ return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -2154,6 +2188,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
* See sctp_sf_do_9_1_abort().
*/
sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -2163,7 +2198,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
/* The same T2 timer, so we should be able to use
* common function with the SHUTDOWN-SENT state.
*/
- return sctp_sf_shutdown_sent_abort(ep, asoc, type, arg, commands);
+ return sctp_sf_shutdown_sent_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -2180,7 +2215,8 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2190,13 +2226,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
sctp_errhdr_t *err;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the ERROR chunk has a valid length.
* The parameter walking depends on this as well.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Process the error here */
@@ -2206,7 +2242,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
*/
sctp_walk_errors(err, chunk->chunk_hdr) {
if (SCTP_ERROR_STALE_COOKIE == err->cause)
- return sctp_sf_do_5_2_6_stale(ep, asoc, type,
+ return sctp_sf_do_5_2_6_stale(net, ep, asoc, type,
arg, commands);
}
@@ -2215,7 +2251,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
* we are discarding the packet, there should be no adverse
* affects.
*/
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
/*
@@ -2243,7 +2279,8 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2365,7 +2402,8 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2374,7 +2412,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
struct sctp_chunk *chunk = arg;
if (!sctp_vtag_verify_either(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the ABORT chunk has a valid length.
* Since this is an ABORT chunk, we have to discard it
@@ -2387,7 +2425,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
* packet.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* ADD-IP: Special case for ABORT chunks
* F4) One special consideration is that ABORT Chunks arriving
@@ -2396,12 +2434,13 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
- return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+ return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
-static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
+static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2418,7 +2457,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
sctp_errhdr_t *err;
sctp_walk_errors(err, chunk->chunk_hdr);
if ((void *)err != (void *)chunk->chunk_end)
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
}
@@ -2426,8 +2465,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
/* ASSOC_FAILED will DELETE_TCB. */
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(error));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_ABORT;
}
@@ -2437,7 +2476,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
*
* See sctp_sf_do_9_1_abort() above.
*/
-sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2448,7 +2488,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
__be16 error = SCTP_ERROR_NO_ERROR;
if (!sctp_vtag_verify_either(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the ABORT chunk has a valid length.
* Since this is an ABORT chunk, we have to discard it
@@ -2461,27 +2501,28 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
* packet.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* See if we have an error cause code in the chunk. */
len = ntohs(chunk->chunk_hdr->length);
if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
- return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED, asoc,
+ return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc,
chunk->transport);
}
/*
* Process an incoming ICMP as an ABORT. (COOKIE-WAIT state)
*/
-sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands)
{
- return sctp_stop_t1_and_abort(commands, SCTP_ERROR_NO_ERROR,
+ return sctp_stop_t1_and_abort(net, commands, SCTP_ERROR_NO_ERROR,
ENOPROTOOPT, asoc,
(struct sctp_transport *)arg);
}
@@ -2489,7 +2530,8 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep
/*
* Process an ABORT. (COOKIE-ECHOED state)
*/
-sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2498,7 +2540,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep,
/* There is a single T1 timer, so we should be able to use
* common function with the COOKIE-WAIT state.
*/
- return sctp_sf_cookie_wait_abort(ep, asoc, type, arg, commands);
+ return sctp_sf_cookie_wait_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -2506,7 +2548,8 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep,
*
* This is common code called by several sctp_sf_*_abort() functions above.
*/
-static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
+static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
+ sctp_cmd_seq_t *commands,
__be16 error, int sk_err,
const struct sctp_association *asoc,
struct sctp_transport *transport)
@@ -2514,7 +2557,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
SCTP_DEBUG_PRINTK("ABORT received (INIT).\n");
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(sk_err));
@@ -2557,7 +2600,8 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2570,12 +2614,12 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
__u32 ctsn;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk,
sizeof(struct sctp_shutdown_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Convert the elaborate header. */
@@ -2595,7 +2639,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
* sender with an ABORT.
*/
if (!TSN_lt(ctsn, asoc->next_tsn))
- return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
+ return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands);
/* API 5.3.1.5 SCTP_SHUTDOWN_EVENT
* When a peer sends a SHUTDOWN, SCTP delivers this notification to
@@ -2619,7 +2663,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
disposition = SCTP_DISPOSITION_CONSUME;
if (sctp_outq_is_empty(&asoc->outqueue)) {
- disposition = sctp_sf_do_9_2_shutdown_ack(ep, asoc, type,
+ disposition = sctp_sf_do_9_2_shutdown_ack(net, ep, asoc, type,
arg, commands);
}
@@ -2645,7 +2689,8 @@ out:
* The Cumulative TSN Ack of the received SHUTDOWN chunk
* MUST be processed.
*/
-sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2656,12 +2701,12 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
__u32 ctsn;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk,
sizeof(struct sctp_shutdown_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
@@ -2678,7 +2723,7 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
* sender with an ABORT.
*/
if (!TSN_lt(ctsn, asoc->next_tsn))
- return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
+ return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands);
/* verify, by checking the Cumulative TSN Ack field of the
* chunk, that all its outstanding DATA chunks have been
@@ -2697,7 +2742,8 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
* that belong to this association, it should discard the INIT chunk and
* retransmit the SHUTDOWN ACK chunk.
*/
-sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2708,7 +2754,7 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep,
/* Make sure that the chunk has a valid length */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Since we are not going to really process this INIT, there
@@ -2760,7 +2806,8 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2771,10 +2818,10 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep,
u32 lowest_tsn;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
cwr = (sctp_cwrhdr_t *) chunk->skb->data;
@@ -2815,7 +2862,8 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_ecne(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2825,10 +2873,10 @@ sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep,
struct sctp_chunk *chunk = arg;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
ecne = (sctp_ecnehdr_t *) chunk->skb->data;
@@ -2871,7 +2919,8 @@ sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2884,11 +2933,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
error = sctp_eat_data(asoc, chunk, commands );
@@ -2897,16 +2946,16 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
break;
case SCTP_IERROR_HIGH_TSN:
case SCTP_IERROR_BAD_STREAM:
- SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
+ SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
goto discard_noforce;
case SCTP_IERROR_DUP_TSN:
case SCTP_IERROR_IGNORE_TSN:
- SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
+ SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
goto discard_force;
case SCTP_IERROR_NO_DATA:
goto consume;
case SCTP_IERROR_PROTO_VIOLATION:
- return sctp_sf_abort_violation(ep, asoc, chunk, commands,
+ return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
(u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
default:
BUG();
@@ -2992,7 +3041,8 @@ consume:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3004,11 +3054,11 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep,
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
error = sctp_eat_data(asoc, chunk, commands );
@@ -3022,7 +3072,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep,
case SCTP_IERROR_NO_DATA:
goto consume;
case SCTP_IERROR_PROTO_VIOLATION:
- return sctp_sf_abort_violation(ep, asoc, chunk, commands,
+ return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
(u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
default:
BUG();
@@ -3082,7 +3132,8 @@ consume:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3093,18 +3144,18 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep,
__u32 ctsn;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SACK chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Pull the SACK chunk from the data buffer */
sackh = sctp_sm_pull_sack(chunk);
/* Was this a bogus SACK? */
if (!sackh)
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
chunk->subh.sack_hdr = sackh;
ctsn = ntohl(sackh->cum_tsn_ack);
@@ -3125,7 +3176,7 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep,
* sender with an ABORT.
*/
if (!TSN_lt(ctsn, asoc->next_tsn))
- return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
+ return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands);
/* Return this SACK for further processing. */
sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_SACKH(sackh));
@@ -3154,7 +3205,8 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3164,7 +3216,7 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
struct sctp_chunk *chunk = arg;
struct sctp_chunk *abort;
- packet = sctp_ootb_pkt_new(asoc, chunk);
+ packet = sctp_ootb_pkt_new(net, asoc, chunk);
if (packet) {
/* Make an ABORT. The T bit will be set if the asoc
@@ -3188,9 +3240,9 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
- sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
return SCTP_DISPOSITION_CONSUME;
}
@@ -3205,7 +3257,8 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_operr_notify(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3215,15 +3268,15 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
sctp_errhdr_t *err;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the ERROR chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
sctp_walk_errors(err, chunk->chunk_hdr);
if ((void *)err != (void *)chunk->chunk_end)
- return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+ return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
(void *)err, commands);
sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR,
@@ -3242,7 +3295,8 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
*
* The return value is the disposition.
*/
-sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3253,11 +3307,11 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
struct sctp_ulpevent *ev;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* 10.2 H) SHUTDOWN COMPLETE notification
*
@@ -3290,8 +3344,8 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
- SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
/* ...and remove all record of the association. */
@@ -3324,7 +3378,8 @@ nomem:
* receiver of the OOTB packet shall discard the OOTB packet and take
* no further action.
*/
-sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_ootb(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3338,13 +3393,13 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
int ootb_shut_ack = 0;
int ootb_cookie_ack = 0;
- SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
ch = (sctp_chunkhdr_t *) chunk->chunk_hdr;
do {
/* Report violation if the chunk is less then minimal */
if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Now that we know we at least have a chunk header,
@@ -3359,7 +3414,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
* sending an ABORT of its own.
*/
if (SCTP_CID_ABORT == ch->type)
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* RFC 8.4, 7) If the packet contains a "Stale cookie" ERROR
* or a COOKIE ACK the SCTP Packet should be silently
@@ -3381,18 +3436,18 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
/* Report violation if chunk len overflows */
ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
if (ch_end > skb_tail_pointer(skb))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
ch = (sctp_chunkhdr_t *) ch_end;
} while (ch_end < skb_tail_pointer(skb));
if (ootb_shut_ack)
- return sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands);
+ return sctp_sf_shut_8_4_5(net, ep, asoc, type, arg, commands);
else if (ootb_cookie_ack)
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
else
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
}
/*
@@ -3416,7 +3471,8 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3426,7 +3482,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
struct sctp_chunk *chunk = arg;
struct sctp_chunk *shut;
- packet = sctp_ootb_pkt_new(asoc, chunk);
+ packet = sctp_ootb_pkt_new(net, asoc, chunk);
if (packet) {
/* Make an SHUTDOWN_COMPLETE.
@@ -3450,19 +3506,19 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
/* If the chunk length is invalid, we don't want to process
* the reset of the packet.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* We need to discard the rest of the packet to prevent
* potential bomming attacks from additional bundled chunks.
* This is documented in SCTP Threats ID.
*/
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
return SCTP_DISPOSITION_NOMEM;
@@ -3479,7 +3535,8 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
* chunks. --piggy ]
*
*/
-sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3489,7 +3546,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep,
/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Although we do have an association in this case, it corresponds
@@ -3497,13 +3554,14 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep,
* packet and the state function that handles OOTB SHUTDOWN_ACK is
* called with a NULL association.
*/
- SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
- return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands);
+ return sctp_sf_shut_8_4_5(net, ep, NULL, type, arg, commands);
}
/* ADDIP Section 4.2 Upon reception of an ASCONF Chunk. */
-sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_asconf(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type, void *arg,
sctp_cmd_seq_t *commands)
@@ -3519,7 +3577,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
/* ADD-IP: Section 4.1.1
@@ -3528,12 +3586,12 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
* is received unauthenticated it MUST be silently discarded as
* described in [I-D.ietf-tsvwg-sctp-auth].
*/
- if (!sctp_addip_noauth && !chunk->auth)
- return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+ if (!net->sctp.addip_noauth && !chunk->auth)
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the ASCONF ADDIP chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
hdr = (sctp_addiphdr_t *)chunk->skb->data;
@@ -3542,7 +3600,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
addr_param = (union sctp_addr_param *)hdr->params;
length = ntohs(addr_param->p.length);
if (length < sizeof(sctp_paramhdr_t))
- return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+ return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
(void *)addr_param, commands);
/* Verify the ASCONF chunk before processing it. */
@@ -3550,7 +3608,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
(sctp_paramhdr_t *)((void *)addr_param + length),
(void *)chunk->chunk_end,
&err_param))
- return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+ return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
(void *)err_param, commands);
/* ADDIP 5.2 E1) Compare the value of the serial number to the value
@@ -3630,7 +3688,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
* When building TLV parameters for the ASCONF Chunk that will add or
* delete IP addresses the D0 to D13 rules should be applied:
*/
-sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type, void *arg,
sctp_cmd_seq_t *commands)
@@ -3645,7 +3704,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
if (!sctp_vtag_verify(asconf_ack, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
/* ADD-IP, Section 4.1.2:
@@ -3654,12 +3713,12 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
* is received unauthenticated it MUST be silently discarded as
* described in [I-D.ietf-tsvwg-sctp-auth].
*/
- if (!sctp_addip_noauth && !asconf_ack->auth)
- return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+ if (!net->sctp.addip_noauth && !asconf_ack->auth)
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the ADDIP chunk has a valid length. */
if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data;
@@ -3670,7 +3729,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
(sctp_paramhdr_t *)addip_hdr->params,
(void *)asconf_ack->chunk_end,
&err_param))
- return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+ return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
(void *)err_param, commands);
if (last_asconf) {
@@ -3705,8 +3764,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_ASCONF_ACK));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_ABORT;
}
@@ -3739,8 +3798,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_ASCONF_ACK));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_ABORT;
}
@@ -3761,7 +3820,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3776,12 +3836,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep,
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
/* Make sure that the FORWARD_TSN chunk has valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data;
@@ -3828,6 +3888,7 @@ discard_noforce:
}
sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -3843,12 +3904,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
/* Make sure that the FORWARD_TSN chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data;
@@ -3915,7 +3976,8 @@ gen_shutdown:
*
* The return value is the disposition of the chunk.
*/
-static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep,
+static sctp_ierror_t sctp_sf_authenticate(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
struct sctp_chunk *chunk)
@@ -3988,7 +4050,8 @@ nomem:
return SCTP_IERROR_NOMEM;
}
-sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_auth(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4001,21 +4064,21 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep,
/* Make sure that the peer has AUTH capable */
if (!asoc->peer.auth_capable)
- return sctp_sf_unk_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands);
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
/* Make sure that the AUTH chunk has valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_auth_chunk)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
auth_hdr = (struct sctp_authhdr *)chunk->skb->data;
- error = sctp_sf_authenticate(ep, asoc, type, chunk);
+ error = sctp_sf_authenticate(net, ep, asoc, type, chunk);
switch (error) {
case SCTP_IERROR_AUTH_BAD_HMAC:
/* Generate the ERROR chunk and discard the rest
@@ -4032,10 +4095,10 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep,
/* Fall Through */
case SCTP_IERROR_AUTH_BAD_KEYID:
case SCTP_IERROR_BAD_SIG:
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
case SCTP_IERROR_PROTO_VIOLATION:
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
case SCTP_IERROR_NOMEM:
@@ -4084,7 +4147,8 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4097,20 +4161,20 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
SCTP_DEBUG_PRINTK("Processing the unknown chunk id %d.\n", type.chunk);
if (!sctp_vtag_verify(unk_chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the chunk has a valid length.
* Since we don't know the chunk type, we use a general
* chunkhdr structure to make a comparison.
*/
if (!sctp_chunk_length_valid(unk_chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
switch (type.chunk & SCTP_CID_ACTION_MASK) {
case SCTP_CID_ACTION_DISCARD:
/* Discard the packet. */
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
break;
case SCTP_CID_ACTION_DISCARD_ERR:
/* Generate an ERROR chunk as response. */
@@ -4125,7 +4189,7 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
}
/* Discard the packet. */
- sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
return SCTP_DISPOSITION_CONSUME;
break;
case SCTP_CID_ACTION_SKIP:
@@ -4167,7 +4231,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4180,7 +4245,7 @@ sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep,
* chunkhdr structure to make a comparison.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
SCTP_DEBUG_PRINTK("Chunk %d is discarded\n", type.chunk);
@@ -4205,13 +4270,14 @@ sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_pdiscard(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands)
{
- SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS);
+ SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS);
sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
return SCTP_DISPOSITION_CONSUME;
@@ -4232,7 +4298,8 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep,
* We simply tag the chunk as a violation. The state machine will log
* the violation and continue.
*/
-sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_violation(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4242,7 +4309,7 @@ sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep,
/* Make sure that the chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
return SCTP_DISPOSITION_VIOLATION;
@@ -4252,6 +4319,7 @@ sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep,
* Common function to handle a protocol violation.
*/
static sctp_disposition_t sctp_sf_abort_violation(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
void *arg,
@@ -4302,7 +4370,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
}
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
if (asoc->state <= SCTP_STATE_COOKIE_ECHOED) {
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -4316,10 +4384,10 @@ static sctp_disposition_t sctp_sf_abort_violation(
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION));
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
}
} else {
- packet = sctp_ootb_pkt_new(asoc, chunk);
+ packet = sctp_ootb_pkt_new(net, asoc, chunk);
if (!packet)
goto nomem_pkt;
@@ -4334,13 +4402,13 @@ static sctp_disposition_t sctp_sf_abort_violation(
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
}
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
discard:
- sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
+ sctp_sf_pdiscard(net, ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
return SCTP_DISPOSITION_ABORT;
nomem_pkt:
@@ -4369,6 +4437,7 @@ nomem:
* Generate an ABORT chunk and terminate the association.
*/
static sctp_disposition_t sctp_sf_violation_chunklen(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4377,7 +4446,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
{
static const char err_str[]="The following chunk had invalid length:";
- return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str,
+ return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
sizeof(err_str));
}
@@ -4388,6 +4457,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
* the length is considered as invalid.
*/
static sctp_disposition_t sctp_sf_violation_paramlen(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4407,17 +4477,17 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
goto nomem;
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION));
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
discard:
- sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
+ sctp_sf_pdiscard(net, ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
return SCTP_DISPOSITION_ABORT;
nomem:
return SCTP_DISPOSITION_NOMEM;
@@ -4430,6 +4500,7 @@ nomem:
* error code.
*/
static sctp_disposition_t sctp_sf_violation_ctsn(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4438,7 +4509,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
{
static const char err_str[]="The cumulative tsn ack beyond the max tsn currently sent:";
- return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str,
+ return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
sizeof(err_str));
}
@@ -4449,6 +4520,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
* on the path and we may not want to continue this communication.
*/
static sctp_disposition_t sctp_sf_violation_chunk(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4458,9 +4530,9 @@ static sctp_disposition_t sctp_sf_violation_chunk(
static const char err_str[]="The following chunk violates protocol:";
if (!asoc)
- return sctp_sf_violation(ep, asoc, type, arg, commands);
+ return sctp_sf_violation(net, ep, asoc, type, arg, commands);
- return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str,
+ return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
sizeof(err_str));
}
/***************************************************************************
@@ -4523,7 +4595,8 @@ static sctp_disposition_t sctp_sf_violation_chunk(
*
* The return value is a disposition.
*/
-sctp_disposition_t sctp_sf_do_prm_asoc(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4634,7 +4707,8 @@ nomem:
*
* The return value is the disposition.
*/
-sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4673,6 +4747,7 @@ sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep,
* The return value is the disposition.
*/
sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4694,7 +4769,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
disposition = SCTP_DISPOSITION_CONSUME;
if (sctp_outq_is_empty(&asoc->outqueue)) {
- disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type,
+ disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
arg, commands);
}
return disposition;
@@ -4728,6 +4803,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
* The return value is the disposition.
*/
sctp_disposition_t sctp_sf_do_9_1_prm_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4759,14 +4835,15 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_USER_ABORT));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return retval;
}
/* We tried an illegal operation on an association which is closed. */
-sctp_disposition_t sctp_sf_error_closed(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_error_closed(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4779,7 +4856,8 @@ sctp_disposition_t sctp_sf_error_closed(const struct sctp_endpoint *ep,
/* We tried an illegal operation on an association which is shutting
* down.
*/
-sctp_disposition_t sctp_sf_error_shutdown(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4805,6 +4883,7 @@ sctp_disposition_t sctp_sf_error_shutdown(const struct sctp_endpoint *ep,
* (timers)
*/
sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4817,7 +4896,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
- SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
+ SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS);
sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
@@ -4839,6 +4918,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
* (timers)
*/
sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4847,7 +4927,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
/* There is a single T1 timer, so we should be able to use
* common function with the COOKIE-WAIT state.
*/
- return sctp_sf_cookie_wait_prm_shutdown(ep, asoc, type, arg, commands);
+ return sctp_sf_cookie_wait_prm_shutdown(net, ep, asoc, type, arg, commands);
}
/*
@@ -4865,6 +4945,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
* (timers)
*/
sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4884,7 +4965,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
/* Even if we can't send the ABORT due to low memory delete the
* TCB. This is a departure from our typical NOMEM handling.
@@ -4914,6 +4995,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
* (timers)
*/
sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4923,7 +5005,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
/* There is a single T1 timer, so we should be able to use
* common function with the COOKIE-WAIT state.
*/
- return sctp_sf_cookie_wait_prm_abort(ep, asoc, type, arg, commands);
+ return sctp_sf_cookie_wait_prm_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -4939,6 +5021,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
* (timers)
*/
sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4949,7 +5032,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
- return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands);
+ return sctp_sf_do_9_1_prm_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -4965,6 +5048,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
* (timers)
*/
sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4979,7 +5063,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
- return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands);
+ return sctp_sf_do_9_1_prm_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -4995,6 +5079,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
* (timers)
*/
sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5004,7 +5089,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
/* The same T2 timer, so we should be able to use
* common function with the SHUTDOWN-SENT state.
*/
- return sctp_sf_shutdown_sent_prm_abort(ep, asoc, type, arg, commands);
+ return sctp_sf_shutdown_sent_prm_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -5030,6 +5115,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
* association on which a heartbeat should be issued.
*/
sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5061,7 +5147,8 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
* When an endpoint has an ASCONF signaled change to be sent to the
* remote endpoint it should do A1 to A9
*/
-sctp_disposition_t sctp_sf_do_prm_asconf(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5082,6 +5169,7 @@ sctp_disposition_t sctp_sf_do_prm_asconf(const struct sctp_endpoint *ep,
* The return value is the disposition of the primitive.
*/
sctp_disposition_t sctp_sf_ignore_primitive(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5103,6 +5191,7 @@ sctp_disposition_t sctp_sf_ignore_primitive(
* retransmit, the stack will immediately send up this notification.
*/
sctp_disposition_t sctp_sf_do_no_pending_tsn(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5134,6 +5223,7 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn(
* The return value is the disposition.
*/
sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5203,6 +5293,7 @@ nomem:
* The return value is the disposition.
*/
sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5221,11 +5312,11 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
*/
if (chunk) {
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
}
@@ -5273,7 +5364,8 @@ nomem:
*
* The return value is the disposition of the event.
*/
-sctp_disposition_t sctp_sf_ignore_other(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_ignore_other(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5298,7 +5390,8 @@ sctp_disposition_t sctp_sf_ignore_other(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5306,7 +5399,7 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
{
struct sctp_transport *transport = arg;
- SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_T3_RTX_EXPIREDS);
if (asoc->overall_error_count >= asoc->max_retrans) {
if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) {
@@ -5327,8 +5420,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_NO_ERROR));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_DELETE_TCB;
}
}
@@ -5384,13 +5477,14 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
* allow. However, an SCTP transmitter MUST NOT be more aggressive than
* the following algorithms allow.
*/
-sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands)
{
- SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS);
sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
return SCTP_DISPOSITION_CONSUME;
}
@@ -5414,7 +5508,8 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
* (timers, events)
*
*/
-sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5425,7 +5520,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
int attempts = asoc->init_err_counter + 1;
SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n");
- SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS);
if (attempts <= asoc->max_init_attempts) {
bp = (struct sctp_bind_addr *) &asoc->base.bind_addr;
@@ -5475,7 +5570,8 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
* (timers, events)
*
*/
-sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5485,7 +5581,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep
int attempts = asoc->init_err_counter + 1;
SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n");
- SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS);
if (attempts <= asoc->max_init_attempts) {
repl = sctp_make_cookie_echo(asoc, NULL);
@@ -5523,7 +5619,8 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep
* the T2-Shutdown timer, giving its peer ample opportunity to transmit
* all of its queued DATA chunks that have not yet been sent.
*/
-sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5532,7 +5629,7 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
struct sctp_chunk *reply = NULL;
SCTP_DEBUG_PRINTK("Timer T2 expired.\n");
- SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS);
((struct sctp_association *)asoc)->shutdown_retries++;
@@ -5542,8 +5639,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
/* Note: CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_NO_ERROR));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_DELETE_TCB;
}
@@ -5592,6 +5689,7 @@ nomem:
* If the T4 RTO timer expires the endpoint should do B1 to B5
*/
sctp_disposition_t sctp_sf_t4_timer_expire(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5601,7 +5699,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
struct sctp_chunk *chunk = asoc->addip_last_asconf;
struct sctp_transport *transport = chunk->transport;
- SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_T4_RTO_EXPIREDS);
/* ADDIP 4.1 B1) Increment the error counters and perform path failure
* detection on the appropriate destination address as defined in
@@ -5626,8 +5724,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
SCTP_ERROR(ETIMEDOUT));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_NO_ERROR));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_ABORT;
}
@@ -5662,7 +5760,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
* At the expiration of this timer the sender SHOULD abort the association
* by sending an ABORT chunk.
*/
-sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5671,7 +5770,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
struct sctp_chunk *reply = NULL;
SCTP_DEBUG_PRINTK("Timer T5 expired.\n");
- SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS);
reply = sctp_make_abort(asoc, NULL, 0);
if (!reply)
@@ -5683,8 +5782,8 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_NO_ERROR));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_DELETE_TCB;
nomem:
@@ -5697,6 +5796,7 @@ nomem:
* the user. So this routine looks same as sctp_sf_do_9_2_prm_shutdown().
*/
sctp_disposition_t sctp_sf_autoclose_timer_expire(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5705,7 +5805,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
{
int disposition;
- SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS);
/* From 9.2 Shutdown of an Association
* Upon receipt of the SHUTDOWN primitive from its upper
@@ -5720,7 +5820,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
disposition = SCTP_DISPOSITION_CONSUME;
if (sctp_outq_is_empty(&asoc->outqueue)) {
- disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type,
+ disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
arg, commands);
}
return disposition;
@@ -5738,7 +5838,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_not_impl(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_not_impl(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5755,7 +5856,8 @@ sctp_disposition_t sctp_sf_not_impl(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_bug(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_bug(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5775,7 +5877,8 @@ sctp_disposition_t sctp_sf_bug(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_timer_ignore(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5817,7 +5920,8 @@ static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk)
/* Create an ABORT packet to be sent as a response, with the specified
* error causes.
*/
-static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
+static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
const void *payload,
@@ -5826,7 +5930,7 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
struct sctp_packet *packet;
struct sctp_chunk *abort;
- packet = sctp_ootb_pkt_new(asoc, chunk);
+ packet = sctp_ootb_pkt_new(net, asoc, chunk);
if (packet) {
/* Make an ABORT.
@@ -5858,7 +5962,8 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
}
/* Allocate a packet for responding in the OOTB conditions. */
-static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc,
+static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
+ const struct sctp_association *asoc,
const struct sctp_chunk *chunk)
{
struct sctp_packet *packet;
@@ -5911,7 +6016,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc
}
/* Make a transport for the bucket, Eliza... */
- transport = sctp_transport_new(sctp_source(chunk), GFP_ATOMIC);
+ transport = sctp_transport_new(net, sctp_source(chunk), GFP_ATOMIC);
if (!transport)
goto nomem;
@@ -5919,7 +6024,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc
* the source address.
*/
sctp_transport_route(transport, (union sctp_addr *)&chunk->dest,
- sctp_sk(sctp_get_ctl_sock()));
+ sctp_sk(net->sctp.ctl_sock));
packet = sctp_packet_init(&transport->packet, transport, sport, dport);
packet = sctp_packet_config(packet, vtag, 0);
@@ -5937,7 +6042,8 @@ void sctp_ootb_pkt_free(struct sctp_packet *packet)
}
/* Send a stale cookie error when a invalid COOKIE ECHO chunk is found */
-static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
+static void sctp_send_stale_cookie_err(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands,
@@ -5946,7 +6052,7 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
struct sctp_packet *packet;
if (err_chunk) {
- packet = sctp_ootb_pkt_new(asoc, chunk);
+ packet = sctp_ootb_pkt_new(net, asoc, chunk);
if (packet) {
struct sctp_signed_cookie *cookie;
@@ -5959,7 +6065,7 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
sctp_packet_append_chunk(packet, err_chunk);
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
} else
sctp_chunk_free (err_chunk);
}
@@ -5979,6 +6085,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
__u32 tsn;
struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
struct sock *sk = asoc->base.sk;
+ struct net *net = sock_net(sk);
u16 ssn;
u16 sid;
u8 ordered = 0;
@@ -6109,8 +6216,8 @@ static int sctp_eat_data(const struct sctp_association *asoc,
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_NO_DATA));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_IERROR_NO_DATA;
}
@@ -6120,9 +6227,9 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* if we renege and the chunk arrives again.
*/
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
- SCTP_INC_STATS(SCTP_MIB_INUNORDERCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_INUNORDERCHUNKS);
else {
- SCTP_INC_STATS(SCTP_MIB_INORDERCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_INORDERCHUNKS);
ordered = 1;
}
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 7c211a7f90f4..84d98d8a5a74 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -59,7 +59,8 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES];
static const sctp_sm_table_entry_t
timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid,
+static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
+ sctp_cid_t cid,
sctp_state_t state);
@@ -82,13 +83,14 @@ static const sctp_sm_table_entry_t bug = {
rtn; \
})
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
+const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
+ sctp_event_t event_type,
sctp_state_t state,
sctp_subtype_t event_subtype)
{
switch (event_type) {
case SCTP_EVENT_T_CHUNK:
- return sctp_chunk_event_lookup(event_subtype.chunk, state);
+ return sctp_chunk_event_lookup(net, event_subtype.chunk, state);
case SCTP_EVENT_T_TIMEOUT:
return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout,
timeout_event_table);
@@ -906,7 +908,8 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S
TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
};
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid,
+static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
+ sctp_cid_t cid,
sctp_state_t state)
{
if (state > SCTP_STATE_MAX)
@@ -915,12 +918,12 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid,
if (cid <= SCTP_CID_BASE_MAX)
return &chunk_event_table[cid][state];
- if (sctp_prsctp_enable) {
+ if (net->sctp.prsctp_enable) {
if (cid == SCTP_CID_FWD_TSN)
return &prsctp_chunk_event_table[0][state];
}
- if (sctp_addip_enable) {
+ if (net->sctp.addip_enable) {
if (cid == SCTP_CID_ASCONF)
return &addip_chunk_event_table[0][state];
@@ -928,7 +931,7 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid,
return &addip_chunk_event_table[1][state];
}
- if (sctp_auth_enable) {
+ if (net->sctp.auth_enable) {
if (cid == SCTP_CID_AUTH)
return &auth_chunk_event_table[0][state];
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 5e259817a7f3..d37d24ff197f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -427,6 +427,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
static int sctp_send_asconf(struct sctp_association *asoc,
struct sctp_chunk *chunk)
{
+ struct net *net = sock_net(asoc->base.sk);
int retval = 0;
/* If there is an outstanding ASCONF chunk, queue it for later
@@ -439,7 +440,7 @@ static int sctp_send_asconf(struct sctp_association *asoc,
/* Hold the chunk until an ASCONF_ACK is received. */
sctp_chunk_hold(chunk);
- retval = sctp_primitive_ASCONF(asoc, chunk);
+ retval = sctp_primitive_ASCONF(net, asoc, chunk);
if (retval)
sctp_chunk_free(chunk);
else
@@ -515,6 +516,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
struct sockaddr *addrs,
int addrcnt)
{
+ struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc;
@@ -529,7 +531,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
int i;
int retval = 0;
- if (!sctp_addip_enable)
+ if (!net->sctp.addip_enable)
return retval;
sp = sctp_sk(sk);
@@ -717,6 +719,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
struct sockaddr *addrs,
int addrcnt)
{
+ struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc;
@@ -732,7 +735,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
int stored = 0;
chunk = NULL;
- if (!sctp_addip_enable)
+ if (!net->sctp.addip_enable)
return retval;
sp = sctp_sk(sk);
@@ -1050,6 +1053,7 @@ static int __sctp_connect(struct sock* sk,
int addrs_size,
sctp_assoc_t *assoc_id)
{
+ struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc = NULL;
@@ -1200,7 +1204,7 @@ static int __sctp_connect(struct sock* sk,
goto out_free;
}
- err = sctp_primitive_ASSOCIATE(asoc, NULL);
+ err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
if (err < 0) {
goto out_free;
}
@@ -1458,6 +1462,7 @@ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len,
*/
SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
{
+ struct net *net = sock_net(sk);
struct sctp_endpoint *ep;
struct sctp_association *asoc;
struct list_head *pos, *temp;
@@ -1499,9 +1504,9 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
chunk = sctp_make_abort_user(asoc, NULL, 0);
if (chunk)
- sctp_primitive_ABORT(asoc, chunk);
+ sctp_primitive_ABORT(net, asoc, chunk);
} else
- sctp_primitive_SHUTDOWN(asoc, NULL);
+ sctp_primitive_SHUTDOWN(net, asoc, NULL);
}
/* On a TCP-style socket, block for at most linger_time if set. */
@@ -1569,6 +1574,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t msg_len)
{
+ struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *new_asoc=NULL, *asoc=NULL;
@@ -1714,7 +1720,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
if (sinfo_flags & SCTP_EOF) {
SCTP_DEBUG_PRINTK("Shutting down association: %p\n",
asoc);
- sctp_primitive_SHUTDOWN(asoc, NULL);
+ sctp_primitive_SHUTDOWN(net, asoc, NULL);
err = 0;
goto out_unlock;
}
@@ -1727,7 +1733,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
}
SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc);
- sctp_primitive_ABORT(asoc, chunk);
+ sctp_primitive_ABORT(net, asoc, chunk);
err = 0;
goto out_unlock;
}
@@ -1900,7 +1906,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
/* Auto-connect, if we aren't connected already. */
if (sctp_state(asoc, CLOSED)) {
- err = sctp_primitive_ASSOCIATE(asoc, NULL);
+ err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
if (err < 0)
goto out_free;
SCTP_DEBUG_PRINTK("We associated primitively.\n");
@@ -1928,7 +1934,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
* works that way today. Keep it that way or this
* breaks.
*/
- err = sctp_primitive_SEND(asoc, datamsg);
+ err = sctp_primitive_SEND(net, asoc, datamsg);
/* Did the lower layer accept the chunk? */
if (err)
sctp_datamsg_free(datamsg);
@@ -2320,7 +2326,9 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
int error;
if (params->spp_flags & SPP_HB_DEMAND && trans) {
- error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans);
+ struct net *net = sock_net(trans->asoc->base.sk);
+
+ error = sctp_primitive_REQUESTHEARTBEAT(net, trans->asoc, trans);
if (error)
return error;
}
@@ -3033,6 +3041,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval,
unsigned int optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_association *asoc = NULL;
struct sctp_setpeerprim prim;
@@ -3042,7 +3051,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
sp = sctp_sk(sk);
- if (!sctp_addip_enable)
+ if (!net->sctp.addip_enable)
return -EPERM;
if (optlen != sizeof(struct sctp_setpeerprim))
@@ -3279,9 +3288,10 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authchunk val;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (optlen != sizeof(struct sctp_authchunk))
@@ -3311,11 +3321,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_hmacalgo *hmacs;
u32 idents;
int err;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (optlen < sizeof(struct sctp_hmacalgo))
@@ -3348,11 +3359,12 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authkey *authkey;
struct sctp_association *asoc;
int ret;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (optlen <= sizeof(struct sctp_authkey))
@@ -3389,10 +3401,11 @@ static int sctp_setsockopt_active_key(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authkeyid val;
struct sctp_association *asoc;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (optlen != sizeof(struct sctp_authkeyid))
@@ -3417,10 +3430,11 @@ static int sctp_setsockopt_del_key(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authkeyid val;
struct sctp_association *asoc;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (optlen != sizeof(struct sctp_authkeyid))
@@ -3471,7 +3485,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
sp->do_auto_asconf = 0;
} else if (val && !sp->do_auto_asconf) {
list_add_tail(&sp->auto_asconf_list,
- &sctp_auto_asconf_splist);
+ &sock_net(sk)->sctp.auto_asconf_splist);
sp->do_auto_asconf = 1;
}
return 0;
@@ -3843,6 +3857,7 @@ out:
*/
SCTP_STATIC int sctp_init_sock(struct sock *sk)
{
+ struct net *net = sock_net(sk);
struct sctp_endpoint *ep;
struct sctp_sock *sp;
@@ -3872,7 +3887,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
sp->default_timetolive = 0;
sp->default_rcv_context = 0;
- sp->max_burst = sctp_max_burst;
+ sp->max_burst = net->sctp.max_burst;
/* Initialize default setup parameters. These parameters
* can be modified with the SCTP_INITMSG socket option or
@@ -3880,24 +3895,24 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
*/
sp->initmsg.sinit_num_ostreams = sctp_max_outstreams;
sp->initmsg.sinit_max_instreams = sctp_max_instreams;
- sp->initmsg.sinit_max_attempts = sctp_max_retrans_init;
- sp->initmsg.sinit_max_init_timeo = sctp_rto_max;
+ sp->initmsg.sinit_max_attempts = net->sctp.max_retrans_init;
+ sp->initmsg.sinit_max_init_timeo = net->sctp.rto_max;
/* Initialize default RTO related parameters. These parameters can
* be modified for with the SCTP_RTOINFO socket option.
*/
- sp->rtoinfo.srto_initial = sctp_rto_initial;
- sp->rtoinfo.srto_max = sctp_rto_max;
- sp->rtoinfo.srto_min = sctp_rto_min;
+ sp->rtoinfo.srto_initial = net->sctp.rto_initial;
+ sp->rtoinfo.srto_max = net->sctp.rto_max;
+ sp->rtoinfo.srto_min = net->sctp.rto_min;
/* Initialize default association related parameters. These parameters
* can be modified with the SCTP_ASSOCINFO socket option.
*/
- sp->assocparams.sasoc_asocmaxrxt = sctp_max_retrans_association;
+ sp->assocparams.sasoc_asocmaxrxt = net->sctp.max_retrans_association;
sp->assocparams.sasoc_number_peer_destinations = 0;
sp->assocparams.sasoc_peer_rwnd = 0;
sp->assocparams.sasoc_local_rwnd = 0;
- sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life;
+ sp->assocparams.sasoc_cookie_life = net->sctp.valid_cookie_life;
/* Initialize default event subscriptions. By default, all the
* options are off.
@@ -3907,10 +3922,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
/* Default Peer Address Parameters. These defaults can
* be modified via SCTP_PEER_ADDR_PARAMS
*/
- sp->hbinterval = sctp_hb_interval;
- sp->pathmaxrxt = sctp_max_retrans_path;
+ sp->hbinterval = net->sctp.hb_interval;
+ sp->pathmaxrxt = net->sctp.max_retrans_path;
sp->pathmtu = 0; // allow default discovery
- sp->sackdelay = sctp_sack_timeout;
+ sp->sackdelay = net->sctp.sack_timeout;
sp->sackfreq = 2;
sp->param_flags = SPP_HB_ENABLE |
SPP_PMTUD_ENABLE |
@@ -3961,10 +3976,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
local_bh_disable();
percpu_counter_inc(&sctp_sockets_allocated);
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- if (sctp_default_auto_asconf) {
+ sock_prot_inuse_add(net, sk->sk_prot, 1);
+ if (net->sctp.default_auto_asconf) {
list_add_tail(&sp->auto_asconf_list,
- &sctp_auto_asconf_splist);
+ &net->sctp.auto_asconf_splist);
sp->do_auto_asconf = 1;
} else
sp->do_auto_asconf = 0;
@@ -4011,6 +4026,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk)
*/
SCTP_STATIC void sctp_shutdown(struct sock *sk, int how)
{
+ struct net *net = sock_net(sk);
struct sctp_endpoint *ep;
struct sctp_association *asoc;
@@ -4022,7 +4038,7 @@ SCTP_STATIC void sctp_shutdown(struct sock *sk, int how)
if (!list_empty(&ep->asocs)) {
asoc = list_entry(ep->asocs.next,
struct sctp_association, asocs);
- sctp_primitive_SHUTDOWN(asoc, NULL);
+ sctp_primitive_SHUTDOWN(net, asoc, NULL);
}
}
}
@@ -4653,9 +4669,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
union sctp_addr temp;
int cnt = 0;
int addrlen;
+ struct net *net = sock_net(sk);
rcu_read_lock();
- list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+ list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) {
if (!addr->valid)
continue;
@@ -5299,12 +5316,13 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_hmacalgo __user *p = (void __user *)optval;
struct sctp_hmac_algo_param *hmacs;
__u16 data_len = 0;
u32 num_idents;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
hmacs = sctp_sk(sk)->ep->auth_hmacs_list;
@@ -5328,10 +5346,11 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
static int sctp_getsockopt_active_key(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authkeyid val;
struct sctp_association *asoc;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (len < sizeof(struct sctp_authkeyid))
@@ -5360,6 +5379,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authchunks __user *p = (void __user *)optval;
struct sctp_authchunks val;
struct sctp_association *asoc;
@@ -5367,7 +5387,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
u32 num_chunks = 0;
char __user *to;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (len < sizeof(struct sctp_authchunks))
@@ -5403,6 +5423,7 @@ num:
static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authchunks __user *p = (void __user *)optval;
struct sctp_authchunks val;
struct sctp_association *asoc;
@@ -5410,7 +5431,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
u32 num_chunks = 0;
char __user *to;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (len < sizeof(struct sctp_authchunks))
@@ -5769,7 +5790,7 @@ static void sctp_unhash(struct sock *sk)
* a fastreuse flag (FIXME: NPI ipg).
*/
static struct sctp_bind_bucket *sctp_bucket_create(
- struct sctp_bind_hashbucket *head, unsigned short snum);
+ struct sctp_bind_hashbucket *head, struct net *, unsigned short snum);
static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
{
@@ -5799,11 +5820,12 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
rover = low;
if (inet_is_reserved_local_port(rover))
continue;
- index = sctp_phashfn(rover);
+ index = sctp_phashfn(sock_net(sk), rover);
head = &sctp_port_hashtable[index];
sctp_spin_lock(&head->lock);
sctp_for_each_hentry(pp, node, &head->chain)
- if (pp->port == rover)
+ if ((pp->port == rover) &&
+ net_eq(sock_net(sk), pp->net))
goto next;
break;
next:
@@ -5827,10 +5849,10 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
* to the port number (snum) - we detect that with the
* port iterator, pp being NULL.
*/
- head = &sctp_port_hashtable[sctp_phashfn(snum)];
+ head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)];
sctp_spin_lock(&head->lock);
sctp_for_each_hentry(pp, node, &head->chain) {
- if (pp->port == snum)
+ if ((pp->port == snum) && net_eq(pp->net, sock_net(sk)))
goto pp_found;
}
}
@@ -5881,7 +5903,7 @@ pp_found:
pp_not_found:
/* If there was a hash table miss, create a new port. */
ret = 1;
- if (!pp && !(pp = sctp_bucket_create(head, snum)))
+ if (!pp && !(pp = sctp_bucket_create(head, sock_net(sk), snum)))
goto fail_unlock;
/* In either case (hit or miss), make sure fastreuse is 1 only
@@ -6113,7 +6135,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
********************************************************************/
static struct sctp_bind_bucket *sctp_bucket_create(
- struct sctp_bind_hashbucket *head, unsigned short snum)
+ struct sctp_bind_hashbucket *head, struct net *net, unsigned short snum)
{
struct sctp_bind_bucket *pp;
@@ -6123,6 +6145,7 @@ static struct sctp_bind_bucket *sctp_bucket_create(
pp->port = snum;
pp->fastreuse = 0;
INIT_HLIST_HEAD(&pp->owner);
+ pp->net = net;
hlist_add_head(&pp->node, &head->chain);
}
return pp;
@@ -6142,7 +6165,8 @@ static void sctp_bucket_destroy(struct sctp_bind_bucket *pp)
static inline void __sctp_put_port(struct sock *sk)
{
struct sctp_bind_hashbucket *head =
- &sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->inet_num)];
+ &sctp_port_hashtable[sctp_phashfn(sock_net(sk),
+ inet_sk(sk)->inet_num)];
struct sctp_bind_bucket *pp;
sctp_spin_lock(&head->lock);
@@ -6809,7 +6833,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
newsp->hmac = NULL;
/* Hook this new socket in to the bind_hash list. */
- head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->inet_num)];
+ head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk),
+ inet_sk(oldsk)->inet_num)];
sctp_local_bh_disable();
sctp_spin_lock(&head->lock);
pp = sctp_sk(oldsk)->bind_hash;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 2b2bfe933ff1..70e3ba5cb50b 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -64,8 +64,34 @@ extern int sysctl_sctp_wmem[3];
static ctl_table sctp_table[] = {
{
+ .procname = "sctp_mem",
+ .data = &sysctl_sctp_mem,
+ .maxlen = sizeof(sysctl_sctp_mem),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax
+ },
+ {
+ .procname = "sctp_rmem",
+ .data = &sysctl_sctp_rmem,
+ .maxlen = sizeof(sysctl_sctp_rmem),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sctp_wmem",
+ .data = &sysctl_sctp_wmem,
+ .maxlen = sizeof(sysctl_sctp_wmem),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+
+ { /* sentinel */ }
+};
+
+static ctl_table sctp_net_table[] = {
+ {
.procname = "rto_initial",
- .data = &sctp_rto_initial,
+ .data = &init_net.sctp.rto_initial,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -74,7 +100,7 @@ static ctl_table sctp_table[] = {
},
{
.procname = "rto_min",
- .data = &sctp_rto_min,
+ .data = &init_net.sctp.rto_min,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -83,7 +109,7 @@ static ctl_table sctp_table[] = {
},
{
.procname = "rto_max",
- .data = &sctp_rto_max,
+ .data = &init_net.sctp.rto_max,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -91,17 +117,22 @@ static ctl_table sctp_table[] = {
.extra2 = &timer_max
},
{
- .procname = "valid_cookie_life",
- .data = &sctp_valid_cookie_life,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &timer_max
+ .procname = "rto_alpha_exp_divisor",
+ .data = &init_net.sctp.rto_alpha,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "rto_beta_exp_divisor",
+ .data = &init_net.sctp.rto_beta,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
},
{
.procname = "max_burst",
- .data = &sctp_max_burst,
+ .data = &init_net.sctp.max_burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -109,31 +140,42 @@ static ctl_table sctp_table[] = {
.extra2 = &int_max
},
{
- .procname = "association_max_retrans",
- .data = &sctp_max_retrans_association,
+ .procname = "cookie_preserve_enable",
+ .data = &init_net.sctp.cookie_preserve_enable,
.maxlen = sizeof(int),
.mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "valid_cookie_life",
+ .data = &init_net.sctp.valid_cookie_life,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &int_max
+ .extra1 = &one,
+ .extra2 = &timer_max
},
{
- .procname = "sndbuf_policy",
- .data = &sctp_sndbuf_policy,
+ .procname = "sack_timeout",
+ .data = &init_net.sctp.sack_timeout,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &sack_timer_min,
+ .extra2 = &sack_timer_max,
},
{
- .procname = "rcvbuf_policy",
- .data = &sctp_rcvbuf_policy,
- .maxlen = sizeof(int),
+ .procname = "hb_interval",
+ .data = &init_net.sctp.hb_interval,
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &timer_max
},
{
- .procname = "path_max_retrans",
- .data = &sctp_max_retrans_path,
+ .procname = "association_max_retrans",
+ .data = &init_net.sctp.max_retrans_association,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -141,17 +183,17 @@ static ctl_table sctp_table[] = {
.extra2 = &int_max
},
{
- .procname = "pf_retrans",
- .data = &sctp_pf_retrans,
+ .procname = "path_max_retrans",
+ .data = &init_net.sctp.max_retrans_path,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = &one,
.extra2 = &int_max
},
{
.procname = "max_init_retransmits",
- .data = &sctp_max_retrans_init,
+ .data = &init_net.sctp.max_retrans_init,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -159,103 +201,66 @@ static ctl_table sctp_table[] = {
.extra2 = &int_max
},
{
- .procname = "hb_interval",
- .data = &sctp_hb_interval,
- .maxlen = sizeof(unsigned int),
+ .procname = "pf_retrans",
+ .data = &init_net.sctp.pf_retrans,
+ .maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &timer_max
+ .extra1 = &zero,
+ .extra2 = &int_max
},
{
- .procname = "cookie_preserve_enable",
- .data = &sctp_cookie_preserve_enable,
+ .procname = "sndbuf_policy",
+ .data = &init_net.sctp.sndbuf_policy,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .procname = "rto_alpha_exp_divisor",
- .data = &sctp_rto_alpha,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "rto_beta_exp_divisor",
- .data = &sctp_rto_beta,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "addip_enable",
- .data = &sctp_addip_enable,
+ .procname = "rcvbuf_policy",
+ .data = &init_net.sctp.rcvbuf_policy,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "default_auto_asconf",
- .data = &sctp_default_auto_asconf,
+ .data = &init_net.sctp.default_auto_asconf,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .procname = "prsctp_enable",
- .data = &sctp_prsctp_enable,
+ .procname = "addip_enable",
+ .data = &init_net.sctp.addip_enable,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .procname = "sack_timeout",
- .data = &sctp_sack_timeout,
+ .procname = "addip_noauth_enable",
+ .data = &init_net.sctp.addip_noauth,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &sack_timer_min,
- .extra2 = &sack_timer_max,
- },
- {
- .procname = "sctp_mem",
- .data = &sysctl_sctp_mem,
- .maxlen = sizeof(sysctl_sctp_mem),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax
- },
- {
- .procname = "sctp_rmem",
- .data = &sysctl_sctp_rmem,
- .maxlen = sizeof(sysctl_sctp_rmem),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "sctp_wmem",
- .data = &sysctl_sctp_wmem,
- .maxlen = sizeof(sysctl_sctp_wmem),
- .mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .procname = "auth_enable",
- .data = &sctp_auth_enable,
+ .procname = "prsctp_enable",
+ .data = &init_net.sctp.prsctp_enable,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .procname = "addip_noauth_enable",
- .data = &sctp_addip_noauth,
+ .procname = "auth_enable",
+ .data = &init_net.sctp.auth_enable,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "addr_scope_policy",
- .data = &sctp_scope_policy,
+ .data = &init_net.sctp.scope_policy,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -264,7 +269,7 @@ static ctl_table sctp_table[] = {
},
{
.procname = "rwnd_update_shift",
- .data = &sctp_rwnd_upd_shift,
+ .data = &init_net.sctp.rwnd_upd_shift,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
@@ -273,7 +278,7 @@ static ctl_table sctp_table[] = {
},
{
.procname = "max_autoclose",
- .data = &sctp_max_autoclose,
+ .data = &init_net.sctp.max_autoclose,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = &proc_doulongvec_minmax,
@@ -284,6 +289,27 @@ static ctl_table sctp_table[] = {
{ /* sentinel */ }
};
+int sctp_sysctl_net_register(struct net *net)
+{
+ struct ctl_table *table;
+ int i;
+
+ table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ for (i = 0; table[i].data; i++)
+ table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
+
+ net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table);
+ return 0;
+}
+
+void sctp_sysctl_net_unregister(struct net *net)
+{
+ unregister_net_sysctl_table(net->sctp.sysctl_header);
+}
+
static struct ctl_table_header * sctp_sysctl_header;
/* Sysctl registration. */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index c97472b248a2..953c21e4af97 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -59,7 +59,8 @@
/* 1st Level Abstractions. */
/* Initialize a new transport from provided memory. */
-static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
+static struct sctp_transport *sctp_transport_init(struct net *net,
+ struct sctp_transport *peer,
const union sctp_addr *addr,
gfp_t gfp)
{
@@ -76,7 +77,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
* given destination transport address, set RTO to the protocol
* parameter 'RTO.Initial'.
*/
- peer->rto = msecs_to_jiffies(sctp_rto_initial);
+ peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
peer->last_time_heard = jiffies;
peer->last_time_ecne_reduced = jiffies;
@@ -86,8 +87,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
SPP_SACKDELAY_ENABLE;
/* Initialize the default path max_retrans. */
- peer->pathmaxrxt = sctp_max_retrans_path;
- peer->pf_retrans = sctp_pf_retrans;
+ peer->pathmaxrxt = net->sctp.max_retrans_path;
+ peer->pf_retrans = net->sctp.pf_retrans;
INIT_LIST_HEAD(&peer->transmitted);
INIT_LIST_HEAD(&peer->send_ready);
@@ -109,7 +110,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
}
/* Allocate and initialize a new transport. */
-struct sctp_transport *sctp_transport_new(const union sctp_addr *addr,
+struct sctp_transport *sctp_transport_new(struct net *net,
+ const union sctp_addr *addr,
gfp_t gfp)
{
struct sctp_transport *transport;
@@ -118,7 +120,7 @@ struct sctp_transport *sctp_transport_new(const union sctp_addr *addr,
if (!transport)
goto fail;
- if (!sctp_transport_init(transport, addr, gfp))
+ if (!sctp_transport_init(net, transport, addr, gfp))
goto fail_init;
transport->malloced = 1;
@@ -316,6 +318,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return);
if (tp->rttvar || tp->srtt) {
+ struct net *net = sock_net(tp->asoc->base.sk);
/* 6.3.1 C3) When a new RTT measurement R' is made, set
* RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|
* SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'
@@ -327,10 +330,10 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
* For example, assuming the default value of RTO.Alpha of
* 1/8, rto_alpha would be expressed as 3.
*/
- tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta)
- + ((abs(tp->srtt - rtt)) >> sctp_rto_beta);
- tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha)
- + (rtt >> sctp_rto_alpha);
+ tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta)
+ + ((abs(tp->srtt - rtt)) >> net->sctp.rto_beta);
+ tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha)
+ + (rtt >> net->sctp.rto_alpha);
} else {
/* 6.3.1 C2) When the first RTT measurement R is made, set
* SRTT <- R, RTTVAR <- R/2.
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 33d894776192..10c018a5b9fe 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -702,7 +702,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
if (rx_count >= asoc->base.sk->sk_rcvbuf) {
if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) ||
- (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize)))
+ (!sk_rmem_schedule(asoc->base.sk, chunk->skb,
+ chunk->skb->truesize)))
goto fail;
}
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index f5a6a4f4faf7..360d8697b95c 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -326,7 +326,9 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq,
* payload was fragmented on the way and ip had to reassemble them.
* We add the rest of skb's to the first skb's fraglist.
*/
-static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag)
+static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net,
+ struct sk_buff_head *queue, struct sk_buff *f_frag,
+ struct sk_buff *l_frag)
{
struct sk_buff *pos;
struct sk_buff *new = NULL;
@@ -394,7 +396,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu
}
event = sctp_skb2event(f_frag);
- SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS);
+ SCTP_INC_STATS(net, SCTP_MIB_REASMUSRMSGS);
return event;
}
@@ -493,7 +495,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul
cevent = sctp_skb2event(pd_first);
pd_point = sctp_sk(asoc->base.sk)->pd_point;
if (pd_point && pd_point <= pd_len) {
- retval = sctp_make_reassembled_event(&ulpq->reasm,
+ retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+ &ulpq->reasm,
pd_first,
pd_last);
if (retval)
@@ -503,7 +506,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul
done:
return retval;
found:
- retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos);
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm, first_frag, pos);
if (retval)
retval->msg_flags |= MSG_EOR;
goto done;
@@ -563,7 +567,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq)
* further.
*/
done:
- retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag);
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm, first_frag, last_frag);
if (retval && is_last)
retval->msg_flags |= MSG_EOR;
@@ -655,7 +660,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq)
* further.
*/
done:
- retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag);
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm, first_frag, last_frag);
return retval;
}
diff --git a/net/socket.c b/net/socket.c
index dfe5b66c97e0..a5471f804d99 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2657,6 +2657,7 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
return -EFAULT;
+ memset(&ifc, 0, sizeof(ifc));
if (ifc32.ifcbuf == 0) {
ifc32.ifc_len = 0;
ifc.ifc_len = 0;
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 9fe8857d8d59..03d03e37a7d5 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -21,6 +21,11 @@ config SUNRPC_XPRT_RDMA
If unsure, say N.
+config SUNRPC_SWAP
+ bool
+ depends on SUNRPC
+ select NETVM
+
config RPCSEC_GSS_KRB5
tristate "Secure RPC: Kerberos V mechanism"
depends on SUNRPC && CRYPTO
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 727e506cacda..b5c067bccc45 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -13,6 +13,7 @@
#include <linux/errno.h>
#include <linux/hash.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/gss_api.h>
#include <linux/spinlock.h>
#ifdef RPC_DEBUG
@@ -122,6 +123,59 @@ rpcauth_unregister(const struct rpc_authops *ops)
}
EXPORT_SYMBOL_GPL(rpcauth_unregister);
+/**
+ * rpcauth_list_flavors - discover registered flavors and pseudoflavors
+ * @array: array to fill in
+ * @size: size of "array"
+ *
+ * Returns the number of array items filled in, or a negative errno.
+ *
+ * The returned array is not sorted by any policy. Callers should not
+ * rely on the order of the items in the returned array.
+ */
+int
+rpcauth_list_flavors(rpc_authflavor_t *array, int size)
+{
+ rpc_authflavor_t flavor;
+ int result = 0;
+
+ spin_lock(&rpc_authflavor_lock);
+ for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) {
+ const struct rpc_authops *ops = auth_flavors[flavor];
+ rpc_authflavor_t pseudos[4];
+ int i, len;
+
+ if (result >= size) {
+ result = -ENOMEM;
+ break;
+ }
+
+ if (ops == NULL)
+ continue;
+ if (ops->list_pseudoflavors == NULL) {
+ array[result++] = ops->au_flavor;
+ continue;
+ }
+ len = ops->list_pseudoflavors(pseudos, ARRAY_SIZE(pseudos));
+ if (len < 0) {
+ result = len;
+ break;
+ }
+ for (i = 0; i < len; i++) {
+ if (result >= size) {
+ result = -ENOMEM;
+ break;
+ }
+ array[result++] = pseudos[i];
+ }
+ }
+ spin_unlock(&rpc_authflavor_lock);
+
+ dprintk("RPC: %s returns %d\n", __func__, result);
+ return result;
+}
+EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
+
struct rpc_auth *
rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
{
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index d3ad81f8da5b..34c522021004 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1619,6 +1619,7 @@ static const struct rpc_authops authgss_ops = {
.crcreate = gss_create_cred,
.pipes_create = gss_pipes_dentries_create,
.pipes_destroy = gss_pipes_dentries_destroy,
+ .list_pseudoflavors = gss_mech_list_pseudoflavors,
};
static const struct rpc_credops gss_credops = {
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 782bfe1b6465..b174fcd9ff4c 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -239,14 +239,28 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor);
-int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr)
+/**
+ * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors
+ * @array: array to fill in
+ * @size: size of "array"
+ *
+ * Returns the number of array items filled in, or a negative errno.
+ *
+ * The returned array is not sorted by any policy. Callers should not
+ * rely on the order of the items in the returned array.
+ */
+int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size)
{
struct gss_api_mech *pos = NULL;
int j, i = 0;
spin_lock(&registered_mechs_lock);
list_for_each_entry(pos, &registered_mechs, gm_list) {
- for (j=0; j < pos->gm_pf_num; j++) {
+ for (j = 0; j < pos->gm_pf_num; j++) {
+ if (i >= size) {
+ spin_unlock(&registered_mechs_lock);
+ return -ENOMEM;
+ }
array_ptr[i++] = pos->gm_pfs[j].pseudoflavor;
}
}
@@ -254,8 +268,6 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr)
return i;
}
-EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors);
-
u32
gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
{
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 47ad2666fdf6..2afd2a84dc35 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1349,8 +1349,11 @@ static int c_show(struct seq_file *m, void *p)
if (cache_check(cd, cp, NULL))
/* cache_check does a cache_put on failure */
seq_printf(m, "# ");
- else
+ else {
+ if (cache_is_expired(cd, cp))
+ seq_printf(m, "# ");
cache_put(cp, cd);
+ }
return cd->cache_show(m, cd, cp);
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 00eb859b7de5..fa48c60aef23 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -717,6 +717,15 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
atomic_inc(&clnt->cl_count);
if (clnt->cl_softrtry)
task->tk_flags |= RPC_TASK_SOFT;
+ if (sk_memalloc_socks()) {
+ struct rpc_xprt *xprt;
+
+ rcu_read_lock();
+ xprt = rcu_dereference(clnt->cl_xprt);
+ if (xprt->swapper)
+ task->tk_flags |= RPC_TASK_SWAPPER;
+ rcu_read_unlock();
+ }
/* Add to the client's list of all tasks */
spin_lock(&clnt->cl_lock);
list_add_tail(&task->tk_task, &clnt->cl_tasks);
@@ -1844,12 +1853,13 @@ call_timeout(struct rpc_task *task)
return;
}
if (RPC_IS_SOFT(task)) {
- if (clnt->cl_chatty)
+ if (clnt->cl_chatty) {
rcu_read_lock();
printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
clnt->cl_protname,
rcu_dereference(clnt->cl_xprt)->servername);
rcu_read_unlock();
+ }
if (task->tk_flags & RPC_TASK_TIMEOUT)
rpc_exit(task, -ETIMEDOUT);
else
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 92509ffe15fc..a70acae496e4 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -251,7 +251,7 @@ static int rpcb_create_local_unix(struct net *net)
if (IS_ERR(clnt)) {
dprintk("RPC: failed to create AF_LOCAL rpcbind "
"client (errno %ld).\n", PTR_ERR(clnt));
- result = -PTR_ERR(clnt);
+ result = PTR_ERR(clnt);
goto out;
}
@@ -298,7 +298,7 @@ static int rpcb_create_local_net(struct net *net)
if (IS_ERR(clnt)) {
dprintk("RPC: failed to create local rpcbind "
"client (errno %ld).\n", PTR_ERR(clnt));
- result = -PTR_ERR(clnt);
+ result = PTR_ERR(clnt);
goto out;
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 994cfea2bad6..128494ec9a64 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -300,8 +300,9 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
/*
* Make an RPC task runnable.
*
- * Note: If the task is ASYNC, this must be called with
- * the spinlock held to protect the wait queue operation.
+ * Note: If the task is ASYNC, and is being made runnable after sitting on an
+ * rpc_wait_queue, this must be called with the queue spinlock held to protect
+ * the wait queue operation.
*/
static void rpc_make_runnable(struct rpc_task *task)
{
@@ -790,7 +791,9 @@ void rpc_execute(struct rpc_task *task)
static void rpc_async_schedule(struct work_struct *work)
{
+ current->flags |= PF_FSTRANS;
__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
+ current->flags &= ~PF_FSTRANS;
}
/**
@@ -812,7 +815,10 @@ static void rpc_async_schedule(struct work_struct *work)
void *rpc_malloc(struct rpc_task *task, size_t size)
{
struct rpc_buffer *buf;
- gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT;
+ gfp_t gfp = GFP_NOWAIT;
+
+ if (RPC_IS_SWAPPER(task))
+ gfp |= __GFP_MEMALLOC;
size += sizeof(struct rpc_buffer);
if (size <= RPC_BUFFER_MAXSIZE)
@@ -886,7 +892,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
static struct rpc_task *
rpc_alloc_task(void)
{
- return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
+ return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO);
}
/*
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 0cf165580d8d..0afba1b4b656 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -129,34 +129,6 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len)
EXPORT_SYMBOL_GPL(xdr_terminate_string);
void
-xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
- unsigned int len)
-{
- struct kvec *tail = xdr->tail;
- u32 *p;
-
- xdr->pages = pages;
- xdr->page_base = base;
- xdr->page_len = len;
-
- p = (u32 *)xdr->head[0].iov_base + XDR_QUADLEN(xdr->head[0].iov_len);
- tail->iov_base = p;
- tail->iov_len = 0;
-
- if (len & 3) {
- unsigned int pad = 4 - (len & 3);
-
- *p = 0;
- tail->iov_base = (char *)p + (len & 3);
- tail->iov_len = pad;
- len += pad;
- }
- xdr->buflen += len;
- xdr->len += len;
-}
-EXPORT_SYMBOL_GPL(xdr_encode_pages);
-
-void
xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
struct page **pages, unsigned int base, unsigned int len)
{
@@ -457,6 +429,16 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len)
EXPORT_SYMBOL_GPL(xdr_shift_buf);
/**
+ * xdr_stream_pos - Return the current offset from the start of the xdr_stream
+ * @xdr: pointer to struct xdr_stream
+ */
+unsigned int xdr_stream_pos(const struct xdr_stream *xdr)
+{
+ return (unsigned int)(XDR_QUADLEN(xdr->buf->len) - xdr->nwords) << 2;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_pos);
+
+/**
* xdr_init_encode - Initialize a struct xdr_stream for sending data.
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer in which to encode data
@@ -556,13 +538,11 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b
EXPORT_SYMBOL_GPL(xdr_write_pages);
static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov,
- __be32 *p, unsigned int len)
+ unsigned int len)
{
if (len > iov->iov_len)
len = iov->iov_len;
- if (p == NULL)
- p = (__be32*)iov->iov_base;
- xdr->p = p;
+ xdr->p = (__be32*)iov->iov_base;
xdr->end = (__be32*)(iov->iov_base + len);
xdr->iov = iov;
xdr->page_ptr = NULL;
@@ -609,7 +589,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr)
newbase -= xdr->buf->page_base;
if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
- xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+ xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len);
}
static bool xdr_set_next_buffer(struct xdr_stream *xdr)
@@ -618,7 +598,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr)
xdr_set_next_page(xdr);
else if (xdr->iov == xdr->buf->head) {
if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0)
- xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+ xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len);
}
return xdr->p != xdr->end;
}
@@ -634,10 +614,15 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
xdr->buf = buf;
xdr->scratch.iov_base = NULL;
xdr->scratch.iov_len = 0;
+ xdr->nwords = XDR_QUADLEN(buf->len);
if (buf->head[0].iov_len != 0)
- xdr_set_iov(xdr, buf->head, p, buf->len);
+ xdr_set_iov(xdr, buf->head, buf->len);
else if (buf->page_len != 0)
xdr_set_page_base(xdr, 0, buf->len);
+ if (p != NULL && p > xdr->p && xdr->end >= p) {
+ xdr->nwords -= p - xdr->p;
+ xdr->p = p;
+ }
}
EXPORT_SYMBOL_GPL(xdr_init_decode);
@@ -662,12 +647,14 @@ EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
+ unsigned int nwords = XDR_QUADLEN(nbytes);
__be32 *p = xdr->p;
- __be32 *q = p + XDR_QUADLEN(nbytes);
+ __be32 *q = p + nwords;
- if (unlikely(q > xdr->end || q < p))
+ if (unlikely(nwords > xdr->nwords || q > xdr->end || q < p))
return NULL;
xdr->p = q;
+ xdr->nwords -= nwords;
return p;
}
@@ -734,6 +721,31 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
}
EXPORT_SYMBOL_GPL(xdr_inline_decode);
+static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
+{
+ struct xdr_buf *buf = xdr->buf;
+ struct kvec *iov;
+ unsigned int nwords = XDR_QUADLEN(len);
+ unsigned int cur = xdr_stream_pos(xdr);
+
+ if (xdr->nwords == 0)
+ return 0;
+ if (nwords > xdr->nwords) {
+ nwords = xdr->nwords;
+ len = nwords << 2;
+ }
+ /* Realign pages to current pointer position */
+ iov = buf->head;
+ if (iov->iov_len > cur)
+ xdr_shrink_bufhead(buf, iov->iov_len - cur);
+
+ /* Truncate page data and move it into the tail */
+ if (buf->page_len > len)
+ xdr_shrink_pagelen(buf, buf->page_len - len);
+ xdr->nwords = XDR_QUADLEN(buf->len - cur);
+ return len;
+}
+
/**
* xdr_read_pages - Ensure page-based XDR data to decode is aligned at current pointer position
* @xdr: pointer to xdr_stream struct
@@ -742,39 +754,37 @@ EXPORT_SYMBOL_GPL(xdr_inline_decode);
* Moves data beyond the current pointer position from the XDR head[] buffer
* into the page list. Any data that lies beyond current position + "len"
* bytes is moved into the XDR tail[].
+ *
+ * Returns the number of XDR encoded bytes now contained in the pages
*/
-void xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
+unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
{
struct xdr_buf *buf = xdr->buf;
struct kvec *iov;
- ssize_t shift;
+ unsigned int nwords;
unsigned int end;
- int padding;
+ unsigned int padding;
- /* Realign pages to current pointer position */
- iov = buf->head;
- shift = iov->iov_len + (char *)iov->iov_base - (char *)xdr->p;
- if (shift > 0)
- xdr_shrink_bufhead(buf, shift);
-
- /* Truncate page data and move it into the tail */
- if (buf->page_len > len)
- xdr_shrink_pagelen(buf, buf->page_len - len);
- padding = (XDR_QUADLEN(len) << 2) - len;
+ len = xdr_align_pages(xdr, len);
+ if (len == 0)
+ return 0;
+ nwords = XDR_QUADLEN(len);
+ padding = (nwords << 2) - len;
xdr->iov = iov = buf->tail;
/* Compute remaining message length. */
- end = iov->iov_len;
- shift = buf->buflen - buf->len;
- if (shift < end)
- end -= shift;
- else if (shift > 0)
- end = 0;
+ end = ((xdr->nwords - nwords) << 2) + padding;
+ if (end > iov->iov_len)
+ end = iov->iov_len;
+
/*
* Position current pointer at beginning of tail, and
* set remaining message length.
*/
xdr->p = (__be32 *)((char *)iov->iov_base + padding);
xdr->end = (__be32 *)((char *)iov->iov_base + end);
+ xdr->page_ptr = NULL;
+ xdr->nwords = XDR_QUADLEN(end - padding);
+ return len;
}
EXPORT_SYMBOL_GPL(xdr_read_pages);
@@ -790,12 +800,13 @@ EXPORT_SYMBOL_GPL(xdr_read_pages);
*/
void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
{
- xdr_read_pages(xdr, len);
+ len = xdr_align_pages(xdr, len);
/*
* Position current pointer at beginning of tail, and
* set remaining message length.
*/
- xdr_set_page_base(xdr, 0, len);
+ if (len != 0)
+ xdr_set_page_base(xdr, 0, len);
}
EXPORT_SYMBOL_GPL(xdr_enter_page);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index b446e100286f..06cdbff79e4a 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -200,6 +200,7 @@ xprt_rdma_connect_worker(struct work_struct *work)
int rc = 0;
if (!xprt->shutdown) {
+ current->flags |= PF_FSTRANS;
xprt_clear_connected(xprt);
dprintk("RPC: %s: %sconnect\n", __func__,
@@ -212,10 +213,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
out:
xprt_wake_pending_tasks(xprt, rc);
-
out_clear:
dprintk("RPC: %s: exit\n", __func__);
xprt_clear_connecting(xprt);
+ current->flags &= ~PF_FSTRANS;
}
/*
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 62d0dac8f780..400567243f84 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1892,6 +1892,8 @@ static void xs_local_setup_socket(struct work_struct *work)
if (xprt->shutdown)
goto out;
+ current->flags |= PF_FSTRANS;
+
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
status = __sock_create(xprt->xprt_net, AF_LOCAL,
SOCK_STREAM, 0, &sock, 1);
@@ -1925,7 +1927,47 @@ static void xs_local_setup_socket(struct work_struct *work)
out:
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
+ current->flags &= ~PF_FSTRANS;
+}
+
+#ifdef CONFIG_SUNRPC_SWAP
+static void xs_set_memalloc(struct rpc_xprt *xprt)
+{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
+ xprt);
+
+ if (xprt->swapper)
+ sk_set_memalloc(transport->inet);
+}
+
+/**
+ * xs_swapper - Tag this transport as being used for swap.
+ * @xprt: transport to tag
+ * @enable: enable/disable
+ *
+ */
+int xs_swapper(struct rpc_xprt *xprt, int enable)
+{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
+ xprt);
+ int err = 0;
+
+ if (enable) {
+ xprt->swapper++;
+ xs_set_memalloc(xprt);
+ } else if (xprt->swapper) {
+ xprt->swapper--;
+ sk_clear_memalloc(transport->inet);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xs_swapper);
+#else
+static void xs_set_memalloc(struct rpc_xprt *xprt)
+{
}
+#endif
static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
@@ -1951,6 +1993,8 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
transport->sock = sock;
transport->inet = sk;
+ xs_set_memalloc(xprt);
+
write_unlock_bh(&sk->sk_callback_lock);
}
xs_udp_do_set_buffer_size(xprt);
@@ -1967,6 +2011,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
if (xprt->shutdown)
goto out;
+ current->flags |= PF_FSTRANS;
+
/* Start by resetting any existing state */
xs_reset_transport(transport);
sock = xs_create_sock(xprt, transport,
@@ -1985,6 +2031,7 @@ static void xs_udp_setup_socket(struct work_struct *work)
out:
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
+ current->flags &= ~PF_FSTRANS;
}
/*
@@ -2075,6 +2122,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
if (!xprt_bound(xprt))
goto out;
+ xs_set_memalloc(xprt);
+
/* Tell the socket layer to start connecting... */
xprt->stat.connect_count++;
xprt->stat.connect_start = jiffies;
@@ -2110,6 +2159,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
if (xprt->shutdown)
goto out;
+ current->flags |= PF_FSTRANS;
+
if (!sock) {
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
sock = xs_create_sock(xprt, transport,
@@ -2159,6 +2210,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -EINPROGRESS:
case -EALREADY:
xprt_clear_connecting(xprt);
+ current->flags &= ~PF_FSTRANS;
return;
case -EINVAL:
/* Happens, for instance, if the user specified a link
@@ -2171,6 +2223,7 @@ out_eagain:
out:
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
+ current->flags &= ~PF_FSTRANS;
}
/**
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 09e71241265d..4ec5c80e8a7c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -49,21 +49,6 @@ struct tipc_bearer tipc_bearers[MAX_BEARERS];
static void bearer_disable(struct tipc_bearer *b_ptr);
/**
- * media_name_valid - validate media name
- *
- * Returns 1 if media name is valid, otherwise 0.
- */
-static int media_name_valid(const char *name)
-{
- u32 len;
-
- len = strlen(name);
- if ((len + 1) > TIPC_MAX_MEDIA_NAME)
- return 0;
- return strspn(name, tipc_alphabet) == len;
-}
-
-/**
* tipc_media_find - locates specified media object by name
*/
struct tipc_media *tipc_media_find(const char *name)
@@ -102,7 +87,7 @@ int tipc_register_media(struct tipc_media *m_ptr)
write_lock_bh(&tipc_net_lock);
- if (!media_name_valid(m_ptr->name))
+ if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME)
goto exit;
if ((m_ptr->bcast_addr.media_id != m_ptr->type_id) ||
!m_ptr->bcast_addr.broadcast)
@@ -206,9 +191,7 @@ static int bearer_name_validate(const char *name,
/* validate component parts of bearer name */
if ((media_len <= 1) || (media_len > TIPC_MAX_MEDIA_NAME) ||
- (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME) ||
- (strspn(media_name, tipc_alphabet) != (media_len - 1)) ||
- (strspn(if_name, tipc_alphabet) != (if_len - 1)))
+ (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME))
return 0;
/* return bearer name components, if necessary */
diff --git a/net/tipc/config.c b/net/tipc/config.c
index a056a3852f71..f67866c765dd 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -2,7 +2,7 @@
* net/tipc/config.c: TIPC configuration management code
*
* Copyright (c) 2002-2006, Ericsson AB
- * Copyright (c) 2004-2007, 2010-2011, Wind River Systems
+ * Copyright (c) 2004-2007, 2010-2012, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -208,36 +208,6 @@ static struct sk_buff *cfg_set_remote_mng(void)
return tipc_cfg_reply_none();
}
-static struct sk_buff *cfg_set_max_publications(void)
-{
- u32 value;
-
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
- value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
- if (value < 1 || value > 65535)
- return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
- " (max publications must be 1-65535)");
- tipc_max_publications = value;
- return tipc_cfg_reply_none();
-}
-
-static struct sk_buff *cfg_set_max_subscriptions(void)
-{
- u32 value;
-
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
- value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
- if (value < 1 || value > 65535)
- return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
- " (max subscriptions must be 1-65535");
- tipc_max_subscriptions = value;
- return tipc_cfg_reply_none();
-}
-
static struct sk_buff *cfg_set_max_ports(void)
{
u32 value;
@@ -357,12 +327,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
case TIPC_CMD_SET_MAX_PORTS:
rep_tlv_buf = cfg_set_max_ports();
break;
- case TIPC_CMD_SET_MAX_PUBL:
- rep_tlv_buf = cfg_set_max_publications();
- break;
- case TIPC_CMD_SET_MAX_SUBSCR:
- rep_tlv_buf = cfg_set_max_subscriptions();
- break;
case TIPC_CMD_SET_NETID:
rep_tlv_buf = cfg_set_netid();
break;
@@ -372,12 +336,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
case TIPC_CMD_GET_MAX_PORTS:
rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports);
break;
- case TIPC_CMD_GET_MAX_PUBL:
- rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_publications);
- break;
- case TIPC_CMD_GET_MAX_SUBSCR:
- rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions);
- break;
case TIPC_CMD_GET_NETID:
rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id);
break;
@@ -393,6 +351,10 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
case TIPC_CMD_GET_MAX_CLUSTERS:
case TIPC_CMD_SET_MAX_NODES:
case TIPC_CMD_GET_MAX_NODES:
+ case TIPC_CMD_SET_MAX_SUBSCR:
+ case TIPC_CMD_GET_MAX_SUBSCR:
+ case TIPC_CMD_SET_MAX_PUBL:
+ case TIPC_CMD_GET_MAX_PUBL:
case TIPC_CMD_SET_LOG_SIZE:
case TIPC_CMD_DUMP_LOG:
rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 6586eac6a50e..bfe8af88469a 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -48,18 +48,13 @@
/* global variables used by multiple sub-systems within TIPC */
-int tipc_random;
-
-const char tipc_alphabet[] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.";
+int tipc_random __read_mostly;
/* configurable TIPC parameters */
-u32 tipc_own_addr;
-int tipc_max_ports;
-int tipc_max_subscriptions;
-int tipc_max_publications;
-int tipc_net_id;
-int tipc_remote_management;
+u32 tipc_own_addr __read_mostly;
+int tipc_max_ports __read_mostly;
+int tipc_net_id __read_mostly;
+int tipc_remote_management __read_mostly;
/**
@@ -101,9 +96,8 @@ int tipc_core_start_net(unsigned long addr)
{
int res;
- res = tipc_net_start(addr);
- if (!res)
- res = tipc_eth_media_start();
+ tipc_net_start(addr);
+ res = tipc_eth_media_start();
if (res)
tipc_core_stop_net();
return res;
@@ -160,8 +154,6 @@ static int __init tipc_init(void)
tipc_own_addr = 0;
tipc_remote_management = 1;
- tipc_max_publications = 10000;
- tipc_max_subscriptions = 2000;
tipc_max_ports = CONFIG_TIPC_PORTS;
tipc_net_id = 4711;
diff --git a/net/tipc/core.h b/net/tipc/core.h
index fd42e106c185..0207db04179a 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -60,7 +60,9 @@
#define TIPC_MOD_VER "2.0.0"
-#define ULTRA_STRING_MAX_LEN 32768
+#define ULTRA_STRING_MAX_LEN 32768
+#define TIPC_MAX_SUBSCRIPTIONS 65535
+#define TIPC_MAX_PUBLICATIONS 65535
struct tipc_msg; /* msg.h */
@@ -74,19 +76,15 @@ int tipc_snprintf(char *buf, int len, const char *fmt, ...);
/*
* Global configuration variables
*/
-extern u32 tipc_own_addr;
-extern int tipc_max_ports;
-extern int tipc_max_subscriptions;
-extern int tipc_max_publications;
-extern int tipc_net_id;
-extern int tipc_remote_management;
+extern u32 tipc_own_addr __read_mostly;
+extern int tipc_max_ports __read_mostly;
+extern int tipc_net_id __read_mostly;
+extern int tipc_remote_management __read_mostly;
/*
* Other global variables
*/
-extern int tipc_random;
-extern const char tipc_alphabet[];
-
+extern int tipc_random __read_mostly;
/*
* Routines available to privileged subsystems
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 90ac9bfa7abb..2132c1ef2951 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -46,19 +46,30 @@
* @bearer: ptr to associated "generic" bearer structure
* @dev: ptr to associated Ethernet network device
* @tipc_packet_type: used in binding TIPC to Ethernet driver
+ * @setup: work item used when enabling bearer
* @cleanup: work item used when disabling bearer
*/
struct eth_bearer {
struct tipc_bearer *bearer;
struct net_device *dev;
struct packet_type tipc_packet_type;
+ struct work_struct setup;
struct work_struct cleanup;
};
static struct tipc_media eth_media_info;
static struct eth_bearer eth_bearers[MAX_ETH_BEARERS];
static int eth_started;
-static struct notifier_block notifier;
+
+static int recv_notification(struct notifier_block *nb, unsigned long evt,
+ void *dv);
+/*
+ * Network device notifier info
+ */
+static struct notifier_block notifier = {
+ .notifier_call = recv_notification,
+ .priority = 0
+};
/**
* eth_media_addr_set - initialize Ethernet media address structure
@@ -134,6 +145,17 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
}
/**
+ * setup_bearer - setup association between Ethernet bearer and interface
+ */
+static void setup_bearer(struct work_struct *work)
+{
+ struct eth_bearer *eb_ptr =
+ container_of(work, struct eth_bearer, setup);
+
+ dev_add_pack(&eb_ptr->tipc_packet_type);
+}
+
+/**
* enable_bearer - attach TIPC bearer to an Ethernet interface
*/
static int enable_bearer(struct tipc_bearer *tb_ptr)
@@ -173,7 +195,8 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
eb_ptr->tipc_packet_type.func = recv_msg;
eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list));
- dev_add_pack(&eb_ptr->tipc_packet_type);
+ INIT_WORK(&eb_ptr->setup, setup_bearer);
+ schedule_work(&eb_ptr->setup);
/* Associate TIPC bearer with Ethernet bearer */
eb_ptr->bearer = tb_ptr;
@@ -357,8 +380,6 @@ int tipc_eth_media_start(void)
if (res)
return res;
- notifier.notifier_call = &recv_notification;
- notifier.priority = 0;
res = register_netdevice_notifier(&notifier);
if (!res)
eth_started = 1;
diff --git a/net/tipc/handler.c b/net/tipc/handler.c
index 7a52d3922f3c..111ff8300ae5 100644
--- a/net/tipc/handler.c
+++ b/net/tipc/handler.c
@@ -45,7 +45,7 @@ struct queue_item {
static struct kmem_cache *tipc_queue_item_cache;
static struct list_head signal_queue_head;
static DEFINE_SPINLOCK(qitem_lock);
-static int handler_enabled;
+static int handler_enabled __read_mostly;
static void process_signal_queue(unsigned long dummy);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1c1e6151875e..a79c755cb417 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -210,9 +210,7 @@ static int link_name_validate(const char *name,
(z_local > 255) || (c_local > 4095) || (n_local > 4095) ||
(z_peer > 255) || (c_peer > 4095) || (n_peer > 4095) ||
(if_local_len <= 1) || (if_local_len > TIPC_MAX_IF_NAME) ||
- (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME) ||
- (strspn(if_local, tipc_alphabet) != (if_local_len - 1)) ||
- (strspn(if_peer, tipc_alphabet) != (if_peer_len - 1)))
+ (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME))
return 0;
/* return link name components, if necessary */
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 360c478b0b53..98975e80bb51 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -41,7 +41,7 @@
#include "subscr.h"
#include "port.h"
-static int tipc_nametbl_size = 1024; /* must be a power of 2 */
+#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
/**
* struct name_info - name sequence publication info
@@ -114,7 +114,7 @@ DEFINE_RWLOCK(tipc_nametbl_lock);
static int hash(int x)
{
- return x & (tipc_nametbl_size - 1);
+ return x & (TIPC_NAMETBL_SIZE - 1);
}
/**
@@ -667,9 +667,9 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
{
struct publication *publ;
- if (table.local_publ_count >= tipc_max_publications) {
+ if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) {
pr_warn("Publication failed, local publication limit reached (%u)\n",
- tipc_max_publications);
+ TIPC_MAX_PUBLICATIONS);
return NULL;
}
@@ -871,7 +871,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
ret += nametbl_header(buf, len, depth);
lowbound = 0;
upbound = ~0;
- for (i = 0; i < tipc_nametbl_size; i++) {
+ for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
seq_head = &table.types[i];
hlist_for_each_entry(seq, seq_node, seq_head, ns_list) {
ret += nameseq_list(seq, buf + ret, len - ret,
@@ -935,7 +935,7 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
int tipc_nametbl_init(void)
{
- table.types = kcalloc(tipc_nametbl_size, sizeof(struct hlist_head),
+ table.types = kcalloc(TIPC_NAMETBL_SIZE, sizeof(struct hlist_head),
GFP_ATOMIC);
if (!table.types)
return -ENOMEM;
@@ -953,7 +953,7 @@ void tipc_nametbl_stop(void)
/* Verify name table is empty, then release it */
write_lock_bh(&tipc_nametbl_lock);
- for (i = 0; i < tipc_nametbl_size; i++) {
+ for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
if (hlist_empty(&table.types[i]))
continue;
pr_err("nametbl_stop(): orphaned hash chain detected\n");
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 5b5cea259caf..7d305ecc09c2 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -171,7 +171,7 @@ void tipc_net_route_msg(struct sk_buff *buf)
tipc_link_send(buf, dnode, msg_link_selector(msg));
}
-int tipc_net_start(u32 addr)
+void tipc_net_start(u32 addr)
{
char addr_string[16];
@@ -187,7 +187,6 @@ int tipc_net_start(u32 addr)
pr_info("Started in network mode\n");
pr_info("Own node address %s, network identity %u\n",
tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
- return 0;
}
void tipc_net_stop(void)
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 9eb4b9e220eb..079daadb3f72 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -41,7 +41,7 @@ extern rwlock_t tipc_net_lock;
void tipc_net_route_msg(struct sk_buff *buf);
-int tipc_net_start(u32 addr);
+void tipc_net_start(u32 addr);
void tipc_net_stop(void);
#endif
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 5ed5965eb0be..0f7d0d007e22 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -304,9 +304,9 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s,
}
/* Refuse subscription if global limit exceeded */
- if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) {
+ if (atomic_read(&topsrv.subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
pr_warn("Subscription rejected, limit reached (%u)\n",
- tipc_max_subscriptions);
+ TIPC_MAX_SUBSCRIPTIONS);
subscr_terminate(subscriber);
return NULL;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 79981d97bc9c..c5ee4ff61364 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -823,6 +823,34 @@ fail:
return NULL;
}
+static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+{
+ struct dentry *dentry;
+ struct path path;
+ int err = 0;
+ /*
+ * Get the parent directory, calculate the hash for last
+ * component.
+ */
+ dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+ err = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+ return err;
+
+ /*
+ * All right, let's create it.
+ */
+ err = security_path_mknod(&path, dentry, mode, 0);
+ if (!err) {
+ err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
+ if (!err) {
+ res->mnt = mntget(path.mnt);
+ res->dentry = dget(dentry);
+ }
+ }
+ done_path_create(&path, dentry);
+ return err;
+}
static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
@@ -831,8 +859,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
char *sun_path = sunaddr->sun_path;
- struct dentry *dentry = NULL;
- struct path path;
int err;
unsigned int hash;
struct unix_address *addr;
@@ -869,43 +895,23 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
atomic_set(&addr->refcnt, 1);
if (sun_path[0]) {
- umode_t mode;
- err = 0;
- /*
- * Get the parent directory, calculate the hash for last
- * component.
- */
- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- goto out_mknod_parent;
-
- /*
- * All right, let's create it.
- */
- mode = S_IFSOCK |
+ struct path path;
+ umode_t mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current_umask());
- err = mnt_want_write(path.mnt);
- if (err)
- goto out_mknod_dput;
- err = security_path_mknod(&path, dentry, mode, 0);
- if (err)
- goto out_mknod_drop_write;
- err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
-out_mknod_drop_write:
- mnt_drop_write(path.mnt);
- if (err)
- goto out_mknod_dput;
- mutex_unlock(&path.dentry->d_inode->i_mutex);
- dput(path.dentry);
- path.dentry = dentry;
-
+ err = unix_mknod(sun_path, mode, &path);
+ if (err) {
+ if (err == -EEXIST)
+ err = -EADDRINUSE;
+ unix_release_addr(addr);
+ goto out_up;
+ }
addr->hash = UNIX_HASH_SIZE;
- }
-
- spin_lock(&unix_table_lock);
-
- if (!sun_path[0]) {
+ hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
+ spin_lock(&unix_table_lock);
+ u->path = path;
+ list = &unix_socket_table[hash];
+ } else {
+ spin_lock(&unix_table_lock);
err = -EADDRINUSE;
if (__unix_find_socket_byname(net, sunaddr, addr_len,
sk->sk_type, hash)) {
@@ -914,9 +920,6 @@ out_mknod_drop_write:
}
list = &unix_socket_table[addr->hash];
- } else {
- list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
- u->path = path;
}
err = 0;
@@ -930,16 +933,6 @@ out_up:
mutex_unlock(&u->readlock);
out:
return err;
-
-out_mknod_dput:
- dput(dentry);
- mutex_unlock(&path.dentry->d_inode->i_mutex);
- path_put(&path);
-out_mknod_parent:
- if (err == -EEXIST)
- err = -EADDRINUSE;
- unix_release_addr(addr);
- goto out_up;
}
static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
@@ -1457,7 +1450,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
wait_for_unix_gc();
- err = scm_send(sock, msg, siocb->scm);
+ err = scm_send(sock, msg, siocb->scm, false);
if (err < 0)
return err;
@@ -1626,7 +1619,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
wait_for_unix_gc();
- err = scm_send(sock, msg, siocb->scm);
+ err = scm_send(sock, msg, siocb->scm, false);
if (err < 0)
return err;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 31b40cc4a9c3..dcd64d5b07aa 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -952,6 +952,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
*/
synchronize_rcu();
INIT_LIST_HEAD(&wdev->list);
+ /*
+ * Ensure that all events have been processed and
+ * freed.
+ */
+ cfg80211_process_wdev_events(wdev);
break;
case NETDEV_PRE_UP:
if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 5206c6844fd7..bc7430b54771 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -426,6 +426,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
struct net_device *dev, enum nl80211_iftype ntype,
u32 *flags, struct vif_params *params);
void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
+void cfg80211_process_wdev_events(struct wireless_dev *wdev);
int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2303ee73b50a..2ded3c7fad06 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -680,6 +680,8 @@ static u32 map_regdom_flags(u32 rd_flags)
channel_flags |= IEEE80211_CHAN_NO_IBSS;
if (rd_flags & NL80211_RRF_DFS)
channel_flags |= IEEE80211_CHAN_RADAR;
+ if (rd_flags & NL80211_RRF_NO_OFDM)
+ channel_flags |= IEEE80211_CHAN_NO_OFDM;
return channel_flags;
}
@@ -901,7 +903,21 @@ static void handle_channel(struct wiphy *wiphy,
chan->max_antenna_gain = min(chan->orig_mag,
(int) MBI_TO_DBI(power_rule->max_antenna_gain));
chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp);
- chan->max_power = min(chan->max_power, chan->max_reg_power);
+ if (chan->orig_mpwr) {
+ /*
+ * Devices that have their own custom regulatory domain
+ * but also use WIPHY_FLAG_STRICT_REGULATORY will follow the
+ * passed country IE power settings.
+ */
+ if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE &&
+ wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY &&
+ wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY)
+ chan->max_power = chan->max_reg_power;
+ else
+ chan->max_power = min(chan->orig_mpwr,
+ chan->max_reg_power);
+ } else
+ chan->max_power = chan->max_reg_power;
}
static void handle_band(struct wiphy *wiphy,
@@ -1885,6 +1901,7 @@ static void restore_custom_reg_settings(struct wiphy *wiphy)
chan->flags = chan->orig_flags;
chan->max_antenna_gain = chan->orig_mag;
chan->max_power = chan->orig_mpwr;
+ chan->beacon_found = false;
}
}
}
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 26f8cd30f712..994e2f0cc7a8 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -735,7 +735,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
wdev->connect_keys = NULL;
}
-static void cfg80211_process_wdev_events(struct wireless_dev *wdev)
+void cfg80211_process_wdev_events(struct wireless_dev *wdev)
{
struct cfg80211_event *ev;
unsigned long flags;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c5a5165a5927..2ed698c190b5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -42,13 +42,14 @@ static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
static struct dst_entry *xfrm_policy_sk_bundles;
static DEFINE_RWLOCK(xfrm_policy_lock);
-static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
-static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
+static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
+static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
+ __read_mostly;
static struct kmem_cache *xfrm_dst_cache __read_mostly;
static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
-static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
+static inline void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
static void xfrm_init_pmtu(struct dst_entry *dst);
static int stale_bundle(struct dst_entry *dst);
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
@@ -1357,6 +1358,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
xdst->flo.ops = &xfrm_bundle_fc_ops;
+ if (afinfo->init_dst)
+ afinfo->init_dst(net, xdst);
} else
xdst = ERR_PTR(-ENOBUFS);
@@ -2418,7 +2421,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
return -EINVAL;
if (unlikely(afinfo->family >= NPROTO))
return -EAFNOSUPPORT;
- write_lock_bh(&xfrm_policy_afinfo_lock);
+ spin_lock_bh(&xfrm_policy_afinfo_lock);
if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
err = -ENOBUFS;
else {
@@ -2439,9 +2442,9 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
dst_ops->neigh_lookup = xfrm_neigh_lookup;
if (likely(afinfo->garbage_collect == NULL))
afinfo->garbage_collect = xfrm_garbage_collect_deferred;
- xfrm_policy_afinfo[afinfo->family] = afinfo;
+ rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);
}
- write_unlock_bh(&xfrm_policy_afinfo_lock);
+ spin_unlock_bh(&xfrm_policy_afinfo_lock);
rtnl_lock();
for_each_net(net) {
@@ -2474,13 +2477,14 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
return -EINVAL;
if (unlikely(afinfo->family >= NPROTO))
return -EAFNOSUPPORT;
- write_lock_bh(&xfrm_policy_afinfo_lock);
+ spin_lock_bh(&xfrm_policy_afinfo_lock);
if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
err = -EINVAL;
else {
struct dst_ops *dst_ops = afinfo->dst_ops;
- xfrm_policy_afinfo[afinfo->family] = NULL;
+ rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family],
+ NULL);
dst_ops->kmem_cachep = NULL;
dst_ops->check = NULL;
dst_ops->negative_advice = NULL;
@@ -2488,7 +2492,8 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
afinfo->garbage_collect = NULL;
}
}
- write_unlock_bh(&xfrm_policy_afinfo_lock);
+ spin_unlock_bh(&xfrm_policy_afinfo_lock);
+ synchronize_rcu();
return err;
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
@@ -2497,16 +2502,16 @@ static void __net_init xfrm_dst_ops_init(struct net *net)
{
struct xfrm_policy_afinfo *afinfo;
- read_lock_bh(&xfrm_policy_afinfo_lock);
- afinfo = xfrm_policy_afinfo[AF_INET];
+ rcu_read_lock_bh();
+ afinfo = rcu_dereference_bh(xfrm_policy_afinfo[AF_INET]);
if (afinfo)
net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
#if IS_ENABLED(CONFIG_IPV6)
- afinfo = xfrm_policy_afinfo[AF_INET6];
+ afinfo = rcu_dereference_bh(xfrm_policy_afinfo[AF_INET6]);
if (afinfo)
net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
#endif
- read_unlock_bh(&xfrm_policy_afinfo_lock);
+ rcu_read_unlock_bh();
}
static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
@@ -2514,16 +2519,16 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
struct xfrm_policy_afinfo *afinfo;
if (unlikely(family >= NPROTO))
return NULL;
- read_lock(&xfrm_policy_afinfo_lock);
- afinfo = xfrm_policy_afinfo[family];
+ rcu_read_lock();
+ afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
if (unlikely(!afinfo))
- read_unlock(&xfrm_policy_afinfo_lock);
+ rcu_read_unlock();
return afinfo;
}
-static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
+static inline void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
{
- read_unlock(&xfrm_policy_afinfo_lock);
+ rcu_read_unlock();
}
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5b228f97d4b3..7856c33898fa 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -415,8 +415,17 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me)
if (x->lft.hard_add_expires_seconds) {
long tmo = x->lft.hard_add_expires_seconds +
x->curlft.add_time - now;
- if (tmo <= 0)
- goto expired;
+ if (tmo <= 0) {
+ if (x->xflags & XFRM_SOFT_EXPIRE) {
+ /* enter hard expire without soft expire first?!
+ * setting a new date could trigger this.
+ * workarbound: fix x->curflt.add_time by below:
+ */
+ x->curlft.add_time = now - x->saved_tmo - 1;
+ tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
+ } else
+ goto expired;
+ }
if (tmo < next)
next = tmo;
}
@@ -433,10 +442,14 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me)
if (x->lft.soft_add_expires_seconds) {
long tmo = x->lft.soft_add_expires_seconds +
x->curlft.add_time - now;
- if (tmo <= 0)
+ if (tmo <= 0) {
warn = 1;
- else if (tmo < next)
+ x->xflags &= ~XFRM_SOFT_EXPIRE;
+ } else if (tmo < next) {
next = tmo;
+ x->xflags |= XFRM_SOFT_EXPIRE;
+ x->saved_tmo = tmo;
+ }
}
if (x->lft.soft_use_expires_seconds) {
long tmo = x->lft.soft_use_expires_seconds +
@@ -1687,7 +1700,7 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
read_lock(&xfrm_km_lock);
list_for_each_entry(km, &xfrm_km_list, list) {
- acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
+ acqret = km->acquire(x, t, pol);
if (!acqret)
err = acqret;
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e75d8e47f35c..ab58034c42d6 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2567,8 +2567,7 @@ static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x,
}
static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
- struct xfrm_tmpl *xt, struct xfrm_policy *xp,
- int dir)
+ struct xfrm_tmpl *xt, struct xfrm_policy *xp)
{
__u32 seq = xfrm_get_acqseq();
struct xfrm_user_acquire *ua;
@@ -2583,7 +2582,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
memcpy(&ua->id, &x->id, sizeof(ua->id));
memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr));
memcpy(&ua->sel, &x->sel, sizeof(ua->sel));
- copy_to_user_policy(xp, &ua->policy, dir);
+ copy_to_user_policy(xp, &ua->policy, XFRM_POLICY_OUT);
ua->aalgos = xt->aalgos;
ua->ealgos = xt->ealgos;
ua->calgos = xt->calgos;
@@ -2605,7 +2604,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
}
static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
- struct xfrm_policy *xp, int dir)
+ struct xfrm_policy *xp)
{
struct net *net = xs_net(x);
struct sk_buff *skb;
@@ -2614,7 +2613,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
if (skb == NULL)
return -ENOMEM;
- if (build_acquire(skb, x, xt, xp, dir) < 0)
+ if (build_acquire(skb, x, xt, xp) < 0)
BUG();
return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC);