diff options
Diffstat (limited to 'net')
166 files changed, 6056 insertions, 2403 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 73a2a83ee2da..402442402af7 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -137,9 +137,21 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, return rc; } +static inline netdev_tx_t vlan_netpoll_send_skb(struct vlan_dev_priv *vlan, struct sk_buff *skb) +{ +#ifdef CONFIG_NET_POLL_CONTROLLER + if (vlan->netpoll) + netpoll_send_skb(vlan->netpoll, skb); +#else + BUG(); +#endif + return NETDEV_TX_OK; +} + static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); unsigned int len; int ret; @@ -150,29 +162,30 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... */ if (veth->h_vlan_proto != htons(ETH_P_8021Q) || - vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR) { + vlan->flags & VLAN_FLAG_REORDER_HDR) { u16 vlan_tci; - vlan_tci = vlan_dev_priv(dev)->vlan_id; + vlan_tci = vlan->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); skb = __vlan_hwaccel_put_tag(skb, vlan_tci); } - skb->dev = vlan_dev_priv(dev)->real_dev; + skb->dev = vlan->real_dev; len = skb->len; - if (netpoll_tx_running(dev)) - return skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev); + if (unlikely(netpoll_tx_running(dev))) + return vlan_netpoll_send_skb(vlan, skb); + ret = dev_queue_xmit(skb); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct vlan_pcpu_stats *stats; - stats = this_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats); + stats = this_cpu_ptr(vlan->vlan_pcpu_stats); u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += len; u64_stats_update_end(&stats->syncp); } else { - this_cpu_inc(vlan_dev_priv(dev)->vlan_pcpu_stats->tx_dropped); + this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped); } return ret; @@ -669,25 +682,26 @@ static void vlan_dev_poll_controller(struct net_device *dev) return; } -static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) +static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo, + gfp_t gfp) { - struct vlan_dev_priv *info = vlan_dev_priv(dev); - struct net_device *real_dev = info->real_dev; + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + struct net_device *real_dev = vlan->real_dev; struct netpoll *netpoll; int err = 0; - netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); + netpoll = kzalloc(sizeof(*netpoll), gfp); err = -ENOMEM; if (!netpoll) goto out; - err = __netpoll_setup(netpoll, real_dev); + err = __netpoll_setup(netpoll, real_dev, gfp); if (err) { kfree(netpoll); goto out; } - info->netpoll = netpoll; + vlan->netpoll = netpoll; out: return err; @@ -695,19 +709,15 @@ out: static void vlan_dev_netpoll_cleanup(struct net_device *dev) { - struct vlan_dev_priv *info = vlan_dev_priv(dev); - struct netpoll *netpoll = info->netpoll; + struct vlan_dev_priv *vlan= vlan_dev_priv(dev); + struct netpoll *netpoll = vlan->netpoll; if (!netpoll) return; - info->netpoll = NULL; - - /* Wait for transmitting packets to finish before freeing. */ - synchronize_rcu_bh(); + vlan->netpoll = NULL; - __netpoll_cleanup(netpoll); - kfree(netpoll); + __netpoll_free_rcu(netpoll); } #endif /* CONFIG_NET_POLL_CONTROLLER */ diff --git a/net/atm/common.c b/net/atm/common.c index b4b44dbed645..0c0ad930a632 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -812,6 +812,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname, if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags)) return -ENOTCONN; + memset(&pvc, 0, sizeof(pvc)); pvc.sap_family = AF_ATMPVC; pvc.sap_addr.itf = vcc->dev->number; pvc.sap_addr.vpi = vcc->vpi; diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 3a734919c36c..ae0324021407 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -95,6 +95,7 @@ static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr, return -ENOTCONN; *sockaddr_len = sizeof(struct sockaddr_atmpvc); addr = (struct sockaddr_atmpvc *)sockaddr; + memset(addr, 0, sizeof(*addr)); addr->sap_family = AF_ATMPVC; addr->sap_addr.itf = vcc->dev->number; addr->sap_addr.vpi = vcc->vpi; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index b421cc49d2cd..fc866f2e4528 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -200,11 +200,11 @@ void batadv_gw_election(struct batadv_priv *bat_priv) if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT) goto out; - if (!batadv_atomic_dec_not_zero(&bat_priv->gw_reselect)) - goto out; - curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + if (!batadv_atomic_dec_not_zero(&bat_priv->gw_reselect) && curr_gw) + goto out; + next_gw = batadv_gw_get_best_gw_node(bat_priv); if (curr_gw == next_gw) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index a438f4b582fc..99dd8f75b3ff 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -197,6 +197,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, del: list_del(&entry->list); kfree(entry); + kfree(tt_change_node); event_removed = true; goto unlock; } diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 4ff0bf3ba9a5..0760d1fed6f0 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -316,7 +316,7 @@ send_rsp: static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb, struct a2mp_cmd *hdr) { - BT_DBG("ident %d code %d", hdr->ident, hdr->code); + BT_DBG("ident %d code 0x%2.2x", hdr->ident, hdr->code); skb_pull(skb, le16_to_cpu(hdr->len)); return 0; @@ -325,17 +325,19 @@ static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb, /* Handle A2MP signalling */ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { - struct a2mp_cmd *hdr = (void *) skb->data; + struct a2mp_cmd *hdr; struct amp_mgr *mgr = chan->data; int err = 0; amp_mgr_get(mgr); while (skb->len >= sizeof(*hdr)) { - struct a2mp_cmd *hdr = (void *) skb->data; - u16 len = le16_to_cpu(hdr->len); + u16 len; - BT_DBG("code 0x%02x id %d len %d", hdr->code, hdr->ident, len); + hdr = (void *) skb->data; + len = le16_to_cpu(hdr->len); + + BT_DBG("code 0x%2.2x id %d len %u", hdr->code, hdr->ident, len); skb_pull(skb, sizeof(*hdr)); @@ -393,7 +395,9 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) if (err) { struct a2mp_cmd_rej rej; + rej.reason = __constant_cpu_to_le16(0); + hdr = (void *) skb->data; BT_DBG("Send A2MP Rej: cmd 0x%2.2x err %d", hdr->code, err); @@ -412,7 +416,7 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) static void a2mp_chan_close_cb(struct l2cap_chan *chan) { - l2cap_chan_destroy(chan); + l2cap_chan_put(chan); } static void a2mp_chan_state_change_cb(struct l2cap_chan *chan, int state) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f7db5792ec64..58f9762b339a 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -28,6 +28,7 @@ #include <asm/ioctls.h> #include <net/bluetooth/bluetooth.h> +#include <linux/proc_fs.h> #define VERSION "2.16" @@ -532,6 +533,146 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) } EXPORT_SYMBOL(bt_sock_wait_state); +#ifdef CONFIG_PROC_FS +struct bt_seq_state { + struct bt_sock_list *l; +}; + +static void *bt_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(seq->private->l->lock) +{ + struct bt_seq_state *s = seq->private; + struct bt_sock_list *l = s->l; + + read_lock(&l->lock); + return seq_hlist_start_head(&l->head, *pos); +} + +static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bt_seq_state *s = seq->private; + struct bt_sock_list *l = s->l; + + return seq_hlist_next(v, &l->head, pos); +} + +static void bt_seq_stop(struct seq_file *seq, void *v) + __releases(seq->private->l->lock) +{ + struct bt_seq_state *s = seq->private; + struct bt_sock_list *l = s->l; + + read_unlock(&l->lock); +} + +static int bt_seq_show(struct seq_file *seq, void *v) +{ + struct sock *sk; + struct bt_sock *bt; + struct bt_seq_state *s = seq->private; + struct bt_sock_list *l = s->l; + bdaddr_t src_baswapped, dst_baswapped; + + if (v == SEQ_START_TOKEN) { + seq_puts(seq ,"sk RefCnt Rmem Wmem User Inode Src Dst Parent"); + + if (l->custom_seq_show) { + seq_putc(seq, ' '); + l->custom_seq_show(seq, v); + } + + seq_putc(seq, '\n'); + } else { + sk = sk_entry(v); + bt = bt_sk(sk); + baswap(&src_baswapped, &bt->src); + baswap(&dst_baswapped, &bt->dst); + + seq_printf(seq, "%pK %-6d %-6u %-6u %-6u %-6lu %pM %pM %-6lu", + sk, + atomic_read(&sk->sk_refcnt), + sk_rmem_alloc_get(sk), + sk_wmem_alloc_get(sk), + sock_i_uid(sk), + sock_i_ino(sk), + &src_baswapped, + &dst_baswapped, + bt->parent? sock_i_ino(bt->parent): 0LU); + + if (l->custom_seq_show) { + seq_putc(seq, ' '); + l->custom_seq_show(seq, v); + } + + seq_putc(seq, '\n'); + } + return 0; +} + +static struct seq_operations bt_seq_ops = { + .start = bt_seq_start, + .next = bt_seq_next, + .stop = bt_seq_stop, + .show = bt_seq_show, +}; + +static int bt_seq_open(struct inode *inode, struct file *file) +{ + struct bt_sock_list *sk_list; + struct bt_seq_state *s; + + sk_list = PDE(inode)->data; + s = __seq_open_private(file, &bt_seq_ops, + sizeof(struct bt_seq_state)); + if (s == NULL) + return -ENOMEM; + + s->l = sk_list; + return 0; +} + +int bt_procfs_init(struct module* module, struct net *net, const char *name, + struct bt_sock_list* sk_list, + int (* seq_show)(struct seq_file *, void *)) +{ + struct proc_dir_entry * pde; + + sk_list->custom_seq_show = seq_show; + + sk_list->fops.owner = module; + sk_list->fops.open = bt_seq_open; + sk_list->fops.read = seq_read; + sk_list->fops.llseek = seq_lseek; + sk_list->fops.release = seq_release_private; + + pde = proc_net_fops_create(net, name, 0, &sk_list->fops); + if (pde == NULL) + return -ENOMEM; + + pde->data = sk_list; + + return 0; +} + +void bt_procfs_cleanup(struct net *net, const char *name) +{ + proc_net_remove(net, name); +} +#else +int bt_procfs_init(struct module* module, struct net *net, const char *name, + struct bt_sock_list* sk_list, + int (* seq_show)(struct seq_file *, void *)) +{ + return 0; +} + +void bt_procfs_cleanup(struct net *net, const char *name) +{ +} +#endif +EXPORT_SYMBOL(bt_procfs_init); +EXPORT_SYMBOL(bt_procfs_cleanup); + static struct net_proto_family bt_sock_family_ops = { .owner = THIS_MODULE, .family = PF_BLUETOOTH, diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 5e5f5b410e0b..5b6cc0bf4dec 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -29,6 +29,10 @@ #include "bnep.h" +static struct bt_sock_list bnep_sk_list = { + .lock = __RW_LOCK_UNLOCKED(bnep_sk_list.lock) +}; + static int bnep_sock_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -38,6 +42,8 @@ static int bnep_sock_release(struct socket *sock) if (!sk) return 0; + bt_sock_unlink(&bnep_sk_list, sk); + sock_orphan(sk); sock_put(sk); return 0; @@ -204,6 +210,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol, sk->sk_protocol = protocol; sk->sk_state = BT_OPEN; + bt_sock_link(&bnep_sk_list, sk); return 0; } @@ -222,19 +229,30 @@ int __init bnep_sock_init(void) return err; err = bt_sock_register(BTPROTO_BNEP, &bnep_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("Can't register BNEP socket"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "bnep", &bnep_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create BNEP proc file"); + bt_sock_unregister(BTPROTO_BNEP); + goto error; + } + + BT_INFO("BNEP socket layer initialized"); return 0; error: - BT_ERR("Can't register BNEP socket"); proto_unregister(&bnep_proto); return err; } void __exit bnep_sock_cleanup(void) { + bt_procfs_cleanup(&init_net, "bnep"); if (bt_sock_unregister(BTPROTO_BNEP) < 0) BT_ERR("Can't unregister BNEP socket"); diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 311668d14571..d5cacef52748 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -42,6 +42,10 @@ #include "cmtp.h" +static struct bt_sock_list cmtp_sk_list = { + .lock = __RW_LOCK_UNLOCKED(cmtp_sk_list.lock) +}; + static int cmtp_sock_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -51,6 +55,8 @@ static int cmtp_sock_release(struct socket *sock) if (!sk) return 0; + bt_sock_unlink(&cmtp_sk_list, sk); + sock_orphan(sk); sock_put(sk); @@ -214,6 +220,8 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol, sk->sk_protocol = protocol; sk->sk_state = BT_OPEN; + bt_sock_link(&cmtp_sk_list, sk); + return 0; } @@ -232,19 +240,30 @@ int cmtp_init_sockets(void) return err; err = bt_sock_register(BTPROTO_CMTP, &cmtp_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("Can't register CMTP socket"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "cmtp", &cmtp_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create CMTP proc file"); + bt_sock_unregister(BTPROTO_HIDP); + goto error; + } + + BT_INFO("CMTP socket layer initialized"); return 0; error: - BT_ERR("Can't register CMTP socket"); proto_unregister(&cmtp_proto); return err; } void cmtp_cleanup_sockets(void) { + bt_procfs_cleanup(&init_net, "cmtp"); if (bt_sock_unregister(BTPROTO_CMTP) < 0) BT_ERR("Can't unregister CMTP socket"); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d4de5db18d5a..fa974a19d365 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -696,7 +696,8 @@ int hci_dev_open(__u16 dev) hci_dev_hold(hdev); set_bit(HCI_UP, &hdev->flags); hci_notify(hdev, HCI_DEV_UP); - if (!test_bit(HCI_SETUP, &hdev->dev_flags)) { + if (!test_bit(HCI_SETUP, &hdev->dev_flags) && + mgmt_valid_hdev(hdev)) { hci_dev_lock(hdev); mgmt_powered(hdev, 1); hci_dev_unlock(hdev); @@ -797,7 +798,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) * and no tasks are scheduled. */ hdev->close(hdev); - if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { + if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) && + mgmt_valid_hdev(hdev)) { hci_dev_lock(hdev); mgmt_powered(hdev, 0); hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 41ff978a33f9..4fd2cf3bcd05 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -513,7 +513,7 @@ static void hci_setup_event_mask(struct hci_dev *hdev) if (hdev->features[3] & LMP_RSSI_INQ) events[4] |= 0x02; /* Inquiry Result with RSSI */ - if (hdev->features[5] & LMP_SNIFF_SUBR) + if (lmp_sniffsubr_capable(hdev)) events[5] |= 0x20; /* Sniff Subrating */ if (hdev->features[5] & LMP_PAUSE_ENC) @@ -522,13 +522,13 @@ static void hci_setup_event_mask(struct hci_dev *hdev) if (hdev->features[6] & LMP_EXT_INQ) events[5] |= 0x40; /* Extended Inquiry Result */ - if (hdev->features[6] & LMP_NO_FLUSH) + if (lmp_no_flush_capable(hdev)) events[7] |= 0x01; /* Enhanced Flush Complete */ if (hdev->features[7] & LMP_LSTO) events[6] |= 0x80; /* Link Supervision Timeout Changed */ - if (hdev->features[6] & LMP_SIMPLE_PAIR) { + if (lmp_ssp_capable(hdev)) { events[6] |= 0x01; /* IO Capability Request */ events[6] |= 0x02; /* IO Capability Response */ events[6] |= 0x04; /* User Confirmation Request */ @@ -541,7 +541,7 @@ static void hci_setup_event_mask(struct hci_dev *hdev) * Features Notification */ } - if (hdev->features[4] & LMP_LE) + if (lmp_le_capable(hdev)) events[7] |= 0x20; /* LE Meta-Event */ hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events); @@ -623,11 +623,11 @@ static void hci_setup_link_policy(struct hci_dev *hdev) struct hci_cp_write_def_link_policy cp; u16 link_policy = 0; - if (hdev->features[0] & LMP_RSWITCH) + if (lmp_rswitch_capable(hdev)) link_policy |= HCI_LP_RSWITCH; if (hdev->features[0] & LMP_HOLD) link_policy |= HCI_LP_HOLD; - if (hdev->features[0] & LMP_SNIFF) + if (lmp_sniff_capable(hdev)) link_policy |= HCI_LP_SNIFF; if (hdev->features[1] & LMP_PARK) link_policy |= HCI_LP_PARK; @@ -686,7 +686,7 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, hdev->esco_type |= (ESCO_HV3); } - if (hdev->features[3] & LMP_ESCO) + if (lmp_esco_capable(hdev)) hdev->esco_type |= (ESCO_EV3); if (hdev->features[4] & LMP_EV4) @@ -746,7 +746,7 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev, break; } - if (test_bit(HCI_INIT, &hdev->flags) && hdev->features[4] & LMP_LE) + if (test_bit(HCI_INIT, &hdev->flags) && lmp_le_capable(hdev)) hci_set_le_support(hdev); done: @@ -1365,6 +1365,9 @@ static bool hci_resolve_next_name(struct hci_dev *hdev) return false; e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, NAME_NEEDED); + if (!e) + return false; + if (hci_resolve_name(hdev, e) == 0) { e->name_state = NAME_PENDING; return true; @@ -1393,12 +1396,20 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn, return; e = hci_inquiry_cache_lookup_resolve(hdev, bdaddr, NAME_PENDING); - if (e) { + /* If the device was not found in a list of found devices names of which + * are pending. there is no need to continue resolving a next name as it + * will be done upon receiving another Remote Name Request Complete + * Event */ + if (!e) + return; + + list_del(&e->list); + if (name) { e->name_state = NAME_KNOWN; - list_del(&e->list); - if (name) - mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, - e->data.rssi, name, name_len); + mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, + e->data.rssi, name, name_len); + } else { + e->name_state = NAME_NOT_KNOWN; } if (hci_resolve_next_name(hdev)) @@ -1614,43 +1625,30 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status) { - struct hci_cp_le_create_conn *cp; struct hci_conn *conn; BT_DBG("%s status 0x%2.2x", hdev->name, status); - cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CONN); - if (!cp) - return; + if (status) { + hci_dev_lock(hdev); - hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (!conn) { + hci_dev_unlock(hdev); + return; + } - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr); + BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&conn->dst), + conn); - BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->peer_addr), - conn); + conn->state = BT_CLOSED; + mgmt_connect_failed(hdev, &conn->dst, conn->type, + conn->dst_type, status); + hci_proto_connect_cfm(conn, status); + hci_conn_del(conn); - if (status) { - if (conn && conn->state == BT_CONNECT) { - conn->state = BT_CLOSED; - mgmt_connect_failed(hdev, &cp->peer_addr, conn->type, - conn->dst_type, status); - hci_proto_connect_cfm(conn, status); - hci_conn_del(conn); - } - } else { - if (!conn) { - conn = hci_conn_add(hdev, LE_LINK, &cp->peer_addr); - if (conn) { - conn->dst_type = cp->peer_addr_type; - conn->out = true; - } else { - BT_ERR("No memory for new connection"); - } - } + hci_dev_unlock(hdev); } - - hci_dev_unlock(hdev); } static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status) @@ -1762,7 +1760,12 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (conn->type == ACL_LINK) { conn->state = BT_CONFIG; hci_conn_hold(conn); - conn->disc_timeout = HCI_DISCONN_TIMEOUT; + + if (!conn->out && !hci_conn_ssp_enabled(conn) && + !hci_find_link_key(hdev, &ev->bdaddr)) + conn->disc_timeout = HCI_PAIRING_TIMEOUT; + else + conn->disc_timeout = HCI_DISCONN_TIMEOUT; } else conn->state = BT_CONNECTED; @@ -3252,12 +3255,8 @@ static void hci_user_passkey_request_evt(struct hci_dev *hdev, BT_DBG("%s", hdev->name); - hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) mgmt_user_passkey_request(hdev, &ev->bdaddr, ACL_LINK, 0); - - hci_dev_unlock(hdev); } static void hci_simple_pair_complete_evt(struct hci_dev *hdev, @@ -3350,11 +3349,23 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); - if (ev->status) { - conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); - if (!conn) + conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT); + if (!conn) { + conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr); + if (!conn) { + BT_ERR("No memory for new connection"); goto unlock; + } + + conn->dst_type = ev->bdaddr_type; + if (ev->role == LE_CONN_ROLE_MASTER) { + conn->out = true; + conn->link_mode |= HCI_LM_MASTER; + } + } + + if (ev->status) { mgmt_connect_failed(hdev, &conn->dst, conn->type, conn->dst_type, ev->status); hci_proto_connect_cfm(conn, ev->status); @@ -3363,18 +3374,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) goto unlock; } - conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr); - if (!conn) { - conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr); - if (!conn) { - BT_ERR("No memory for new connection"); - hci_dev_unlock(hdev); - return; - } - - conn->dst_type = ev->bdaddr_type; - } - if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) mgmt_device_connected(hdev, &ev->bdaddr, conn->type, conn->dst_type, 0, NULL, 0, NULL); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index a7f04de03d79..bb64331db3b7 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -694,6 +694,7 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, *addr_len = sizeof(*haddr); haddr->hci_family = AF_BLUETOOTH; haddr->hci_dev = hdev->id; + haddr->hci_channel= 0; release_sock(sk); return 0; @@ -1009,6 +1010,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, { struct hci_filter *f = &hci_pi(sk)->filter; + memset(&uf, 0, sizeof(uf)); uf.type_mask = f->type_mask; uf.opcode = f->opcode; uf.event_mask[0] = *((u32 *) f->event_mask + 0); @@ -1100,21 +1102,30 @@ int __init hci_sock_init(void) return err; err = bt_sock_register(BTPROTO_HCI, &hci_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("HCI socket registration failed"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "hci", &hci_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create HCI proc file"); + bt_sock_unregister(BTPROTO_HCI); + goto error; + } BT_INFO("HCI socket layer initialized"); return 0; error: - BT_ERR("HCI socket registration failed"); proto_unregister(&hci_sk_proto); return err; } void hci_sock_cleanup(void) { + bt_procfs_cleanup(&init_net, "hci"); if (bt_sock_unregister(BTPROTO_HCI) < 0) BT_ERR("HCI socket unregistration failed"); diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 18b3f6892a36..eca3889371c4 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -25,6 +25,10 @@ #include "hidp.h" +static struct bt_sock_list hidp_sk_list = { + .lock = __RW_LOCK_UNLOCKED(hidp_sk_list.lock) +}; + static int hidp_sock_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -34,6 +38,8 @@ static int hidp_sock_release(struct socket *sock) if (!sk) return 0; + bt_sock_unlink(&hidp_sk_list, sk); + sock_orphan(sk); sock_put(sk); @@ -253,6 +259,8 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol, sk->sk_protocol = protocol; sk->sk_state = BT_OPEN; + bt_sock_link(&hidp_sk_list, sk); + return 0; } @@ -271,8 +279,19 @@ int __init hidp_init_sockets(void) return err; err = bt_sock_register(BTPROTO_HIDP, &hidp_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("Can't register HIDP socket"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "hidp", &hidp_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create HIDP proc file"); + bt_sock_unregister(BTPROTO_HIDP); + goto error; + } + + BT_INFO("HIDP socket layer initialized"); return 0; @@ -284,6 +303,7 @@ error: void __exit hidp_cleanup_sockets(void) { + bt_procfs_cleanup(&init_net, "hidp"); if (bt_sock_unregister(BTPROTO_HIDP) < 0) BT_ERR("Can't unregister HIDP socket"); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a8964db04bfb..f0a3ab156ec6 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -416,13 +416,30 @@ struct l2cap_chan *l2cap_chan_create(void) return chan; } -void l2cap_chan_destroy(struct l2cap_chan *chan) +static void l2cap_chan_destroy(struct l2cap_chan *chan) { + BT_DBG("chan %p", chan); + write_lock(&chan_list_lock); list_del(&chan->global_l); write_unlock(&chan_list_lock); - l2cap_chan_put(chan); + kfree(chan); +} + +void l2cap_chan_hold(struct l2cap_chan *c) +{ + BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt)); + + atomic_inc(&c->refcnt); +} + +void l2cap_chan_put(struct l2cap_chan *c) +{ + BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt)); + + if (atomic_dec_and_test(&c->refcnt)) + l2cap_chan_destroy(c); } void l2cap_chan_set_defaults(struct l2cap_chan *chan) @@ -1181,6 +1198,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) sk = chan->sk; hci_conn_hold(conn->hcon); + conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; bacpy(&bt_sk(sk)->src, conn->src); bacpy(&bt_sk(sk)->dst, conn->dst); @@ -5329,7 +5347,7 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) return exact ? lm1 : lm2; } -int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) +void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { struct l2cap_conn *conn; @@ -5342,7 +5360,6 @@ int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) } else l2cap_conn_del(hcon, bt_to_errno(status)); - return 0; } int l2cap_disconn_ind(struct hci_conn *hcon) @@ -5356,12 +5373,11 @@ int l2cap_disconn_ind(struct hci_conn *hcon) return conn->disc_reason; } -int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) +void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); l2cap_conn_del(hcon, bt_to_errno(reason)); - return 0; } static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt) @@ -5404,6 +5420,11 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) BT_DBG("chan %p scid 0x%4.4x state %s", chan, chan->scid, state_to_string(chan->state)); + if (chan->chan_type == L2CAP_CHAN_CONN_FIX_A2MP) { + l2cap_chan_unlock(chan); + continue; + } + if (chan->scid == L2CAP_CID_LE_DATA) { if (!status && encrypt) { chan->sec_level = hcon->sec_level; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a4bb27e8427e..3a6ce73541d9 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -34,6 +34,10 @@ #include <net/bluetooth/l2cap.h> #include <net/bluetooth/smp.h> +static struct bt_sock_list l2cap_sk_list = { + .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock) +}; + static const struct proto_ops l2cap_sock_ops; static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio); @@ -245,6 +249,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l BT_DBG("sock %p, sk %p", sock, sk); + memset(la, 0, sizeof(struct sockaddr_l2)); addr->sa_family = AF_BLUETOOTH; *len = sizeof(struct sockaddr_l2); @@ -823,7 +828,7 @@ static void l2cap_sock_kill(struct sock *sk) /* Kill poor orphan */ - l2cap_chan_destroy(l2cap_pi(sk)->chan); + l2cap_chan_put(l2cap_pi(sk)->chan); sock_set_flag(sk, SOCK_DEAD); sock_put(sk); } @@ -886,6 +891,8 @@ static int l2cap_sock_release(struct socket *sock) if (!sk) return 0; + bt_sock_unlink(&l2cap_sk_list, sk); + err = l2cap_sock_shutdown(sock, 2); sock_orphan(sk); @@ -1174,7 +1181,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p chan = l2cap_chan_create(); if (!chan) { - l2cap_sock_kill(sk); + sk_free(sk); return NULL; } @@ -1210,6 +1217,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, return -ENOMEM; l2cap_sock_init(sk, NULL); + bt_sock_link(&l2cap_sk_list, sk); return 0; } @@ -1248,21 +1256,30 @@ int __init l2cap_init_sockets(void) return err; err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("L2CAP socket registration failed"); goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "l2cap", &l2cap_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create L2CAP proc file"); + bt_sock_unregister(BTPROTO_L2CAP); + goto error; + } BT_INFO("L2CAP socket layer initialized"); return 0; error: - BT_ERR("L2CAP socket registration failed"); proto_unregister(&l2cap_proto); return err; } void l2cap_cleanup_sockets(void) { + bt_procfs_cleanup(&init_net, "l2cap"); if (bt_sock_unregister(BTPROTO_L2CAP) < 0) BT_ERR("L2CAP socket unregistration failed"); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ad6613d17ca6..a3329cbd3e4d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -193,6 +193,11 @@ static u8 mgmt_status_table[] = { MGMT_STATUS_CONNECT_FAILED, /* MAC Connection Failed */ }; +bool mgmt_valid_hdev(struct hci_dev *hdev) +{ + return hdev->dev_type == HCI_BREDR; +} + static u8 mgmt_status(u8 hci_status) { if (hci_status < ARRAY_SIZE(mgmt_status_table)) @@ -317,7 +322,6 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { struct mgmt_rp_read_index_list *rp; - struct list_head *p; struct hci_dev *d; size_t rp_len; u16 count; @@ -328,7 +332,10 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, read_lock(&hci_dev_list_lock); count = 0; - list_for_each(p, &hci_dev_list) { + list_for_each_entry(d, &hci_dev_list, list) { + if (!mgmt_valid_hdev(d)) + continue; + count++; } @@ -346,6 +353,9 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, if (test_bit(HCI_SETUP, &d->dev_flags)) continue; + if (!mgmt_valid_hdev(d)) + continue; + rp->index[i++] = cpu_to_le16(d->id); BT_DBG("Added hci%u", d->id); } @@ -370,10 +380,10 @@ static u32 get_supported_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_DISCOVERABLE; settings |= MGMT_SETTING_PAIRABLE; - if (hdev->features[6] & LMP_SIMPLE_PAIR) + if (lmp_ssp_capable(hdev)) settings |= MGMT_SETTING_SSP; - if (!(hdev->features[4] & LMP_NO_BREDR)) { + if (lmp_bredr_capable(hdev)) { settings |= MGMT_SETTING_BREDR; settings |= MGMT_SETTING_LINK_SECURITY; } @@ -381,7 +391,7 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (enable_hs) settings |= MGMT_SETTING_HS; - if (hdev->features[4] & LMP_LE) + if (lmp_le_capable(hdev)) settings |= MGMT_SETTING_LE; return settings; @@ -403,7 +413,7 @@ static u32 get_current_settings(struct hci_dev *hdev) if (test_bit(HCI_PAIRABLE, &hdev->dev_flags)) settings |= MGMT_SETTING_PAIRABLE; - if (!(hdev->features[4] & LMP_NO_BREDR)) + if (lmp_bredr_capable(hdev)) settings |= MGMT_SETTING_BREDR; if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) @@ -1111,7 +1121,7 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_dev_lock(hdev); - if (!(hdev->features[6] & LMP_SIMPLE_PAIR)) { + if (!lmp_ssp_capable(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, MGMT_STATUS_NOT_SUPPORTED); goto failed; @@ -1195,7 +1205,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_dev_lock(hdev); - if (!(hdev->features[4] & LMP_LE)) { + if (!lmp_le_capable(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_SET_LE, MGMT_STATUS_NOT_SUPPORTED); goto unlock; @@ -2191,7 +2201,7 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!(hdev->features[6] & LMP_SIMPLE_PAIR)) { + if (!lmp_ssp_capable(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, MGMT_STATUS_NOT_SUPPORTED); goto unlock; @@ -2820,6 +2830,9 @@ static void cmd_status_rsp(struct pending_cmd *cmd, void *data) int mgmt_index_added(struct hci_dev *hdev) { + if (!mgmt_valid_hdev(hdev)) + return -ENOTSUPP; + return mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL); } @@ -2827,6 +2840,9 @@ int mgmt_index_removed(struct hci_dev *hdev) { u8 status = MGMT_STATUS_INVALID_INDEX; + if (!mgmt_valid_hdev(hdev)) + return -ENOTSUPP; + mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status); return mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 7e1e59645c05..b3226f3658cf 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -528,6 +528,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int * BT_DBG("sock %p, sk %p", sock, sk); + memset(sa, 0, sizeof(*sa)); sa->rc_family = AF_BLUETOOTH; sa->rc_channel = rfcomm_pi(sk)->channel; if (peer) @@ -822,6 +823,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c } sec.level = rfcomm_pi(sk)->sec_level; + sec.key_size = 0; len = min_t(unsigned int, len, sizeof(sec)); if (copy_to_user(optval, (char *) &sec, len)) @@ -1033,8 +1035,17 @@ int __init rfcomm_init_sockets(void) return err; err = bt_sock_register(BTPROTO_RFCOMM, &rfcomm_sock_family_ops); - if (err < 0) + if (err < 0) { + BT_ERR("RFCOMM socket layer registration failed"); + goto error; + } + + err = bt_procfs_init(THIS_MODULE, &init_net, "rfcomm", &rfcomm_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create RFCOMM proc file"); + bt_sock_unregister(BTPROTO_RFCOMM); goto error; + } if (bt_debugfs) { rfcomm_sock_debugfs = debugfs_create_file("rfcomm", 0444, @@ -1048,13 +1059,14 @@ int __init rfcomm_init_sockets(void) return 0; error: - BT_ERR("RFCOMM socket layer registration failed"); proto_unregister(&rfcomm_proto); return err; } void __exit rfcomm_cleanup_sockets(void) { + bt_procfs_cleanup(&init_net, "rfcomm"); + debugfs_remove(rfcomm_sock_debugfs); if (bt_sock_unregister(BTPROTO_RFCOMM) < 0) diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index cb960773c002..56f182393c4c 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -456,7 +456,7 @@ static int rfcomm_get_dev_list(void __user *arg) size = sizeof(*dl) + dev_num * sizeof(*di); - dl = kmalloc(size, GFP_KERNEL); + dl = kzalloc(size, GFP_KERNEL); if (!dl) return -ENOMEM; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 40bbe25dcff7..dc42b917aaaf 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -131,6 +131,15 @@ static int sco_conn_del(struct hci_conn *hcon, int err) sco_sock_clear_timer(sk); sco_chan_del(sk, err); bh_unlock_sock(sk); + + sco_conn_lock(conn); + conn->sk = NULL; + sco_pi(sk)->conn = NULL; + sco_conn_unlock(conn); + + if (conn->hcon) + hci_conn_put(conn->hcon); + sco_sock_kill(sk); } @@ -821,16 +830,6 @@ static void sco_chan_del(struct sock *sk, int err) BT_DBG("sk %p, conn %p, err %d", sk, conn, err); - if (conn) { - sco_conn_lock(conn); - conn->sk = NULL; - sco_pi(sk)->conn = NULL; - sco_conn_unlock(conn); - - if (conn->hcon) - hci_conn_put(conn->hcon); - } - sk->sk_state = BT_CLOSED; sk->sk_err = err; sk->sk_state_change(sk); @@ -913,7 +912,7 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr) return lm; } -int sco_connect_cfm(struct hci_conn *hcon, __u8 status) +void sco_connect_cfm(struct hci_conn *hcon, __u8 status) { BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); if (!status) { @@ -924,16 +923,13 @@ int sco_connect_cfm(struct hci_conn *hcon, __u8 status) sco_conn_ready(conn); } else sco_conn_del(hcon, bt_to_errno(status)); - - return 0; } -int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) +void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); sco_conn_del(hcon, bt_to_errno(reason)); - return 0; } int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) @@ -1026,6 +1022,13 @@ int __init sco_init(void) goto error; } + err = bt_procfs_init(THIS_MODULE, &init_net, "sco", &sco_sk_list, NULL); + if (err < 0) { + BT_ERR("Failed to create SCO proc file"); + bt_sock_unregister(BTPROTO_SCO); + goto error; + } + if (bt_debugfs) { sco_debugfs = debugfs_create_file("sco", 0444, bt_debugfs, NULL, &sco_debugfs_fops); @@ -1044,6 +1047,8 @@ error: void __exit sco_exit(void) { + bt_procfs_cleanup(&init_net, "sco"); + debugfs_remove(sco_debugfs); if (bt_sock_unregister(BTPROTO_SCO) < 0) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 16ef0dc85a0a..901a616c8083 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -579,8 +579,11 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (!test_and_set_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) smp = smp_chan_create(conn); + else + smp = conn->smp_chan; - smp = conn->smp_chan; + if (!smp) + return SMP_UNSPECIFIED; smp->preq[0] = SMP_CMD_PAIRING_REQ; memcpy(&smp->preq[1], req, sizeof(*req)); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 333484537600..070e8a68cfc6 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -31,9 +31,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct net_bridge_mdb_entry *mdst; struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + rcu_read_lock(); #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { br_nf_pre_routing_finish_bridge_slow(skb); + rcu_read_unlock(); return NETDEV_TX_OK; } #endif @@ -48,7 +50,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); - rcu_read_lock(); if (is_broadcast_ether_addr(dest)) br_flood_deliver(br, skb); else if (is_multicast_ether_addr(dest)) { @@ -206,24 +207,23 @@ static void br_poll_controller(struct net_device *br_dev) static void br_netpoll_cleanup(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - struct net_bridge_port *p, *n; + struct net_bridge_port *p; - list_for_each_entry_safe(p, n, &br->port_list, list) { + list_for_each_entry(p, &br->port_list, list) br_netpoll_disable(p); - } } -static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) +static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, + gfp_t gfp) { struct net_bridge *br = netdev_priv(dev); - struct net_bridge_port *p, *n; + struct net_bridge_port *p; int err = 0; - list_for_each_entry_safe(p, n, &br->port_list, list) { + list_for_each_entry(p, &br->port_list, list) { if (!p->dev) continue; - - err = br_netpoll_enable(p); + err = br_netpoll_enable(p, gfp); if (err) goto fail; } @@ -236,17 +236,17 @@ fail: goto out; } -int br_netpoll_enable(struct net_bridge_port *p) +int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) { struct netpoll *np; int err = 0; - np = kzalloc(sizeof(*p->np), GFP_KERNEL); + np = kzalloc(sizeof(*p->np), gfp); err = -ENOMEM; if (!np) goto out; - err = __netpoll_setup(np, p->dev); + err = __netpoll_setup(np, p->dev, gfp); if (err) { kfree(np); goto out; @@ -267,11 +267,7 @@ void br_netpoll_disable(struct net_bridge_port *p) p->np = NULL; - /* Wait for transmitting packets to finish before freeing. */ - synchronize_rcu_bh(); - - __netpoll_cleanup(np); - kfree(np); + __netpoll_free_rcu(np); } #endif diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index d21f32383517..9ce430b4657c 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -312,7 +312,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, fe->is_local = f->is_local; if (!f->is_static) - fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->updated); + fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated); ++fe; ++num; } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e9466d412707..02015a505d2a 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -65,7 +65,7 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { skb->dev = to->dev; - if (unlikely(netpoll_tx_running(to->dev))) { + if (unlikely(netpoll_tx_running(to->br->dev))) { if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) kfree_skb(skb); else { diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index e1144e1617be..1c8fdc3558cd 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -361,7 +361,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err2; - if (br_netpoll_info(br) && ((err = br_netpoll_enable(p)))) + if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL)))) goto err3; err = netdev_set_master(dev, br->dev); @@ -427,6 +427,10 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) if (!p || p->br != br) return -EINVAL; + /* Since more than one interface can be attached to a bridge, + * there still maybe an alternate path for netconsole to use; + * therefore there is no reason for a NETDEV_RELEASE event. + */ del_nbp(p); spin_lock_bh(&br->lock); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a768b2408edf..f507d2af9646 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -316,7 +316,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, netpoll_send_skb(np, skb); } -extern int br_netpoll_enable(struct net_bridge_port *p); +extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp); extern void br_netpoll_disable(struct net_bridge_port *p); #else static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br) @@ -329,7 +329,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, { } -static inline int br_netpoll_enable(struct net_bridge_port *p) +static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) { return 0; } diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index a6747e673426..c3530a81a33b 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -170,5 +170,5 @@ void br_stp_port_timer_init(struct net_bridge_port *p) unsigned long br_timer_value(const struct timer_list *timer) { return timer_pending(timer) - ? jiffies_to_clock_t(timer->expires - jiffies) : 0; + ? jiffies_delta_to_clock_t(timer->expires - jiffies) : 0; } diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 6229b62749e8..13b36bdc76a7 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -27,7 +27,7 @@ struct brport_attribute { }; #define BRPORT_ATTR(_name,_mode,_show,_store) \ -struct brport_attribute brport_attr_##_name = { \ +const struct brport_attribute brport_attr_##_name = { \ .attr = {.name = __stringify(_name), \ .mode = _mode }, \ .show = _show, \ @@ -164,7 +164,7 @@ static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, store_multicast_router); #endif -static struct brport_attribute *brport_attrs[] = { +static const struct brport_attribute *brport_attrs[] = { &brport_attr_path_cost, &brport_attr_priority, &brport_attr_port_id, @@ -241,7 +241,7 @@ const struct sysfs_ops brport_sysfs_ops = { int br_sysfs_addif(struct net_bridge_port *p) { struct net_bridge *br = p->br; - struct brport_attribute **a; + const struct brport_attribute **a; int err; err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj, diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 78f1cdad5b33..095259f83902 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -141,7 +141,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) err = sk_filter(sk, skb); if (err) return err; - if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { + if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) { set_rx_flow_off(cf_sk); net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n"); caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 69771c04ba8f..e597733affb8 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -94,6 +94,10 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) /* check the version of IP */ ip_version = skb_header_pointer(skb, 0, 1, &buf); + if (!ip_version) { + kfree_skb(skb); + return -EINVAL; + } switch (*ip_version >> 4) { case 4: diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index ba4323bce0e9..69e38db28e5f 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -17,6 +17,7 @@ #include <linux/string.h> +#include <linux/ceph/ceph_features.h> #include <linux/ceph/libceph.h> #include <linux/ceph/debugfs.h> #include <linux/ceph/decode.h> @@ -460,27 +461,23 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, client->auth_err = 0; client->extra_mon_dispatch = NULL; - client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT | + client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT | supported_features; - client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT | + client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT | required_features; /* msgr */ if (ceph_test_opt(client, MYIP)) myaddr = &client->options->my_addr; - client->msgr = ceph_messenger_create(myaddr, - client->supported_features, - client->required_features); - if (IS_ERR(client->msgr)) { - err = PTR_ERR(client->msgr); - goto fail; - } - client->msgr->nocrc = ceph_test_opt(client, NOCRC); + ceph_messenger_init(&client->msgr, myaddr, + client->supported_features, + client->required_features, + ceph_test_opt(client, NOCRC)); /* subsystems */ err = ceph_monc_init(&client->monc, client); if (err < 0) - goto fail_msgr; + goto fail; err = ceph_osdc_init(&client->osdc, client); if (err < 0) goto fail_monc; @@ -489,8 +486,6 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, fail_monc: ceph_monc_stop(&client->monc); -fail_msgr: - ceph_messenger_destroy(client->msgr); fail: kfree(client); return ERR_PTR(err); @@ -501,6 +496,8 @@ void ceph_destroy_client(struct ceph_client *client) { dout("destroy_client %p\n", client); + atomic_set(&client->msgr.stopping, 1); + /* unmount */ ceph_osdc_stop(&client->osdc); @@ -508,8 +505,6 @@ void ceph_destroy_client(struct ceph_client *client) ceph_debugfs_client_cleanup(client); - ceph_messenger_destroy(client->msgr); - ceph_destroy_options(client->options); kfree(client); diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index d7edc24333b8..35fce755ce10 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -306,7 +306,6 @@ static int crush_choose(const struct crush_map *map, int item = 0; int itemtype; int collide, reject; - const unsigned int orig_tries = 5; /* attempts before we fall back to search */ dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", bucket->id, x, outpos, numrep); @@ -351,8 +350,9 @@ static int crush_choose(const struct crush_map *map, reject = 1; goto reject; } - if (flocal >= (in->size>>1) && - flocal > orig_tries) + if (map->choose_local_fallback_tries > 0 && + flocal >= (in->size>>1) && + flocal > map->choose_local_fallback_tries) item = bucket_perm_choose(in, x, r); else item = crush_bucket_choose(in, x, r); @@ -422,13 +422,14 @@ reject: ftotal++; flocal++; - if (collide && flocal < 3) + if (collide && flocal <= map->choose_local_tries) /* retry locally a few times */ retry_bucket = 1; - else if (flocal <= in->size + orig_tries) + else if (map->choose_local_fallback_tries > 0 && + flocal <= in->size + map->choose_local_fallback_tries) /* exhaustive bucket search */ retry_bucket = 1; - else if (ftotal < 20) + else if (ftotal <= map->choose_total_tries) /* then retry descent */ retry_descent = 1; else diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index b780cb7947dd..9da7fdd3cd8a 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -466,6 +466,7 @@ void ceph_key_destroy(struct key *key) { struct ceph_crypto_key *ckey = key->payload.data; ceph_crypto_key_destroy(ckey); + kfree(ckey); } struct key_type key_type_ceph = { diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index 1919d1550d75..3572dc518bc9 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -16,7 +16,8 @@ struct ceph_crypto_key { static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) { - kfree(key->key); + if (key) + kfree(key->key); } extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst, diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 10255e81be79..b9796750034a 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -29,6 +29,74 @@ * the sender. */ +/* + * We track the state of the socket on a given connection using + * values defined below. The transition to a new socket state is + * handled by a function which verifies we aren't coming from an + * unexpected state. + * + * -------- + * | NEW* | transient initial state + * -------- + * | con_sock_state_init() + * v + * ---------- + * | CLOSED | initialized, but no socket (and no + * ---------- TCP connection) + * ^ \ + * | \ con_sock_state_connecting() + * | ---------------------- + * | \ + * + con_sock_state_closed() \ + * |+--------------------------- \ + * | \ \ \ + * | ----------- \ \ + * | | CLOSING | socket event; \ \ + * | ----------- await close \ \ + * | ^ \ | + * | | \ | + * | + con_sock_state_closing() \ | + * | / \ | | + * | / --------------- | | + * | / \ v v + * | / -------------- + * | / -----------------| CONNECTING | socket created, TCP + * | | / -------------- connect initiated + * | | | con_sock_state_connected() + * | | v + * ------------- + * | CONNECTED | TCP connection established + * ------------- + * + * State values for ceph_connection->sock_state; NEW is assumed to be 0. + */ + +#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */ +#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */ +#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */ +#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */ +#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */ + +/* + * connection states + */ +#define CON_STATE_CLOSED 1 /* -> PREOPEN */ +#define CON_STATE_PREOPEN 2 /* -> CONNECTING, CLOSED */ +#define CON_STATE_CONNECTING 3 /* -> NEGOTIATING, CLOSED */ +#define CON_STATE_NEGOTIATING 4 /* -> OPEN, CLOSED */ +#define CON_STATE_OPEN 5 /* -> STANDBY, CLOSED */ +#define CON_STATE_STANDBY 6 /* -> PREOPEN, CLOSED */ + +/* + * ceph_connection flag bits + */ +#define CON_FLAG_LOSSYTX 0 /* we can close channel or drop + * messages on errors */ +#define CON_FLAG_KEEPALIVE_PENDING 1 /* we need to send a keepalive */ +#define CON_FLAG_WRITE_PENDING 2 /* we have data ready to send */ +#define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */ +#define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */ + /* static tag bytes (protocol control messages) */ static char tag_msg = CEPH_MSGR_TAG_MSG; static char tag_ack = CEPH_MSGR_TAG_ACK; @@ -147,72 +215,130 @@ void ceph_msgr_flush(void) } EXPORT_SYMBOL(ceph_msgr_flush); +/* Connection socket state transition functions */ + +static void con_sock_state_init(struct ceph_connection *con) +{ + int old_state; + + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); + if (WARN_ON(old_state != CON_SOCK_STATE_NEW)) + printk("%s: unexpected old state %d\n", __func__, old_state); + dout("%s con %p sock %d -> %d\n", __func__, con, old_state, + CON_SOCK_STATE_CLOSED); +} + +static void con_sock_state_connecting(struct ceph_connection *con) +{ + int old_state; + + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING); + if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED)) + printk("%s: unexpected old state %d\n", __func__, old_state); + dout("%s con %p sock %d -> %d\n", __func__, con, old_state, + CON_SOCK_STATE_CONNECTING); +} + +static void con_sock_state_connected(struct ceph_connection *con) +{ + int old_state; + + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED); + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING)) + printk("%s: unexpected old state %d\n", __func__, old_state); + dout("%s con %p sock %d -> %d\n", __func__, con, old_state, + CON_SOCK_STATE_CONNECTED); +} + +static void con_sock_state_closing(struct ceph_connection *con) +{ + int old_state; + + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING); + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING && + old_state != CON_SOCK_STATE_CONNECTED && + old_state != CON_SOCK_STATE_CLOSING)) + printk("%s: unexpected old state %d\n", __func__, old_state); + dout("%s con %p sock %d -> %d\n", __func__, con, old_state, + CON_SOCK_STATE_CLOSING); +} + +static void con_sock_state_closed(struct ceph_connection *con) +{ + int old_state; + + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED && + old_state != CON_SOCK_STATE_CLOSING && + old_state != CON_SOCK_STATE_CONNECTING && + old_state != CON_SOCK_STATE_CLOSED)) + printk("%s: unexpected old state %d\n", __func__, old_state); + dout("%s con %p sock %d -> %d\n", __func__, con, old_state, + CON_SOCK_STATE_CLOSED); +} /* * socket callback functions */ /* data available on socket, or listen socket received a connect */ -static void ceph_data_ready(struct sock *sk, int count_unused) +static void ceph_sock_data_ready(struct sock *sk, int count_unused) { struct ceph_connection *con = sk->sk_user_data; + if (atomic_read(&con->msgr->stopping)) { + return; + } if (sk->sk_state != TCP_CLOSE_WAIT) { - dout("ceph_data_ready on %p state = %lu, queueing work\n", + dout("%s on %p state = %lu, queueing work\n", __func__, con, con->state); queue_con(con); } } /* socket has buffer space for writing */ -static void ceph_write_space(struct sock *sk) +static void ceph_sock_write_space(struct sock *sk) { struct ceph_connection *con = sk->sk_user_data; /* only queue to workqueue if there is data we want to write, * and there is sufficient space in the socket buffer to accept - * more data. clear SOCK_NOSPACE so that ceph_write_space() + * more data. clear SOCK_NOSPACE so that ceph_sock_write_space() * doesn't get called again until try_write() fills the socket * buffer. See net/ipv4/tcp_input.c:tcp_check_space() * and net/core/stream.c:sk_stream_write_space(). */ - if (test_bit(WRITE_PENDING, &con->state)) { + if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) { if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { - dout("ceph_write_space %p queueing write work\n", con); + dout("%s %p queueing write work\n", __func__, con); clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); queue_con(con); } } else { - dout("ceph_write_space %p nothing to write\n", con); + dout("%s %p nothing to write\n", __func__, con); } } /* socket's state has changed */ -static void ceph_state_change(struct sock *sk) +static void ceph_sock_state_change(struct sock *sk) { struct ceph_connection *con = sk->sk_user_data; - dout("ceph_state_change %p state = %lu sk_state = %u\n", + dout("%s %p state = %lu sk_state = %u\n", __func__, con, con->state, sk->sk_state); - if (test_bit(CLOSED, &con->state)) - return; - switch (sk->sk_state) { case TCP_CLOSE: - dout("ceph_state_change TCP_CLOSE\n"); + dout("%s TCP_CLOSE\n", __func__); case TCP_CLOSE_WAIT: - dout("ceph_state_change TCP_CLOSE_WAIT\n"); - if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) { - if (test_bit(CONNECTING, &con->state)) - con->error_msg = "connection failed"; - else - con->error_msg = "socket closed"; - queue_con(con); - } + dout("%s TCP_CLOSE_WAIT\n", __func__); + con_sock_state_closing(con); + set_bit(CON_FLAG_SOCK_CLOSED, &con->flags); + queue_con(con); break; case TCP_ESTABLISHED: - dout("ceph_state_change TCP_ESTABLISHED\n"); + dout("%s TCP_ESTABLISHED\n", __func__); + con_sock_state_connected(con); queue_con(con); break; default: /* Everything else is uninteresting */ @@ -228,9 +354,9 @@ static void set_sock_callbacks(struct socket *sock, { struct sock *sk = sock->sk; sk->sk_user_data = con; - sk->sk_data_ready = ceph_data_ready; - sk->sk_write_space = ceph_write_space; - sk->sk_state_change = ceph_state_change; + sk->sk_data_ready = ceph_sock_data_ready; + sk->sk_write_space = ceph_sock_write_space; + sk->sk_state_change = ceph_sock_state_change; } @@ -262,6 +388,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); + con_sock_state_connecting(con); ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), O_NONBLOCK); if (ret == -EINPROGRESS) { @@ -277,7 +404,6 @@ static int ceph_tcp_connect(struct ceph_connection *con) return ret; } con->sock = sock; - return 0; } @@ -333,16 +459,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, */ static int con_close_socket(struct ceph_connection *con) { - int rc; + int rc = 0; dout("con_close_socket on %p sock %p\n", con, con->sock); - if (!con->sock) - return 0; - set_bit(SOCK_CLOSED, &con->state); - rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR); - sock_release(con->sock); - con->sock = NULL; - clear_bit(SOCK_CLOSED, &con->state); + if (con->sock) { + rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR); + sock_release(con->sock); + con->sock = NULL; + } + + /* + * Forcibly clear the SOCK_CLOSED flag. It gets set + * independent of the connection mutex, and we could have + * received a socket close event before we had the chance to + * shut the socket down. + */ + clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags); + + con_sock_state_closed(con); return rc; } @@ -353,6 +487,10 @@ static int con_close_socket(struct ceph_connection *con) static void ceph_msg_remove(struct ceph_msg *msg) { list_del_init(&msg->list_head); + BUG_ON(msg->con == NULL); + msg->con->ops->put(msg->con); + msg->con = NULL; + ceph_msg_put(msg); } static void ceph_msg_remove_list(struct list_head *head) @@ -372,8 +510,11 @@ static void reset_connection(struct ceph_connection *con) ceph_msg_remove_list(&con->out_sent); if (con->in_msg) { + BUG_ON(con->in_msg->con != con); + con->in_msg->con = NULL; ceph_msg_put(con->in_msg); con->in_msg = NULL; + con->ops->put(con); } con->connect_seq = 0; @@ -391,32 +532,44 @@ static void reset_connection(struct ceph_connection *con) */ void ceph_con_close(struct ceph_connection *con) { + mutex_lock(&con->mutex); dout("con_close %p peer %s\n", con, ceph_pr_addr(&con->peer_addr.in_addr)); - set_bit(CLOSED, &con->state); /* in case there's queued work */ - clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ - clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ - clear_bit(KEEPALIVE_PENDING, &con->state); - clear_bit(WRITE_PENDING, &con->state); - mutex_lock(&con->mutex); + con->state = CON_STATE_CLOSED; + + clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */ + clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags); + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); + clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags); + clear_bit(CON_FLAG_BACKOFF, &con->flags); + reset_connection(con); con->peer_global_seq = 0; cancel_delayed_work(&con->work); + con_close_socket(con); mutex_unlock(&con->mutex); - queue_con(con); } EXPORT_SYMBOL(ceph_con_close); /* * Reopen a closed connection, with a new peer address. */ -void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) +void ceph_con_open(struct ceph_connection *con, + __u8 entity_type, __u64 entity_num, + struct ceph_entity_addr *addr) { + mutex_lock(&con->mutex); dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); - set_bit(OPENING, &con->state); - clear_bit(CLOSED, &con->state); + + BUG_ON(con->state != CON_STATE_CLOSED); + con->state = CON_STATE_PREOPEN; + + con->peer_name.type = (__u8) entity_type; + con->peer_name.num = cpu_to_le64(entity_num); + memcpy(&con->peer_addr, addr, sizeof(*addr)); con->delay = 0; /* reset backoff memory */ + mutex_unlock(&con->mutex); queue_con(con); } EXPORT_SYMBOL(ceph_con_open); @@ -430,42 +583,26 @@ bool ceph_con_opened(struct ceph_connection *con) } /* - * generic get/put - */ -struct ceph_connection *ceph_con_get(struct ceph_connection *con) -{ - int nref = __atomic_add_unless(&con->nref, 1, 0); - - dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1); - - return nref ? con : NULL; -} - -void ceph_con_put(struct ceph_connection *con) -{ - int nref = atomic_dec_return(&con->nref); - - BUG_ON(nref < 0); - if (nref == 0) { - BUG_ON(con->sock); - kfree(con); - } - dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref); -} - -/* * initialize a new connection. */ -void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) +void ceph_con_init(struct ceph_connection *con, void *private, + const struct ceph_connection_operations *ops, + struct ceph_messenger *msgr) { dout("con_init %p\n", con); memset(con, 0, sizeof(*con)); - atomic_set(&con->nref, 1); + con->private = private; + con->ops = ops; con->msgr = msgr; + + con_sock_state_init(con); + mutex_init(&con->mutex); INIT_LIST_HEAD(&con->out_queue); INIT_LIST_HEAD(&con->out_sent); INIT_DELAYED_WORK(&con->work, con_work); + + con->state = CON_STATE_CLOSED; } EXPORT_SYMBOL(ceph_con_init); @@ -486,14 +623,14 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) return ret; } -static void ceph_con_out_kvec_reset(struct ceph_connection *con) +static void con_out_kvec_reset(struct ceph_connection *con) { con->out_kvec_left = 0; con->out_kvec_bytes = 0; con->out_kvec_cur = &con->out_kvec[0]; } -static void ceph_con_out_kvec_add(struct ceph_connection *con, +static void con_out_kvec_add(struct ceph_connection *con, size_t size, void *data) { int index; @@ -507,6 +644,53 @@ static void ceph_con_out_kvec_add(struct ceph_connection *con, con->out_kvec_bytes += size; } +#ifdef CONFIG_BLOCK +static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) +{ + if (!bio) { + *iter = NULL; + *seg = 0; + return; + } + *iter = bio; + *seg = bio->bi_idx; +} + +static void iter_bio_next(struct bio **bio_iter, int *seg) +{ + if (*bio_iter == NULL) + return; + + BUG_ON(*seg >= (*bio_iter)->bi_vcnt); + + (*seg)++; + if (*seg == (*bio_iter)->bi_vcnt) + init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); +} +#endif + +static void prepare_write_message_data(struct ceph_connection *con) +{ + struct ceph_msg *msg = con->out_msg; + + BUG_ON(!msg); + BUG_ON(!msg->hdr.data_len); + + /* initialize page iterator */ + con->out_msg_pos.page = 0; + if (msg->pages) + con->out_msg_pos.page_pos = msg->page_alignment; + else + con->out_msg_pos.page_pos = 0; +#ifdef CONFIG_BLOCK + if (msg->bio) + init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); +#endif + con->out_msg_pos.data_pos = 0; + con->out_msg_pos.did_page_crc = false; + con->out_more = 1; /* data + footer will follow */ +} + /* * Prepare footer for currently outgoing message, and finish things * off. Assumes out_kvec* are already valid.. we just add on to the end. @@ -516,6 +700,8 @@ static void prepare_write_message_footer(struct ceph_connection *con) struct ceph_msg *m = con->out_msg; int v = con->out_kvec_left; + m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; + dout("prepare_write_message_footer %p\n", con); con->out_kvec_is_msg = true; con->out_kvec[v].iov_base = &m->footer; @@ -534,7 +720,7 @@ static void prepare_write_message(struct ceph_connection *con) struct ceph_msg *m; u32 crc; - ceph_con_out_kvec_reset(con); + con_out_kvec_reset(con); con->out_kvec_is_msg = true; con->out_msg_done = false; @@ -542,14 +728,16 @@ static void prepare_write_message(struct ceph_connection *con) * TCP packet that's a good thing. */ if (con->in_seq > con->in_seq_acked) { con->in_seq_acked = con->in_seq; - ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); + con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); con->out_temp_ack = cpu_to_le64(con->in_seq_acked); - ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), + con_out_kvec_add(con, sizeof (con->out_temp_ack), &con->out_temp_ack); } + BUG_ON(list_empty(&con->out_queue)); m = list_first_entry(&con->out_queue, struct ceph_msg, list_head); con->out_msg = m; + BUG_ON(m->con != con); /* put message on sent list */ ceph_msg_get(m); @@ -576,18 +764,18 @@ static void prepare_write_message(struct ceph_connection *con) BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); /* tag + hdr + front + middle */ - ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); - ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); - ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); + con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); + con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); + con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); if (m->middle) - ceph_con_out_kvec_add(con, m->middle->vec.iov_len, + con_out_kvec_add(con, m->middle->vec.iov_len, m->middle->vec.iov_base); /* fill in crc (except data pages), footer */ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); con->out_msg->hdr.crc = cpu_to_le32(crc); - con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE; + con->out_msg->footer.flags = 0; crc = crc32c(0, m->front.iov_base, m->front.iov_len); con->out_msg->footer.front_crc = cpu_to_le32(crc); @@ -597,28 +785,19 @@ static void prepare_write_message(struct ceph_connection *con) con->out_msg->footer.middle_crc = cpu_to_le32(crc); } else con->out_msg->footer.middle_crc = 0; - con->out_msg->footer.data_crc = 0; - dout("prepare_write_message front_crc %u data_crc %u\n", + dout("%s front_crc %u middle_crc %u\n", __func__, le32_to_cpu(con->out_msg->footer.front_crc), le32_to_cpu(con->out_msg->footer.middle_crc)); /* is there a data payload? */ - if (le32_to_cpu(m->hdr.data_len) > 0) { - /* initialize page iterator */ - con->out_msg_pos.page = 0; - if (m->pages) - con->out_msg_pos.page_pos = m->page_alignment; - else - con->out_msg_pos.page_pos = 0; - con->out_msg_pos.data_pos = 0; - con->out_msg_pos.did_page_crc = false; - con->out_more = 1; /* data + footer will follow */ - } else { + con->out_msg->footer.data_crc = 0; + if (m->hdr.data_len) + prepare_write_message_data(con); + else /* no, queue up footer too and be done */ prepare_write_message_footer(con); - } - set_bit(WRITE_PENDING, &con->state); + set_bit(CON_FLAG_WRITE_PENDING, &con->flags); } /* @@ -630,16 +809,16 @@ static void prepare_write_ack(struct ceph_connection *con) con->in_seq_acked, con->in_seq); con->in_seq_acked = con->in_seq; - ceph_con_out_kvec_reset(con); + con_out_kvec_reset(con); - ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); + con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); con->out_temp_ack = cpu_to_le64(con->in_seq_acked); - ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), + con_out_kvec_add(con, sizeof (con->out_temp_ack), &con->out_temp_ack); con->out_more = 1; /* more will follow.. eventually.. */ - set_bit(WRITE_PENDING, &con->state); + set_bit(CON_FLAG_WRITE_PENDING, &con->flags); } /* @@ -648,9 +827,9 @@ static void prepare_write_ack(struct ceph_connection *con) static void prepare_write_keepalive(struct ceph_connection *con) { dout("prepare_write_keepalive %p\n", con); - ceph_con_out_kvec_reset(con); - ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); - set_bit(WRITE_PENDING, &con->state); + con_out_kvec_reset(con); + con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); + set_bit(CON_FLAG_WRITE_PENDING, &con->flags); } /* @@ -665,27 +844,21 @@ static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection if (!con->ops->get_authorizer) { con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; con->out_connect.authorizer_len = 0; - return NULL; } /* Can't hold the mutex while getting authorizer */ - mutex_unlock(&con->mutex); - auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry); - mutex_lock(&con->mutex); if (IS_ERR(auth)) return auth; - if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->state)) + if (con->state != CON_STATE_NEGOTIATING) return ERR_PTR(-EAGAIN); con->auth_reply_buf = auth->authorizer_reply_buf; con->auth_reply_buf_len = auth->authorizer_reply_buf_len; - - return auth; } @@ -694,12 +867,12 @@ static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection */ static void prepare_write_banner(struct ceph_connection *con) { - ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); - ceph_con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), + con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); + con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), &con->msgr->my_enc_addr); con->out_more = 0; - set_bit(WRITE_PENDING, &con->state); + set_bit(CON_FLAG_WRITE_PENDING, &con->flags); } static int prepare_write_connect(struct ceph_connection *con) @@ -742,14 +915,15 @@ static int prepare_write_connect(struct ceph_connection *con) con->out_connect.authorizer_len = auth ? cpu_to_le32(auth->authorizer_buf_len) : 0; - ceph_con_out_kvec_add(con, sizeof (con->out_connect), + con_out_kvec_reset(con); + con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect); if (auth && auth->authorizer_buf_len) - ceph_con_out_kvec_add(con, auth->authorizer_buf_len, + con_out_kvec_add(con, auth->authorizer_buf_len, auth->authorizer_buf); con->out_more = 0; - set_bit(WRITE_PENDING, &con->state); + set_bit(CON_FLAG_WRITE_PENDING, &con->flags); return 0; } @@ -797,30 +971,34 @@ out: return ret; /* done! */ } -#ifdef CONFIG_BLOCK -static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) +static void out_msg_pos_next(struct ceph_connection *con, struct page *page, + size_t len, size_t sent, bool in_trail) { - if (!bio) { - *iter = NULL; - *seg = 0; - return; - } - *iter = bio; - *seg = bio->bi_idx; -} + struct ceph_msg *msg = con->out_msg; -static void iter_bio_next(struct bio **bio_iter, int *seg) -{ - if (*bio_iter == NULL) - return; + BUG_ON(!msg); + BUG_ON(!sent); - BUG_ON(*seg >= (*bio_iter)->bi_vcnt); + con->out_msg_pos.data_pos += sent; + con->out_msg_pos.page_pos += sent; + if (sent < len) + return; - (*seg)++; - if (*seg == (*bio_iter)->bi_vcnt) - init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); -} + BUG_ON(sent != len); + con->out_msg_pos.page_pos = 0; + con->out_msg_pos.page++; + con->out_msg_pos.did_page_crc = false; + if (in_trail) + list_move_tail(&page->lru, + &msg->trail->head); + else if (msg->pagelist) + list_move_tail(&page->lru, + &msg->pagelist->head); +#ifdef CONFIG_BLOCK + else if (msg->bio) + iter_bio_next(&msg->bio_iter, &msg->bio_seg); #endif +} /* * Write as much message data payload as we can. If we finish, queue @@ -837,41 +1015,36 @@ static int write_partial_msg_pages(struct ceph_connection *con) bool do_datacrc = !con->msgr->nocrc; int ret; int total_max_write; - int in_trail = 0; - size_t trail_len = (msg->trail ? msg->trail->length : 0); + bool in_trail = false; + const size_t trail_len = (msg->trail ? msg->trail->length : 0); + const size_t trail_off = data_len - trail_len; dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", - con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, + con, msg, con->out_msg_pos.page, msg->nr_pages, con->out_msg_pos.page_pos); -#ifdef CONFIG_BLOCK - if (msg->bio && !msg->bio_iter) - init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); -#endif - + /* + * Iterate through each page that contains data to be + * written, and send as much as possible for each. + * + * If we are calculating the data crc (the default), we will + * need to map the page. If we have no pages, they have + * been revoked, so use the zero page. + */ while (data_len > con->out_msg_pos.data_pos) { struct page *page = NULL; int max_write = PAGE_SIZE; int bio_offset = 0; - total_max_write = data_len - trail_len - - con->out_msg_pos.data_pos; - - /* - * if we are calculating the data crc (the default), we need - * to map the page. if our pages[] has been revoked, use the - * zero page. - */ - - /* have we reached the trail part of the data? */ - if (con->out_msg_pos.data_pos >= data_len - trail_len) { - in_trail = 1; + in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off; + if (!in_trail) + total_max_write = trail_off - con->out_msg_pos.data_pos; + if (in_trail) { total_max_write = data_len - con->out_msg_pos.data_pos; page = list_first_entry(&msg->trail->head, struct page, lru); - max_write = PAGE_SIZE; } else if (msg->pages) { page = msg->pages[con->out_msg_pos.page]; } else if (msg->pagelist) { @@ -894,15 +1067,14 @@ static int write_partial_msg_pages(struct ceph_connection *con) if (do_datacrc && !con->out_msg_pos.did_page_crc) { void *base; - u32 crc; - u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); + u32 crc = le32_to_cpu(msg->footer.data_crc); char *kaddr; kaddr = kmap(page); BUG_ON(kaddr == NULL); base = kaddr + con->out_msg_pos.page_pos + bio_offset; - crc = crc32c(tmpcrc, base, len); - con->out_msg->footer.data_crc = cpu_to_le32(crc); + crc = crc32c(crc, base, len); + msg->footer.data_crc = cpu_to_le32(crc); con->out_msg_pos.did_page_crc = true; } ret = ceph_tcp_sendpage(con->sock, page, @@ -915,31 +1087,15 @@ static int write_partial_msg_pages(struct ceph_connection *con) if (ret <= 0) goto out; - con->out_msg_pos.data_pos += ret; - con->out_msg_pos.page_pos += ret; - if (ret == len) { - con->out_msg_pos.page_pos = 0; - con->out_msg_pos.page++; - con->out_msg_pos.did_page_crc = false; - if (in_trail) - list_move_tail(&page->lru, - &msg->trail->head); - else if (msg->pagelist) - list_move_tail(&page->lru, - &msg->pagelist->head); -#ifdef CONFIG_BLOCK - else if (msg->bio) - iter_bio_next(&msg->bio_iter, &msg->bio_seg); -#endif - } + out_msg_pos_next(con, page, len, (size_t) ret, in_trail); } dout("write_partial_msg_pages %p msg %p done\n", con, msg); /* prepare and queue up footer, too */ if (!do_datacrc) - con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; - ceph_con_out_kvec_reset(con); + msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; + con_out_kvec_reset(con); prepare_write_message_footer(con); ret = 1; out: @@ -1351,20 +1507,14 @@ static int process_banner(struct ceph_connection *con) ceph_pr_addr(&con->msgr->inst.addr.in_addr)); } - set_bit(NEGOTIATING, &con->state); - prepare_read_connect(con); return 0; } static void fail_protocol(struct ceph_connection *con) { reset_connection(con); - set_bit(CLOSED, &con->state); /* in case there's queued work */ - - mutex_unlock(&con->mutex); - if (con->ops->bad_proto) - con->ops->bad_proto(con); - mutex_lock(&con->mutex); + BUG_ON(con->state != CON_STATE_NEGOTIATING); + con->state = CON_STATE_CLOSED; } static int process_connect(struct ceph_connection *con) @@ -1407,7 +1557,6 @@ static int process_connect(struct ceph_connection *con) return -1; } con->auth_retry = 1; - ceph_con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1428,7 +1577,6 @@ static int process_connect(struct ceph_connection *con) ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr)); reset_connection(con); - ceph_con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1440,8 +1588,7 @@ static int process_connect(struct ceph_connection *con) if (con->ops->peer_reset) con->ops->peer_reset(con); mutex_lock(&con->mutex); - if (test_bit(CLOSED, &con->state) || - test_bit(OPENING, &con->state)) + if (con->state != CON_STATE_NEGOTIATING) return -EAGAIN; break; @@ -1454,7 +1601,6 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->out_connect.connect_seq), le32_to_cpu(con->in_reply.connect_seq)); con->connect_seq = le32_to_cpu(con->in_reply.connect_seq); - ceph_con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1471,7 +1617,6 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->in_reply.global_seq)); get_global_seq(con->msgr, le32_to_cpu(con->in_reply.global_seq)); - ceph_con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) return ret; @@ -1489,7 +1634,10 @@ static int process_connect(struct ceph_connection *con) fail_protocol(con); return -1; } - clear_bit(CONNECTING, &con->state); + + BUG_ON(con->state != CON_STATE_NEGOTIATING); + con->state = CON_STATE_OPEN; + con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); con->connect_seq++; con->peer_features = server_feat; @@ -1501,7 +1649,9 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->in_reply.connect_seq)); if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY) - set_bit(LOSSYTX, &con->state); + set_bit(CON_FLAG_LOSSYTX, &con->flags); + + con->delay = 0; /* reset backoff memory */ prepare_read_tag(con); break; @@ -1587,10 +1737,7 @@ static int read_partial_message_section(struct ceph_connection *con, return 1; } -static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, - struct ceph_msg_header *hdr, - int *skip); - +static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip); static int read_partial_message_pages(struct ceph_connection *con, struct page **pages, @@ -1633,9 +1780,6 @@ static int read_partial_message_bio(struct ceph_connection *con, void *p; int ret, left; - if (IS_ERR(bv)) - return PTR_ERR(bv); - left = min((int)(data_len - con->in_msg_pos.data_pos), (int)(bv->bv_len - con->in_msg_pos.page_pos)); @@ -1672,7 +1816,6 @@ static int read_partial_message(struct ceph_connection *con) int ret; unsigned int front_len, middle_len, data_len; bool do_datacrc = !con->msgr->nocrc; - int skip; u64 seq; u32 crc; @@ -1723,10 +1866,13 @@ static int read_partial_message(struct ceph_connection *con) /* allocate message? */ if (!con->in_msg) { + int skip = 0; + dout("got hdr type %d front %d data %d\n", con->in_hdr.type, con->in_hdr.front_len, con->in_hdr.data_len); - skip = 0; - con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); + ret = ceph_con_in_msg_alloc(con, &skip); + if (ret < 0) + return ret; if (skip) { /* skip this message */ dout("alloc_msg said skip message\n"); @@ -1737,11 +1883,9 @@ static int read_partial_message(struct ceph_connection *con) con->in_seq++; return 0; } - if (!con->in_msg) { - con->error_msg = - "error allocating memory for incoming message"; - return -ENOMEM; - } + + BUG_ON(!con->in_msg); + BUG_ON(con->in_msg->con != con); m = con->in_msg; m->front.iov_len = 0; /* haven't read it yet */ if (m->middle) @@ -1753,6 +1897,11 @@ static int read_partial_message(struct ceph_connection *con) else con->in_msg_pos.page_pos = 0; con->in_msg_pos.data_pos = 0; + +#ifdef CONFIG_BLOCK + if (m->bio) + init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); +#endif } /* front */ @@ -1769,10 +1918,6 @@ static int read_partial_message(struct ceph_connection *con) if (ret <= 0) return ret; } -#ifdef CONFIG_BLOCK - if (m->bio && !m->bio_iter) - init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); -#endif /* (page) data */ while (con->in_msg_pos.data_pos < data_len) { @@ -1783,7 +1928,7 @@ static int read_partial_message(struct ceph_connection *con) return ret; #ifdef CONFIG_BLOCK } else if (m->bio) { - + BUG_ON(!m->bio_iter); ret = read_partial_message_bio(con, &m->bio_iter, &m->bio_seg, data_len, do_datacrc); @@ -1837,8 +1982,11 @@ static void process_message(struct ceph_connection *con) { struct ceph_msg *msg; + BUG_ON(con->in_msg->con != con); + con->in_msg->con = NULL; msg = con->in_msg; con->in_msg = NULL; + con->ops->put(con); /* if first message, set peer_name */ if (con->peer_name.type == 0) @@ -1858,7 +2006,6 @@ static void process_message(struct ceph_connection *con) con->ops->dispatch(con, msg); mutex_lock(&con->mutex); - prepare_read_tag(con); } @@ -1870,22 +2017,19 @@ static int try_write(struct ceph_connection *con) { int ret = 1; - dout("try_write start %p state %lu nref %d\n", con, con->state, - atomic_read(&con->nref)); + dout("try_write start %p state %lu\n", con, con->state); more: dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); /* open the socket first? */ - if (con->sock == NULL) { - ceph_con_out_kvec_reset(con); + if (con->state == CON_STATE_PREOPEN) { + BUG_ON(con->sock); + con->state = CON_STATE_CONNECTING; + + con_out_kvec_reset(con); prepare_write_banner(con); - ret = prepare_write_connect(con); - if (ret < 0) - goto out; prepare_read_banner(con); - set_bit(CONNECTING, &con->state); - clear_bit(NEGOTIATING, &con->state); BUG_ON(con->in_msg); con->in_tag = CEPH_MSGR_TAG_READY; @@ -1932,7 +2076,7 @@ more_kvec: } do_next: - if (!test_bit(CONNECTING, &con->state)) { + if (con->state == CON_STATE_OPEN) { /* is anything else pending? */ if (!list_empty(&con->out_queue)) { prepare_write_message(con); @@ -1942,14 +2086,15 @@ do_next: prepare_write_ack(con); goto more; } - if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) { + if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING, + &con->flags)) { prepare_write_keepalive(con); goto more; } } /* Nothing to do! */ - clear_bit(WRITE_PENDING, &con->state); + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); dout("try_write nothing else to write.\n"); ret = 0; out: @@ -1966,38 +2111,42 @@ static int try_read(struct ceph_connection *con) { int ret = -1; - if (!con->sock) - return 0; - - if (test_bit(STANDBY, &con->state)) +more: + dout("try_read start on %p state %lu\n", con, con->state); + if (con->state != CON_STATE_CONNECTING && + con->state != CON_STATE_NEGOTIATING && + con->state != CON_STATE_OPEN) return 0; - dout("try_read start on %p\n", con); + BUG_ON(!con->sock); -more: dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, con->in_base_pos); - /* - * process_connect and process_message drop and re-take - * con->mutex. make sure we handle a racing close or reopen. - */ - if (test_bit(CLOSED, &con->state) || - test_bit(OPENING, &con->state)) { - ret = -EAGAIN; + if (con->state == CON_STATE_CONNECTING) { + dout("try_read connecting\n"); + ret = read_partial_banner(con); + if (ret <= 0) + goto out; + ret = process_banner(con); + if (ret < 0) + goto out; + + BUG_ON(con->state != CON_STATE_CONNECTING); + con->state = CON_STATE_NEGOTIATING; + + /* Banner is good, exchange connection info */ + ret = prepare_write_connect(con); + if (ret < 0) + goto out; + prepare_read_connect(con); + + /* Send connection info before awaiting response */ goto out; } - if (test_bit(CONNECTING, &con->state)) { - if (!test_bit(NEGOTIATING, &con->state)) { - dout("try_read connecting\n"); - ret = read_partial_banner(con); - if (ret <= 0) - goto out; - ret = process_banner(con); - if (ret < 0) - goto out; - } + if (con->state == CON_STATE_NEGOTIATING) { + dout("try_read negotiating\n"); ret = read_partial_connect(con); if (ret <= 0) goto out; @@ -2007,6 +2156,8 @@ more: goto more; } + BUG_ON(con->state != CON_STATE_OPEN); + if (con->in_base_pos < 0) { /* * skipping + discarding content. @@ -2040,7 +2191,8 @@ more: prepare_read_ack(con); break; case CEPH_MSGR_TAG_CLOSE: - set_bit(CLOSED, &con->state); /* fixme */ + con_close_socket(con); + con->state = CON_STATE_CLOSED; goto out; default: goto bad_tag; @@ -2063,6 +2215,8 @@ more: if (con->in_tag == CEPH_MSGR_TAG_READY) goto more; process_message(con); + if (con->state == CON_STATE_OPEN) + prepare_read_tag(con); goto more; } if (con->in_tag == CEPH_MSGR_TAG_ACK) { @@ -2091,12 +2245,6 @@ bad_tag: */ static void queue_con(struct ceph_connection *con) { - if (test_bit(DEAD, &con->state)) { - dout("queue_con %p ignoring: DEAD\n", - con); - return; - } - if (!con->ops->get(con)) { dout("queue_con %p ref count 0\n", con); return; @@ -2121,7 +2269,26 @@ static void con_work(struct work_struct *work) mutex_lock(&con->mutex); restart: - if (test_and_clear_bit(BACKOFF, &con->state)) { + if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) { + switch (con->state) { + case CON_STATE_CONNECTING: + con->error_msg = "connection failed"; + break; + case CON_STATE_NEGOTIATING: + con->error_msg = "negotiation failed"; + break; + case CON_STATE_OPEN: + con->error_msg = "socket closed"; + break; + default: + dout("unrecognized con state %d\n", (int)con->state); + con->error_msg = "unrecognized con state"; + BUG(); + } + goto fault; + } + + if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) { dout("con_work %p backing off\n", con); if (queue_delayed_work(ceph_msgr_wq, &con->work, round_jiffies_relative(con->delay))) { @@ -2135,35 +2302,35 @@ restart: } } - if (test_bit(STANDBY, &con->state)) { + if (con->state == CON_STATE_STANDBY) { dout("con_work %p STANDBY\n", con); goto done; } - if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ - dout("con_work CLOSED\n"); - con_close_socket(con); + if (con->state == CON_STATE_CLOSED) { + dout("con_work %p CLOSED\n", con); + BUG_ON(con->sock); goto done; } - if (test_and_clear_bit(OPENING, &con->state)) { - /* reopen w/ new peer */ + if (con->state == CON_STATE_PREOPEN) { dout("con_work OPENING\n"); - con_close_socket(con); + BUG_ON(con->sock); } - if (test_and_clear_bit(SOCK_CLOSED, &con->state)) - goto fault; - ret = try_read(con); if (ret == -EAGAIN) goto restart; - if (ret < 0) + if (ret < 0) { + con->error_msg = "socket error on read"; goto fault; + } ret = try_write(con); if (ret == -EAGAIN) goto restart; - if (ret < 0) + if (ret < 0) { + con->error_msg = "socket error on write"; goto fault; + } done: mutex_unlock(&con->mutex); @@ -2172,7 +2339,6 @@ done_unlocked: return; fault: - mutex_unlock(&con->mutex); ceph_fault(con); /* error/fault path */ goto done_unlocked; } @@ -2183,26 +2349,31 @@ fault: * exponential backoff */ static void ceph_fault(struct ceph_connection *con) + __releases(con->mutex) { pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); dout("fault %p state %lu to peer %s\n", con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); - if (test_bit(LOSSYTX, &con->state)) { - dout("fault on LOSSYTX channel\n"); - goto out; - } - - mutex_lock(&con->mutex); - if (test_bit(CLOSED, &con->state)) - goto out_unlock; + BUG_ON(con->state != CON_STATE_CONNECTING && + con->state != CON_STATE_NEGOTIATING && + con->state != CON_STATE_OPEN); con_close_socket(con); + if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) { + dout("fault on LOSSYTX channel, marking CLOSED\n"); + con->state = CON_STATE_CLOSED; + goto out_unlock; + } + if (con->in_msg) { + BUG_ON(con->in_msg->con != con); + con->in_msg->con = NULL; ceph_msg_put(con->in_msg); con->in_msg = NULL; + con->ops->put(con); } /* Requeue anything that hasn't been acked */ @@ -2211,12 +2382,13 @@ static void ceph_fault(struct ceph_connection *con) /* If there are no messages queued or keepalive pending, place * the connection in a STANDBY state */ if (list_empty(&con->out_queue) && - !test_bit(KEEPALIVE_PENDING, &con->state)) { + !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) { dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); - clear_bit(WRITE_PENDING, &con->state); - set_bit(STANDBY, &con->state); + clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con->state = CON_STATE_STANDBY; } else { /* retry after a delay. */ + con->state = CON_STATE_PREOPEN; if (con->delay == 0) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) @@ -2237,13 +2409,12 @@ static void ceph_fault(struct ceph_connection *con) * that when con_work restarts we schedule the * delay then. */ - set_bit(BACKOFF, &con->state); + set_bit(CON_FLAG_BACKOFF, &con->flags); } } out_unlock: mutex_unlock(&con->mutex); -out: /* * in case we faulted due to authentication, invalidate our * current tickets so that we can get new ones. @@ -2260,18 +2431,14 @@ out: /* - * create a new messenger instance + * initialize a new messenger instance */ -struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, - u32 supported_features, - u32 required_features) +void ceph_messenger_init(struct ceph_messenger *msgr, + struct ceph_entity_addr *myaddr, + u32 supported_features, + u32 required_features, + bool nocrc) { - struct ceph_messenger *msgr; - - msgr = kzalloc(sizeof(*msgr), GFP_KERNEL); - if (msgr == NULL) - return ERR_PTR(-ENOMEM); - msgr->supported_features = supported_features; msgr->required_features = required_features; @@ -2284,30 +2451,23 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, msgr->inst.addr.type = 0; get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); encode_my_addr(msgr); + msgr->nocrc = nocrc; - dout("messenger_create %p\n", msgr); - return msgr; -} -EXPORT_SYMBOL(ceph_messenger_create); + atomic_set(&msgr->stopping, 0); -void ceph_messenger_destroy(struct ceph_messenger *msgr) -{ - dout("destroy %p\n", msgr); - kfree(msgr); - dout("destroyed messenger %p\n", msgr); + dout("%s %p\n", __func__, msgr); } -EXPORT_SYMBOL(ceph_messenger_destroy); +EXPORT_SYMBOL(ceph_messenger_init); static void clear_standby(struct ceph_connection *con) { /* come back from STANDBY? */ - if (test_and_clear_bit(STANDBY, &con->state)) { - mutex_lock(&con->mutex); + if (con->state == CON_STATE_STANDBY) { dout("clear_standby %p and ++connect_seq\n", con); + con->state = CON_STATE_PREOPEN; con->connect_seq++; - WARN_ON(test_bit(WRITE_PENDING, &con->state)); - WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state)); - mutex_unlock(&con->mutex); + WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags)); + WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)); } } @@ -2316,21 +2476,24 @@ static void clear_standby(struct ceph_connection *con) */ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) { - if (test_bit(CLOSED, &con->state)) { - dout("con_send %p closed, dropping %p\n", con, msg); - ceph_msg_put(msg); - return; - } - /* set src+dst */ msg->hdr.src = con->msgr->inst.name; - BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); - msg->needs_out_seq = true; - /* queue */ mutex_lock(&con->mutex); + + if (con->state == CON_STATE_CLOSED) { + dout("con_send %p closed, dropping %p\n", con, msg); + ceph_msg_put(msg); + mutex_unlock(&con->mutex); + return; + } + + BUG_ON(msg->con != NULL); + msg->con = con->ops->get(con); + BUG_ON(msg->con == NULL); + BUG_ON(!list_empty(&msg->list_head)); list_add_tail(&msg->list_head, &con->out_queue); dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg, @@ -2339,12 +2502,13 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) le32_to_cpu(msg->hdr.front_len), le32_to_cpu(msg->hdr.middle_len), le32_to_cpu(msg->hdr.data_len)); + + clear_standby(con); mutex_unlock(&con->mutex); /* if there wasn't anything waiting to send before, queue * new work */ - clear_standby(con); - if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) + if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_send); @@ -2352,24 +2516,34 @@ EXPORT_SYMBOL(ceph_con_send); /* * Revoke a message that was previously queued for send */ -void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg) +void ceph_msg_revoke(struct ceph_msg *msg) { + struct ceph_connection *con = msg->con; + + if (!con) + return; /* Message not in our possession */ + mutex_lock(&con->mutex); if (!list_empty(&msg->list_head)) { - dout("con_revoke %p msg %p - was on queue\n", con, msg); + dout("%s %p msg %p - was on queue\n", __func__, con, msg); list_del_init(&msg->list_head); - ceph_msg_put(msg); + BUG_ON(msg->con == NULL); + msg->con->ops->put(msg->con); + msg->con = NULL; msg->hdr.seq = 0; + + ceph_msg_put(msg); } if (con->out_msg == msg) { - dout("con_revoke %p msg %p - was sending\n", con, msg); + dout("%s %p msg %p - was sending\n", __func__, con, msg); con->out_msg = NULL; if (con->out_kvec_is_msg) { con->out_skip = con->out_kvec_bytes; con->out_kvec_is_msg = false; } - ceph_msg_put(msg); msg->hdr.seq = 0; + + ceph_msg_put(msg); } mutex_unlock(&con->mutex); } @@ -2377,17 +2551,27 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg) /* * Revoke a message that we may be reading data into */ -void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) +void ceph_msg_revoke_incoming(struct ceph_msg *msg) { + struct ceph_connection *con; + + BUG_ON(msg == NULL); + if (!msg->con) { + dout("%s msg %p null con\n", __func__, msg); + + return; /* Message not in our possession */ + } + + con = msg->con; mutex_lock(&con->mutex); - if (con->in_msg && con->in_msg == msg) { + if (con->in_msg == msg) { unsigned int front_len = le32_to_cpu(con->in_hdr.front_len); unsigned int middle_len = le32_to_cpu(con->in_hdr.middle_len); unsigned int data_len = le32_to_cpu(con->in_hdr.data_len); /* skip rest of message */ - dout("con_revoke_pages %p msg %p revoked\n", con, msg); - con->in_base_pos = con->in_base_pos - + dout("%s %p msg %p revoked\n", __func__, con, msg); + con->in_base_pos = con->in_base_pos - sizeof(struct ceph_msg_header) - front_len - middle_len - @@ -2398,8 +2582,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) con->in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; } else { - dout("con_revoke_pages %p msg %p pages %p no-op\n", - con, con->in_msg, msg); + dout("%s %p in_msg %p msg %p no-op\n", + __func__, con, con->in_msg, msg); } mutex_unlock(&con->mutex); } @@ -2410,9 +2594,11 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) void ceph_con_keepalive(struct ceph_connection *con) { dout("con_keepalive %p\n", con); + mutex_lock(&con->mutex); clear_standby(con); - if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && - test_and_set_bit(WRITE_PENDING, &con->state) == 0) + mutex_unlock(&con->mutex); + if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 && + test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_keepalive); @@ -2431,6 +2617,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, if (m == NULL) goto out; kref_init(&m->kref); + + m->con = NULL; INIT_LIST_HEAD(&m->list_head); m->hdr.tid = 0; @@ -2526,46 +2714,77 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg) } /* - * Generic message allocator, for incoming messages. + * Allocate a message for receiving an incoming message on a + * connection, and save the result in con->in_msg. Uses the + * connection's private alloc_msg op if available. + * + * Returns 0 on success, or a negative error code. + * + * On success, if we set *skip = 1: + * - the next message should be skipped and ignored. + * - con->in_msg == NULL + * or if we set *skip = 0: + * - con->in_msg is non-null. + * On error (ENOMEM, EAGAIN, ...), + * - con->in_msg == NULL */ -static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, - struct ceph_msg_header *hdr, - int *skip) +static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) { + struct ceph_msg_header *hdr = &con->in_hdr; int type = le16_to_cpu(hdr->type); int front_len = le32_to_cpu(hdr->front_len); int middle_len = le32_to_cpu(hdr->middle_len); - struct ceph_msg *msg = NULL; - int ret; + int ret = 0; + + BUG_ON(con->in_msg != NULL); if (con->ops->alloc_msg) { + struct ceph_msg *msg; + mutex_unlock(&con->mutex); msg = con->ops->alloc_msg(con, hdr, skip); mutex_lock(&con->mutex); - if (!msg || *skip) - return NULL; + if (con->state != CON_STATE_OPEN) { + ceph_msg_put(msg); + return -EAGAIN; + } + con->in_msg = msg; + if (con->in_msg) { + con->in_msg->con = con->ops->get(con); + BUG_ON(con->in_msg->con == NULL); + } + if (*skip) { + con->in_msg = NULL; + return 0; + } + if (!con->in_msg) { + con->error_msg = + "error allocating memory for incoming message"; + return -ENOMEM; + } } - if (!msg) { - *skip = 0; - msg = ceph_msg_new(type, front_len, GFP_NOFS, false); - if (!msg) { + if (!con->in_msg) { + con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false); + if (!con->in_msg) { pr_err("unable to allocate msg type %d len %d\n", type, front_len); - return NULL; + return -ENOMEM; } - msg->page_alignment = le16_to_cpu(hdr->data_off); + con->in_msg->con = con->ops->get(con); + BUG_ON(con->in_msg->con == NULL); + con->in_msg->page_alignment = le16_to_cpu(hdr->data_off); } - memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); + memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); - if (middle_len && !msg->middle) { - ret = ceph_alloc_middle(con, msg); + if (middle_len && !con->in_msg->middle) { + ret = ceph_alloc_middle(con, con->in_msg); if (ret < 0) { - ceph_msg_put(msg); - return NULL; + ceph_msg_put(con->in_msg); + con->in_msg = NULL; } } - return msg; + return ret; } diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index d0649a9655be..105d533b55f3 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -106,9 +106,9 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) monc->pending_auth = 1; monc->m_auth->front.iov_len = len; monc->m_auth->hdr.front_len = cpu_to_le32(len); - ceph_con_revoke(monc->con, monc->m_auth); + ceph_msg_revoke(monc->m_auth); ceph_msg_get(monc->m_auth); /* keep our ref */ - ceph_con_send(monc->con, monc->m_auth); + ceph_con_send(&monc->con, monc->m_auth); } /* @@ -117,8 +117,11 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) static void __close_session(struct ceph_mon_client *monc) { dout("__close_session closing mon%d\n", monc->cur_mon); - ceph_con_revoke(monc->con, monc->m_auth); - ceph_con_close(monc->con); + ceph_msg_revoke(monc->m_auth); + ceph_msg_revoke_incoming(monc->m_auth_reply); + ceph_msg_revoke(monc->m_subscribe); + ceph_msg_revoke_incoming(monc->m_subscribe_ack); + ceph_con_close(&monc->con); monc->cur_mon = -1; monc->pending_auth = 0; ceph_auth_reset(monc->auth); @@ -142,9 +145,8 @@ static int __open_session(struct ceph_mon_client *monc) monc->want_next_osdmap = !!monc->want_next_osdmap; dout("open_session mon%d opening\n", monc->cur_mon); - monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON; - monc->con->peer_name.num = cpu_to_le64(monc->cur_mon); - ceph_con_open(monc->con, + ceph_con_open(&monc->con, + CEPH_ENTITY_TYPE_MON, monc->cur_mon, &monc->monmap->mon_inst[monc->cur_mon].addr); /* initiatiate authentication handshake */ @@ -226,8 +228,8 @@ static void __send_subscribe(struct ceph_mon_client *monc) msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); - ceph_con_revoke(monc->con, msg); - ceph_con_send(monc->con, ceph_msg_get(msg)); + ceph_msg_revoke(msg); + ceph_con_send(&monc->con, ceph_msg_get(msg)); monc->sub_sent = jiffies | 1; /* never 0 */ } @@ -247,7 +249,7 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc, if (monc->hunting) { pr_info("mon%d %s session established\n", monc->cur_mon, - ceph_pr_addr(&monc->con->peer_addr.in_addr)); + ceph_pr_addr(&monc->con.peer_addr.in_addr)); monc->hunting = false; } dout("handle_subscribe_ack after %d seconds\n", seconds); @@ -439,6 +441,7 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con, m = NULL; } else { dout("get_generic_reply %lld got %p\n", tid, req->reply); + *skip = 0; m = ceph_msg_get(req->reply); /* * we don't need to track the connection reading into @@ -461,7 +464,7 @@ static int do_generic_request(struct ceph_mon_client *monc, req->request->hdr.tid = cpu_to_le64(req->tid); __insert_generic_request(monc, req); monc->num_generic_requests++; - ceph_con_send(monc->con, ceph_msg_get(req->request)); + ceph_con_send(&monc->con, ceph_msg_get(req->request)); mutex_unlock(&monc->mutex); err = wait_for_completion_interruptible(&req->completion); @@ -684,8 +687,9 @@ static void __resend_generic_request(struct ceph_mon_client *monc) for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) { req = rb_entry(p, struct ceph_mon_generic_request, node); - ceph_con_revoke(monc->con, req->request); - ceph_con_send(monc->con, ceph_msg_get(req->request)); + ceph_msg_revoke(req->request); + ceph_msg_revoke_incoming(req->reply); + ceph_con_send(&monc->con, ceph_msg_get(req->request)); } } @@ -705,7 +709,7 @@ static void delayed_work(struct work_struct *work) __close_session(monc); __open_session(monc); /* continue hunting */ } else { - ceph_con_keepalive(monc->con); + ceph_con_keepalive(&monc->con); __validate_auth(monc); @@ -760,19 +764,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) goto out; /* connection */ - monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL); - if (!monc->con) - goto out_monmap; - ceph_con_init(monc->client->msgr, monc->con); - monc->con->private = monc; - monc->con->ops = &mon_con_ops; - /* authentication */ monc->auth = ceph_auth_init(cl->options->name, cl->options->key); if (IS_ERR(monc->auth)) { err = PTR_ERR(monc->auth); - goto out_con; + goto out_monmap; } monc->auth->want_keys = CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | @@ -801,6 +798,9 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) if (!monc->m_auth) goto out_auth_reply; + ceph_con_init(&monc->con, monc, &mon_con_ops, + &monc->client->msgr); + monc->cur_mon = -1; monc->hunting = true; monc->sub_renew_after = jiffies; @@ -824,8 +824,6 @@ out_subscribe_ack: ceph_msg_put(monc->m_subscribe_ack); out_auth: ceph_auth_destroy(monc->auth); -out_con: - monc->con->ops->put(monc->con); out_monmap: kfree(monc->monmap); out: @@ -841,10 +839,6 @@ void ceph_monc_stop(struct ceph_mon_client *monc) mutex_lock(&monc->mutex); __close_session(monc); - monc->con->private = NULL; - monc->con->ops->put(monc->con); - monc->con = NULL; - mutex_unlock(&monc->mutex); /* @@ -888,8 +882,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc, } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { dout("authenticated, starting session\n"); - monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; - monc->client->msgr->inst.name.num = + monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT; + monc->client->msgr.inst.name.num = cpu_to_le64(monc->auth->global_id); __send_subscribe(monc); @@ -1000,6 +994,8 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, case CEPH_MSG_MDS_MAP: case CEPH_MSG_OSD_MAP: m = ceph_msg_new(type, front_len, GFP_NOFS, false); + if (!m) + return NULL; /* ENOMEM--return skip == 0 */ break; } @@ -1029,7 +1025,7 @@ static void mon_fault(struct ceph_connection *con) if (!monc->hunting) pr_info("mon%d %s session lost, " "hunting for new mon\n", monc->cur_mon, - ceph_pr_addr(&monc->con->peer_addr.in_addr)); + ceph_pr_addr(&monc->con.peer_addr.in_addr)); __close_session(monc); if (!monc->hunting) { @@ -1044,9 +1040,23 @@ out: mutex_unlock(&monc->mutex); } +/* + * We can ignore refcounting on the connection struct, as all references + * will come from the messenger workqueue, which is drained prior to + * mon_client destruction. + */ +static struct ceph_connection *con_get(struct ceph_connection *con) +{ + return con; +} + +static void con_put(struct ceph_connection *con) +{ +} + static const struct ceph_connection_operations mon_con_ops = { - .get = ceph_con_get, - .put = ceph_con_put, + .get = con_get, + .put = con_put, .dispatch = dispatch, .fault = mon_fault, .alloc_msg = mon_alloc_msg, diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c index 11d5f4196a73..ddec1c10ac80 100644 --- a/net/ceph/msgpool.c +++ b/net/ceph/msgpool.c @@ -12,7 +12,7 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg) struct ceph_msgpool *pool = arg; struct ceph_msg *msg; - msg = ceph_msg_new(0, pool->front_len, gfp_mask, true); + msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true); if (!msg) { dout("msgpool_alloc %s failed\n", pool->name); } else { @@ -32,10 +32,11 @@ static void msgpool_free(void *element, void *arg) ceph_msg_put(msg); } -int ceph_msgpool_init(struct ceph_msgpool *pool, +int ceph_msgpool_init(struct ceph_msgpool *pool, int type, int front_len, int size, bool blocking, const char *name) { dout("msgpool %s init\n", name); + pool->type = type; pool->front_len = front_len; pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool); if (!pool->pool) @@ -61,7 +62,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, WARN_ON(1); /* try to alloc a fresh message */ - return ceph_msg_new(0, front_len, GFP_NOFS, false); + return ceph_msg_new(pool->type, front_len, GFP_NOFS, false); } msg = mempool_alloc(pool->pool, GFP_NOFS); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ca59e66c9787..42119c05e82c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -140,10 +140,9 @@ void ceph_osdc_release_request(struct kref *kref) if (req->r_request) ceph_msg_put(req->r_request); if (req->r_con_filling_msg) { - dout("release_request revoking pages %p from con %p\n", + dout("%s revoking pages %p from con %p\n", __func__, req->r_pages, req->r_con_filling_msg); - ceph_con_revoke_message(req->r_con_filling_msg, - req->r_reply); + ceph_msg_revoke_incoming(req->r_reply); req->r_con_filling_msg->ops->put(req->r_con_filling_msg); } if (req->r_reply) @@ -214,10 +213,13 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, kref_init(&req->r_kref); init_completion(&req->r_completion); init_completion(&req->r_safe_completion); + rb_init_node(&req->r_node); INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); INIT_LIST_HEAD(&req->r_req_lru_item); + INIT_LIST_HEAD(&req->r_osd_item); + req->r_flags = flags; WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); @@ -243,6 +245,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, } ceph_pagelist_init(req->r_trail); } + /* create request message; allow space for oid */ msg_size += MAX_OBJ_NAME_SIZE; if (snapc) @@ -256,7 +259,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, return NULL; } - msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); memset(msg->front.iov_base, 0, msg->front.iov_len); req->r_request = msg; @@ -624,7 +626,7 @@ static void osd_reset(struct ceph_connection *con) /* * Track open sessions with osds. */ -static struct ceph_osd *create_osd(struct ceph_osd_client *osdc) +static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) { struct ceph_osd *osd; @@ -634,15 +636,13 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc) atomic_set(&osd->o_ref, 1); osd->o_osdc = osdc; + osd->o_osd = onum; INIT_LIST_HEAD(&osd->o_requests); INIT_LIST_HEAD(&osd->o_linger_requests); INIT_LIST_HEAD(&osd->o_osd_lru); osd->o_incarnation = 1; - ceph_con_init(osdc->client->msgr, &osd->o_con); - osd->o_con.private = osd; - osd->o_con.ops = &osd_con_ops; - osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD; + ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr); INIT_LIST_HEAD(&osd->o_keepalive_item); return osd; @@ -688,7 +688,7 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) static void remove_all_osds(struct ceph_osd_client *osdc) { - dout("__remove_old_osds %p\n", osdc); + dout("%s %p\n", __func__, osdc); mutex_lock(&osdc->request_mutex); while (!RB_EMPTY_ROOT(&osdc->osds)) { struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), @@ -752,7 +752,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) ret = -EAGAIN; } else { ceph_con_close(&osd->o_con); - ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); + ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, + &osdc->osdmap->osd_addr[osd->o_osd]); osd->o_incarnation++; } return ret; @@ -853,7 +854,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, if (req->r_osd) { /* make sure the original request isn't in flight. */ - ceph_con_revoke(&req->r_osd->o_con, req->r_request); + ceph_msg_revoke(req->r_request); list_del_init(&req->r_osd_item); if (list_empty(&req->r_osd->o_requests) && @@ -880,7 +881,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, static void __cancel_request(struct ceph_osd_request *req) { if (req->r_sent && req->r_osd) { - ceph_con_revoke(&req->r_osd->o_con, req->r_request); + ceph_msg_revoke(req->r_request); req->r_sent = 0; } } @@ -890,7 +891,9 @@ static void __register_linger_request(struct ceph_osd_client *osdc, { dout("__register_linger_request %p\n", req); list_add_tail(&req->r_linger_item, &osdc->req_linger); - list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests); + if (req->r_osd) + list_add_tail(&req->r_linger_osd, + &req->r_osd->o_linger_requests); } static void __unregister_linger_request(struct ceph_osd_client *osdc, @@ -998,18 +1001,18 @@ static int __map_request(struct ceph_osd_client *osdc, req->r_osd = __lookup_osd(osdc, o); if (!req->r_osd && o >= 0) { err = -ENOMEM; - req->r_osd = create_osd(osdc); + req->r_osd = create_osd(osdc, o); if (!req->r_osd) { list_move(&req->r_req_lru_item, &osdc->req_notarget); goto out; } dout("map_request osd %p is osd%d\n", req->r_osd, o); - req->r_osd->o_osd = o; - req->r_osd->o_con.peer_name.num = cpu_to_le64(o); __insert_osd(osdc, req->r_osd); - ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]); + ceph_con_open(&req->r_osd->o_con, + CEPH_ENTITY_TYPE_OSD, o, + &osdc->osdmap->osd_addr[o]); } if (req->r_osd) { @@ -1304,8 +1307,9 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("kick_requests %s\n", force_resend ? " (force resend)" : ""); mutex_lock(&osdc->request_mutex); - for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { + for (p = rb_first(&osdc->requests); p; ) { req = rb_entry(p, struct ceph_osd_request, r_node); + p = rb_next(p); err = __map_request(osdc, req, force_resend); if (err < 0) continue; /* error */ @@ -1313,10 +1317,23 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("%p tid %llu maps to no osd\n", req, req->r_tid); needmap++; /* request a newer map */ } else if (err > 0) { - dout("%p tid %llu requeued on osd%d\n", req, req->r_tid, - req->r_osd ? req->r_osd->o_osd : -1); - if (!req->r_linger) + if (!req->r_linger) { + dout("%p tid %llu requeued on osd%d\n", req, + req->r_tid, + req->r_osd ? req->r_osd->o_osd : -1); req->r_flags |= CEPH_OSD_FLAG_RETRY; + } + } + if (req->r_linger && list_empty(&req->r_linger_item)) { + /* + * register as a linger so that we will + * re-submit below and get a new tid + */ + dout("%p tid %llu restart on osd%d\n", + req, req->r_tid, + req->r_osd ? req->r_osd->o_osd : -1); + __register_linger_request(osdc, req); + __unregister_request(osdc, req); } } @@ -1391,7 +1408,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) epoch, maplen); newmap = osdmap_apply_incremental(&p, next, osdc->osdmap, - osdc->client->msgr); + &osdc->client->msgr); if (IS_ERR(newmap)) { err = PTR_ERR(newmap); goto bad; @@ -1839,11 +1856,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) if (!osdc->req_mempool) goto out; - err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true, + err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP, + OSD_OP_FRONT_LEN, 10, true, "osd_op"); if (err < 0) goto out_mempool; - err = ceph_msgpool_init(&osdc->msgpool_op_reply, + err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY, OSD_OPREPLY_FRONT_LEN, 10, true, "osd_op_reply"); if (err < 0) @@ -2019,15 +2037,15 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, if (!req) { *skip = 1; m = NULL; - pr_info("get_reply unknown tid %llu from osd%d\n", tid, - osd->o_osd); + dout("get_reply unknown tid %llu from osd%d\n", tid, + osd->o_osd); goto out; } if (req->r_con_filling_msg) { - dout("get_reply revoking msg %p from old con %p\n", + dout("%s revoking msg %p from old con %p\n", __func__, req->r_reply, req->r_con_filling_msg); - ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); + ceph_msg_revoke_incoming(req->r_reply); req->r_con_filling_msg->ops->put(req->r_con_filling_msg); req->r_con_filling_msg = NULL; } @@ -2080,6 +2098,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, int type = le16_to_cpu(hdr->type); int front = le32_to_cpu(hdr->front_len); + *skip = 0; switch (type) { case CEPH_MSG_OSD_MAP: case CEPH_MSG_WATCH_NOTIFY: diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 81e3b84a77ef..3124b71a8883 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -135,6 +135,21 @@ bad: return -EINVAL; } +static int skip_name_map(void **p, void *end) +{ + int len; + ceph_decode_32_safe(p, end, len ,bad); + while (len--) { + int strlen; + *p += sizeof(u32); + ceph_decode_32_safe(p, end, strlen, bad); + *p += strlen; +} + return 0; +bad: + return -EINVAL; +} + static struct crush_map *crush_decode(void *pbyval, void *end) { struct crush_map *c; @@ -143,6 +158,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end) void **p = &pbyval; void *start = pbyval; u32 magic; + u32 num_name_maps; dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p)); @@ -150,6 +166,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end) if (c == NULL) return ERR_PTR(-ENOMEM); + /* set tunables to default values */ + c->choose_local_tries = 2; + c->choose_local_fallback_tries = 5; + c->choose_total_tries = 19; + ceph_decode_need(p, end, 4*sizeof(u32), bad); magic = ceph_decode_32(p); if (magic != CRUSH_MAGIC) { @@ -297,7 +318,25 @@ static struct crush_map *crush_decode(void *pbyval, void *end) } /* ignore trailing name maps. */ + for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) { + err = skip_name_map(p, end); + if (err < 0) + goto done; + } + + /* tunables */ + ceph_decode_need(p, end, 3*sizeof(u32), done); + c->choose_local_tries = ceph_decode_32(p); + c->choose_local_fallback_tries = ceph_decode_32(p); + c->choose_total_tries = ceph_decode_32(p); + dout("crush decode tunable choose_local_tries = %d", + c->choose_local_tries); + dout("crush decode tunable choose_local_fallback_tries = %d", + c->choose_local_fallback_tries); + dout("crush decode tunable choose_total_tries = %d", + c->choose_total_tries); +done: dout("crush_decode success\n"); return c; @@ -488,15 +527,16 @@ static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) ceph_decode_32_safe(p, end, pool, bad); ceph_decode_32_safe(p, end, len, bad); dout(" pool %d len %d\n", pool, len); + ceph_decode_need(p, end, len, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (pi) { + char *name = kstrndup(*p, len, GFP_NOFS); + + if (!name) + return -ENOMEM; kfree(pi->name); - pi->name = kmalloc(len + 1, GFP_NOFS); - if (pi->name) { - memcpy(pi->name, *p, len); - pi->name[len] = '\0'; - dout(" name is %s\n", pi->name); - } + pi->name = name; + dout(" name is %s\n", pi->name); } *p += len; } @@ -666,6 +706,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); ceph_decode_copy(p, &pgid, sizeof(pgid)); n = ceph_decode_32(p); + err = -EINVAL; + if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) + goto bad; ceph_decode_need(p, end, n * sizeof(u32), bad); err = -ENOMEM; pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS); @@ -889,6 +932,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, (void) __remove_pg_mapping(&map->pg_temp, pgid); /* insert */ + if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) { + err = -EINVAL; + goto bad; + } pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); if (!pg) { err = -ENOMEM; diff --git a/net/core/dev.c b/net/core/dev.c index 0ebaea16632f..088923fe4066 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1055,6 +1055,8 @@ rollback: */ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) { + char *new_ifalias; + ASSERT_RTNL(); if (len >= IFALIASZ) @@ -1068,9 +1070,10 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return 0; } - dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); - if (!dev->ifalias) + new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); + if (!new_ifalias) return -ENOMEM; + dev->ifalias = new_ifalias; strlcpy(dev->ifalias, alias, len+1); return len; @@ -1106,11 +1109,23 @@ void netdev_state_change(struct net_device *dev) } EXPORT_SYMBOL(netdev_state_change); -int netdev_bonding_change(struct net_device *dev, unsigned long event) +/** + * netdev_notify_peers - notify network peers about existence of @dev + * @dev: network device + * + * Generate traffic such that interested network peers are aware of + * @dev, such as by generating a gratuitous ARP. This may be used when + * a device wants to inform the rest of the network about some sort of + * reconfiguration such as a failover event or virtual machine + * migration. + */ +void netdev_notify_peers(struct net_device *dev) { - return call_netdevice_notifiers(event, dev); + rtnl_lock(); + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); + rtnl_unlock(); } -EXPORT_SYMBOL(netdev_bonding_change); +EXPORT_SYMBOL(netdev_notify_peers); /** * dev_load - load a network module @@ -1172,6 +1187,7 @@ static int __dev_open(struct net_device *dev) net_dmaengine_get(); dev_set_rx_mode(dev); dev_activate(dev); + add_device_randomness(dev->dev_addr, dev->addr_len); } return ret; @@ -1638,6 +1654,19 @@ static inline int deliver_skb(struct sk_buff *skb, return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } +static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) +{ + if (ptype->af_packet_priv == NULL) + return false; + + if (ptype->id_match) + return ptype->id_match(ptype, skb->sk); + else if ((struct sock *)ptype->af_packet_priv == skb->sk) + return true; + + return false; +} + /* * Support routine. Sends outgoing frames to any network * taps currently in use. @@ -1655,8 +1684,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) * they originated from - MvS (miquels@drinkel.ow.org) */ if ((ptype->dev == dev || !ptype->dev) && - (ptype->af_packet_priv == NULL || - (struct sock *)ptype->af_packet_priv != skb->sk)) { + (!skb_loop_sk(ptype, skb))) { if (pt_prev) { deliver_skb(skb2, pt_prev, skb->dev); pt_prev = ptype; @@ -2133,6 +2161,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) __be16 protocol = skb->protocol; netdev_features_t features = skb->dev->features; + if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) + features &= ~NETIF_F_GSO_MASK; + if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; @@ -3155,6 +3186,23 @@ void netdev_rx_handler_unregister(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); +/* + * Limit the use of PFMEMALLOC reserves to those protocols that implement + * the special handling of PFMEMALLOC skbs. + */ +static bool skb_pfmemalloc_protocol(struct sk_buff *skb) +{ + switch (skb->protocol) { + case __constant_htons(ETH_P_ARP): + case __constant_htons(ETH_P_IP): + case __constant_htons(ETH_P_IPV6): + case __constant_htons(ETH_P_8021Q): + return true; + default: + return false; + } +} + static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -3164,14 +3212,27 @@ static int __netif_receive_skb(struct sk_buff *skb) bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; + unsigned long pflags = current->flags; net_timestamp_check(!netdev_tstamp_prequeue, skb); trace_netif_receive_skb(skb); + /* + * PFMEMALLOC skbs are special, they should + * - be delivered to SOCK_MEMALLOC sockets only + * - stay away from userspace + * - have bounded memory usage + * + * Use PF_MEMALLOC as this saves us from propagating the allocation + * context down to all allocation sites. + */ + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + current->flags |= PF_MEMALLOC; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) - return NET_RX_DROP; + goto out; orig_dev = skb->dev; @@ -3191,7 +3252,7 @@ another_round: if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { skb = vlan_untag(skb); if (unlikely(!skb)) - goto out; + goto unlock; } #ifdef CONFIG_NET_CLS_ACT @@ -3201,6 +3262,9 @@ another_round: } #endif + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + goto skip_taps; + list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) @@ -3209,13 +3273,18 @@ another_round: } } +skip_taps: #ifdef CONFIG_NET_CLS_ACT skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto out; + goto unlock; ncls: #endif + if (sk_memalloc_socks() && skb_pfmemalloc(skb) + && !skb_pfmemalloc_protocol(skb)) + goto drop; + rx_handler = rcu_dereference(skb->dev->rx_handler); if (vlan_tx_tag_present(skb)) { if (pt_prev) { @@ -3225,7 +3294,7 @@ ncls: if (vlan_do_receive(&skb, !rx_handler)) goto another_round; else if (unlikely(!skb)) - goto out; + goto unlock; } if (rx_handler) { @@ -3235,7 +3304,7 @@ ncls: } switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: - goto out; + goto unlock; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3268,6 +3337,7 @@ ncls: else ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { +drop: atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); /* Jamal, now you will not able to escape explaining @@ -3276,8 +3346,10 @@ ncls: ret = NET_RX_DROP; } -out: +unlock: rcu_read_unlock(); +out: + tsk_restore_flags(current, pflags, PF_MEMALLOC); return ret; } @@ -4801,6 +4873,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) err = ops->ndo_set_mac_address(dev, sa); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + add_device_randomness(dev->dev_addr, dev->addr_len); return err; } EXPORT_SYMBOL(dev_set_mac_address); @@ -5175,12 +5248,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) */ static int dev_new_index(struct net *net) { - static int ifindex; + int ifindex = net->ifindex; for (;;) { if (++ifindex <= 0) ifindex = 1; if (!__dev_get_by_index(net, ifindex)) - return ifindex; + return net->ifindex = ifindex; } } @@ -5533,7 +5606,12 @@ int register_netdevice(struct net_device *dev) } } - dev->ifindex = dev_new_index(net); + ret = -EBUSY; + if (!dev->ifindex) + dev->ifindex = dev_new_index(net); + else if (__dev_get_by_index(net, dev->ifindex)) + goto err_uninit; + if (dev->iflink == -1) dev->iflink = dev->ifindex; @@ -5579,6 +5657,7 @@ int register_netdevice(struct net_device *dev) dev_init_scheduler(dev); dev_hold(dev); list_netdevice(dev); + add_device_randomness(dev->dev_addr, dev->addr_len); /* Notify protocols, that a new device appeared. */ ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); @@ -5682,6 +5761,7 @@ EXPORT_SYMBOL(netdev_refcnt_read); /** * netdev_wait_allrefs - wait until all references are gone. + * @dev: target net_device * * This is called when unregistering network devices. * @@ -5942,6 +6022,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); dev->gso_max_size = GSO_MAX_SIZE; + dev->gso_max_segs = GSO_MAX_SEGS; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); diff --git a/net/core/dst.c b/net/core/dst.c index 069d51d29414..56d63612e1e4 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -149,7 +149,15 @@ int dst_discard(struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard); -const u32 dst_default_metrics[RTAX_MAX]; +const u32 dst_default_metrics[RTAX_MAX + 1] = { + /* This initializer is needed to force linker to place this variable + * into const section. Otherwise it might end into bss section. + * We really want to avoid false sharing on this variable, and catch + * any writes on it. + */ + [RTAX_MAX] = 0xdeadbeef, +}; + void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, unsigned short flags) diff --git a/net/core/filter.c b/net/core/filter.c index d4ce2dc712e3..907efd27ec77 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -83,6 +83,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) int err; struct sk_filter *filter; + /* + * If the skb was allocated from pfmemalloc reserves, only + * allow SOCK_MEMALLOC sockets to use it as this socket is + * helping free memory + */ + if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) + return -ENOMEM; + err = security_sock_rcv_skb(sk, skb); if (err) return err; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b4c90e42b443..346b1eb83a1f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -26,6 +26,7 @@ #include <linux/workqueue.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/if_vlan.h> #include <net/tcp.h> #include <net/udp.h> #include <asm/unaligned.h> @@ -54,7 +55,7 @@ static atomic_t trapped; MAX_UDP_CHUNK) static void zap_completion_queue(void); -static void arp_reply(struct sk_buff *skb); +static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); @@ -167,15 +168,24 @@ static void poll_napi(struct net_device *dev) struct napi_struct *napi; int budget = 16; + WARN_ON_ONCE(!irqs_disabled()); + list_for_each_entry(napi, &dev->napi_list, dev_list) { + local_irq_enable(); if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { - budget = poll_one_napi(dev->npinfo, napi, budget); + rcu_read_lock_bh(); + budget = poll_one_napi(rcu_dereference_bh(dev->npinfo), + napi, budget); + rcu_read_unlock_bh(); spin_unlock(&napi->poll_lock); - if (!budget) + if (!budget) { + local_irq_disable(); break; + } } + local_irq_disable(); } } @@ -185,13 +195,14 @@ static void service_arp_queue(struct netpoll_info *npi) struct sk_buff *skb; while ((skb = skb_dequeue(&npi->arp_tx))) - arp_reply(skb); + netpoll_arp_reply(skb, npi); } } static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; + struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); if (!dev || !netif_running(dev)) return; @@ -206,17 +217,18 @@ static void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); if (dev->flags & IFF_SLAVE) { - if (dev->npinfo) { + if (ni) { struct net_device *bond_dev = dev->master; struct sk_buff *skb; - while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) { + struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo); + while ((skb = skb_dequeue(&ni->arp_tx))) { skb->dev = bond_dev; - skb_queue_tail(&bond_dev->npinfo->arp_tx, skb); + skb_queue_tail(&bond_ni->arp_tx, skb); } } } - service_arp_queue(dev->npinfo); + service_arp_queue(ni); zap_completion_queue(); } @@ -302,6 +314,7 @@ static int netpoll_owner_active(struct net_device *dev) return 0; } +/* call with IRQ disabled */ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, struct net_device *dev) { @@ -309,8 +322,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, unsigned long tries; const struct net_device_ops *ops = dev->netdev_ops; /* It is up to the caller to keep npinfo alive. */ - struct netpoll_info *npinfo = np->dev->npinfo; + struct netpoll_info *npinfo; + + WARN_ON_ONCE(!irqs_disabled()); + npinfo = rcu_dereference_bh(np->dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { __kfree_skb(skb); return; @@ -319,16 +335,22 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { struct netdev_queue *txq; - unsigned long flags; txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - local_irq_save(flags); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_xmit_stopped(txq)) { + if (vlan_tx_tag_present(skb) && + !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) { + skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); + if (unlikely(!skb)) + break; + skb->vlan_tci = 0; + } + status = ops->ndo_start_xmit(skb, dev); if (status == NETDEV_TX_OK) txq_trans_update(txq); @@ -347,10 +369,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, } WARN_ONCE(!irqs_disabled(), - "netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n", + "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n", dev->name, ops->ndo_start_xmit); - local_irq_restore(flags); } if (status != NETDEV_TX_OK) { @@ -423,9 +444,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); -static void arp_reply(struct sk_buff *skb) +static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) { - struct netpoll_info *npinfo = skb->dev->npinfo; struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; @@ -543,13 +563,12 @@ static void arp_reply(struct sk_buff *skb) spin_unlock_irqrestore(&npinfo->rx_lock, flags); } -int __netpoll_rx(struct sk_buff *skb) +int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) { int proto, len, ulen; int hits = 0; const struct iphdr *iph; struct udphdr *uh; - struct netpoll_info *npinfo = skb->dev->npinfo; struct netpoll *np, *tmp; if (list_empty(&npinfo->rx_np)) @@ -565,6 +584,12 @@ int __netpoll_rx(struct sk_buff *skb) return 1; } + if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { + skb = vlan_untag(skb); + if (unlikely(!skb)) + goto out; + } + proto = ntohs(eth_hdr(skb)->h_proto); if (proto != ETH_P_IP) goto out; @@ -715,7 +740,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) } EXPORT_SYMBOL(netpoll_parse_options); -int __netpoll_setup(struct netpoll *np, struct net_device *ndev) +int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) { struct netpoll_info *npinfo; const struct net_device_ops *ops; @@ -734,7 +759,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) } if (!ndev->npinfo) { - npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); + npinfo = kmalloc(sizeof(*npinfo), gfp); if (!npinfo) { err = -ENOMEM; goto out; @@ -752,7 +777,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) ops = np->dev->netdev_ops; if (ops->ndo_netpoll_setup) { - err = ops->ndo_netpoll_setup(ndev, npinfo); + err = ops->ndo_netpoll_setup(ndev, npinfo, gfp); if (err) goto free_npinfo; } @@ -857,7 +882,7 @@ int netpoll_setup(struct netpoll *np) refill_skbs(); rtnl_lock(); - err = __netpoll_setup(np, ndev); + err = __netpoll_setup(np, ndev, GFP_KERNEL); rtnl_unlock(); if (err) @@ -878,6 +903,24 @@ static int __init netpoll_init(void) } core_initcall(netpoll_init); +static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) +{ + struct netpoll_info *npinfo = + container_of(rcu_head, struct netpoll_info, rcu); + + skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->txq); + + /* we can't call cancel_delayed_work_sync here, as we are in softirq */ + cancel_delayed_work(&npinfo->tx_work); + + /* clean after last, unfinished work */ + __skb_queue_purge(&npinfo->txq); + /* now cancel it again */ + cancel_delayed_work(&npinfo->tx_work); + kfree(npinfo); +} + void __netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; @@ -903,20 +946,24 @@ void __netpoll_cleanup(struct netpoll *np) ops->ndo_netpoll_cleanup(np->dev); RCU_INIT_POINTER(np->dev->npinfo, NULL); + call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); + } +} +EXPORT_SYMBOL_GPL(__netpoll_cleanup); - /* avoid racing with NAPI reading npinfo */ - synchronize_rcu_bh(); +static void rcu_cleanup_netpoll(struct rcu_head *rcu_head) +{ + struct netpoll *np = container_of(rcu_head, struct netpoll, rcu); - skb_queue_purge(&npinfo->arp_tx); - skb_queue_purge(&npinfo->txq); - cancel_delayed_work_sync(&npinfo->tx_work); + __netpoll_cleanup(np); + kfree(np); +} - /* clean after last, unfinished work */ - __skb_queue_purge(&npinfo->txq); - kfree(npinfo); - } +void __netpoll_free_rcu(struct netpoll *np) +{ + call_rcu_bh(&np->rcu, rcu_cleanup_netpoll); } -EXPORT_SYMBOL_GPL(__netpoll_cleanup); +EXPORT_SYMBOL_GPL(__netpoll_free_rcu); void netpoll_cleanup(struct netpoll *np) { diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index ed0c0431fcd8..c75e3f9d060f 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -101,12 +101,10 @@ static int write_update_netdev_table(struct net_device *dev) u32 max_len; struct netprio_map *map; - rtnl_lock(); max_len = atomic_read(&max_prioidx) + 1; map = rtnl_dereference(dev->priomap); if (!map || map->priomap_len < max_len) ret = extend_netdev_table(dev, max_len); - rtnl_unlock(); return ret; } @@ -256,17 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft, if (!dev) goto out_free_devname; + rtnl_lock(); ret = write_update_netdev_table(dev); if (ret < 0) goto out_put_dev; - rcu_read_lock(); - map = rcu_dereference(dev->priomap); + map = rtnl_dereference(dev->priomap); if (map) map->priomap[prioidx] = priority; - rcu_read_unlock(); out_put_dev: + rtnl_unlock(); dev_put(dev); out_free_devname: @@ -277,12 +275,6 @@ out_free_devname: void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *p; - char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); - - if (!tmp) { - pr_warn("Unable to attach cgrp due to alloc failure!\n"); - return; - } cgroup_taskset_for_each(p, cgrp, tset) { unsigned int fd; @@ -296,32 +288,24 @@ void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) continue; } - rcu_read_lock(); + spin_lock(&files->file_lock); fdt = files_fdtable(files); for (fd = 0; fd < fdt->max_fds; fd++) { - char *path; struct file *file; struct socket *sock; - unsigned long s; - int rv, err = 0; + int err; file = fcheck_files(files, fd); if (!file) continue; - path = d_path(&file->f_path, tmp, PAGE_SIZE); - rv = sscanf(path, "socket:[%lu]", &s); - if (rv <= 0) - continue; - sock = sock_from_file(file, &err); - if (!err) + if (sock) sock_update_netprioidx(sock->sk, p); } - rcu_read_unlock(); + spin_unlock(&files->file_lock); task_unlock(p); } - kfree(tmp); } static struct cftype ss_files[] = { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 334b930e0de3..34d975b0f277 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -618,16 +618,20 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error) { struct rta_cacheinfo ci = { - .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), + .rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse), .rta_used = dst->__use, .rta_clntref = atomic_read(&(dst->__refcnt)), .rta_error = error, .rta_id = id, }; - if (expires) - ci.rta_expires = jiffies_to_clock_t(expires); + if (expires) { + unsigned long clock; + clock = jiffies_to_clock_t(abs(expires)); + clock = min_t(unsigned long, clock, INT_MAX); + ci.rta_expires = (expires > 0) ? clock : -clock; + } return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); } EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); @@ -659,6 +663,12 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } } +static unsigned int rtnl_dev_get_flags(const struct net_device *dev) +{ + return (dev->flags & ~(IFF_PROMISC | IFF_ALLMULTI)) | + (dev->gflags & (IFF_PROMISC | IFF_ALLMULTI)); +} + static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, const struct ifinfomsg *ifm) { @@ -667,7 +677,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, /* bugwards compatibility: ifi_change == 0 is treated as ~0 */ if (ifm->ifi_change) flags = (flags & ifm->ifi_change) | - (dev->flags & ~ifm->ifi_change); + (rtnl_dev_get_flags(dev) & ~ifm->ifi_change); return flags; } @@ -1371,6 +1381,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; send_addr_notify = 1; modified = 1; + add_device_randomness(dev->dev_addr, dev->addr_len); } if (tb[IFLA_MTU]) { @@ -1801,8 +1812,6 @@ replay: return -ENODEV; } - if (ifm->ifi_index) - return -EOPNOTSUPP; if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) return -EOPNOTSUPP; @@ -1828,10 +1837,14 @@ replay: return PTR_ERR(dest_net); dev = rtnl_create_link(net, dest_net, ifname, ops, tb); - - if (IS_ERR(dev)) + if (IS_ERR(dev)) { err = PTR_ERR(dev); - else if (ops->newlink) + goto out; + } + + dev->ifindex = ifm->ifi_index; + + if (ops->newlink) err = ops->newlink(net, dev, tb, data); else err = register_netdevice(dev); diff --git a/net/core/scm.c b/net/core/scm.c index 8f6ccfd68ef4..040cebeed45b 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -265,6 +265,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++) { + struct socket *sock; int new_fd; err = security_file_receive(fp[i]); if (err) @@ -281,6 +282,9 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) } /* Bump the usage count and install the file. */ get_file(fp[i]); + sock = sock_from_file(fp[i], &err); + if (sock) + sock_update_netprioidx(sock->sk, current); fd_install(new_fd, fp[i]); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 368f65c15e4f..fe00d1208167 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) BUG(); } + +/* + * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells + * the caller if emergency pfmemalloc reserves are being used. If it is and + * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves + * may be used. Otherwise, the packet data may be discarded until enough + * memory is free + */ +#define kmalloc_reserve(size, gfp, node, pfmemalloc) \ + __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) +void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, + bool *pfmemalloc) +{ + void *obj; + bool ret_pfmemalloc = false; + + /* + * Try a regular allocation, when that fails and we're not entitled + * to the reserves, fail. + */ + obj = kmalloc_node_track_caller(size, + flags | __GFP_NOMEMALLOC | __GFP_NOWARN, + node); + if (obj || !(gfp_pfmemalloc_allowed(flags))) + goto out; + + /* Try again but now we are using pfmemalloc reserves */ + ret_pfmemalloc = true; + obj = kmalloc_node_track_caller(size, flags, node); + +out: + if (pfmemalloc) + *pfmemalloc = ret_pfmemalloc; + + return obj; +} + /* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the * [BEEP] leaks. @@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask - * @fclone: allocate from fclone cache instead of head cache - * and allocate a cloned (child) skb + * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache + * instead of head cache and allocate a cloned (child) skb. + * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for + * allocations in case the data is required for writeback * @node: numa node to allocate memory on * * Allocate a new &sk_buff. The returned buffer has no headroom and a @@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) * %GFP_ATOMIC. */ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, - int fclone, int node) + int flags, int node) { struct kmem_cache *cache; struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; + bool pfmemalloc; - cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; + cache = (flags & SKB_ALLOC_FCLONE) + ? skbuff_fclone_cache : skbuff_head_cache; + + if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) + gfp_mask |= __GFP_MEMALLOC; /* Get the HEAD */ skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); @@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, */ size = SKB_DATA_ALIGN(size); size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - data = kmalloc_node_track_caller(size, gfp_mask, node); + data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); if (!data) goto nodata; /* kmalloc(size) might give us more room than requested. @@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, memset(skb, 0, offsetof(struct sk_buff, tail)); /* Account for allocated memory : skb + skb->head */ skb->truesize = SKB_TRUESIZE(size); + skb->pfmemalloc = pfmemalloc; atomic_set(&skb->users, 1); skb->head = data; skb->data = data; @@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, atomic_set(&shinfo->dataref, 1); kmemcheck_annotate_variable(shinfo->destructor_arg); - if (fclone) { + if (flags & SKB_ALLOC_FCLONE) { struct sk_buff *child = skb + 1; atomic_t *fclone_ref = (atomic_t *) (child + 1); @@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, atomic_set(fclone_ref, 1); child->fclone = SKB_FCLONE_UNAVAILABLE; + child->pfmemalloc = pfmemalloc; } out: return skb; @@ -302,14 +348,7 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); #define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) -/** - * netdev_alloc_frag - allocate a page fragment - * @fragsz: fragment size - * - * Allocates a frag from a page for receive buffer. - * Uses GFP_ATOMIC allocations. - */ -void *netdev_alloc_frag(unsigned int fragsz) +static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { struct netdev_alloc_cache *nc; void *data = NULL; @@ -319,7 +358,7 @@ void *netdev_alloc_frag(unsigned int fragsz) nc = &__get_cpu_var(netdev_alloc_cache); if (unlikely(!nc->page)) { refill: - nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); + nc->page = alloc_page(gfp_mask); if (unlikely(!nc->page)) goto end; recycle: @@ -343,6 +382,18 @@ end: local_irq_restore(flags); return data; } + +/** + * netdev_alloc_frag - allocate a page fragment + * @fragsz: fragment size + * + * Allocates a frag from a page for receive buffer. + * Uses GFP_ATOMIC allocations. + */ +void *netdev_alloc_frag(unsigned int fragsz) +{ + return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); +} EXPORT_SYMBOL(netdev_alloc_frag); /** @@ -366,7 +417,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { - void *data = netdev_alloc_frag(fragsz); + void *data; + + if (sk_memalloc_socks()) + gfp_mask |= __GFP_MEMALLOC; + + data = __netdev_alloc_frag(fragsz, gfp_mask); if (likely(data)) { skb = build_skb(data, fragsz); @@ -374,7 +430,8 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, put_page(virt_to_head_page(data)); } } else { - skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); + skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, + SKB_ALLOC_RX, NUMA_NO_NODE); } if (likely(skb)) { skb_reserve(skb, NET_SKB_PAD); @@ -656,6 +713,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #if IS_ENABLED(CONFIG_IP_VS) new->ipvs_property = old->ipvs_property; #endif + new->pfmemalloc = old->pfmemalloc; new->protocol = old->protocol; new->mark = old->mark; new->skb_iif = old->skb_iif; @@ -814,6 +872,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->fclone = SKB_FCLONE_CLONE; atomic_inc(fclone_ref); } else { + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); if (!n) return NULL; @@ -850,6 +911,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } +static inline int skb_alloc_rx_flag(const struct sk_buff *skb) +{ + if (skb_pfmemalloc(skb)) + return SKB_ALLOC_RX; + return 0; +} + /** * skb_copy - create private copy of an sk_buff * @skb: buffer to copy @@ -871,7 +939,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { int headerlen = skb_headroom(skb); unsigned int size = skb_end_offset(skb) + skb->data_len; - struct sk_buff *n = alloc_skb(size, gfp_mask); + struct sk_buff *n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; @@ -906,7 +975,8 @@ EXPORT_SYMBOL(skb_copy); struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) { unsigned int size = skb_headlen(skb) + headroom; - struct sk_buff *n = alloc_skb(size, gfp_mask); + struct sk_buff *n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) goto out; @@ -979,8 +1049,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, size = SKB_DATA_ALIGN(size); - data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), - gfp_mask); + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), + gfp_mask, NUMA_NO_NODE, NULL); if (!data) goto nodata; size = SKB_WITH_OVERHEAD(ksize(data)); @@ -1092,8 +1164,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, /* * Allocate the copy buffer */ - struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask); + struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, + gfp_mask, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; int off; @@ -2775,8 +2848,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_release_head_state(nskb); __skb_push(nskb, doffset); } else { - nskb = alloc_skb(hsize + doffset + headroom, - GFP_ATOMIC); + nskb = __alloc_skb(hsize + doffset + headroom, + GFP_ATOMIC, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); if (unlikely(!nskb)) goto err; diff --git a/net/core/sock.c b/net/core/sock.c index 2676a88f533e..8f67ced8d6a8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -142,7 +142,7 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { struct proto *proto; @@ -271,6 +271,61 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); +struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE; +EXPORT_SYMBOL_GPL(memalloc_socks); + +/** + * sk_set_memalloc - sets %SOCK_MEMALLOC + * @sk: socket to set it on + * + * Set %SOCK_MEMALLOC on a socket for access to emergency reserves. + * It's the responsibility of the admin to adjust min_free_kbytes + * to meet the requirements + */ +void sk_set_memalloc(struct sock *sk) +{ + sock_set_flag(sk, SOCK_MEMALLOC); + sk->sk_allocation |= __GFP_MEMALLOC; + static_key_slow_inc(&memalloc_socks); +} +EXPORT_SYMBOL_GPL(sk_set_memalloc); + +void sk_clear_memalloc(struct sock *sk) +{ + sock_reset_flag(sk, SOCK_MEMALLOC); + sk->sk_allocation &= ~__GFP_MEMALLOC; + static_key_slow_dec(&memalloc_socks); + + /* + * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward + * progress of swapping. However, if SOCK_MEMALLOC is cleared while + * it has rmem allocations there is a risk that the user of the + * socket cannot make forward progress due to exceeding the rmem + * limits. By rights, sk_clear_memalloc() should only be called + * on sockets being torn down but warn and reset the accounting if + * that assumption breaks. + */ + if (WARN_ON(sk->sk_forward_alloc)) + sk_mem_reclaim(sk); +} +EXPORT_SYMBOL_GPL(sk_clear_memalloc); + +int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + unsigned long pflags = current->flags; + + /* these should have been dropped before queueing */ + BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); + + current->flags |= PF_MEMALLOC; + ret = sk->sk_backlog_rcv(sk, skb); + tsk_restore_flags(current, pflags, PF_MEMALLOC); + + return ret; +} +EXPORT_SYMBOL(__sk_backlog_rcv); + #if defined(CONFIG_CGROUPS) #if !defined(CONFIG_NET_CLS_CGROUP) int net_cls_subsys_id = -1; @@ -353,7 +408,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) return err; - if (!sk_rmem_schedule(sk, skb->truesize)) { + if (!sk_rmem_schedule(sk, skb, skb->truesize)) { atomic_inc(&sk->sk_drops); return -ENOBUFS; } @@ -1403,6 +1458,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; sk->sk_gso_max_size = dst->dev->gso_max_size; + sk->sk_gso_max_segs = dst->dev->gso_max_segs; } } } diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 75c3582a7678..fb85d371a8de 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -246,7 +246,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len, optval, optlen); return rc; @@ -257,7 +257,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len, optval, optlen); return rc; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index d65e98798eca..119c04317d48 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -535,6 +535,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, case DCCP_SOCKOPT_CCID_TX_INFO: if (len < sizeof(tfrc)) return -EINVAL; + memset(&tfrc, 0, sizeof(tfrc)); tfrc.tfrctx_x = hc->tx_x; tfrc.tfrctx_x_recv = hc->tx_x_recv; tfrc.tfrctx_x_calc = hc->tx_x_calc; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 85a3604c87c8..c855e8d0738f 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -961,7 +961,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o .saddr = oldflp->saddr, .flowidn_scope = RT_SCOPE_UNIVERSE, .flowidn_mark = oldflp->flowidn_mark, - .flowidn_iif = init_net.loopback_dev->ifindex, + .flowidn_iif = LOOPBACK_IFINDEX, .flowidn_oif = oldflp->flowidn_oif, }; struct dn_route *rt = NULL; @@ -979,7 +979,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o "dn_route_output_slow: dst=%04x src=%04x mark=%d" " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr), le16_to_cpu(oldflp->saddr), - oldflp->flowidn_mark, init_net.loopback_dev->ifindex, + oldflp->flowidn_mark, LOOPBACK_IFINDEX, oldflp->flowidn_oif); /* If we have an output interface, verify its a DECnet device */ @@ -1042,7 +1042,7 @@ source_ok: if (!fld.daddr) goto out; } - fld.flowidn_oif = init_net.loopback_dev->ifindex; + fld.flowidn_oif = LOOPBACK_IFINDEX; res.type = RTN_LOCAL; goto make_route; } diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ae2ccf2890e4..15ca63ec604e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o -obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o +obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fe4582ca969a..6681ccf5c3ee 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1364,7 +1364,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if (*(u8 *)iph != 0x45) goto out_unlock; - if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + if (unlikely(ip_fast_csum((u8 *)iph, 5))) goto out_unlock; id = ntohl(*(__be32 *)&iph->id); @@ -1380,7 +1380,6 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, iph2 = ip_hdr(p); if ((iph->protocol ^ iph2->protocol) | - (iph->tos ^ iph2->tos) | ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { NAPI_GRO_CB(p)->same_flow = 0; @@ -1390,6 +1389,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, /* All fields must match except length and checksum. */ NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | + (iph->tos ^ iph2->tos) | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 44bf82e3aef7..adf273f8ad2e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -94,25 +94,22 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, }; -/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE - * value. So if you change this define, make appropriate changes to - * inet_addr_hash as well. - */ -#define IN4_ADDR_HSIZE 256 +#define IN4_ADDR_HSIZE_SHIFT 8 +#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) + static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; static DEFINE_SPINLOCK(inet_addr_hash_lock); -static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) +static u32 inet_addr_hash(struct net *net, __be32 addr) { - u32 val = (__force u32) addr ^ hash_ptr(net, 8); + u32 val = (__force u32) addr ^ net_hash_mix(net); - return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & - (IN4_ADDR_HSIZE - 1)); + return hash_32(val, IN4_ADDR_HSIZE_SHIFT); } static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) { - unsigned int hash = inet_addr_hash(net, ifa->ifa_local); + u32 hash = inet_addr_hash(net, ifa->ifa_local); spin_lock(&inet_addr_hash_lock); hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); @@ -136,18 +133,18 @@ static void inet_hash_remove(struct in_ifaddr *ifa) */ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) { - unsigned int hash = inet_addr_hash(net, addr); + u32 hash = inet_addr_hash(net, addr); struct net_device *result = NULL; struct in_ifaddr *ifa; struct hlist_node *node; rcu_read_lock(); hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { - struct net_device *dev = ifa->ifa_dev->dev; - - if (!net_eq(dev_net(dev), net)) - continue; if (ifa->ifa_local == addr) { + struct net_device *dev = ifa->ifa_dev->dev; + + if (!net_eq(dev_net(dev), net)) + continue; result = dev; break; } @@ -182,10 +179,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, static void devinet_sysctl_register(struct in_device *idev); static void devinet_sysctl_unregister(struct in_device *idev); #else -static inline void devinet_sysctl_register(struct in_device *idev) +static void devinet_sysctl_register(struct in_device *idev) { } -static inline void devinet_sysctl_unregister(struct in_device *idev) +static void devinet_sysctl_unregister(struct in_device *idev) { } #endif @@ -205,7 +202,7 @@ static void inet_rcu_free_ifa(struct rcu_head *head) kfree(ifa); } -static inline void inet_free_ifa(struct in_ifaddr *ifa) +static void inet_free_ifa(struct in_ifaddr *ifa) { call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); } @@ -659,7 +656,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg * Determine a default network mask, based on the IP address. */ -static inline int inet_abc_len(__be32 addr) +static int inet_abc_len(__be32 addr) { int rc = -1; /* Something else, probably a multicast. */ @@ -1124,7 +1121,7 @@ skip: } } -static inline bool inetdev_valid_mtu(unsigned int mtu) +static bool inetdev_valid_mtu(unsigned int mtu) { return mtu >= 68; } @@ -1239,7 +1236,7 @@ static struct notifier_block ip_netdev_notifier = { .notifier_call = inetdev_event, }; -static inline size_t inet_nlmsg_size(void) +static size_t inet_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + nla_total_size(4) /* IFA_ADDRESS */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8732cc7920ed..7f073a38c87d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -218,7 +218,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) scope = RT_SCOPE_UNIVERSE; if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { fl4.flowi4_oif = 0; - fl4.flowi4_iif = net->loopback_dev->ifindex; + fl4.flowi4_iif = LOOPBACK_IFINDEX; fl4.daddr = ip_hdr(skb)->saddr; fl4.saddr = 0; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); @@ -1046,6 +1046,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2, -1); + rt_flush_dev(dev); return NOTIFY_DONE; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index da0cc2e6b250..da80dc14cc76 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { }, }; +static void rt_fibinfo_free(struct rtable __rcu **rtp) +{ + struct rtable *rt = rcu_dereference_protected(*rtp, 1); + + if (!rt) + return; + + /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); + * because we waited an RCU grace period before calling + * free_fib_info_rcu() + */ + + dst_free(&rt->dst); +} + static void free_nh_exceptions(struct fib_nh *nh) { struct fnhe_hash_bucket *hash = nh->nh_exceptions; @@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh) struct fib_nh_exception *next; next = rcu_dereference_protected(fnhe->fnhe_next, 1); + + rt_fibinfo_free(&fnhe->fnhe_rth); + kfree(fnhe); fnhe = next; @@ -161,6 +179,23 @@ static void free_nh_exceptions(struct fib_nh *nh) kfree(hash); } +static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) +{ + int cpu; + + if (!rtp) + return; + + for_each_possible_cpu(cpu) { + struct rtable *rt; + + rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); + if (rt) + dst_free(&rt->dst); + } + free_percpu(rtp); +} + /* Release a nexthop info record */ static void free_fib_info_rcu(struct rcu_head *head) { @@ -171,10 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head) dev_put(nexthop_nh->nh_dev); if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); - if (nexthop_nh->nh_rth_output) - dst_free(&nexthop_nh->nh_rth_output->dst); - if (nexthop_nh->nh_rth_input) - dst_free(&nexthop_nh->nh_rth_input->dst); + rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); + rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); release_net(fi->fib_net); @@ -804,6 +837,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_nhs = nhs; change_nexthops(fi) { nexthop_nh->nh_parent = fi; + nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); } endfor_nexthops(fi) if (cfg->fc_mx) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 18cbc15b20d5..3c820dae235e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -159,7 +159,6 @@ struct trie { #endif }; -static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n); static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, int wasfull); static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); @@ -368,7 +367,7 @@ static void __leaf_free_rcu(struct rcu_head *head) static inline void free_leaf(struct leaf *l) { - call_rcu_bh(&l->rcu, __leaf_free_rcu); + call_rcu(&l->rcu, __leaf_free_rcu); } static inline void free_leaf_info(struct leaf_info *leaf) @@ -473,7 +472,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) } pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), - sizeof(struct rt_trie_node) << bits); + sizeof(struct rt_trie_node *) << bits); return tn; } @@ -490,7 +489,7 @@ static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node * return ((struct tnode *) n)->pos == tn->pos + tn->bits; } -static inline void put_child(struct trie *t, struct tnode *tn, int i, +static inline void put_child(struct tnode *tn, int i, struct rt_trie_node *n) { tnode_put_child_reorg(tn, i, n, -1); @@ -754,8 +753,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) goto nomem; } - put_child(t, tn, 2*i, (struct rt_trie_node *) left); - put_child(t, tn, 2*i+1, (struct rt_trie_node *) right); + put_child(tn, 2*i, (struct rt_trie_node *) left); + put_child(tn, 2*i+1, (struct rt_trie_node *) right); } } @@ -776,9 +775,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits, 1) == 0) - put_child(t, tn, 2*i, node); + put_child(tn, 2*i, node); else - put_child(t, tn, 2*i+1, node); + put_child(tn, 2*i+1, node); continue; } @@ -786,8 +785,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) inode = (struct tnode *) node; if (inode->bits == 1) { - put_child(t, tn, 2*i, rtnl_dereference(inode->child[0])); - put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1])); + put_child(tn, 2*i, rtnl_dereference(inode->child[0])); + put_child(tn, 2*i+1, rtnl_dereference(inode->child[1])); tnode_free_safe(inode); continue; @@ -817,22 +816,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) */ left = (struct tnode *) tnode_get_child(tn, 2*i); - put_child(t, tn, 2*i, NULL); + put_child(tn, 2*i, NULL); BUG_ON(!left); right = (struct tnode *) tnode_get_child(tn, 2*i+1); - put_child(t, tn, 2*i+1, NULL); + put_child(tn, 2*i+1, NULL); BUG_ON(!right); size = tnode_child_length(left); for (j = 0; j < size; j++) { - put_child(t, left, j, rtnl_dereference(inode->child[j])); - put_child(t, right, j, rtnl_dereference(inode->child[j + size])); + put_child(left, j, rtnl_dereference(inode->child[j])); + put_child(right, j, rtnl_dereference(inode->child[j + size])); } - put_child(t, tn, 2*i, resize(t, left)); - put_child(t, tn, 2*i+1, resize(t, right)); + put_child(tn, 2*i, resize(t, left)); + put_child(tn, 2*i+1, resize(t, right)); tnode_free_safe(inode); } @@ -877,7 +876,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) if (!newn) goto nomem; - put_child(t, tn, i/2, (struct rt_trie_node *)newn); + put_child(tn, i/2, (struct rt_trie_node *)newn); } } @@ -892,21 +891,21 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) if (left == NULL) { if (right == NULL) /* Both are empty */ continue; - put_child(t, tn, i/2, right); + put_child(tn, i/2, right); continue; } if (right == NULL) { - put_child(t, tn, i/2, left); + put_child(tn, i/2, left); continue; } /* Two nonempty children */ newBinNode = (struct tnode *) tnode_get_child(tn, i/2); - put_child(t, tn, i/2, NULL); - put_child(t, newBinNode, 0, left); - put_child(t, newBinNode, 1, right); - put_child(t, tn, i/2, resize(t, newBinNode)); + put_child(tn, i/2, NULL); + put_child(newBinNode, 0, left); + put_child(newBinNode, 1, right); + put_child(tn, i/2, resize(t, newBinNode)); } tnode_free_safe(oldtnode); return tn; @@ -1125,7 +1124,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) node_set_parent((struct rt_trie_node *)l, tp); cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(t, tp, cindex, (struct rt_trie_node *)l); + put_child(tp, cindex, (struct rt_trie_node *)l); } else { /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ /* @@ -1155,12 +1154,12 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) node_set_parent((struct rt_trie_node *)tn, tp); missbit = tkey_extract_bits(key, newpos, 1); - put_child(t, tn, missbit, (struct rt_trie_node *)l); - put_child(t, tn, 1-missbit, n); + put_child(tn, missbit, (struct rt_trie_node *)l); + put_child(tn, 1-missbit, n); if (tp) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(t, tp, cindex, (struct rt_trie_node *)tn); + put_child(tp, cindex, (struct rt_trie_node *)tn); } else { rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tp = tn; @@ -1551,7 +1550,8 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, * state.directly. */ if (pref_mismatch) { - int mp = KEYLENGTH - fls(pref_mismatch); + /* fls(x) = __fls(x) + 1 */ + int mp = KEYLENGTH - __fls(pref_mismatch) - 1; if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0) goto backtrace; @@ -1619,7 +1619,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) if (tp) { t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); - put_child(t, tp, cindex, NULL); + put_child(tp, cindex, NULL); trie_rebalance(t, tp); } else RCU_INIT_POINTER(t->trie, NULL); @@ -1656,7 +1656,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if (!l) return -ESRCH; - fa_head = get_fa_head(l, plen); + li = find_leaf_info(l, plen); + + if (!li) + return -ESRCH; + + fa_head = &li->falh; fa = fib_find_alias(fa_head, tos, 0); if (!fa) @@ -1692,9 +1697,6 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); - l = fib_find_node(t, key); - li = find_leaf_info(l, plen); - list_del_rcu(&fa->fa_list); if (!plen) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6699f23e6f55..0b5580c69f2d 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2435,6 +2435,8 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) struct ip_mc_list *im = (struct ip_mc_list *)v; struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); char *querier; + long delta; + #ifdef CONFIG_IP_MULTICAST querier = IGMP_V1_SEEN(state->in_dev) ? "V1" : IGMP_V2_SEEN(state->in_dev) ? "V2" : @@ -2448,11 +2450,12 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } + delta = im->timer.expires - jiffies; seq_printf(seq, "\t\t\t\t%08X %5d %d:%08lX\t\t%d\n", im->multiaddr, im->users, - im->tm_running, im->tm_running ? - jiffies_to_clock_t(im->timer.expires-jiffies) : 0, + im->tm_running, + im->tm_running ? jiffies_delta_to_clock_t(delta) : 0, im->reporter); } return 0; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index db0cf17c00f7..7f75f21d7b83 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -404,12 +404,15 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, { const struct inet_request_sock *ireq = inet_rsk(req); struct inet_sock *newinet = inet_sk(newsk); - struct ip_options_rcu *opt = ireq->opt; + struct ip_options_rcu *opt; struct net *net = sock_net(sk); struct flowi4 *fl4; struct rtable *rt; fl4 = &newinet->cork.fl.u.ip4; + + rcu_read_lock(); + opt = rcu_dereference(newinet->inet_opt); flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), @@ -421,11 +424,13 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_gateway) goto route_err; + rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: + rcu_read_unlock(); IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 981ff1eef28c..f1395a6fb35f 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -325,14 +325,12 @@ static int ip_rcv_finish(struct sk_buff *skb) const struct net_protocol *ipprot; int protocol = iph->protocol; - rcu_read_lock(); ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->early_demux) { ipprot->early_demux(skb); /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); } - rcu_read_unlock(); } /* diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ba39a52d18c1..c196d749daf2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -197,7 +197,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); - if (neigh) { + if (!IS_ERR(neigh)) { int res = dst_neigh_output(dst, neigh, skb); rcu_read_unlock_bh(); @@ -1338,10 +1338,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ihl = 5; iph->tos = inet->tos; iph->frag_off = df; - ip_select_ident(iph, &rt->dst, sk); iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); + ip_select_ident(iph, &rt->dst, sk); if (opt) { iph->ihl += opt->optlen>>2; @@ -1366,9 +1366,8 @@ out: return skb; } -int ip_send_skb(struct sk_buff *skb) +int ip_send_skb(struct net *net, struct sk_buff *skb) { - struct net *net = sock_net(skb->sk); int err; err = ip_local_out(skb); @@ -1391,7 +1390,7 @@ int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4) return 0; /* Netfilter gets whole the not fragmented skb. */ - return ip_send_skb(skb); + return ip_send_skb(sock_net(sk), skb); } /* @@ -1536,6 +1535,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; + skb_orphan(nskb); skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); ip_push_pending_frames(sk, &fl4); } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8eec8f4a0536..3a57570c8ee5 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1798,7 +1798,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) .flowi4_oif = (rt_is_output_route(rt) ? skb->dev->ifindex : 0), .flowi4_iif = (rt_is_output_route(rt) ? - net->loopback_dev->ifindex : + LOOPBACK_IFINDEX : skb->dev->ifindex), .flowi4_mark = skb->mark, }; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 31371be8174b..c30130062cd6 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -85,7 +85,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) return ipv4_is_local_multicast(iph->daddr) ^ invert; flow.flowi4_iif = 0; } else { - flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex; + flow.flowi4_iif = LOOPBACK_IFINDEX; } flow.daddr = iph->saddr; diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index ea4a23813d26..4ad9cf173992 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, hdr, NULL, &matchoff, &matchlen, &addr, &port) > 0) { - unsigned int matchend, poff, plen, buflen, n; + unsigned int olen, matchend, poff, plen, buflen, n; char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; /* We're only interested in headers related to this @@ -163,17 +163,18 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, goto next; } + olen = *datalen; if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, &addr, port)) return NF_DROP; - matchend = matchoff + matchlen; + matchend = matchoff + matchlen + *datalen - olen; /* The maddr= parameter (RFC 2361) specifies where to send * the reply. */ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, "maddr=", &poff, &plen, - &addr) > 0 && + &addr, true) > 0 && addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { buflen = sprintf(buffer, "%pI4", @@ -187,7 +188,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, * from which the server received the request. */ if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, "received=", &poff, &plen, - &addr) > 0 && + &addr, false) > 0 && addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { buflen = sprintf(buffer, "%pI4", diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fc1a81ca79a7..50f6d3adb474 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -70,7 +70,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> -#include <linux/bootmem.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/sockios.h> @@ -80,7 +79,6 @@ #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/init.h> -#include <linux/workqueue.h> #include <linux/skbuff.h> #include <linux/inetdevice.h> #include <linux/igmp.h> @@ -88,11 +86,9 @@ #include <linux/mroute.h> #include <linux/netfilter_ipv4.h> #include <linux/random.h> -#include <linux/jhash.h> #include <linux/rcupdate.h> #include <linux/times.h> #include <linux/slab.h> -#include <linux/prefetch.h> #include <net/dst.h> #include <net/net_namespace.h> #include <net/protocol.h> @@ -147,6 +143,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static void ipv4_dst_destroy(struct dst_entry *dst); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) @@ -170,6 +167,7 @@ static struct dst_ops ipv4_dst_ops = { .default_advmss = ipv4_default_advmss, .mtu = ipv4_mtu, .cow_metrics = ipv4_cow_metrics, + .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, @@ -587,11 +585,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, build_sk_flow_key(fl4, sk); } -static DEFINE_SEQLOCK(fnhe_seqlock); +static inline void rt_free(struct rtable *rt) +{ + call_rcu(&rt->dst.rcu_head, dst_rcu_free); +} + +static DEFINE_SPINLOCK(fnhe_lock); static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) { struct fib_nh_exception *fnhe, *oldest; + struct rtable *orig; oldest = rcu_dereference(hash->chain); for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; @@ -599,6 +603,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) oldest = fnhe; } + orig = rcu_dereference(oldest->fnhe_rth); + if (orig) { + RCU_INIT_POINTER(oldest->fnhe_rth, NULL); + rt_free(orig); + } return oldest; } @@ -620,7 +629,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, int depth; u32 hval = fnhe_hashfun(daddr); - write_seqlock_bh(&fnhe_seqlock); + spin_lock_bh(&fnhe_lock); hash = nh->nh_exceptions; if (!hash) { @@ -667,7 +676,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_stamp = jiffies; out_unlock: - write_sequnlock_bh(&fnhe_seqlock); + spin_unlock_bh(&fnhe_lock); return; } @@ -1164,53 +1173,62 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) return NULL; } -static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, +static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, __be32 daddr) { - __be32 fnhe_daddr, gw; - unsigned long expires; - unsigned int seq; - u32 pmtu; - -restart: - seq = read_seqbegin(&fnhe_seqlock); - fnhe_daddr = fnhe->fnhe_daddr; - gw = fnhe->fnhe_gw; - pmtu = fnhe->fnhe_pmtu; - expires = fnhe->fnhe_expires; - if (read_seqretry(&fnhe_seqlock, seq)) - goto restart; - - if (daddr != fnhe_daddr) - return; + bool ret = false; + + spin_lock_bh(&fnhe_lock); + + if (daddr == fnhe->fnhe_daddr) { + struct rtable *orig; - if (pmtu) { - unsigned long diff = expires - jiffies; + if (fnhe->fnhe_pmtu) { + unsigned long expires = fnhe->fnhe_expires; + unsigned long diff = expires - jiffies; - if (time_before(jiffies, expires)) { - rt->rt_pmtu = pmtu; - dst_set_expires(&rt->dst, diff); + if (time_before(jiffies, expires)) { + rt->rt_pmtu = fnhe->fnhe_pmtu; + dst_set_expires(&rt->dst, diff); + } } + if (fnhe->fnhe_gw) { + rt->rt_flags |= RTCF_REDIRECTED; + rt->rt_gateway = fnhe->fnhe_gw; + } + + orig = rcu_dereference(fnhe->fnhe_rth); + rcu_assign_pointer(fnhe->fnhe_rth, rt); + if (orig) + rt_free(orig); + + fnhe->fnhe_stamp = jiffies; + ret = true; + } else { + /* Routes we intend to cache in nexthop exception have + * the DST_NOCACHE bit clear. However, if we are + * unsuccessful at storing this route into the cache + * we really need to set it. + */ + rt->dst.flags |= DST_NOCACHE; } - if (gw) { - rt->rt_flags |= RTCF_REDIRECTED; - rt->rt_gateway = gw; - } - fnhe->fnhe_stamp = jiffies; -} + spin_unlock_bh(&fnhe_lock); -static inline void rt_free(struct rtable *rt) -{ - call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); + return ret; } -static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) +static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) { - struct rtable *orig, *prev, **p = &nh->nh_rth_output; - - if (rt_is_input_route(rt)) - p = &nh->nh_rth_input; + struct rtable *orig, *prev, **p; + bool ret = true; + if (rt_is_input_route(rt)) { + p = (struct rtable **)&nh->nh_rth_input; + } else { + if (!nh->nh_pcpu_rth_output) + goto nocache; + p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output); + } orig = *p; prev = cmpxchg(p, orig, rt); @@ -1223,7 +1241,50 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) * unsuccessful at storing this route into the cache * we really need to set it. */ +nocache: rt->dst.flags |= DST_NOCACHE; + ret = false; + } + + return ret; +} + +static DEFINE_SPINLOCK(rt_uncached_lock); +static LIST_HEAD(rt_uncached_list); + +static void rt_add_uncached_list(struct rtable *rt) +{ + spin_lock_bh(&rt_uncached_lock); + list_add_tail(&rt->rt_uncached, &rt_uncached_list); + spin_unlock_bh(&rt_uncached_lock); +} + +static void ipv4_dst_destroy(struct dst_entry *dst) +{ + struct rtable *rt = (struct rtable *) dst; + + if (dst->flags & DST_NOCACHE) { + spin_lock_bh(&rt_uncached_lock); + list_del(&rt->rt_uncached); + spin_unlock_bh(&rt_uncached_lock); + } +} + +void rt_flush_dev(struct net_device *dev) +{ + if (!list_empty(&rt_uncached_list)) { + struct net *net = dev_net(dev); + struct rtable *rt; + + spin_lock_bh(&rt_uncached_lock); + list_for_each_entry(rt, &rt_uncached_list, rt_uncached) { + if (rt->dst.dev != dev) + continue; + rt->dst.dev = net->loopback_dev; + dev_hold(rt->dst.dev); + dev_put(dev); + } + spin_unlock_bh(&rt_uncached_lock); } } @@ -1239,20 +1300,24 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, struct fib_nh_exception *fnhe, struct fib_info *fi, u16 type, u32 itag) { + bool cached = false; + if (fi) { struct fib_nh *nh = &FIB_RES_NH(*res); if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) rt->rt_gateway = nh->nh_gw; - if (unlikely(fnhe)) - rt_bind_exception(rt, fnhe, daddr); dst_init_metrics(&rt->dst, fi->fib_metrics, true); #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif - if (!(rt->dst.flags & DST_NOCACHE)) - rt_cache_route(nh, rt); + if (unlikely(fnhe)) + cached = rt_bind_exception(rt, fnhe, daddr); + else if (!(rt->dst.flags & DST_NOCACHE)) + cached = rt_cache_route(nh, rt); } + if (unlikely(!cached)) + rt_add_uncached_list(rt); #ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -1319,6 +1384,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); if (our) { rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; @@ -1420,7 +1486,7 @@ static int __mkroute_input(struct sk_buff *skb, do_cache = false; if (res->fi) { if (!itag) { - rth = FIB_RES_NH(*res).nh_rth_input; + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -1444,6 +1510,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); rth->dst.input = ip_forward; rth->dst.output = ip_output; @@ -1520,11 +1587,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (ipv4_is_zeronet(daddr)) goto martian_destination; - if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) { - if (ipv4_is_loopback(daddr)) + /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(), + * and call it once if daddr or/and saddr are loopback addresses + */ + if (ipv4_is_loopback(daddr)) { + if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) goto martian_destination; - - if (ipv4_is_loopback(saddr)) + } else if (ipv4_is_loopback(saddr)) { + if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) goto martian_source; } @@ -1549,7 +1619,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { err = fib_validate_source(skb, saddr, daddr, tos, - net->loopback_dev->ifindex, + LOOPBACK_IFINDEX, dev, in_dev, &itag); if (err < 0) goto martian_source_keep_err; @@ -1582,7 +1652,7 @@ local_input: do_cache = false; if (res.fi) { if (!itag) { - rth = FIB_RES_NH(res).nh_rth_input; + rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); err = 0; @@ -1610,6 +1680,7 @@ local_input: rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; @@ -1748,19 +1819,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = NULL; if (fi) { + struct rtable __rcu **prth; + fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); - if (!fnhe) { - rth = FIB_RES_NH(*res).nh_rth_output; - if (rt_cache_valid(rth)) { - dst_hold(&rth->dst); - return rth; - } + if (fnhe) + prth = &fnhe->fnhe_rth; + else + prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); + rth = rcu_dereference(*prth); + if (rt_cache_valid(rth)) { + dst_hold(&rth->dst); + return rth; } } rth = rt_dst_alloc(dev_out, IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(in_dev, NOXFRM), - fi && !fnhe); + fi); if (!rth) return ERR_PTR(-ENOBUFS); @@ -1773,6 +1848,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); RT_CACHE_STAT_INC(out_slow_tot); @@ -1819,7 +1895,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) orig_oif = fl4->flowi4_oif; - fl4->flowi4_iif = net->loopback_dev->ifindex; + fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); @@ -1908,7 +1984,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) if (!fl4->daddr) fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); dev_out = net->loopback_dev; - fl4->flowi4_oif = net->loopback_dev->ifindex; + fl4->flowi4_oif = LOOPBACK_IFINDEX; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; @@ -1955,7 +2031,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) } dev_out = net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; - res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; } @@ -2052,6 +2127,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_type = ort->rt_type; rt->rt_gateway = ort->rt_gateway; + INIT_LIST_HEAD(&rt->rt_uncached); + dst_free(new); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5840c3255721..1b5ce96707a3 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -184,7 +184,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, int ret; unsigned long vec[3]; struct net *net = current->nsproxy->net_ns; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM struct mem_cgroup *memcg; #endif @@ -203,7 +203,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, if (ret) return ret; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM rcu_read_lock(); memcg = mem_cgroup_from_task(current); @@ -784,13 +784,6 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .procname = "rt_cache_rebuild_count", - .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "ping_group_range", .data = &init_net.ipv4.sysctl_ping_group_range, .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range), @@ -829,8 +822,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) table[5].data = &net->ipv4.sysctl_icmp_ratemask; table[6].data = - &net->ipv4.sysctl_rt_cache_rebuild_count; - table[7].data = &net->ipv4.sysctl_ping_group_range; } @@ -842,8 +833,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) net->ipv4.sysctl_ping_group_range[0] = 1; net->ipv4.sysctl_ping_group_range[1] = 0; - net->ipv4.sysctl_rt_cache_rebuild_count = 4; - tcp_init_mem(net); net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 581ecf02c6b5..2109ff4a1daf 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -811,7 +811,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, old_size_goal + mss_now > xmit_size_goal)) { xmit_size_goal = old_size_goal; } else { - tp->xmit_size_goal_segs = xmit_size_goal / mss_now; + tp->xmit_size_goal_segs = + min_t(u16, xmit_size_goal / mss_now, + sk->sk_gso_max_segs); xmit_size_goal = tp->xmit_size_goal_segs * mss_now; } } @@ -2681,7 +2683,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, /* Cap the max timeout in ms TCP will retry/retrans * before giving up and aborting (ETIMEDOUT) a connection. */ - icsk->icsk_user_timeout = msecs_to_jiffies(val); + if (val < 0) + err = -EINVAL; + else + icsk->icsk_user_timeout = msecs_to_jiffies(val); break; default: err = -ENOPROTOOPT; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4d4db16e336e..1432cdb0644c 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -291,7 +291,8 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) left = tp->snd_cwnd - in_flight; if (sk_can_gso(sk) && left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && - left * tp->mss_cache < sk->sk_gso_max_size) + left * tp->mss_cache < sk->sk_gso_max_size && + left < sk->sk_gso_max_segs) return true; return left <= tcp_max_tso_deferred_mss(tp); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3e07a64ca44e..bcfccc5cb8d0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -237,7 +237,11 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s tcp_enter_quickack_mode((struct sock *)tp); break; case INET_ECN_CE: - tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { + /* Better not delay acks, sender can have a very low cwnd */ + tcp_enter_quickack_mode((struct sock *)tp); + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + } /* fallinto */ default: tp->ecn_flags |= TCP_ECN_SEEN; @@ -4351,19 +4355,20 @@ static void tcp_ofo_queue(struct sock *sk) static bool tcp_prune_ofo_queue(struct sock *sk); static int tcp_prune_queue(struct sock *sk); -static int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) +static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, + unsigned int size) { if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - !sk_rmem_schedule(sk, size)) { + !sk_rmem_schedule(sk, skb, size)) { if (tcp_prune_queue(sk) < 0) return -1; - if (!sk_rmem_schedule(sk, size)) { + if (!sk_rmem_schedule(sk, skb, size)) { if (!tcp_prune_ofo_queue(sk)) return -1; - if (!sk_rmem_schedule(sk, size)) + if (!sk_rmem_schedule(sk, skb, size)) return -1; } } @@ -4418,7 +4423,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) TCP_ECN_check_ce(tp, skb); - if (unlikely(tcp_try_rmem_schedule(sk, skb->truesize))) { + if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); __kfree_skb(skb); return; @@ -4552,17 +4557,17 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct tcphdr *th; bool fragstolen; - if (tcp_try_rmem_schedule(sk, size + sizeof(*th))) - goto err; - skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); if (!skb) goto err; + if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th))) + goto err_free; + th = (struct tcphdr *)skb_put(skb, sizeof(*th)); skb_reset_transport_header(skb); memset(th, 0, sizeof(*th)); @@ -4633,7 +4638,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (eaten <= 0) { queue_and_out: if (eaten < 0 && - tcp_try_rmem_schedule(sk, skb->truesize)) + tcp_try_rmem_schedule(sk, skb, skb->truesize)) goto drop; eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); @@ -5391,6 +5396,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); + if (unlikely(sk->sk_rx_dst == NULL)) + inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* * Header prediction. * The code loosely follows the one in the famous @@ -5475,7 +5482,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (tp->copied_seq == tp->rcv_nxt && len - tcp_header_len <= tp->ucopy.len) { #ifdef CONFIG_NET_DMA - if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { + if (tp->ucopy.task == current && + sock_owned_by_user(sk) && + tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { copied_early = 1; eaten = 1; } @@ -5602,7 +5611,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_ESTABLISHED); if (skb != NULL) { - sk->sk_rx_dst = dst_clone(skb_dst(skb)); + icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); security_inet_conn_established(sk, skb); } @@ -5737,7 +5746,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, TCP_ECN_rcv_synack(tp, th); - tp->snd_wl1 = TCP_SKB_CB(skb)->seq; + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); tcp_ack(sk, skb, FLAG_SLOWPATH); /* Ok.. it's good. Set up sequence numbers and @@ -5750,7 +5759,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * never scaled. */ tp->snd_wnd = ntohs(th->window); - tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); if (!tp->rx_opt.wscale_ok) { tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b6b07c93924c..1e15c5be04e7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -417,10 +417,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ tp->mtu_info = info; - if (!sock_owned_by_user(sk)) + if (!sock_owned_by_user(sk)) { tcp_v4_mtu_reduced(sk); - else - set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); + } else { + if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) + sock_hold(sk); + } goto out; } @@ -1462,6 +1464,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, goto exit_nonewsk; newsk->sk_gso_type = SKB_GSO_TCPV4; + inet_sk_rx_dst_set(newsk, skb); newtp = tcp_sk(newsk); newinet = inet_sk(newsk); @@ -1617,21 +1620,16 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) #endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + struct dst_entry *dst = sk->sk_rx_dst; + sock_rps_save_rxhash(sk, skb); - if (sk->sk_rx_dst) { - struct dst_entry *dst = sk->sk_rx_dst; - if (dst->ops->check(dst, 0) == NULL) { + if (dst) { + if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || + dst->ops->check(dst, 0) == NULL) { dst_release(dst); sk->sk_rx_dst = NULL; } } - if (unlikely(sk->sk_rx_dst == NULL)) { - struct inet_sock *icsk = inet_sk(sk); - struct rtable *rt = skb_rtable(skb); - - sk->sk_rx_dst = dst_clone(&rt->dst); - icsk->rx_dst_ifindex = inet_iif(skb); - } if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; @@ -1709,11 +1707,11 @@ void tcp_v4_early_demux(struct sk_buff *skb) skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { struct dst_entry *dst = sk->sk_rx_dst; - struct inet_sock *icsk = inet_sk(sk); + if (dst) dst = dst_check(dst, 0); if (dst && - icsk->rx_dst_ifindex == skb->skb_iif) + inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } @@ -1874,10 +1872,21 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; +void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; +} +EXPORT_SYMBOL(inet_sk_rx_dst_set); + const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, + .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v4_conn_request, .syn_recv_sock = tcp_v4_syn_recv_sock, .net_header_len = sizeof(struct iphdr), @@ -2387,7 +2396,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, struct seq_file *f, int i, int uid, int *len) { const struct inet_request_sock *ireq = inet_rsk(req); - int ttd = req->expires - jiffies; + long delta = req->expires - jiffies; seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n", @@ -2399,7 +2408,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, TCP_SYN_RECV, 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ - jiffies_to_clock_t(ttd), + jiffies_delta_to_clock_t(delta), req->retrans, uid, 0, /* non standard timer */ @@ -2450,7 +2459,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) tp->write_seq - tp->snd_una, rx_queue, timer_active, - jiffies_to_clock_t(timer_expires - jiffies), + jiffies_delta_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, sock_i_uid(sk), icsk->icsk_probes_out, @@ -2469,10 +2478,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, { __be32 dest, src; __u16 destp, srcp; - int ttd = tw->tw_ttd - jiffies; - - if (ttd < 0) - ttd = 0; + long delta = tw->tw_ttd - jiffies; dest = tw->tw_daddr; src = tw->tw_rcv_saddr; @@ -2482,7 +2488,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n", i, src, srcp, dest, destp, tw->tw_substate, 0, 0, - 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, atomic_read(&tw->tw_refcnt), tw, len); } @@ -2635,7 +2641,7 @@ struct proto tcp_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM .init_cgroup = tcp_init_cgroup, .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 2288a6399e1e..0abe67bb4d3a 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -731,6 +731,18 @@ static int __net_init tcp_net_metrics_init(struct net *net) static void __net_exit tcp_net_metrics_exit(struct net *net) { + unsigned int i; + + for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) { + struct tcp_metrics_block *tm, *next; + + tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1); + while (tm) { + next = rcu_dereference_protected(tm->tcpm_next, 1); + kfree(tm); + tm = next; + } + } kfree(net->ipv4.tcp_metrics_hash); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 5912ac3fd240..6ff7f10dce9d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -387,8 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_sock *oldtp = tcp_sk(sk); struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - newsk->sk_rx_dst = dst_clone(skb_dst(skb)); - /* TCP Cookie Transactions require space for the cookie pair, * as it differs for each connection. There is no need to * copy any s_data_payload stored at the original socket. diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 33cd065cfbd8..d04632673a9e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -910,14 +910,18 @@ void tcp_release_cb(struct sock *sk) if (flags & (1UL << TCP_TSQ_DEFERRED)) tcp_tsq_handler(sk); - if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) + if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { tcp_write_timer_handler(sk); - - if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) + __sock_put(sk); + } + if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) { tcp_delack_timer_handler(sk); - - if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) + __sock_put(sk); + } + if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { sk->sk_prot->mtu_reduced(sk); + __sock_put(sk); + } } EXPORT_SYMBOL(tcp_release_cb); @@ -940,7 +944,7 @@ void __init tcp_tasklet_init(void) * We cant xmit new skbs from this context, as we might already * hold qdisc lock. */ -void tcp_wfree(struct sk_buff *skb) +static void tcp_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); @@ -1522,21 +1526,21 @@ static void tcp_cwnd_validate(struct sock *sk) * when we would be allowed to send the split-due-to-Nagle skb fully. */ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, - unsigned int mss_now, unsigned int cwnd) + unsigned int mss_now, unsigned int max_segs) { const struct tcp_sock *tp = tcp_sk(sk); - u32 needed, window, cwnd_len; + u32 needed, window, max_len; window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; - cwnd_len = mss_now * cwnd; + max_len = mss_now * max_segs; - if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk))) - return cwnd_len; + if (likely(max_len <= window && skb != tcp_write_queue_tail(sk))) + return max_len; needed = min(skb->len, window); - if (cwnd_len <= needed) - return cwnd_len; + if (max_len <= needed) + return max_len; return needed - needed % mss_now; } @@ -1765,7 +1769,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) limit = min(send_win, cong_win); /* If a full-sized TSO skb can be sent, do it. */ - if (limit >= sk->sk_gso_max_size) + if (limit >= min_t(unsigned int, sk->sk_gso_max_size, + sk->sk_gso_max_segs * tp->mss_cache)) goto send_now; /* Middle in queue won't get any more data, full sendable already? */ @@ -1999,7 +2004,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, limit = mss_now; if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, - cwnd_quota); + min_t(unsigned int, + cwnd_quota, + sk->sk_gso_max_segs)); if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) @@ -2045,7 +2052,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, if (unlikely(sk->sk_state == TCP_CLOSE)) return; - if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC)) + if (tcp_write_xmit(sk, cur_mss, nonagle, 0, + sk_gfp_atomic(sk, GFP_ATOMIC))) tcp_check_probe_timer(sk); } @@ -2666,7 +2674,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) s_data_desired = cvp->s_data_desired; - skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC); + skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, + sk_gfp_atomic(sk, GFP_ATOMIC)); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -3064,7 +3073,7 @@ void tcp_send_ack(struct sock *sk) * tcp_transmit_skb() will set the ownership to this * sock. */ - buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); + buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); if (buff == NULL) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; @@ -3079,7 +3088,7 @@ void tcp_send_ack(struct sock *sk) /* Send it off, this clears delayed acks for us. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; - tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); + tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); } /* This routine sends a packet with an out of date sequence @@ -3099,7 +3108,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) struct sk_buff *skb; /* We don't queue it, tcp_transmit_skb() sets ownership. */ - skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); + skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); if (skb == NULL) return -1; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6df36ad55a38..b774a03bd1dc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data) inet_csk(sk)->icsk_ack.blocked = 1; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); /* deleguate our work to tcp_release_cb() */ - set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + sock_hold(sk); } bh_unlock_sock(sk); sock_put(sk); @@ -481,7 +482,8 @@ static void tcp_write_timer(unsigned long data) tcp_write_timer_handler(sk); } else { /* deleguate our work to tcp_release_cb() */ - set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) + sock_hold(sk); } bh_unlock_sock(sk); sock_put(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b4c3582a991f..6f6d1aca3c3d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -758,7 +758,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) uh->check = CSUM_MANGLED_0; send: - err = ip_send_skb(skb); + err = ip_send_skb(sock_net(sk), skb); if (err) { if (err == -ENOBUFS && !inet->recverr) { UDP_INC_STATS_USER(sock_net(sk), diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c6281847f16a..681ea2f413e2 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_type = rt->rt_type; xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; + INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); return 0; } diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 5728695b5449..4f7fe7270e37 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -201,6 +201,22 @@ config IPV6_TUNNEL If unsure, say N. +config IPV6_GRE + tristate "IPv6: GRE tunnel" + select IPV6_TUNNEL + ---help--- + Tunneling means encapsulating data of one protocol type within + another protocol and sending it over a channel that understands the + encapsulating protocol. This particular tunneling driver implements + GRE (Generic Routing Encapsulation) and at this time allows + encapsulating of IPv4 or IPv6 over existing IPv6 infrastructure. + This driver is useful if the other endpoint is a Cisco router: Cisco + likes GRE much better than the other Linux tunneling driver ("IP + tunneling" above). In addition, GRE allows multicast redistribution + through the tunnel. + + Saying M here will produce a module called ip6_gre. If unsure, say N. + config IPV6_MULTIPLE_TABLES bool "IPv6: Multiple Routing Tables" depends on EXPERIMENTAL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 686934acfac1..b6d3f79151e2 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o +obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-y += addrconf_core.o exthdrs_core.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 79181819a24f..6bc85f7c31e3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -494,8 +494,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); @@ -504,7 +503,6 @@ static void addrconf_forward_change(struct net *net, __s32 newf) dev_forward_change(idev); } } - rcu_read_unlock(); } static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c new file mode 100644 index 000000000000..a84ad5dc4fcf --- /dev/null +++ b/net/ipv6/ip6_gre.c @@ -0,0 +1,1790 @@ +/* + * GRE over IPv6 protocol decoder. + * + * Authors: Dmitry Kozlov (xeb@mail.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/capability.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if_arp.h> +#include <linux/mroute.h> +#include <linux/init.h> +#include <linux/in6.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> +#include <linux/netfilter_ipv4.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/hash.h> +#include <linux/if_tunnel.h> +#include <linux/ip6_tunnel.h> + +#include <net/sock.h> +#include <net/ip.h> +#include <net/icmp.h> +#include <net/protocol.h> +#include <net/addrconf.h> +#include <net/arp.h> +#include <net/checksum.h> +#include <net/dsfield.h> +#include <net/inet_ecn.h> +#include <net/xfrm.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/rtnetlink.h> + +#include <net/ipv6.h> +#include <net/ip6_fib.h> +#include <net/ip6_route.h> +#include <net/ip6_tunnel.h> + + +#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) +#define IPV6_TCLASS_SHIFT 20 + +#define HASH_SIZE_SHIFT 5 +#define HASH_SIZE (1 << HASH_SIZE_SHIFT) + +static int ip6gre_net_id __read_mostly; +struct ip6gre_net { + struct ip6_tnl __rcu *tunnels[4][HASH_SIZE]; + + struct net_device *fb_tunnel_dev; +}; + +static struct rtnl_link_ops ip6gre_link_ops __read_mostly; +static int ip6gre_tunnel_init(struct net_device *dev); +static void ip6gre_tunnel_setup(struct net_device *dev); +static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); +static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu); + +/* Tunnel hash table */ + +/* + 4 hash tables: + + 3: (remote,local) + 2: (remote,*) + 1: (*,local) + 0: (*,*) + + We require exact key match i.e. if a key is present in packet + it will match only tunnel with the same key; if it is not present, + it will match only keyless tunnel. + + All keysless packets, if not matched configured keyless tunnels + will match fallback tunnel. + */ + +#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1)) +static u32 HASH_ADDR(const struct in6_addr *addr) +{ + u32 hash = ipv6_addr_hash(addr); + + return hash_32(hash, HASH_SIZE_SHIFT); +} + +#define tunnels_r_l tunnels[3] +#define tunnels_r tunnels[2] +#define tunnels_l tunnels[1] +#define tunnels_wc tunnels[0] +/* + * Locking : hash tables are protected by RCU and RTNL + */ + +#define for_each_ip_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) + +/* often modified stats are per cpu, other are shared (netdev->stats) */ +struct pcpu_tstats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; +}; + +static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) +{ + int i; + + for_each_possible_cpu(i) { + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; + } + + tot->multicast = dev->stats.multicast; + tot->rx_crc_errors = dev->stats.rx_crc_errors; + tot->rx_fifo_errors = dev->stats.rx_fifo_errors; + tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_errors = dev->stats.rx_errors; + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; + tot->tx_dropped = dev->stats.tx_dropped; + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; + tot->tx_errors = dev->stats.tx_errors; + + return tot; +} + +/* Given src, dst and key, find appropriate for input tunnel. */ + +static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, + const struct in6_addr *remote, const struct in6_addr *local, + __be32 key, __be16 gre_proto) +{ + struct net *net = dev_net(dev); + int link = dev->ifindex; + unsigned int h0 = HASH_ADDR(remote); + unsigned int h1 = HASH_KEY(key); + struct ip6_tnl *t, *cand = NULL; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int dev_type = (gre_proto == htons(ETH_P_TEB)) ? + ARPHRD_ETHER : ARPHRD_IP6GRE; + int score, cand_score = 4; + + for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { + if (!ipv6_addr_equal(local, &t->parms.laddr) || + !ipv6_addr_equal(remote, &t->parms.raddr) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { + if (!ipv6_addr_equal(remote, &t->parms.raddr) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { + if ((!ipv6_addr_equal(local, &t->parms.laddr) && + (!ipv6_addr_equal(local, &t->parms.raddr) || + !ipv6_addr_is_multicast(local))) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { + if (t->parms.i_key != key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + if (cand != NULL) + return cand; + + dev = ign->fb_tunnel_dev; + if (dev->flags & IFF_UP) + return netdev_priv(dev); + + return NULL; +} + +static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign, + const struct __ip6_tnl_parm *p) +{ + const struct in6_addr *remote = &p->raddr; + const struct in6_addr *local = &p->laddr; + unsigned int h = HASH_KEY(p->i_key); + int prio = 0; + + if (!ipv6_addr_any(local)) + prio |= 1; + if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) { + prio |= 2; + h ^= HASH_ADDR(remote); + } + + return &ign->tunnels[prio][h]; +} + +static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign, + const struct ip6_tnl *t) +{ + return __ip6gre_bucket(ign, &t->parms); +} + +static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t); + + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); + rcu_assign_pointer(*tp, t); +} + +static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp; + struct ip6_tnl *iter; + + for (tp = ip6gre_bucket(ign, t); + (iter = rtnl_dereference(*tp)) != NULL; + tp = &iter->next) { + if (t == iter) { + rcu_assign_pointer(*tp, t->next); + break; + } + } +} + +static struct ip6_tnl *ip6gre_tunnel_find(struct net *net, + const struct __ip6_tnl_parm *parms, + int type) +{ + const struct in6_addr *remote = &parms->raddr; + const struct in6_addr *local = &parms->laddr; + __be32 key = parms->i_key; + int link = parms->link; + struct ip6_tnl *t; + struct ip6_tnl __rcu **tp; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + for (tp = __ip6gre_bucket(ign, parms); + (t = rtnl_dereference(*tp)) != NULL; + tp = &t->next) + if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_equal(remote, &t->parms.raddr) && + key == t->parms.i_key && + link == t->parms.link && + type == t->dev->type) + break; + + return t; +} + +static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, + const struct __ip6_tnl_parm *parms, int create) +{ + struct ip6_tnl *t, *nt; + struct net_device *dev; + char name[IFNAMSIZ]; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE); + if (t || !create) + return t; + + if (parms->name[0]) + strlcpy(name, parms->name, IFNAMSIZ); + else + strcpy(name, "ip6gre%d"); + + dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup); + if (!dev) + return NULL; + + dev_net_set(dev, net); + + nt = netdev_priv(dev); + nt->parms = *parms; + dev->rtnl_link_ops = &ip6gre_link_ops; + + nt->dev = dev; + ip6gre_tnl_link_config(nt, 1); + + if (register_netdevice(dev) < 0) + goto failed_free; + + /* Can use a lockless transmit, unless we generate output sequences */ + if (!(nt->parms.o_flags & GRE_SEQ)) + dev->features |= NETIF_F_LLTX; + + dev_hold(dev); + ip6gre_tunnel_link(ign, nt); + return nt; + +failed_free: + free_netdev(dev); + return NULL; +} + +static void ip6gre_tunnel_uninit(struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + ip6gre_tunnel_unlink(ign, netdev_priv(dev)); + dev_put(dev); +} + + +static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; + __be16 *p = (__be16 *)(ipv6h + 1); + int grehlen = sizeof(ipv6h) + 4; + struct ip6_tnl *t; + __be16 flags; + + flags = p[0]; + if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { + if (flags&(GRE_VERSION|GRE_ROUTING)) + return; + if (flags&GRE_KEY) { + grehlen += 4; + if (flags&GRE_CSUM) + grehlen += 4; + } + } + + /* If only 8 bytes returned, keyed message will be dropped here */ + if (skb_headlen(skb) < grehlen) + return; + + rcu_read_lock(); + + t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, + flags & GRE_KEY ? + *(((__be32 *)p) + (grehlen / 4) - 1) : 0, + p[1]); + if (t == NULL) + goto out; + + switch (type) { + __u32 teli; + struct ipv6_tlv_tnl_enc_lim *tel; + __u32 mtu; + case ICMPV6_DEST_UNREACH: + net_warn_ratelimited("%s: Path to destination invalid or inactive!\n", + t->parms.name); + break; + case ICMPV6_TIME_EXCEED: + if (code == ICMPV6_EXC_HOPLIMIT) { + net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", + t->parms.name); + } + break; + case ICMPV6_PARAMPROB: + teli = 0; + if (code == ICMPV6_HDR_FIELD) + teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); + + if (teli && teli == info - 2) { + tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; + if (tel->encap_limit == 0) { + net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", + t->parms.name); + } + } else { + net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n", + t->parms.name); + } + break; + case ICMPV6_PKT_TOOBIG: + mtu = info - offset; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + t->dev->mtu = mtu; + break; + } + + if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) + t->err_count++; + else + t->err_count = 1; + t->err_time = jiffies; +out: + rcu_read_unlock(); +} + +static inline void ip6gre_ecn_decapsulate_ipv4(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, struct sk_buff *skb) +{ + __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; + + if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) + ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); + + if (INET_ECN_is_ce(dsfield)) + IP_ECN_set_ce(ip_hdr(skb)); +} + +static inline void ip6gre_ecn_decapsulate_ipv6(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, struct sk_buff *skb) +{ + if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) + ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); + + if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h))) + IP6_ECN_set_ce(ipv6_hdr(skb)); +} + +static int ip6gre_rcv(struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h; + u8 *h; + __be16 flags; + __sum16 csum = 0; + __be32 key = 0; + u32 seqno = 0; + struct ip6_tnl *tunnel; + int offset = 4; + __be16 gre_proto; + + if (!pskb_may_pull(skb, sizeof(struct in6_addr))) + goto drop_nolock; + + ipv6h = ipv6_hdr(skb); + h = skb->data; + flags = *(__be16 *)h; + + if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { + /* - Version must be 0. + - We do not support routing headers. + */ + if (flags&(GRE_VERSION|GRE_ROUTING)) + goto drop_nolock; + + if (flags&GRE_CSUM) { + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); + if (!csum) + break; + /* fall through */ + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + } + offset += 4; + } + if (flags&GRE_KEY) { + key = *(__be32 *)(h + offset); + offset += 4; + } + if (flags&GRE_SEQ) { + seqno = ntohl(*(__be32 *)(h + offset)); + offset += 4; + } + } + + gre_proto = *(__be16 *)(h + 2); + + rcu_read_lock(); + tunnel = ip6gre_tunnel_lookup(skb->dev, + &ipv6h->saddr, &ipv6h->daddr, key, + gre_proto); + if (tunnel) { + struct pcpu_tstats *tstats; + + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) { + tunnel->dev->stats.rx_dropped++; + goto drop; + } + + secpath_reset(skb); + + skb->protocol = gre_proto; + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { + skb->protocol = htons(ETH_P_IP); + if ((*(h + offset) & 0xF0) != 0x40) + offset += 4; + } + + skb->mac_header = skb->network_header; + __pskb_pull(skb, offset); + skb_postpull_rcsum(skb, skb_transport_header(skb), offset); + skb->pkt_type = PACKET_HOST; + + if (((flags&GRE_CSUM) && csum) || + (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { + tunnel->dev->stats.rx_crc_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + if (tunnel->parms.i_flags&GRE_SEQ) { + if (!(flags&GRE_SEQ) || + (tunnel->i_seqno && + (s32)(seqno - tunnel->i_seqno) < 0)) { + tunnel->dev->stats.rx_fifo_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + tunnel->i_seqno = seqno + 1; + } + + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + tunnel->dev->stats.rx_length_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + ipv6h = ipv6_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } + + tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + __skb_tunnel_rx(skb, tunnel->dev); + + skb_reset_network_header(skb); + if (skb->protocol == htons(ETH_P_IP)) + ip6gre_ecn_decapsulate_ipv4(tunnel, ipv6h, skb); + else if (skb->protocol == htons(ETH_P_IPV6)) + ip6gre_ecn_decapsulate_ipv6(tunnel, ipv6h, skb); + + netif_rx(skb); + + rcu_read_unlock(); + return 0; + } + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +drop: + rcu_read_unlock(); +drop_nolock: + kfree_skb(skb); + return 0; +} + +struct ipv6_tel_txoption { + struct ipv6_txoptions ops; + __u8 dst_opt[8]; +}; + +static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) +{ + memset(opt, 0, sizeof(struct ipv6_tel_txoption)); + + opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; + opt->dst_opt[3] = 1; + opt->dst_opt[4] = encap_limit; + opt->dst_opt[5] = IPV6_TLV_PADN; + opt->dst_opt[6] = 1; + + opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt; + opt->ops.opt_nflen = 8; +} + +static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, + struct net_device *dev, + __u8 dsfield, + struct flowi6 *fl6, + int encap_limit, + __u32 *pmtu) +{ + struct net *net = dev_net(dev); + struct ip6_tnl *tunnel = netdev_priv(dev); + struct net_device *tdev; /* Device to other host */ + struct ipv6hdr *ipv6h; /* Our new IP header */ + unsigned int max_headroom; /* The extra header space needed */ + int gre_hlen; + struct ipv6_tel_txoption opt; + int mtu; + struct dst_entry *dst = NULL, *ndst = NULL; + struct net_device_stats *stats = &tunnel->dev->stats; + int err = -1; + u8 proto; + int pkt_len; + struct sk_buff *new_skb; + + if (dev->type == ARPHRD_ETHER) + IPCB(skb)->flags = 0; + + if (dev->header_ops && dev->type == ARPHRD_IP6GRE) { + gre_hlen = 0; + ipv6h = (struct ipv6hdr *)skb->data; + fl6->daddr = ipv6h->daddr; + } else { + gre_hlen = tunnel->hlen; + fl6->daddr = tunnel->parms.raddr; + } + + if (!fl6->flowi6_mark) + dst = ip6_tnl_dst_check(tunnel); + + if (!dst) { + ndst = ip6_route_output(net, NULL, fl6); + + if (ndst->error) + goto tx_err_link_failure; + ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(ndst)) { + err = PTR_ERR(ndst); + ndst = NULL; + goto tx_err_link_failure; + } + dst = ndst; + } + + tdev = dst->dev; + + if (tdev == dev) { + stats->collisions++; + net_warn_ratelimited("%s: Local routing loop detected!\n", + tunnel->parms.name); + goto tx_err_dst_release; + } + + mtu = dst_mtu(dst) - sizeof(*ipv6h); + if (encap_limit >= 0) { + max_headroom += 8; + mtu -= 8; + } + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + if (skb->len > mtu) { + *pmtu = mtu; + err = -EMSGSIZE; + goto tx_err_dst_release; + } + + if (tunnel->err_count > 0) { + if (time_before(jiffies, + tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) { + tunnel->err_count--; + + dst_link_failure(skb); + } else + tunnel->err_count = 0; + } + + max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; + + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { + new_skb = skb_realloc_headroom(skb, max_headroom); + if (max_headroom > dev->needed_headroom) + dev->needed_headroom = max_headroom; + if (!new_skb) + goto tx_err_dst_release; + + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); + consume_skb(skb); + skb = new_skb; + } + + skb_dst_drop(skb); + + if (fl6->flowi6_mark) { + skb_dst_set(skb, dst); + ndst = NULL; + } else { + skb_dst_set_noref(skb, dst); + } + + skb->transport_header = skb->network_header; + + proto = NEXTHDR_GRE; + if (encap_limit >= 0) { + init_tel_txopt(&opt, encap_limit); + ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); + } + + skb_push(skb, gre_hlen); + skb_reset_network_header(skb); + + /* + * Push down and install the IP header. + */ + ipv6h = ipv6_hdr(skb); + *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000); + dsfield = INET_ECN_encapsulate(0, dsfield); + ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); + ipv6h->hop_limit = tunnel->parms.hop_limit; + ipv6h->nexthdr = proto; + ipv6h->saddr = fl6->saddr; + ipv6h->daddr = fl6->daddr; + + ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags; + ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ? + htons(ETH_P_TEB) : skb->protocol; + + if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4); + + if (tunnel->parms.o_flags&GRE_SEQ) { + ++tunnel->o_seqno; + *ptr = htonl(tunnel->o_seqno); + ptr--; + } + if (tunnel->parms.o_flags&GRE_KEY) { + *ptr = tunnel->parms.o_key; + ptr--; + } + if (tunnel->parms.o_flags&GRE_CSUM) { + *ptr = 0; + *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1), + skb->len - sizeof(struct ipv6hdr)); + } + } + + nf_reset(skb); + pkt_len = skb->len; + err = ip6_local_out(skb); + + if (net_xmit_eval(err) == 0) { + struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats); + + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + } else { + stats->tx_errors++; + stats->tx_aborted_errors++; + } + + if (ndst) + ip6_tnl_dst_store(tunnel, ndst); + + return 0; +tx_err_link_failure: + stats->tx_carrier_errors++; + dst_link_failure(skb); +tx_err_dst_release: + dst_release(ndst); + return err; +} + +static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + const struct iphdr *iph = ip_hdr(skb); + int encap_limit = -1; + struct flowi6 fl6; + __u8 dsfield; + __u32 mtu; + int err; + + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + + dsfield = ipv4_get_dsfield(iph); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) + & IPV6_TCLASS_MASK; + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + + err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + if (err != 0) { + /* XXX: send ICMP error even if DF is not set. */ + if (err == -EMSGSIZE) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + return -1; + } + + return 0; +} + +static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + int encap_limit = -1; + __u16 offset; + struct flowi6 fl6; + __u8 dsfield; + __u32 mtu; + int err; + + if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) + return -1; + + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; + tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + encap_limit = tel->encap_limit - 1; + } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; + + dsfield = ipv6_get_dsfield(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + + err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + if (err != 0) { + if (err == -EMSGSIZE) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + return -1; + } + + return 0; +} + +/** + * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own + * @t: the outgoing tunnel device + * @hdr: IPv6 header from the incoming packet + * + * Description: + * Avoid trivial tunneling loop by checking that tunnel exit-point + * doesn't match source of incoming packet. + * + * Return: + * 1 if conflict, + * 0 else + **/ + +static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t, + const struct ipv6hdr *hdr) +{ + return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); +} + +static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + int encap_limit = -1; + struct flowi6 fl6; + __u32 mtu; + int err; + + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = skb->protocol; + + err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu); + + return err; +} + +static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net_device_stats *stats = &t->dev->stats; + int ret; + + if (!ip6_tnl_xmit_ctl(t)) + return -1; + + switch (skb->protocol) { + case htons(ETH_P_IP): + ret = ip6gre_xmit_ipv4(skb, dev); + break; + case htons(ETH_P_IPV6): + ret = ip6gre_xmit_ipv6(skb, dev); + break; + default: + ret = ip6gre_xmit_other(skb, dev); + break; + } + + if (ret < 0) + goto tx_err; + + return NETDEV_TX_OK; + +tx_err: + stats->tx_errors++; + stats->tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) +{ + struct net_device *dev = t->dev; + struct __ip6_tnl_parm *p = &t->parms; + struct flowi6 *fl6 = &t->fl.u.ip6; + int addend = sizeof(struct ipv6hdr) + 4; + + if (dev->type != ARPHRD_ETHER) { + memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); + } + + /* Set up flowi template */ + fl6->saddr = p->laddr; + fl6->daddr = p->raddr; + fl6->flowi6_oif = p->link; + fl6->flowlabel = 0; + + if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) + fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; + if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) + fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; + + p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); + p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); + + if (p->flags&IP6_TNL_F_CAP_XMIT && + p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER) + dev->flags |= IFF_POINTOPOINT; + else + dev->flags &= ~IFF_POINTOPOINT; + + dev->iflink = p->link; + + /* Precalculate GRE options length */ + if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { + if (t->parms.o_flags&GRE_CSUM) + addend += 4; + if (t->parms.o_flags&GRE_KEY) + addend += 4; + if (t->parms.o_flags&GRE_SEQ) + addend += 4; + } + + if (p->flags & IP6_TNL_F_CAP_XMIT) { + int strict = (ipv6_addr_type(&p->raddr) & + (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); + + struct rt6_info *rt = rt6_lookup(dev_net(dev), + &p->raddr, &p->laddr, + p->link, strict); + + if (rt == NULL) + return; + + if (rt->dst.dev) { + dev->hard_header_len = rt->dst.dev->hard_header_len + addend; + + if (set_mtu) { + dev->mtu = rt->dst.dev->mtu - addend; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + + if (dev->mtu < IPV6_MIN_MTU) + dev->mtu = IPV6_MIN_MTU; + } + } + dst_release(&rt->dst); + } + + t->hlen = addend; +} + +static int ip6gre_tnl_change(struct ip6_tnl *t, + const struct __ip6_tnl_parm *p, int set_mtu) +{ + t->parms.laddr = p->laddr; + t->parms.raddr = p->raddr; + t->parms.flags = p->flags; + t->parms.hop_limit = p->hop_limit; + t->parms.encap_limit = p->encap_limit; + t->parms.flowinfo = p->flowinfo; + t->parms.link = p->link; + t->parms.proto = p->proto; + t->parms.i_key = p->i_key; + t->parms.o_key = p->o_key; + t->parms.i_flags = p->i_flags; + t->parms.o_flags = p->o_flags; + ip6_tnl_dst_reset(t); + ip6gre_tnl_link_config(t, set_mtu); + return 0; +} + +static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p, + const struct ip6_tnl_parm2 *u) +{ + p->laddr = u->laddr; + p->raddr = u->raddr; + p->flags = u->flags; + p->hop_limit = u->hop_limit; + p->encap_limit = u->encap_limit; + p->flowinfo = u->flowinfo; + p->link = u->link; + p->i_key = u->i_key; + p->o_key = u->o_key; + p->i_flags = u->i_flags; + p->o_flags = u->o_flags; + memcpy(p->name, u->name, sizeof(u->name)); +} + +static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u, + const struct __ip6_tnl_parm *p) +{ + u->proto = IPPROTO_GRE; + u->laddr = p->laddr; + u->raddr = p->raddr; + u->flags = p->flags; + u->hop_limit = p->hop_limit; + u->encap_limit = p->encap_limit; + u->flowinfo = p->flowinfo; + u->link = p->link; + u->i_key = p->i_key; + u->o_key = p->o_key; + u->i_flags = p->i_flags; + u->o_flags = p->o_flags; + memcpy(u->name, p->name, sizeof(u->name)); +} + +static int ip6gre_tunnel_ioctl(struct net_device *dev, + struct ifreq *ifr, int cmd) +{ + int err = 0; + struct ip6_tnl_parm2 p; + struct __ip6_tnl_parm p1; + struct ip6_tnl *t; + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + switch (cmd) { + case SIOCGETTUNNEL: + t = NULL; + if (dev == ign->fb_tunnel_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, 0); + } + if (t == NULL) + t = netdev_priv(dev); + ip6gre_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + break; + + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + goto done; + + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + + err = -EINVAL; + if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)) + goto done; + + if (!(p.i_flags&GRE_KEY)) + p.i_key = 0; + if (!(p.o_flags&GRE_KEY)) + p.o_key = 0; + + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL); + + if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { + if (t != NULL) { + if (t->dev != dev) { + err = -EEXIST; + break; + } + } else { + t = netdev_priv(dev); + + ip6gre_tunnel_unlink(ign, t); + synchronize_net(); + ip6gre_tnl_change(t, &p1, 1); + ip6gre_tunnel_link(ign, t); + netdev_state_change(dev); + } + } + + if (t) { + err = 0; + + ip6gre_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + + case SIOCDELTUNNEL: + err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + goto done; + + if (dev == ign->fb_tunnel_dev) { + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + err = -ENOENT; + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, 0); + if (t == NULL) + goto done; + err = -EPERM; + if (t == netdev_priv(ign->fb_tunnel_dev)) + goto done; + dev = t->dev; + } + unregister_netdevice(dev); + err = 0; + break; + + default: + err = -EINVAL; + } + +done: + return err; +} + +static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + if (new_mtu < 68 || + new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, unsigned int len) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); + __be16 *p = (__be16 *)(ipv6h+1); + + *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000); + ipv6h->hop_limit = t->parms.hop_limit; + ipv6h->nexthdr = NEXTHDR_GRE; + ipv6h->saddr = t->parms.laddr; + ipv6h->daddr = t->parms.raddr; + + p[0] = t->parms.o_flags; + p[1] = htons(type); + + /* + * Set the source hardware address. + */ + + if (saddr) + memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr)); + if (daddr) + memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr)); + if (!ipv6_addr_any(&ipv6h->daddr)) + return t->hlen; + + return -t->hlen; +} + +static int ip6gre_header_parse(const struct sk_buff *skb, unsigned char *haddr) +{ + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb_mac_header(skb); + memcpy(haddr, &ipv6h->saddr, sizeof(struct in6_addr)); + return sizeof(struct in6_addr); +} + +static const struct header_ops ip6gre_header_ops = { + .create = ip6gre_header, + .parse = ip6gre_header_parse, +}; + +static const struct net_device_ops ip6gre_netdev_ops = { + .ndo_init = ip6gre_tunnel_init, + .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_start_xmit = ip6gre_tunnel_xmit, + .ndo_do_ioctl = ip6gre_tunnel_ioctl, + .ndo_change_mtu = ip6gre_tunnel_change_mtu, + .ndo_get_stats64 = ip6gre_get_stats64, +}; + +static void ip6gre_dev_free(struct net_device *dev) +{ + free_percpu(dev->tstats); + free_netdev(dev); +} + +static void ip6gre_tunnel_setup(struct net_device *dev) +{ + struct ip6_tnl *t; + + dev->netdev_ops = &ip6gre_netdev_ops; + dev->destructor = ip6gre_dev_free; + + dev->type = ARPHRD_IP6GRE; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4; + dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4; + t = netdev_priv(dev); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + dev->flags |= IFF_NOARP; + dev->iflink = 0; + dev->addr_len = sizeof(struct in6_addr); + dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; +} + +static int ip6gre_tunnel_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); + + if (ipv6_addr_any(&tunnel->parms.raddr)) + dev->header_ops = &ip6gre_header_ops; + + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static void ip6gre_fb_tunnel_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + tunnel->hlen = sizeof(struct ipv6hdr) + 4; + + dev_hold(dev); +} + + +static struct inet6_protocol ip6gre_protocol __read_mostly = { + .handler = ip6gre_rcv, + .err_handler = ip6gre_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, +}; + +static void ip6gre_destroy_tunnels(struct ip6gre_net *ign, + struct list_head *head) +{ + int prio; + + for (prio = 0; prio < 4; prio++) { + int h; + for (h = 0; h < HASH_SIZE; h++) { + struct ip6_tnl *t; + + t = rtnl_dereference(ign->tunnels[prio][h]); + + while (t != NULL) { + unregister_netdevice_queue(t->dev, head); + t = rtnl_dereference(t->next); + } + } + } +} + +static int __net_init ip6gre_init_net(struct net *net) +{ + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int err; + + ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", + ip6gre_tunnel_setup); + if (!ign->fb_tunnel_dev) { + err = -ENOMEM; + goto err_alloc_dev; + } + dev_net_set(ign->fb_tunnel_dev, net); + + ip6gre_fb_tunnel_init(ign->fb_tunnel_dev); + ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; + + err = register_netdev(ign->fb_tunnel_dev); + if (err) + goto err_reg_dev; + + rcu_assign_pointer(ign->tunnels_wc[0], + netdev_priv(ign->fb_tunnel_dev)); + return 0; + +err_reg_dev: + ip6gre_dev_free(ign->fb_tunnel_dev); +err_alloc_dev: + return err; +} + +static void __net_exit ip6gre_exit_net(struct net *net) +{ + struct ip6gre_net *ign; + LIST_HEAD(list); + + ign = net_generic(net, ip6gre_net_id); + rtnl_lock(); + ip6gre_destroy_tunnels(ign, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); +} + +static struct pernet_operations ip6gre_net_ops = { + .init = ip6gre_init_net, + .exit = ip6gre_exit_net, + .id = &ip6gre_net_id, + .size = sizeof(struct ip6gre_net), +}; + +static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + __be16 flags; + + if (!data) + return 0; + + flags = 0; + if (data[IFLA_GRE_IFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); + if (data[IFLA_GRE_OFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); + if (flags & (GRE_VERSION|GRE_ROUTING)) + return -EINVAL; + + return 0; +} + +static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + struct in6_addr daddr; + + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + if (!data) + goto out; + + if (data[IFLA_GRE_REMOTE]) { + nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + if (ipv6_addr_any(&daddr)) + return -EINVAL; + } + +out: + return ip6gre_tunnel_validate(tb, data); +} + + +static void ip6gre_netlink_parms(struct nlattr *data[], + struct __ip6_tnl_parm *parms) +{ + memset(parms, 0, sizeof(*parms)); + + if (!data) + return; + + if (data[IFLA_GRE_LINK]) + parms->link = nla_get_u32(data[IFLA_GRE_LINK]); + + if (data[IFLA_GRE_IFLAGS]) + parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); + + if (data[IFLA_GRE_OFLAGS]) + parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); + + if (data[IFLA_GRE_IKEY]) + parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); + + if (data[IFLA_GRE_OKEY]) + parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); + + if (data[IFLA_GRE_LOCAL]) + nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr)); + + if (data[IFLA_GRE_REMOTE]) + nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + + if (data[IFLA_GRE_TTL]) + parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]); + + if (data[IFLA_GRE_ENCAP_LIMIT]) + parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]); + + if (data[IFLA_GRE_FLOWINFO]) + parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]); + + if (data[IFLA_GRE_FLAGS]) + parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]); +} + +static int ip6gre_tap_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + ip6gre_tnl_link_config(tunnel, 1); + + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static const struct net_device_ops ip6gre_tap_netdev_ops = { + .ndo_init = ip6gre_tap_init, + .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_start_xmit = ip6gre_tunnel_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ip6gre_tunnel_change_mtu, + .ndo_get_stats64 = ip6gre_get_stats64, +}; + +static void ip6gre_tap_setup(struct net_device *dev) +{ + + ether_setup(dev); + + dev->netdev_ops = &ip6gre_tap_netdev_ops; + dev->destructor = ip6gre_dev_free; + + dev->iflink = 0; + dev->features |= NETIF_F_NETNS_LOCAL; +} + +static int ip6gre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct ip6_tnl *nt; + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int err; + + nt = netdev_priv(dev); + ip6gre_netlink_parms(data, &nt->parms); + + if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + + if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) + eth_hw_addr_random(dev); + + nt->dev = dev; + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + + /* Can use a lockless transmit, unless we generate output sequences */ + if (!(nt->parms.o_flags & GRE_SEQ)) + dev->features |= NETIF_F_LLTX; + + err = register_netdevice(dev); + if (err) + goto out; + + dev_hold(dev); + ip6gre_tunnel_link(ign, nt); + +out: + return err; +} + +static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip6_tnl *t, *nt; + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct __ip6_tnl_parm p; + + if (dev == ign->fb_tunnel_dev) + return -EINVAL; + + nt = netdev_priv(dev); + ip6gre_netlink_parms(data, &p); + + t = ip6gre_tunnel_locate(net, &p, 0); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else { + t = nt; + + ip6gre_tunnel_unlink(ign, t); + ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); + ip6gre_tunnel_link(ign, t); + netdev_state_change(dev); + } + + return 0; +} + +static size_t ip6gre_get_size(const struct net_device *dev) +{ + return + /* IFLA_GRE_LINK */ + nla_total_size(4) + + /* IFLA_GRE_IFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_OFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_IKEY */ + nla_total_size(4) + + /* IFLA_GRE_OKEY */ + nla_total_size(4) + + /* IFLA_GRE_LOCAL */ + nla_total_size(4) + + /* IFLA_GRE_REMOTE */ + nla_total_size(4) + + /* IFLA_GRE_TTL */ + nla_total_size(1) + + /* IFLA_GRE_TOS */ + nla_total_size(1) + + /* IFLA_GRE_ENCAP_LIMIT */ + nla_total_size(1) + + /* IFLA_GRE_FLOWINFO */ + nla_total_size(4) + + /* IFLA_GRE_FLAGS */ + nla_total_size(4) + + 0; +} + +static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct __ip6_tnl_parm *p = &t->parms; + + if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || + nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || + nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || + nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->raddr) || + nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->laddr) || + nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || + /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/ + nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || + nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || + nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = { + [IFLA_GRE_LINK] = { .type = NLA_U32 }, + [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_IKEY] = { .type = NLA_U32 }, + [IFLA_GRE_OKEY] = { .type = NLA_U32 }, + [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) }, + [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) }, + [IFLA_GRE_TTL] = { .type = NLA_U8 }, + [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 }, + [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 }, + [IFLA_GRE_FLAGS] = { .type = NLA_U32 }, +}; + +static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { + .kind = "ip6gre", + .maxtype = IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6gre_tunnel_setup, + .validate = ip6gre_tunnel_validate, + .newlink = ip6gre_newlink, + .changelink = ip6gre_changelink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, +}; + +static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { + .kind = "ip6gretap", + .maxtype = IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6gre_tap_setup, + .validate = ip6gre_tap_validate, + .newlink = ip6gre_newlink, + .changelink = ip6gre_changelink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, +}; + +/* + * And now the modules code and kernel interface. + */ + +static int __init ip6gre_init(void) +{ + int err; + + pr_info("GRE over IPv6 tunneling driver\n"); + + err = register_pernet_device(&ip6gre_net_ops); + if (err < 0) + return err; + + err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE); + if (err < 0) { + pr_info("%s: can't add protocol\n", __func__); + goto add_proto_failed; + } + + err = rtnl_link_register(&ip6gre_link_ops); + if (err < 0) + goto rtnl_link_failed; + + err = rtnl_link_register(&ip6gre_tap_ops); + if (err < 0) + goto tap_ops_failed; + +out: + return err; + +tap_ops_failed: + rtnl_link_unregister(&ip6gre_link_ops); +rtnl_link_failed: + inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); +add_proto_failed: + unregister_pernet_device(&ip6gre_net_ops); + goto out; +} + +static void __exit ip6gre_fini(void) +{ + rtnl_link_unregister(&ip6gre_tap_ops); + rtnl_link_unregister(&ip6gre_link_ops); + inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); + unregister_pernet_device(&ip6gre_net_ops); +} + +module_init(ip6gre_init); +module_exit(ip6gre_fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); +MODULE_DESCRIPTION("GRE over IPv6 tunneling device"); +MODULE_ALIAS_RTNL_LINK("ip6gre"); +MODULE_ALIAS_NETDEV("ip6gre0"); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 47975e363fcd..a52d864d562b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -52,11 +52,9 @@ int ip6_rcv_finish(struct sk_buff *skb) if (sysctl_ip_early_demux && !skb_dst(skb)) { const struct inet6_protocol *ipprot; - rcu_read_lock(); ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); if (ipprot && ipprot->early_demux) ipprot->early_demux(skb); - rcu_read_unlock(); } if (!skb_dst(skb)) ip6_route_input(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9a1d5fe6aef8..cb7e2ded6f08 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -126,7 +126,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) * Locking : hash tables are protected by RCU and RTNL */ -static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) +struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) { struct dst_entry *dst = t->dst_cache; @@ -139,20 +139,23 @@ static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) return dst; } +EXPORT_SYMBOL_GPL(ip6_tnl_dst_check); -static inline void ip6_tnl_dst_reset(struct ip6_tnl *t) +void ip6_tnl_dst_reset(struct ip6_tnl *t) { dst_release(t->dst_cache); t->dst_cache = NULL; } +EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); -static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) +void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *) dst; t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; dst_release(t->dst_cache); t->dst_cache = dst; } +EXPORT_SYMBOL_GPL(ip6_tnl_dst_store); /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses @@ -200,7 +203,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ **/ static struct ip6_tnl __rcu ** -ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p) +ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; @@ -267,7 +270,7 @@ static void ip6_dev_free(struct net_device *dev) * created tunnel or NULL **/ -static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) +static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) { struct net_device *dev; struct ip6_tnl *t; @@ -322,7 +325,7 @@ failed: **/ static struct ip6_tnl *ip6_tnl_locate(struct net *net, - struct ip6_tnl_parm *p, int create) + struct __ip6_tnl_parm *p, int create) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; @@ -374,8 +377,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) * else index to encapsulation limit **/ -static __u16 -parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) +__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) { const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; __u8 nexthdr = ipv6h->nexthdr; @@ -425,6 +427,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) } return 0; } +EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim); /** * ip6_tnl_err - tunnel error handler @@ -480,7 +483,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, case ICMPV6_PARAMPROB: teli = 0; if ((*code) == ICMPV6_HDR_FIELD) - teli = parse_tlv_tnl_enc_lim(skb, skb->data); + teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); if (teli && teli == *info - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; @@ -693,11 +696,11 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, IP6_ECN_set_ce(ipv6_hdr(skb)); } -static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, +__u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; int ltype = ipv6_addr_type(laddr); int rtype = ipv6_addr_type(raddr); __u32 flags = 0; @@ -715,13 +718,14 @@ static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, } return flags; } +EXPORT_SYMBOL(ip6_tnl_get_cap); /* called with rcu_read_lock() */ -static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, +int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = dev_net(t->dev); @@ -740,6 +744,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, } return ret; } +EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); /** * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally @@ -859,9 +864,9 @@ ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } -static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) +int ip6_tnl_xmit_ctl(struct ip6_tnl *t) { - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = dev_net(t->dev); @@ -885,6 +890,8 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) } return ret; } +EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); + /** * ip6_tnl_xmit2 - encapsulate packet and send * @skb: the outgoing socket buffer @@ -1085,7 +1092,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) return -1; - offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb)); + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; @@ -1152,7 +1159,7 @@ tx_err: static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); @@ -1215,7 +1222,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) **/ static int -ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) +ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) { t->parms.laddr = p->laddr; t->parms.raddr = p->raddr; @@ -1230,6 +1237,34 @@ ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) return 0; } +static void +ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) +{ + p->laddr = u->laddr; + p->raddr = u->raddr; + p->flags = u->flags; + p->hop_limit = u->hop_limit; + p->encap_limit = u->encap_limit; + p->flowinfo = u->flowinfo; + p->link = u->link; + p->proto = u->proto; + memcpy(p->name, u->name, sizeof(u->name)); +} + +static void +ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) +{ + u->laddr = p->laddr; + u->raddr = p->raddr; + u->flags = p->flags; + u->hop_limit = p->hop_limit; + u->encap_limit = p->encap_limit; + u->flowinfo = p->flowinfo; + u->link = p->link; + u->proto = p->proto; + memcpy(u->name, p->name, sizeof(u->name)); +} + /** * ip6_tnl_ioctl - configure ipv6 tunnels from userspace * @dev: virtual device associated with tunnel @@ -1263,6 +1298,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { int err = 0; struct ip6_tnl_parm p; + struct __ip6_tnl_parm p1; struct ip6_tnl *t = NULL; struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); @@ -1274,11 +1310,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) err = -EFAULT; break; } - t = ip6_tnl_locate(net, &p, 0); + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, 0); + } else { + memset(&p, 0, sizeof(p)); } if (t == NULL) t = netdev_priv(dev); - memcpy(&p, &t->parms, sizeof (p)); + ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { err = -EFAULT; } @@ -1295,7 +1334,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && p.proto != 0) break; - t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL); + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { if (t != NULL) { if (t->dev != dev) { @@ -1307,13 +1347,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) ip6_tnl_unlink(ip6n, t); synchronize_net(); - err = ip6_tnl_change(t, &p); + err = ip6_tnl_change(t, &p1); ip6_tnl_link(ip6n, t); netdev_state_change(dev); } if (t) { err = 0; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p))) + ip6_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) err = -EFAULT; } else @@ -1329,7 +1370,9 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) break; err = -ENOENT; - if ((t = ip6_tnl_locate(net, &p, 0)) == NULL) + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, 0); + if (t == NULL) break; err = -EPERM; if (t->dev == ip6n->fb_tnl_dev) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index da2e92d05c15..745a32042950 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -307,10 +307,10 @@ static int __net_init ipv6_proc_init_net(struct net *net) goto proc_dev_snmp6_fail; return 0; +proc_dev_snmp6_fail: + proc_net_remove(net, "snmp6"); proc_snmp6_fail: proc_net_remove(net, "sockstat6"); -proc_dev_snmp6_fail: - proc_net_remove(net, "dev_snmp6"); return -ENOMEM; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cf02cb97bbdd..0ddf2d132e7f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -965,7 +965,7 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, { int flags = 0; - fl6->flowi6_iif = net->loopback_dev->ifindex; + fl6->flowi6_iif = LOOPBACK_IFINDEX; if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) flags |= RT6_LOOKUP_F_IFACE; @@ -2480,12 +2480,8 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) goto nla_put_failure; - if (!(rt->rt6i_flags & RTF_EXPIRES)) - expires = 0; - else if (rt->dst.expires - jiffies < INT_MAX) - expires = rt->dst.expires - jiffies; - else - expires = INT_MAX; + + expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) goto nla_put_failure; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 221224e72507..cd49de3678fb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -94,6 +94,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, } #endif +static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + const struct rt6_info *rt = (const struct rt6_info *)dst; + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + if (rt->rt6i_node) + inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; +} + static void tcp_v6_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { @@ -1270,6 +1282,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(newsk, dst, NULL, NULL); + inet6_sk_rx_dst_set(newsk, skb); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; @@ -1299,7 +1312,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; if (treq->pktopts != NULL) { - newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC); + newnp->pktoptions = skb_clone(treq->pktopts, + sk_gfp_atomic(sk, GFP_ATOMIC)); consume_skb(treq->pktopts); treq->pktopts = NULL; if (newnp->pktoptions) @@ -1349,7 +1363,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * across. Shucks. */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, - AF_INET6, key->key, key->keylen, GFP_ATOMIC); + AF_INET6, key->key, key->keylen, + sk_gfp_atomic(sk, GFP_ATOMIC)); } #endif @@ -1442,10 +1457,20 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, GFP_ATOMIC); + opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + struct dst_entry *dst = sk->sk_rx_dst; + sock_rps_save_rxhash(sk, skb); + if (dst) { + if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || + dst->ops->check(dst, np->rx_dst_cookie) == NULL) { + dst_release(dst); + sk->sk_rx_dst = NULL; + } + } + if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; if (opt_skb) @@ -1703,9 +1728,9 @@ static void tcp_v6_early_demux(struct sk_buff *skb) struct dst_entry *dst = sk->sk_rx_dst; struct inet_sock *icsk = inet_sk(sk); if (dst) - dst = dst_check(dst, 0); + dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); if (dst && - icsk->rx_dst_ifindex == inet6_iif(skb)) + icsk->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } @@ -1721,6 +1746,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, + .sk_rx_dst_set = inet6_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct ipv6hdr), @@ -1752,6 +1778,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, + .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct iphdr), @@ -1873,7 +1900,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) tp->write_seq-tp->snd_una, (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), timer_active, - jiffies_to_clock_t(timer_expires - jiffies), + jiffies_delta_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, sock_i_uid(sp), icsk->icsk_probes_out, @@ -1893,10 +1920,7 @@ static void get_timewait6_sock(struct seq_file *seq, const struct in6_addr *dest, *src; __u16 destp, srcp; const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); - int ttd = tw->tw_ttd - jiffies; - - if (ttd < 0) - ttd = 0; + long delta = tw->tw_ttd - jiffies; dest = &tw6->tw_v6_daddr; src = &tw6->tw_v6_rcv_saddr; @@ -1912,7 +1936,7 @@ static void get_timewait6_sock(struct seq_file *seq, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, tw->tw_substate, 0, 0, - 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, atomic_read(&tw->tw_refcnt), tw); } @@ -2015,7 +2039,7 @@ struct proto tcpv6_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM .proto_cgroup = tcp_proto_cgroup, #endif }; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ef39812107b1..f8c4c08ffb60 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -73,6 +73,13 @@ static int xfrm6_get_tos(const struct flowi *fl) return 0; } +static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst) +{ + struct rt6_info *rt = (struct rt6_info *)xdst; + + rt6_init_peer(rt, net->ipv6.peers); +} + static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { @@ -286,6 +293,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, + .init_dst = xfrm6_init_dst, .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, diff --git a/net/key/af_key.c b/net/key/af_key.c index 34e418508a67..ec7d161c129b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3024,7 +3024,7 @@ static u32 get_acqseq(void) return res; } -static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp, int dir) +static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp) { struct sk_buff *skb; struct sadb_msg *hdr; @@ -3105,7 +3105,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t); pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; - pol->sadb_x_policy_dir = dir+1; + pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; pol->sadb_x_policy_id = xp->index; /* Set sadb_comb's. */ diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 35e1e4bde587..927547171bc7 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -410,6 +410,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, lsa->l2tp_family = AF_INET6; lsa->l2tp_flowinfo = 0; lsa->l2tp_scope_id = 0; + lsa->l2tp_unused = 0; if (peer) { if (!lsk->peer_conn_id) return -ENOTCONN; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index f6fe4d400502..c2190005a114 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -969,14 +969,13 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, struct sockaddr_llc sllc; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - int rc = 0; + int rc = -EBADF; memset(&sllc, 0, sizeof(sllc)); lock_sock(sk); if (sock_flag(sk, SOCK_ZAPPED)) goto out; *uaddrlen = sizeof(sllc); - memset(uaddr, 0, *uaddrlen); if (peer) { rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) @@ -1206,7 +1205,7 @@ static int __init llc2_init(void) rc = llc_proc_init(); if (rc != 0) { printk(llc_proc_err_msg); - goto out_unregister_llc_proto; + goto out_station; } rc = llc_sysctl_init(); if (rc) { @@ -1226,7 +1225,8 @@ out_sysctl: llc_sysctl_exit(); out_proc: llc_proc_exit(); -out_unregister_llc_proto: +out_station: + llc_station_exit(); proto_unregister(&llc_proto); goto out; } diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index e32cab44ea95..dd3e83328ad5 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -42,6 +42,7 @@ static void (*llc_type_handlers[2])(struct llc_sap *sap, void llc_add_pack(int type, void (*handler)(struct llc_sap *sap, struct sk_buff *skb)) { + smp_wmb(); /* ensure initialisation is complete before it's called */ if (type == LLC_DEST_SAP || type == LLC_DEST_CONN) llc_type_handlers[type - 1] = handler; } @@ -50,11 +51,19 @@ void llc_remove_pack(int type) { if (type == LLC_DEST_SAP || type == LLC_DEST_CONN) llc_type_handlers[type - 1] = NULL; + synchronize_net(); } void llc_set_station_handler(void (*handler)(struct sk_buff *skb)) { + /* Ensure initialisation is complete before it's called */ + if (handler) + smp_wmb(); + llc_station_handler = handler; + + if (!handler) + synchronize_net(); } /** @@ -150,6 +159,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, int dest; int (*rcv)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); + void (*sta_handler)(struct sk_buff *skb); + void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb); if (!net_eq(dev_net(dev), &init_net)) goto drop; @@ -182,7 +193,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, */ rcv = rcu_dereference(sap->rcv_func); dest = llc_pdu_type(skb); - if (unlikely(!dest || !llc_type_handlers[dest - 1])) { + sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL; + if (unlikely(!sap_handler)) { if (rcv) rcv(skb, dev, pt, orig_dev); else @@ -193,7 +205,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, if (cskb) rcv(cskb, dev, pt, orig_dev); } - llc_type_handlers[dest - 1](sap, skb); + sap_handler(sap, skb); } llc_sap_put(sap); out: @@ -202,9 +214,10 @@ drop: kfree_skb(skb); goto out; handle_station: - if (!llc_station_handler) + sta_handler = ACCESS_ONCE(llc_station_handler); + if (!sta_handler) goto drop; - llc_station_handler(skb); + sta_handler(skb); goto out; } diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 39a8d8924b9c..b2f2bac2c2a2 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -268,7 +268,7 @@ static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb) out: return rc; free: - kfree_skb(skb); + kfree_skb(nskb); goto out; } @@ -293,7 +293,7 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb) out: return rc; free: - kfree_skb(skb); + kfree_skb(nskb); goto out; } @@ -322,7 +322,7 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb) out: return rc; free: - kfree_skb(skb); + kfree_skb(nskb); goto out; } @@ -687,12 +687,8 @@ static void llc_station_rcv(struct sk_buff *skb) llc_station_state_process(skb); } -int __init llc_station_init(void) +void __init llc_station_init(void) { - int rc = -ENOBUFS; - struct sk_buff *skb; - struct llc_station_state_ev *ev; - skb_queue_head_init(&llc_main_station.mac_pdu_q); skb_queue_head_init(&llc_main_station.ev_q.list); spin_lock_init(&llc_main_station.ev_q.lock); @@ -700,23 +696,12 @@ int __init llc_station_init(void) (unsigned long)&llc_main_station); llc_main_station.ack_timer.expires = jiffies + sysctl_llc_station_ack_timeout; - skb = alloc_skb(0, GFP_ATOMIC); - if (!skb) - goto out; - rc = 0; - llc_set_station_handler(llc_station_rcv); - ev = llc_station_ev(skb); - memset(ev, 0, sizeof(*ev)); llc_main_station.maximum_retry = 1; - llc_main_station.state = LLC_STATION_STATE_DOWN; - ev->type = LLC_STATION_EV_TYPE_SIMPLE; - ev->prim_type = LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK; - rc = llc_station_next_state(skb); -out: - return rc; + llc_main_station.state = LLC_STATION_STATE_UP; + llc_set_station_handler(llc_station_rcv); } -void __exit llc_station_exit(void) +void llc_station_exit(void) { llc_set_station_handler(NULL); } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6fac18c0423f..0e2f83e71277 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -136,10 +136,13 @@ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) * mesh_accept_plinks_update - update accepting_plink in local mesh beacons * * @sdata: mesh interface in which mesh beacons are going to be updated + * + * Returns: beacon changed flag if the beacon content changed. */ -void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) +u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) { bool free_plinks; + u32 changed = 0; /* In case mesh_plink_free_count > 0 and mesh_plinktbl_capacity == 0, * the mesh interface might be able to establish plinks with peers that @@ -149,8 +152,12 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) */ free_plinks = mesh_plink_availables(sdata); - if (free_plinks != sdata->u.mesh.accepting_plinks) - ieee80211_mesh_housekeeping_timer((unsigned long) sdata); + if (free_plinks != sdata->u.mesh.accepting_plinks) { + sdata->u.mesh.accepting_plinks = free_plinks; + changed = BSS_CHANGED_BEACON; + } + + return changed; } int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) @@ -262,7 +269,6 @@ mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) neighbors = (neighbors > 15) ? 15 : neighbors; *pos++ = neighbors << 1; /* Mesh capability */ - ifmsh->accepting_plinks = mesh_plink_availables(sdata); *pos = MESHCONF_CAPAB_FORWARDING; *pos |= ifmsh->accepting_plinks ? MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; @@ -521,14 +527,13 @@ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_mesh *ifmsh) { - bool free_plinks; + u32 changed; ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); mesh_path_expire(sdata); - free_plinks = mesh_plink_availables(sdata); - if (free_plinks != sdata->u.mesh.accepting_plinks) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + changed = mesh_accept_plinks_update(sdata); + ieee80211_bss_info_change_notify(sdata, changed); mod_timer(&ifmsh->housekeeping_timer, round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL)); @@ -622,6 +627,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) del_timer_sync(&sdata->u.mesh.housekeeping_timer); del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); + del_timer_sync(&sdata->u.mesh.mesh_path_timer); /* * If the timer fired while we waited for it, it will have * requeued the work. Now the work will be running again @@ -634,6 +640,8 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) local->fif_other_bss--; atomic_dec(&local->iff_allmultis); ieee80211_configure_filter(local); + + sdata->u.mesh.timers_running = 0; } static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index faaa39bcfd10..13fd5b5fdb0a 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -282,7 +282,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, u8 *hw_addr, struct ieee802_11_elems *ie); bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); -void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); +u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); void mesh_plink_broken(struct sta_info *sta); void mesh_plink_deactivate(struct sta_info *sta); int mesh_plink_open(struct sta_info *sta); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index af671b984df3..f20e9f26d137 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -48,17 +48,17 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, u8 *da, __le16 llid, __le16 plid, __le16 reason); static inline -void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) +u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_inc(&sdata->u.mesh.mshstats.estab_plinks); - mesh_accept_plinks_update(sdata); + return mesh_accept_plinks_update(sdata); } static inline -void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) +u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_dec(&sdata->u.mesh.mshstats.estab_plinks); - mesh_accept_plinks_update(sdata); + return mesh_accept_plinks_update(sdata); } /** @@ -170,22 +170,21 @@ out: * @sta: mesh peer link to deactivate * * All mesh paths with this peer as next hop will be flushed + * Returns beacon changed flag if the beacon content changed. * * Locking: the caller must hold sta->lock */ -static bool __mesh_plink_deactivate(struct sta_info *sta) +static u32 __mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - bool deactivated = false; + u32 changed = 0; - if (sta->plink_state == NL80211_PLINK_ESTAB) { - mesh_plink_dec_estab_count(sdata); - deactivated = true; - } + if (sta->plink_state == NL80211_PLINK_ESTAB) + changed = mesh_plink_dec_estab_count(sdata); sta->plink_state = NL80211_PLINK_BLOCKED; mesh_path_flush_by_nexthop(sta); - return deactivated; + return changed; } /** @@ -198,18 +197,17 @@ static bool __mesh_plink_deactivate(struct sta_info *sta) void mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - bool deactivated; + u32 changed; spin_lock_bh(&sta->lock); - deactivated = __mesh_plink_deactivate(sta); + changed = __mesh_plink_deactivate(sta); sta->reason = cpu_to_le16(WLAN_REASON_MESH_PEER_CANCELED); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, sta->sta.addr, sta->llid, sta->plid, sta->reason); spin_unlock_bh(&sta->lock); - if (deactivated) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + ieee80211_bss_info_change_notify(sdata, changed); } static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, @@ -541,15 +539,14 @@ int mesh_plink_open(struct sta_info *sta) void mesh_plink_block(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - bool deactivated; + u32 changed; spin_lock_bh(&sta->lock); - deactivated = __mesh_plink_deactivate(sta); + changed = __mesh_plink_deactivate(sta); sta->plink_state = NL80211_PLINK_BLOCKED; spin_unlock_bh(&sta->lock); - if (deactivated) - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); + ieee80211_bss_info_change_notify(sdata, changed); } @@ -852,9 +849,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m del_timer(&sta->plink_timer); sta->plink_state = NL80211_PLINK_ESTAB; spin_unlock_bh(&sta->lock); - mesh_plink_inc_estab_count(sdata); + changed |= mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); - changed |= BSS_CHANGED_BEACON; mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); break; @@ -888,9 +884,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m del_timer(&sta->plink_timer); sta->plink_state = NL80211_PLINK_ESTAB; spin_unlock_bh(&sta->lock); - mesh_plink_inc_estab_count(sdata); + changed |= mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); - changed |= BSS_CHANGED_BEACON; mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); mesh_plink_frame_tx(sdata, @@ -908,13 +903,12 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m case CLS_ACPT: reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); sta->reason = reason; - __mesh_plink_deactivate(sta); + changed |= __mesh_plink_deactivate(sta); sta->plink_state = NL80211_PLINK_HOLDING; llid = sta->llid; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); spin_unlock_bh(&sta->lock); changed |= mesh_set_ht_prot_mode(sdata); - changed |= BSS_CHANGED_BEACON; mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, sta->sta.addr, llid, plid, reason); break; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cef0c9e79aba..a4a5acdbaa4d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1430,6 +1430,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, del_timer_sync(&sdata->u.mgd.bcn_mon_timer); del_timer_sync(&sdata->u.mgd.timer); del_timer_sync(&sdata->u.mgd.chswitch_timer); + + sdata->u.mgd.timers_running = 0; } void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index bcaee5d12839..839dd9737989 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -299,7 +299,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, if (local->scan_req != local->int_scan_req) cfg80211_scan_done(local->scan_req, aborted); local->scan_req = NULL; - local->scan_sdata = NULL; + rcu_assign_pointer(local->scan_sdata, NULL); local->scanning = 0; local->scan_channel = NULL; @@ -984,7 +984,6 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata) kfree(local->sched_scan_ies.ie[i]); drv_sched_scan_stop(local, sdata); - rcu_assign_pointer(local->sched_scan_sdata, NULL); } out: mutex_unlock(&local->mtx); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 03d3fc6d9d64..3c601378d27e 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2765,6 +2765,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_timeout_user t; + memset(&t, 0, sizeof(t)); __ip_vs_get_timeouts(net, &t); if (copy_to_user(user, &t, sizeof(t)) != 0) ret = -EFAULT; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 45cf602a76bc..527651a53a45 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -361,23 +361,6 @@ static void evict_oldest_expect(struct nf_conn *master, } } -static inline int refresh_timer(struct nf_conntrack_expect *i) -{ - struct nf_conn_help *master_help = nfct_help(i->master); - const struct nf_conntrack_expect_policy *p; - - if (!del_timer(&i->timeout)) - return 0; - - p = &rcu_dereference_protected( - master_help->helper, - lockdep_is_held(&nf_conntrack_lock) - )->expect_policy[i->class]; - i->timeout.expires = jiffies + p->timeout * HZ; - add_timer(&i->timeout); - return 1; -} - static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) { const struct nf_conntrack_expect_policy *p; @@ -386,7 +369,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct nf_conn_help *master_help = nfct_help(master); struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(expect); - struct hlist_node *n; + struct hlist_node *n, *next; unsigned int h; int ret = 1; @@ -395,12 +378,12 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) goto out; } h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) { if (expect_matches(i, expect)) { - /* Refresh timer: if it's dying, ignore.. */ - if (refresh_timer(i)) { - ret = 0; - goto out; + if (del_timer(&i->timeout)) { + nf_ct_unlink_expect(i); + nf_ct_expect_put(i); + break; } } else if (expect_clash(i, expect)) { ret = -EBUSY; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 14f67a2cbcb5..da4fc37a8578 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1896,10 +1896,15 @@ static int ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) { struct nlattr *cda[CTA_MAX+1]; + int ret; nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); - return ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); + spin_lock_bh(&nf_conntrack_lock); + ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); + spin_unlock_bh(&nf_conntrack_lock); + + return ret; } static struct nfq_ct_hook ctnetlink_nfqueue_hook = { diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 758a1bacc126..5c0a112aeee6 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -183,12 +183,12 @@ static int media_len(const struct nf_conn *ct, const char *dptr, return len + digits_len(ct, dptr, limit, shift); } -static int parse_addr(const struct nf_conn *ct, const char *cp, - const char **endp, union nf_inet_addr *addr, - const char *limit) +static int sip_parse_addr(const struct nf_conn *ct, const char *cp, + const char **endp, union nf_inet_addr *addr, + const char *limit, bool delim) { const char *end; - int ret = 0; + int ret; if (!ct) return 0; @@ -197,16 +197,28 @@ static int parse_addr(const struct nf_conn *ct, const char *cp, switch (nf_ct_l3num(ct)) { case AF_INET: ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end); + if (ret == 0) + return 0; break; case AF_INET6: + if (cp < limit && *cp == '[') + cp++; + else if (delim) + return 0; + ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end); + if (ret == 0) + return 0; + + if (end < limit && *end == ']') + end++; + else if (delim) + return 0; break; default: BUG(); } - if (ret == 0 || end == cp) - return 0; if (endp) *endp = end; return 1; @@ -219,7 +231,7 @@ static int epaddr_len(const struct nf_conn *ct, const char *dptr, union nf_inet_addr addr; const char *aux = dptr; - if (!parse_addr(ct, dptr, &dptr, &addr, limit)) { + if (!sip_parse_addr(ct, dptr, &dptr, &addr, limit, true)) { pr_debug("ip: %s parse failed.!\n", dptr); return 0; } @@ -296,7 +308,7 @@ int ct_sip_parse_request(const struct nf_conn *ct, return 0; dptr += shift; - if (!parse_addr(ct, dptr, &end, addr, limit)) + if (!sip_parse_addr(ct, dptr, &end, addr, limit, true)) return -1; if (end < limit && *end == ':') { end++; @@ -550,7 +562,7 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, if (ret == 0) return ret; - if (!parse_addr(ct, dptr + *matchoff, &c, addr, limit)) + if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true)) return -1; if (*c == ':') { c++; @@ -599,7 +611,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, const char *name, unsigned int *matchoff, unsigned int *matchlen, - union nf_inet_addr *addr) + union nf_inet_addr *addr, bool delim) { const char *limit = dptr + datalen; const char *start, *end; @@ -613,7 +625,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, return 0; start += strlen(name); - if (!parse_addr(ct, start, &end, addr, limit)) + if (!sip_parse_addr(ct, start, &end, addr, limit, delim)) return 0; *matchoff = start - dptr; *matchlen = end - start; @@ -675,6 +687,47 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, return 1; } +static int sdp_parse_addr(const struct nf_conn *ct, const char *cp, + const char **endp, union nf_inet_addr *addr, + const char *limit) +{ + const char *end; + int ret; + + memset(addr, 0, sizeof(*addr)); + switch (nf_ct_l3num(ct)) { + case AF_INET: + ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end); + break; + case AF_INET6: + ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end); + break; + default: + BUG(); + } + + if (ret == 0) + return 0; + if (endp) + *endp = end; + return 1; +} + +/* skip ip address. returns its length. */ +static int sdp_addr_len(const struct nf_conn *ct, const char *dptr, + const char *limit, int *shift) +{ + union nf_inet_addr addr; + const char *aux = dptr; + + if (!sdp_parse_addr(ct, dptr, &dptr, &addr, limit)) { + pr_debug("ip: %s parse failed.!\n", dptr); + return 0; + } + + return dptr - aux; +} + /* SDP header parsing: a SDP session description contains an ordered set of * headers, starting with a section containing general session parameters, * optionally followed by multiple media descriptions. @@ -686,10 +739,10 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, */ static const struct sip_header ct_sdp_hdrs[] = { [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len), - [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", epaddr_len), - [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", epaddr_len), - [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", epaddr_len), - [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", epaddr_len), + [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", sdp_addr_len), + [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", sdp_addr_len), + [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", sdp_addr_len), + [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", sdp_addr_len), [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len), }; @@ -775,8 +828,8 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr, if (ret <= 0) return ret; - if (!parse_addr(ct, dptr + *matchoff, NULL, addr, - dptr + *matchoff + *matchlen)) + if (!sdp_parse_addr(ct, dptr + *matchoff, NULL, addr, + dptr + *matchoff + *matchlen)) return -1; return 1; } @@ -1515,7 +1568,6 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff, } static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly; -static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly; static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { [SIP_EXPECT_SIGNALLING] = { @@ -1585,9 +1637,9 @@ static int __init nf_conntrack_sip_init(void) sip[i][j].me = THIS_MODULE; if (ports[i] == SIP_PORT) - sprintf(sip_names[i][j], "sip"); + sprintf(sip[i][j].name, "sip"); else - sprintf(sip_names[i][j], "sip-%u", i); + sprintf(sip[i][j].name, "sip-%u", i); pr_debug("port #%u: %u\n", i, ports[i]); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5463969da45b..1445d73533ed 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1362,7 +1362,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &scm; - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, true); if (err < 0) return err; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 320fa0e6951a..f3f96badf5aa 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -325,9 +325,6 @@ static int sample(struct datapath *dp, struct sk_buff *skb, } } - if (!acts_list) - return 0; - return do_execute_actions(dp, skb, nla_data(acts_list), nla_len(acts_list), true); } diff --git a/net/packet/Kconfig b/net/packet/Kconfig index 0060e3b396b7..cc55b35f80e5 100644 --- a/net/packet/Kconfig +++ b/net/packet/Kconfig @@ -14,3 +14,11 @@ config PACKET be called af_packet. If unsure, say Y. + +config PACKET_DIAG + tristate "Packet: sockets monitoring interface" + depends on PACKET + default n + ---help--- + Support for PF_PACKET sockets monitoring interface used by the ss tool. + If unsure, say Y. diff --git a/net/packet/Makefile b/net/packet/Makefile index 81183eabfdec..9df61347a3c3 100644 --- a/net/packet/Makefile +++ b/net/packet/Makefile @@ -3,3 +3,5 @@ # obj-$(CONFIG_PACKET) += af_packet.o +obj-$(CONFIG_PACKET_DIAG) += af_packet_diag.o +af_packet_diag-y += diag.o diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ceaca7c134a0..fe0912f161ce 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -93,6 +93,8 @@ #include <net/inet_common.h> #endif +#include "internal.h" + /* Assumptions: - if device has no dev->hard_header routine, it adds and removes ll header @@ -146,14 +148,6 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it) /* Private packet socket structures. */ -struct packet_mclist { - struct packet_mclist *next; - int ifindex; - int count; - unsigned short type; - unsigned short alen; - unsigned char addr[MAX_ADDR_LEN]; -}; /* identical to struct packet_mreq except it has * a longer address field. */ @@ -175,63 +169,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, #define BLK_PLUS_PRIV(sz_of_priv) \ (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) -/* kbdq - kernel block descriptor queue */ -struct tpacket_kbdq_core { - struct pgv *pkbdq; - unsigned int feature_req_word; - unsigned int hdrlen; - unsigned char reset_pending_on_curr_blk; - unsigned char delete_blk_timer; - unsigned short kactive_blk_num; - unsigned short blk_sizeof_priv; - - /* last_kactive_blk_num: - * trick to see if user-space has caught up - * in order to avoid refreshing timer when every single pkt arrives. - */ - unsigned short last_kactive_blk_num; - - char *pkblk_start; - char *pkblk_end; - int kblk_size; - unsigned int knum_blocks; - uint64_t knxt_seq_num; - char *prev; - char *nxt_offset; - struct sk_buff *skb; - - atomic_t blk_fill_in_prog; - - /* Default is set to 8ms */ -#define DEFAULT_PRB_RETIRE_TOV (8) - - unsigned short retire_blk_tov; - unsigned short version; - unsigned long tov_in_jiffies; - - /* timer to retire an outstanding block */ - struct timer_list retire_blk_timer; -}; - #define PGV_FROM_VMALLOC 1 -struct pgv { - char *buffer; -}; - -struct packet_ring_buffer { - struct pgv *pg_vec; - unsigned int head; - unsigned int frames_per_block; - unsigned int frame_size; - unsigned int frame_max; - - unsigned int pg_vec_order; - unsigned int pg_vec_pages; - unsigned int pg_vec_len; - - struct tpacket_kbdq_core prb_bdqc; - atomic_t pending; -}; #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) @@ -269,52 +207,6 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void packet_flush_mclist(struct sock *sk); -struct packet_fanout; -struct packet_sock { - /* struct sock has to be the first member of packet_sock */ - struct sock sk; - struct packet_fanout *fanout; - struct tpacket_stats stats; - union tpacket_stats_u stats_u; - struct packet_ring_buffer rx_ring; - struct packet_ring_buffer tx_ring; - int copy_thresh; - spinlock_t bind_lock; - struct mutex pg_vec_lock; - unsigned int running:1, /* prot_hook is attached*/ - auxdata:1, - origdev:1, - has_vnet_hdr:1; - int ifindex; /* bound device */ - __be16 num; - struct packet_mclist *mclist; - atomic_t mapped; - enum tpacket_versions tp_version; - unsigned int tp_hdrlen; - unsigned int tp_reserve; - unsigned int tp_loss:1; - unsigned int tp_tstamp; - struct packet_type prot_hook ____cacheline_aligned_in_smp; -}; - -#define PACKET_FANOUT_MAX 256 - -struct packet_fanout { -#ifdef CONFIG_NET_NS - struct net *net; -#endif - unsigned int num_members; - u16 id; - u8 type; - u8 defrag; - atomic_t rr_cur; - struct list_head list; - struct sock *arr[PACKET_FANOUT_MAX]; - spinlock_t lock; - atomic_t sk_ref; - struct packet_type prot_hook ____cacheline_aligned_in_smp; -}; - struct packet_skb_cb { unsigned int origlen; union { @@ -334,11 +226,6 @@ struct packet_skb_cb { (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \ ((x)->kactive_blk_num+1) : 0) -static struct packet_sock *pkt_sk(struct sock *sk) -{ - return (struct packet_sock *)sk; -} - static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); @@ -1079,7 +966,7 @@ static void *packet_current_rx_frame(struct packet_sock *po, default: WARN(1, "TPACKET version not supported\n"); BUG(); - return 0; + return NULL; } } @@ -1243,7 +1130,8 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); } -static DEFINE_MUTEX(fanout_mutex); +DEFINE_MUTEX(fanout_mutex); +EXPORT_SYMBOL_GPL(fanout_mutex); static LIST_HEAD(fanout_list); static void __fanout_link(struct sock *sk, struct packet_sock *po) @@ -1273,6 +1161,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) spin_unlock(&f->lock); } +bool match_fanout_group(struct packet_type *ptype, struct sock * sk) +{ + if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout) + return true; + + return false; +} + static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { struct packet_sock *po = pkt_sk(sk); @@ -1325,6 +1221,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.id_match = match_fanout_group; dev_add_pack(&match->prot_hook); list_add(&match->list, &fanout_list); } @@ -1355,9 +1252,9 @@ static void fanout_release(struct sock *sk) if (!f) return; + mutex_lock(&fanout_mutex); po->fanout = NULL; - mutex_lock(&fanout_mutex); if (atomic_dec_and_test(&f->sk_ref)) { list_del(&f->list); dev_remove_pack(&f->prot_hook); @@ -1936,7 +1833,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) if (likely(po->tx_ring.pg_vec)) { ph = skb_shinfo(skb)->destructor_arg; - BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); BUG_ON(atomic_read(&po->tx_ring.pending) == 0); atomic_dec(&po->tx_ring.pending); __packet_set_status(po, ph, TP_STATUS_AVAILABLE); diff --git a/net/packet/diag.c b/net/packet/diag.c new file mode 100644 index 000000000000..bc33fbe8a5ef --- /dev/null +++ b/net/packet/diag.c @@ -0,0 +1,242 @@ +#include <linux/module.h> +#include <linux/sock_diag.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/packet_diag.h> +#include <net/net_namespace.h> +#include <net/sock.h> + +#include "internal.h" + +static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) +{ + struct packet_diag_info pinfo; + + pinfo.pdi_index = po->ifindex; + pinfo.pdi_version = po->tp_version; + pinfo.pdi_reserve = po->tp_reserve; + pinfo.pdi_copy_thresh = po->copy_thresh; + pinfo.pdi_tstamp = po->tp_tstamp; + + pinfo.pdi_flags = 0; + if (po->running) + pinfo.pdi_flags |= PDI_RUNNING; + if (po->auxdata) + pinfo.pdi_flags |= PDI_AUXDATA; + if (po->origdev) + pinfo.pdi_flags |= PDI_ORIGDEV; + if (po->has_vnet_hdr) + pinfo.pdi_flags |= PDI_VNETHDR; + if (po->tp_loss) + pinfo.pdi_flags |= PDI_LOSS; + + return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo); +} + +static int pdiag_put_mclist(const struct packet_sock *po, struct sk_buff *nlskb) +{ + struct nlattr *mca; + struct packet_mclist *ml; + + mca = nla_nest_start(nlskb, PACKET_DIAG_MCLIST); + if (!mca) + return -EMSGSIZE; + + rtnl_lock(); + for (ml = po->mclist; ml; ml = ml->next) { + struct packet_diag_mclist *dml; + + dml = nla_reserve_nohdr(nlskb, sizeof(*dml)); + if (!dml) { + rtnl_unlock(); + nla_nest_cancel(nlskb, mca); + return -EMSGSIZE; + } + + dml->pdmc_index = ml->ifindex; + dml->pdmc_type = ml->type; + dml->pdmc_alen = ml->alen; + dml->pdmc_count = ml->count; + BUILD_BUG_ON(sizeof(dml->pdmc_addr) != sizeof(ml->addr)); + memcpy(dml->pdmc_addr, ml->addr, sizeof(ml->addr)); + } + + rtnl_unlock(); + nla_nest_end(nlskb, mca); + + return 0; +} + +static int pdiag_put_ring(struct packet_ring_buffer *ring, int ver, int nl_type, + struct sk_buff *nlskb) +{ + struct packet_diag_ring pdr; + + if (!ring->pg_vec || ((ver > TPACKET_V2) && + (nl_type == PACKET_DIAG_TX_RING))) + return 0; + + pdr.pdr_block_size = ring->pg_vec_pages << PAGE_SHIFT; + pdr.pdr_block_nr = ring->pg_vec_len; + pdr.pdr_frame_size = ring->frame_size; + pdr.pdr_frame_nr = ring->frame_max + 1; + + if (ver > TPACKET_V2) { + pdr.pdr_retire_tmo = ring->prb_bdqc.retire_blk_tov; + pdr.pdr_sizeof_priv = ring->prb_bdqc.blk_sizeof_priv; + pdr.pdr_features = ring->prb_bdqc.feature_req_word; + } else { + pdr.pdr_retire_tmo = 0; + pdr.pdr_sizeof_priv = 0; + pdr.pdr_features = 0; + } + + return nla_put(nlskb, nl_type, sizeof(pdr), &pdr); +} + +static int pdiag_put_rings_cfg(struct packet_sock *po, struct sk_buff *skb) +{ + int ret; + + mutex_lock(&po->pg_vec_lock); + ret = pdiag_put_ring(&po->rx_ring, po->tp_version, + PACKET_DIAG_RX_RING, skb); + if (!ret) + ret = pdiag_put_ring(&po->tx_ring, po->tp_version, + PACKET_DIAG_TX_RING, skb); + mutex_unlock(&po->pg_vec_lock); + + return ret; +} + +static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) +{ + int ret = 0; + + mutex_lock(&fanout_mutex); + if (po->fanout) { + u32 val; + + val = (u32)po->fanout->id | ((u32)po->fanout->type << 16); + ret = nla_put_u32(nlskb, PACKET_DIAG_FANOUT, val); + } + mutex_unlock(&fanout_mutex); + + return ret; +} + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, + u32 pid, u32 seq, u32 flags, int sk_ino) +{ + struct nlmsghdr *nlh; + struct packet_diag_msg *rp; + struct packet_sock *po = pkt_sk(sk); + + nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); + if (!nlh) + return -EMSGSIZE; + + rp = nlmsg_data(nlh); + rp->pdiag_family = AF_PACKET; + rp->pdiag_type = sk->sk_type; + rp->pdiag_num = ntohs(po->num); + rp->pdiag_ino = sk_ino; + sock_diag_save_cookie(sk, rp->pdiag_cookie); + + if ((req->pdiag_show & PACKET_SHOW_INFO) && + pdiag_put_info(po, skb)) + goto out_nlmsg_trim; + + if ((req->pdiag_show & PACKET_SHOW_MCLIST) && + pdiag_put_mclist(po, skb)) + goto out_nlmsg_trim; + + if ((req->pdiag_show & PACKET_SHOW_RING_CFG) && + pdiag_put_rings_cfg(po, skb)) + goto out_nlmsg_trim; + + if ((req->pdiag_show & PACKET_SHOW_FANOUT) && + pdiag_put_fanout(po, skb)) + goto out_nlmsg_trim; + + return nlmsg_end(skb, nlh); + +out_nlmsg_trim: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int num = 0, s_num = cb->args[0]; + struct packet_diag_req *req; + struct net *net; + struct sock *sk; + struct hlist_node *node; + + net = sock_net(skb->sk); + req = nlmsg_data(cb->nlh); + + rcu_read_lock(); + sk_for_each_rcu(sk, node, &net->packet.sklist) { + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) + goto next; + + if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + sock_i_ino(sk)) < 0) + goto done; +next: + num++; + } +done: + rcu_read_unlock(); + cb->args[0] = num; + + return skb->len; +} + +static int packet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct packet_diag_req); + struct net *net = sock_net(skb->sk); + struct packet_diag_req *req; + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + req = nlmsg_data(h); + /* Make it possible to support protocol filtering later */ + if (req->sdiag_protocol) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = packet_diag_dump, + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); + } else + return -EOPNOTSUPP; +} + +static const struct sock_diag_handler packet_diag_handler = { + .family = AF_PACKET, + .dump = packet_diag_handler_dump, +}; + +static int __init packet_diag_init(void) +{ + return sock_diag_register(&packet_diag_handler); +} + +static void __exit packet_diag_exit(void) +{ + sock_diag_unregister(&packet_diag_handler); +} + +module_init(packet_diag_init); +module_exit(packet_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */); diff --git a/net/packet/internal.h b/net/packet/internal.h new file mode 100644 index 000000000000..44945f6b7252 --- /dev/null +++ b/net/packet/internal.h @@ -0,0 +1,121 @@ +#ifndef __PACKET_INTERNAL_H__ +#define __PACKET_INTERNAL_H__ + +struct packet_mclist { + struct packet_mclist *next; + int ifindex; + int count; + unsigned short type; + unsigned short alen; + unsigned char addr[MAX_ADDR_LEN]; +}; + +/* kbdq - kernel block descriptor queue */ +struct tpacket_kbdq_core { + struct pgv *pkbdq; + unsigned int feature_req_word; + unsigned int hdrlen; + unsigned char reset_pending_on_curr_blk; + unsigned char delete_blk_timer; + unsigned short kactive_blk_num; + unsigned short blk_sizeof_priv; + + /* last_kactive_blk_num: + * trick to see if user-space has caught up + * in order to avoid refreshing timer when every single pkt arrives. + */ + unsigned short last_kactive_blk_num; + + char *pkblk_start; + char *pkblk_end; + int kblk_size; + unsigned int knum_blocks; + uint64_t knxt_seq_num; + char *prev; + char *nxt_offset; + struct sk_buff *skb; + + atomic_t blk_fill_in_prog; + + /* Default is set to 8ms */ +#define DEFAULT_PRB_RETIRE_TOV (8) + + unsigned short retire_blk_tov; + unsigned short version; + unsigned long tov_in_jiffies; + + /* timer to retire an outstanding block */ + struct timer_list retire_blk_timer; +}; + +struct pgv { + char *buffer; +}; + +struct packet_ring_buffer { + struct pgv *pg_vec; + unsigned int head; + unsigned int frames_per_block; + unsigned int frame_size; + unsigned int frame_max; + + unsigned int pg_vec_order; + unsigned int pg_vec_pages; + unsigned int pg_vec_len; + + struct tpacket_kbdq_core prb_bdqc; + atomic_t pending; +}; + +extern struct mutex fanout_mutex; +#define PACKET_FANOUT_MAX 256 + +struct packet_fanout { +#ifdef CONFIG_NET_NS + struct net *net; +#endif + unsigned int num_members; + u16 id; + u8 type; + u8 defrag; + atomic_t rr_cur; + struct list_head list; + struct sock *arr[PACKET_FANOUT_MAX]; + spinlock_t lock; + atomic_t sk_ref; + struct packet_type prot_hook ____cacheline_aligned_in_smp; +}; + +struct packet_sock { + /* struct sock has to be the first member of packet_sock */ + struct sock sk; + struct packet_fanout *fanout; + struct tpacket_stats stats; + union tpacket_stats_u stats_u; + struct packet_ring_buffer rx_ring; + struct packet_ring_buffer tx_ring; + int copy_thresh; + spinlock_t bind_lock; + struct mutex pg_vec_lock; + unsigned int running:1, /* prot_hook is attached*/ + auxdata:1, + origdev:1, + has_vnet_hdr:1; + int ifindex; /* bound device */ + __be16 num; + struct packet_mclist *mclist; + atomic_t mapped; + enum tpacket_versions tp_version; + unsigned int tp_hdrlen; + unsigned int tp_reserve; + unsigned int tp_loss:1; + unsigned int tp_tstamp; + struct packet_type prot_hook ____cacheline_aligned_in_smp; +}; + +static struct packet_sock *pkt_sk(struct sock *sk) +{ + return (struct packet_sock *)sk; +} + +#endif diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index f10fb8256442..05d60859d8e3 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -67,6 +67,9 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, struct tcf_common *pc; int ret = 0; int err; +#ifdef CONFIG_GACT_PROB + struct tc_gact_p *p_parm = NULL; +#endif if (nla == NULL) return -EINVAL; @@ -82,6 +85,12 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, #ifndef CONFIG_GACT_PROB if (tb[TCA_GACT_PROB] != NULL) return -EOPNOTSUPP; +#else + if (tb[TCA_GACT_PROB]) { + p_parm = nla_data(tb[TCA_GACT_PROB]); + if (p_parm->ptype >= MAX_RAND) + return -EINVAL; + } #endif pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); @@ -103,8 +112,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, spin_lock_bh(&gact->tcf_lock); gact->tcf_action = parm->action; #ifdef CONFIG_GACT_PROB - if (tb[TCA_GACT_PROB] != NULL) { - struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]); + if (p_parm) { gact->tcfg_paction = p_parm->paction; gact->tcfg_pval = p_parm->pval; gact->tcfg_ptype = p_parm->ptype; @@ -133,7 +141,7 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a, spin_lock(&gact->tcf_lock); #ifdef CONFIG_GACT_PROB - if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL) + if (gact->tcfg_ptype) action = gact_rand[gact->tcfg_ptype](gact); else action = gact->tcf_action; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 60e281ad0f07..58fb3c7aab9e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -185,7 +185,12 @@ err3: err2: kfree(tname); err1: - kfree(pc); + if (ret == ACT_P_CREATED) { + if (est) + gen_kill_estimator(&pc->tcfc_bstats, + &pc->tcfc_rate_est); + kfree_rcu(pc, tcfc_rcu); + } return err; } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index fe81cc18e9e0..9c0fd0c78814 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -200,13 +200,12 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, out: if (err) { m->tcf_qstats.overlimits++; - /* should we be asking for packet to be dropped? - * may make sense for redirect case only - */ - retval = TC_ACT_SHOT; - } else { + if (m->tcfm_eaction != TCA_EGRESS_MIRROR) + retval = TC_ACT_SHOT; + else + retval = m->tcf_action; + } else retval = m->tcf_action; - } spin_unlock(&m->tcf_lock); return retval; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 26aa2f6ce257..45c53ab067a6 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -74,7 +74,10 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, p = to_pedit(pc); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - kfree(pc); + if (est) + gen_kill_estimator(&pc->tcfc_bstats, + &pc->tcfc_rate_est); + kfree_rcu(pc, tcfc_rcu); return -ENOMEM; } ret = ACT_P_CREATED; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 3922f2a2821b..3714f60f0b3c 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -131,7 +131,10 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, d = to_defact(pc); ret = alloc_defdata(d, defdata); if (ret < 0) { - kfree(pc); + if (est) + gen_kill_estimator(&pc->tcfc_bstats, + &pc->tcfc_rate_est); + kfree_rcu(pc, tcfc_rcu); return ret; } d->tcf_action = parm->action; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 511323e89cec..6c4d5fe53ce8 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -324,24 +324,6 @@ void netif_carrier_off(struct net_device *dev) } EXPORT_SYMBOL(netif_carrier_off); -/** - * netif_notify_peers - notify network peers about existence of @dev - * @dev: network device - * - * Generate traffic such that interested network peers are aware of - * @dev, such as by generating a gratuitous ARP. This may be used when - * a device wants to inform the rest of the network about some sort of - * reconfiguration such as a failover event or virtual machine - * migration. - */ -void netif_notify_peers(struct net_device *dev) -{ - rtnl_lock(); - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); - rtnl_unlock(); -} -EXPORT_SYMBOL(netif_notify_peers); - /* "NOOP" scheduler: the best scheduler, recommended for all interfaces under all circumstances. It is difficult to invent anything faster or cheaper. diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 9af01f3df18c..e4723d31fdd5 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -203,6 +203,34 @@ out: return index; } +/* Length of the next packet (0 if the queue is empty). */ +static unsigned int qdisc_peek_len(struct Qdisc *sch) +{ + struct sk_buff *skb; + + skb = sch->ops->peek(sch); + return skb ? qdisc_pkt_len(skb) : 0; +} + +static void qfq_deactivate_class(struct qfq_sched *, struct qfq_class *); +static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl, + unsigned int len); + +static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl, + u32 lmax, u32 inv_w, int delta_w) +{ + int i; + + /* update qfq-specific data */ + cl->lmax = lmax; + cl->inv_w = inv_w; + i = qfq_calc_index(cl->inv_w, cl->lmax); + + cl->grp = &q->groups[i]; + + q->wsum += delta_w; +} + static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, unsigned long *arg) { @@ -250,6 +278,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, lmax = 1UL << QFQ_MTU_SHIFT; if (cl != NULL) { + bool need_reactivation = false; + if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, &cl->rate_est, qdisc_root_sleeping_lock(sch), @@ -258,12 +288,29 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return err; } - if (inv_w != cl->inv_w) { - sch_tree_lock(sch); - q->wsum += delta_w; - cl->inv_w = inv_w; - sch_tree_unlock(sch); + if (lmax == cl->lmax && inv_w == cl->inv_w) + return 0; /* nothing to update */ + + i = qfq_calc_index(inv_w, lmax); + sch_tree_lock(sch); + if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) { + /* + * shift cl->F back, to not charge the + * class for the not-yet-served head + * packet + */ + cl->F = cl->S; + /* remove class from its slot in the old group */ + qfq_deactivate_class(q, cl); + need_reactivation = true; } + + qfq_update_class_params(q, cl, lmax, inv_w, delta_w); + + if (need_reactivation) /* activate in new group */ + qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc)); + sch_tree_unlock(sch); + return 0; } @@ -273,11 +320,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->refcnt = 1; cl->common.classid = classid; - cl->lmax = lmax; - cl->inv_w = inv_w; - i = qfq_calc_index(cl->inv_w, cl->lmax); - cl->grp = &q->groups[i]; + qfq_update_class_params(q, cl, lmax, inv_w, delta_w); cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); @@ -294,7 +338,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return err; } } - q->wsum += weight; sch_tree_lock(sch); qdisc_class_hash_insert(&q->clhash, &cl->common); @@ -711,15 +754,6 @@ static void qfq_update_eligible(struct qfq_sched *q, u64 old_V) } } -/* What is length of next packet in queue (0 if queue is empty) */ -static unsigned int qdisc_peek_len(struct Qdisc *sch) -{ - struct sk_buff *skb; - - skb = sch->ops->peek(sch); - return skb ? qdisc_pkt_len(skb) : 0; -} - /* * Updates the class, returns true if also the group needs to be updated. */ @@ -843,11 +877,8 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); - struct qfq_group *grp; struct qfq_class *cl; int err; - u64 roundedS; - int s; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { @@ -876,11 +907,25 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; /* If reach this point, queue q was idle */ - grp = cl->grp; + qfq_activate_class(q, cl, qdisc_pkt_len(skb)); + + return err; +} + +/* + * Handle class switch from idle to backlogged. + */ +static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl, + unsigned int pkt_len) +{ + struct qfq_group *grp = cl->grp; + u64 roundedS; + int s; + qfq_update_start(q, cl); /* compute new finish time and rounded start. */ - cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w; + cl->F = cl->S + (u64)pkt_len * cl->inv_w; roundedS = qfq_round_down(cl->S, grp->slot_shift); /* @@ -917,8 +962,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) skip_update: qfq_slot_insert(grp, cl, roundedS); - - return err; } diff --git a/net/sctp/associola.c b/net/sctp/associola.c index ebaef3ed6065..b1ef3bc301a5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -82,6 +82,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a sctp_scope_t scope, gfp_t gfp) { + struct net *net = sock_net(sk); struct sctp_sock *sp; int i; sctp_paramhdr_t *p; @@ -124,7 +125,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * socket values. */ asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt; - asoc->pf_retrans = sctp_pf_retrans; + asoc->pf_retrans = net->sctp.pf_retrans; asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial); asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max); @@ -175,7 +176,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ; + min_t(unsigned long, sp->autoclose, net->sctp.max_autoclose) * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) @@ -281,7 +282,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * and will revert old behavior. */ asoc->peer.asconf_capable = 0; - if (sctp_addip_noauth) + if (net->sctp.addip_noauth) asoc->peer.asconf_capable = 1; asoc->asconf_addr_del_pending = NULL; asoc->src_out_of_asoc_ok = 0; @@ -641,6 +642,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, const gfp_t gfp, const int peer_state) { + struct net *net = sock_net(asoc->base.sk); struct sctp_transport *peer; struct sctp_sock *sp; unsigned short port; @@ -674,7 +676,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, return peer; } - peer = sctp_transport_new(addr, gfp); + peer = sctp_transport_new(net, addr, gfp); if (!peer) return NULL; @@ -1089,13 +1091,15 @@ out: /* Is this the association we are looking for? */ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc, + struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr) { struct sctp_transport *transport; if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) && - (htons(asoc->peer.port) == paddr->v4.sin_port)) { + (htons(asoc->peer.port) == paddr->v4.sin_port) && + net_eq(sock_net(asoc->base.sk), net)) { transport = sctp_assoc_lookup_paddr(asoc, paddr); if (!transport) goto out; @@ -1116,6 +1120,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) struct sctp_association *asoc = container_of(work, struct sctp_association, base.inqueue.immediate); + struct net *net = sock_net(asoc->base.sk); struct sctp_endpoint *ep; struct sctp_chunk *chunk; struct sctp_inq *inqueue; @@ -1148,13 +1153,13 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) if (sctp_chunk_is_data(chunk)) asoc->peer.last_data_from = chunk->transport; else - SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_INCTRLCHUNKS); if (chunk->transport) chunk->transport->last_time_heard = jiffies; /* Run through the state machine. */ - error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype, + error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state, ep, asoc, chunk, GFP_ATOMIC); /* Check to see if the association is freed in response to @@ -1414,6 +1419,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) /* Should we send a SACK to update our peer? */ static inline int sctp_peer_needs_update(struct sctp_association *asoc) { + struct net *net = sock_net(asoc->base.sk); switch (asoc->state) { case SCTP_STATE_ESTABLISHED: case SCTP_STATE_SHUTDOWN_PENDING: @@ -1421,7 +1427,7 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) case SCTP_STATE_SHUTDOWN_SENT: if ((asoc->rwnd > asoc->a_rwnd) && ((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32, - (asoc->base.sk->sk_rcvbuf >> sctp_rwnd_upd_shift), + (asoc->base.sk->sk_rcvbuf >> net->sctp.rwnd_upd_shift), asoc->pathmtu))) return 1; break; @@ -1542,7 +1548,8 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, if (asoc->peer.ipv6_address) flags |= SCTP_ADDR6_PEERSUPP; - return sctp_bind_addr_copy(&asoc->base.bind_addr, + return sctp_bind_addr_copy(sock_net(asoc->base.sk), + &asoc->base.bind_addr, &asoc->ep->base.bind_addr, scope, gfp, flags); } diff --git a/net/sctp/auth.c b/net/sctp/auth.c index bf812048cf6f..159b9bc5d633 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -392,13 +392,14 @@ nomem: */ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) { + struct net *net = sock_net(asoc->base.sk); struct sctp_auth_bytes *secret; struct sctp_shared_key *ep_key; /* If we don't support AUTH, or peer is not capable * we don't need to do anything. */ - if (!sctp_auth_enable || !asoc->peer.auth_capable) + if (!net->sctp.auth_enable || !asoc->peer.auth_capable) return 0; /* If the key_id is non-zero and we couldn't find an @@ -445,11 +446,12 @@ struct sctp_shared_key *sctp_auth_get_shkey( */ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) { + struct net *net = sock_net(ep->base.sk); struct crypto_hash *tfm = NULL; __u16 id; /* if the transforms are already allocted, we are done */ - if (!sctp_auth_enable) { + if (!net->sctp.auth_enable) { ep->auth_hmacs = NULL; return 0; } @@ -674,7 +676,12 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) /* Check if peer requested that this chunk is authenticated */ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) { - if (!sctp_auth_enable || !asoc || !asoc->peer.auth_capable) + struct net *net; + if (!asoc) + return 0; + + net = sock_net(asoc->base.sk); + if (!net->sctp.auth_enable || !asoc->peer.auth_capable) return 0; return __sctp_auth_cid(chunk, asoc->peer.peer_chunks); @@ -683,7 +690,12 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) /* Check if we requested that peer authenticate this chunk. */ int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc) { - if (!sctp_auth_enable || !asoc) + struct net *net; + if (!asoc) + return 0; + + net = sock_net(asoc->base.sk); + if (!net->sctp.auth_enable) return 0; return __sctp_auth_cid(chunk, diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 4ece451c8d27..d886b3bf84f5 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -52,8 +52,8 @@ #include <net/sctp/sm.h> /* Forward declarations for internal helpers. */ -static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *, - sctp_scope_t scope, gfp_t gfp, +static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *, + union sctp_addr *, sctp_scope_t scope, gfp_t gfp, int flags); static void sctp_bind_addr_clean(struct sctp_bind_addr *); @@ -62,7 +62,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *); /* Copy 'src' to 'dest' taking 'scope' into account. Omit addresses * in 'src' which have a broader scope than 'scope'. */ -int sctp_bind_addr_copy(struct sctp_bind_addr *dest, +int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, sctp_scope_t scope, gfp_t gfp, int flags) @@ -75,7 +75,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest, /* Extract the addresses which are relevant for this scope. */ list_for_each_entry(addr, &src->address_list, list) { - error = sctp_copy_one_addr(dest, &addr->a, scope, + error = sctp_copy_one_addr(net, dest, &addr->a, scope, gfp, flags); if (error < 0) goto out; @@ -87,7 +87,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest, */ if (list_empty(&dest->address_list) && (SCTP_SCOPE_GLOBAL == scope)) { list_for_each_entry(addr, &src->address_list, list) { - error = sctp_copy_one_addr(dest, &addr->a, + error = sctp_copy_one_addr(net, dest, &addr->a, SCTP_SCOPE_LINK, gfp, flags); if (error < 0) @@ -448,7 +448,7 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, } /* Copy out addresses from the global local address list. */ -static int sctp_copy_one_addr(struct sctp_bind_addr *dest, +static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, union sctp_addr *addr, sctp_scope_t scope, gfp_t gfp, int flags) @@ -456,8 +456,8 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, int error = 0; if (sctp_is_any(NULL, addr)) { - error = sctp_copy_local_addr_list(dest, scope, gfp, flags); - } else if (sctp_in_scope(addr, scope)) { + error = sctp_copy_local_addr_list(net, dest, scope, gfp, flags); + } else if (sctp_in_scope(net, addr, scope)) { /* Now that the address is in scope, check to see if * the address type is supported by local sock as * well as the remote peer. @@ -494,7 +494,7 @@ int sctp_is_any(struct sock *sk, const union sctp_addr *addr) } /* Is 'addr' valid for 'scope'? */ -int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope) +int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope) { sctp_scope_t addr_scope = sctp_scope(addr); @@ -512,7 +512,7 @@ int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope) * Address scoping can be selectively controlled via sysctl * option */ - switch (sctp_scope_policy) { + switch (net->sctp.scope_policy) { case SCTP_SCOPE_POLICY_DISABLE: return 1; case SCTP_SCOPE_POLICY_ENABLE: diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 6c8556459a75..7c2df9c33df3 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -257,7 +257,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, offset = 0; if ((whole > 1) || (whole && over)) - SCTP_INC_STATS_USER(SCTP_MIB_FRAGUSRMSGS); + SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); /* Create chunks for all the full sized DATA chunks. */ for (i=0, len=first_len; i < whole; i++) { diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 68a385d7c3bd..1859e2bc83d1 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -65,6 +65,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sock *sk, gfp_t gfp) { + struct net *net = sock_net(sk); struct sctp_hmac_algo_param *auth_hmacs = NULL; struct sctp_chunks_param *auth_chunks = NULL; struct sctp_shared_key *null_key; @@ -74,7 +75,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, if (!ep->digest) return NULL; - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { /* Allocate space for HMACS and CHUNKS authentication * variables. There are arrays that we encode directly * into parameters to make the rest of the operations easier. @@ -106,7 +107,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* If the Add-IP functionality is enabled, we must * authenticate, ASCONF and ASCONF-ACK chunks */ - if (sctp_addip_enable) { + if (net->sctp.addip_enable) { auth_chunks->chunks[0] = SCTP_CID_ASCONF; auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; auth_chunks->param_hdr.length = @@ -140,14 +141,14 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, INIT_LIST_HEAD(&ep->asocs); /* Use SCTP specific send buffer space queues. */ - ep->sndbuf_policy = sctp_sndbuf_policy; + ep->sndbuf_policy = net->sctp.sndbuf_policy; sk->sk_data_ready = sctp_data_ready; sk->sk_write_space = sctp_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); /* Get the receive buffer policy for this endpoint */ - ep->rcvbuf_policy = sctp_rcvbuf_policy; + ep->rcvbuf_policy = net->sctp.rcvbuf_policy; /* Initialize the secret key used with cookie. */ get_random_bytes(&ep->secret_key[0], SCTP_SECRET_SIZE); @@ -302,11 +303,13 @@ void sctp_endpoint_put(struct sctp_endpoint *ep) /* Is this the endpoint we are looking for? */ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, + struct net *net, const union sctp_addr *laddr) { struct sctp_endpoint *retval = NULL; - if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) { + if ((htons(ep->base.bind_addr.port) == laddr->v4.sin_port) && + net_eq(sock_net(ep->base.sk), net)) { if (sctp_bind_addr_match(&ep->base.bind_addr, laddr, sctp_sk(ep->base.sk))) retval = ep; @@ -343,7 +346,8 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( rport = ntohs(paddr->v4.sin_port); - hash = sctp_assoc_hashfn(ep->base.bind_addr.port, rport); + hash = sctp_assoc_hashfn(sock_net(ep->base.sk), ep->base.bind_addr.port, + rport); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { @@ -386,13 +390,14 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, { struct sctp_sockaddr_entry *addr; struct sctp_bind_addr *bp; + struct net *net = sock_net(ep->base.sk); bp = &ep->base.bind_addr; /* This function is called with the socket lock held, * so the address_list can not change. */ list_for_each_entry(addr, &bp->address_list, list) { - if (sctp_has_association(&addr->a, paddr)) + if (sctp_has_association(net, &addr->a, paddr)) return 1; } @@ -409,6 +414,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) base.inqueue.immediate); struct sctp_association *asoc; struct sock *sk; + struct net *net; struct sctp_transport *transport; struct sctp_chunk *chunk; struct sctp_inq *inqueue; @@ -423,6 +429,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) asoc = NULL; inqueue = &ep->base.inqueue; sk = ep->base.sk; + net = sock_net(sk); while (NULL != (chunk = sctp_inq_pop(inqueue))) { subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type); @@ -474,12 +481,12 @@ normal: if (asoc && sctp_chunk_is_data(chunk)) asoc->peer.last_data_from = chunk->transport; else - SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS); + SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_INCTRLCHUNKS); if (chunk->transport) chunk->transport->last_time_heard = jiffies; - error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype, state, + error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state, ep, asoc, chunk, GFP_ATOMIC); if (error && chunk) diff --git a/net/sctp/input.c b/net/sctp/input.c index e64d5210ed13..25dfe7380479 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -66,12 +66,15 @@ /* Forward declarations for internal helpers. */ static int sctp_rcv_ootb(struct sk_buff *); -static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_lookup(struct net *net, + struct sk_buff *skb, const union sctp_addr *laddr, const union sctp_addr *paddr, struct sctp_transport **transportp); -static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr); +static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, + const union sctp_addr *laddr); static struct sctp_association *__sctp_lookup_association( + struct net *net, const union sctp_addr *local, const union sctp_addr *peer, struct sctp_transport **pt); @@ -80,7 +83,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb); /* Calculate the SCTP checksum of an SCTP packet. */ -static inline int sctp_rcv_checksum(struct sk_buff *skb) +static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb) { struct sctphdr *sh = sctp_hdr(skb); __le32 cmp = sh->checksum; @@ -96,7 +99,7 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb) if (val != cmp) { /* CRC failure, dump it. */ - SCTP_INC_STATS_BH(SCTP_MIB_CHECKSUMERRORS); + SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS); return -1; } return 0; @@ -129,11 +132,12 @@ int sctp_rcv(struct sk_buff *skb) union sctp_addr dest; int family; struct sctp_af *af; + struct net *net = dev_net(skb->dev); if (skb->pkt_type!=PACKET_HOST) goto discard_it; - SCTP_INC_STATS_BH(SCTP_MIB_INSCTPPACKS); + SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS); if (skb_linearize(skb)) goto discard_it; @@ -145,7 +149,7 @@ int sctp_rcv(struct sk_buff *skb) if (skb->len < sizeof(struct sctphdr)) goto discard_it; if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) && - sctp_rcv_checksum(skb) < 0) + sctp_rcv_checksum(net, skb) < 0) goto discard_it; skb_pull(skb, sizeof(struct sctphdr)); @@ -178,10 +182,10 @@ int sctp_rcv(struct sk_buff *skb) !af->addr_valid(&dest, NULL, skb)) goto discard_it; - asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport); + asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport); if (!asoc) - ep = __sctp_rcv_lookup_endpoint(&dest); + ep = __sctp_rcv_lookup_endpoint(net, &dest); /* Retrieve the common input handling substructure. */ rcvr = asoc ? &asoc->base : &ep->base; @@ -200,7 +204,7 @@ int sctp_rcv(struct sk_buff *skb) sctp_endpoint_put(ep); ep = NULL; } - sk = sctp_get_ctl_sock(); + sk = net->sctp.ctl_sock; ep = sctp_sk(sk)->ep; sctp_endpoint_hold(ep); rcvr = &ep->base; @@ -216,7 +220,7 @@ int sctp_rcv(struct sk_buff *skb) */ if (!asoc) { if (sctp_rcv_ootb(skb)) { - SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES); goto discard_release; } } @@ -272,9 +276,9 @@ int sctp_rcv(struct sk_buff *skb) skb = NULL; /* sctp_chunk_free already freed the skb */ goto discard_release; } - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); + SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG); } else { - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); + SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); } @@ -289,7 +293,7 @@ int sctp_rcv(struct sk_buff *skb) return 0; discard_it: - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS); + SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; @@ -462,11 +466,13 @@ void sctp_icmp_proto_unreachable(struct sock *sk, } } else { + struct net *net = sock_net(sk); + if (timer_pending(&t->proto_unreach_timer) && del_timer(&t->proto_unreach_timer)) sctp_association_put(asoc); - sctp_do_sm(SCTP_EVENT_T_OTHER, + sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), asoc->state, asoc->ep, asoc, t, GFP_ATOMIC); @@ -474,7 +480,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, } /* Common lookup code for icmp/icmpv6 error handler. */ -struct sock *sctp_err_lookup(int family, struct sk_buff *skb, +struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, struct sctphdr *sctphdr, struct sctp_association **app, struct sctp_transport **tpp) @@ -503,7 +509,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, /* Look for an association that matches the incoming ICMP error * packet. */ - asoc = __sctp_lookup_association(&saddr, &daddr, &transport); + asoc = __sctp_lookup_association(net, &saddr, &daddr, &transport); if (!asoc) return NULL; @@ -539,7 +545,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(&init_net, LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); *app = asoc; *tpp = transport; @@ -586,9 +592,10 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) struct inet_sock *inet; sk_buff_data_t saveip, savesctp; int err; + struct net *net = dev_net(skb->dev); if (skb->len < ihlen + 8) { - ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } @@ -597,12 +604,12 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) savesctp = skb->transport_header; skb_reset_network_header(skb); skb_set_transport_header(skb, ihlen); - sk = sctp_err_lookup(AF_INET, skb, sctp_hdr(skb), &asoc, &transport); + sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &transport); /* Put back, the original values. */ skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } /* Warning: The sock lock is held. Remember to call @@ -723,12 +730,13 @@ discard: /* Insert endpoint into the hash table. */ static void __sctp_hash_endpoint(struct sctp_endpoint *ep) { + struct net *net = sock_net(ep->base.sk); struct sctp_ep_common *epb; struct sctp_hashbucket *head; epb = &ep->base; - epb->hashent = sctp_ep_hashfn(epb->bind_addr.port); + epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; sctp_write_lock(&head->lock); @@ -747,12 +755,13 @@ void sctp_hash_endpoint(struct sctp_endpoint *ep) /* Remove endpoint from the hash table. */ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) { + struct net *net = sock_net(ep->base.sk); struct sctp_hashbucket *head; struct sctp_ep_common *epb; epb = &ep->base; - epb->hashent = sctp_ep_hashfn(epb->bind_addr.port); + epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; @@ -770,7 +779,8 @@ void sctp_unhash_endpoint(struct sctp_endpoint *ep) } /* Look up an endpoint. */ -static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr) +static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, + const union sctp_addr *laddr) { struct sctp_hashbucket *head; struct sctp_ep_common *epb; @@ -778,16 +788,16 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *l struct hlist_node *node; int hash; - hash = sctp_ep_hashfn(ntohs(laddr->v4.sin_port)); + hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port)); head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { ep = sctp_ep(epb); - if (sctp_endpoint_is_match(ep, laddr)) + if (sctp_endpoint_is_match(ep, net, laddr)) goto hit; } - ep = sctp_sk((sctp_get_ctl_sock()))->ep; + ep = sctp_sk(net->sctp.ctl_sock)->ep; hit: sctp_endpoint_hold(ep); @@ -798,13 +808,15 @@ hit: /* Insert association into the hash table. */ static void __sctp_hash_established(struct sctp_association *asoc) { + struct net *net = sock_net(asoc->base.sk); struct sctp_ep_common *epb; struct sctp_hashbucket *head; epb = &asoc->base; /* Calculate which chain this entry will belong to. */ - epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port, asoc->peer.port); + epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port, + asoc->peer.port); head = &sctp_assoc_hashtable[epb->hashent]; @@ -827,12 +839,13 @@ void sctp_hash_established(struct sctp_association *asoc) /* Remove association from the hash table. */ static void __sctp_unhash_established(struct sctp_association *asoc) { + struct net *net = sock_net(asoc->base.sk); struct sctp_hashbucket *head; struct sctp_ep_common *epb; epb = &asoc->base; - epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port, + epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port, asoc->peer.port); head = &sctp_assoc_hashtable[epb->hashent]; @@ -855,6 +868,7 @@ void sctp_unhash_established(struct sctp_association *asoc) /* Look up an association. */ static struct sctp_association *__sctp_lookup_association( + struct net *net, const union sctp_addr *local, const union sctp_addr *peer, struct sctp_transport **pt) @@ -869,12 +883,13 @@ static struct sctp_association *__sctp_lookup_association( /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - hash = sctp_assoc_hashfn(ntohs(local->v4.sin_port), ntohs(peer->v4.sin_port)); + hash = sctp_assoc_hashfn(net, ntohs(local->v4.sin_port), + ntohs(peer->v4.sin_port)); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { asoc = sctp_assoc(epb); - transport = sctp_assoc_is_match(asoc, local, peer); + transport = sctp_assoc_is_match(asoc, net, local, peer); if (transport) goto hit; } @@ -892,27 +907,29 @@ hit: /* Look up an association. BH-safe. */ SCTP_STATIC -struct sctp_association *sctp_lookup_association(const union sctp_addr *laddr, +struct sctp_association *sctp_lookup_association(struct net *net, + const union sctp_addr *laddr, const union sctp_addr *paddr, struct sctp_transport **transportp) { struct sctp_association *asoc; sctp_local_bh_disable(); - asoc = __sctp_lookup_association(laddr, paddr, transportp); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp); sctp_local_bh_enable(); return asoc; } /* Is there an association matching the given local and peer addresses? */ -int sctp_has_association(const union sctp_addr *laddr, +int sctp_has_association(struct net *net, + const union sctp_addr *laddr, const union sctp_addr *paddr) { struct sctp_association *asoc; struct sctp_transport *transport; - if ((asoc = sctp_lookup_association(laddr, paddr, &transport))) { + if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) { sctp_association_put(asoc); return 1; } @@ -938,7 +955,8 @@ int sctp_has_association(const union sctp_addr *laddr, * in certain circumstances. * */ -static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, + struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp) { struct sctp_association *asoc; @@ -978,7 +996,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, af->from_addr_param(paddr, params.addr, sh->source, 0); - asoc = __sctp_lookup_association(laddr, paddr, &transport); + asoc = __sctp_lookup_association(net, laddr, paddr, &transport); if (asoc) return asoc; } @@ -1001,6 +1019,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, * subsequent ASCONF Chunks. If found, proceed to rule D4. */ static struct sctp_association *__sctp_rcv_asconf_lookup( + struct net *net, sctp_chunkhdr_t *ch, const union sctp_addr *laddr, __be16 peer_port, @@ -1020,7 +1039,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( af->from_addr_param(&paddr, param, peer_port, 0); - return __sctp_lookup_association(laddr, &paddr, transportp); + return __sctp_lookup_association(net, laddr, &paddr, transportp); } @@ -1033,7 +1052,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( * This means that any chunks that can help us identify the association need * to be looked at to find this association. */ -static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, + struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp) { @@ -1074,8 +1094,9 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb, break; case SCTP_CID_ASCONF: - if (have_auth || sctp_addip_noauth) - asoc = __sctp_rcv_asconf_lookup(ch, laddr, + if (have_auth || net->sctp.addip_noauth) + asoc = __sctp_rcv_asconf_lookup( + net, ch, laddr, sctp_hdr(skb)->source, transportp); default: @@ -1098,7 +1119,8 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb, * include looking inside of INIT/INIT-ACK chunks or after the AUTH * chunks. */ -static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, + struct sk_buff *skb, const union sctp_addr *laddr, struct sctp_transport **transportp) { @@ -1118,11 +1140,11 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb, switch (ch->type) { case SCTP_CID_INIT: case SCTP_CID_INIT_ACK: - return __sctp_rcv_init_lookup(skb, laddr, transportp); + return __sctp_rcv_init_lookup(net, skb, laddr, transportp); break; default: - return __sctp_rcv_walk_lookup(skb, laddr, transportp); + return __sctp_rcv_walk_lookup(net, skb, laddr, transportp); break; } @@ -1131,21 +1153,22 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb, } /* Lookup an association for an inbound skb. */ -static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb, +static struct sctp_association *__sctp_rcv_lookup(struct net *net, + struct sk_buff *skb, const union sctp_addr *paddr, const union sctp_addr *laddr, struct sctp_transport **transportp) { struct sctp_association *asoc; - asoc = __sctp_lookup_association(laddr, paddr, transportp); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp); /* Further lookup for INIT/INIT-ACK packets. * SCTP Implementors Guide, 2.18 Handling of address * parameters within the INIT or INIT-ACK. */ if (!asoc) - asoc = __sctp_rcv_lookup_harder(skb, laddr, transportp); + asoc = __sctp_rcv_lookup_harder(net, skb, laddr, transportp); return asoc; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ed7139ea7978..ea14cb445295 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -99,6 +99,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; + struct net *net = dev_net(ifa->idev->dev); int found = 0; switch (ev) { @@ -110,27 +111,27 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; - spin_lock_bh(&sctp_local_addr_lock); - list_add_tail_rcu(&addr->list, &sctp_local_addr_list); - sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); - spin_unlock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); + list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW); + spin_unlock_bh(&net->sctp.local_addr_lock); } break; case NETDEV_DOWN: - spin_lock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); list_for_each_entry_safe(addr, temp, - &sctp_local_addr_list, list) { + &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET6 && ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { - sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); break; } } - spin_unlock_bh(&sctp_local_addr_lock); + spin_unlock_bh(&net->sctp.local_addr_lock); if (found) kfree_rcu(addr, rcu); break; @@ -154,6 +155,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct ipv6_pinfo *np; sk_buff_data_t saveip, savesctp; int err; + struct net *net = dev_net(skb->dev); idev = in6_dev_get(skb->dev); @@ -162,12 +164,12 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, savesctp = skb->transport_header; skb_reset_network_header(skb); skb_set_transport_header(skb, offset); - sk = sctp_err_lookup(AF_INET6, skb, sctp_hdr(skb), &asoc, &transport); + sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport); /* Put back, the original pointers. */ skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP6_INC_STATS_BH(dev_net(skb->dev), idev, ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_INERRORS); goto out; } @@ -241,7 +243,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) __func__, skb, skb->len, &fl6.saddr, &fl6.daddr); - SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); + SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); if (!(transport->param_flags & SPP_PMTUD_ENABLE)) skb->local_df = 1; @@ -580,7 +582,7 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) if (!(type & IPV6_ADDR_UNICAST)) return 0; - return ipv6_chk_addr(&init_net, in6, NULL, 0); + return ipv6_chk_addr(sock_net(&sp->inet.sk), in6, NULL, 0); } /* This function checks if the address is a valid address to be used for @@ -857,14 +859,14 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) struct net_device *dev; if (type & IPV6_ADDR_LINKLOCAL) { + struct net *net; if (!addr->v6.sin6_scope_id) return 0; + net = sock_net(&opt->inet.sk); rcu_read_lock(); - dev = dev_get_by_index_rcu(&init_net, - addr->v6.sin6_scope_id); + dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id); if (!dev || - !ipv6_chk_addr(&init_net, &addr->v6.sin6_addr, - dev, 0)) { + !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) { rcu_read_unlock(); return 0; } @@ -897,7 +899,7 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) if (!addr->v6.sin6_scope_id) return 0; rcu_read_lock(); - dev = dev_get_by_index_rcu(&init_net, + dev = dev_get_by_index_rcu(sock_net(&opt->inet.sk), addr->v6.sin6_scope_id); rcu_read_unlock(); if (!dev) diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index 8ef8e7d9eb61..fe012c44f8df 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -129,20 +129,20 @@ static const struct file_operations sctp_objcnt_ops = { }; /* Initialize the objcount in the proc filesystem. */ -void sctp_dbg_objcnt_init(void) +void sctp_dbg_objcnt_init(struct net *net) { struct proc_dir_entry *ent; ent = proc_create("sctp_dbg_objcnt", 0, - proc_net_sctp, &sctp_objcnt_ops); + net->sctp.proc_net_sctp, &sctp_objcnt_ops); if (!ent) pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n"); } /* Cleanup the objcount entry in the proc filesystem. */ -void sctp_dbg_objcnt_exit(void) +void sctp_dbg_objcnt_exit(struct net *net) { - remove_proc_entry("sctp_dbg_objcnt", proc_net_sctp); + remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp); } diff --git a/net/sctp/output.c b/net/sctp/output.c index 838e18b4d7ea..0c6359bb973c 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -597,7 +597,7 @@ out: return err; no_route: kfree_skb(nskb); - IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e7aa177c9522..072bf6ae3c26 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -299,6 +299,7 @@ void sctp_outq_free(struct sctp_outq *q) /* Put a new chunk in an sctp_outq. */ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) { + struct net *net = sock_net(q->asoc->base.sk); int error = 0; SCTP_DEBUG_PRINTK("sctp_outq_tail(%p, %p[%s])\n", @@ -337,15 +338,15 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) sctp_outq_tail_data(q, chunk); if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) - SCTP_INC_STATS(SCTP_MIB_OUTUNORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); else - SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); q->empty = 0; break; } } else { list_add_tail(&chunk->list, &q->control_chunk_list); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } if (error < 0) @@ -478,11 +479,12 @@ void sctp_retransmit_mark(struct sctp_outq *q, void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_retransmit_reason_t reason) { + struct net *net = sock_net(q->asoc->base.sk); int error = 0; switch(reason) { case SCTP_RTXR_T3_RTX: - SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_T3_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); /* Update the retran path if the T3-rtx timer has expired for * the current retran path. @@ -493,15 +495,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, transport->asoc->unack_data; break; case SCTP_RTXR_FAST_RTX: - SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); q->fast_rtx = 1; break; case SCTP_RTXR_PMTUD: - SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_PMTUD_RETRANSMITS); break; case SCTP_RTXR_T1_RTX: - SCTP_INC_STATS(SCTP_MIB_T1_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_T1_RETRANSMITS); transport->asoc->init_retries++; break; default: @@ -1914,6 +1916,6 @@ static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn) if (ftsn_chunk) { list_add_tail(&ftsn_chunk->list, &q->control_chunk_list); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS); } } diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c index 534c7eae9d15..794bb14decde 100644 --- a/net/sctp/primitive.c +++ b/net/sctp/primitive.c @@ -57,7 +57,7 @@ #define DECLARE_PRIMITIVE(name) \ /* This is called in the code as sctp_primitive_ ## name. */ \ -int sctp_primitive_ ## name(struct sctp_association *asoc, \ +int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \ void *arg) { \ int error = 0; \ sctp_event_t event_type; sctp_subtype_t subtype; \ @@ -69,7 +69,7 @@ int sctp_primitive_ ## name(struct sctp_association *asoc, \ state = asoc ? asoc->state : SCTP_STATE_CLOSED; \ ep = asoc ? asoc->ep : NULL; \ \ - error = sctp_do_sm(event_type, subtype, state, ep, asoc, \ + error = sctp_do_sm(net, event_type, subtype, state, ep, asoc, \ arg, GFP_KERNEL); \ return error; \ } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 1e2eee88c3ea..d9cb2ab149fe 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -80,11 +80,12 @@ static const struct snmp_mib sctp_snmp_list[] = { /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq->private; int i; for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field((void __percpu **)sctp_statistics, + snmp_fold_field((void __percpu **)net->sctp.sctp_statistics, sctp_snmp_list[i].entry)); return 0; @@ -93,7 +94,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) /* Initialize the seq file operations for 'snmp' object. */ static int sctp_snmp_seq_open(struct inode *inode, struct file *file) { - return single_open(file, sctp_snmp_seq_show, NULL); + return single_open_net(inode, file, sctp_snmp_seq_show); } static const struct file_operations sctp_snmp_seq_fops = { @@ -105,11 +106,12 @@ static const struct file_operations sctp_snmp_seq_fops = { }; /* Set up the proc fs entry for 'snmp' object. */ -int __init sctp_snmp_proc_init(void) +int __net_init sctp_snmp_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("snmp", S_IRUGO, proc_net_sctp, &sctp_snmp_seq_fops); + p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_snmp_seq_fops); if (!p) return -ENOMEM; @@ -117,9 +119,9 @@ int __init sctp_snmp_proc_init(void) } /* Cleanup the proc fs entry for 'snmp' object. */ -void sctp_snmp_proc_exit(void) +void sctp_snmp_proc_exit(struct net *net) { - remove_proc_entry("snmp", proc_net_sctp); + remove_proc_entry("snmp", net->sctp.proc_net_sctp); } /* Dump local addresses of an association/endpoint. */ @@ -213,6 +215,8 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) sctp_for_each_hentry(epb, node, &head->chain) { ep = sctp_ep(epb); sk = epb->sk; + if (!net_eq(sock_net(sk), seq_file_net(seq))) + continue; seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk, sctp_sk(sk)->type, sk->sk_state, hash, epb->bind_addr.port, @@ -238,7 +242,8 @@ static const struct seq_operations sctp_eps_ops = { /* Initialize the seq file operations for 'eps' object. */ static int sctp_eps_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &sctp_eps_ops); + return seq_open_net(inode, file, &sctp_eps_ops, + sizeof(struct seq_net_private)); } static const struct file_operations sctp_eps_seq_fops = { @@ -249,11 +254,12 @@ static const struct file_operations sctp_eps_seq_fops = { }; /* Set up the proc fs entry for 'eps' object. */ -int __init sctp_eps_proc_init(void) +int __net_init sctp_eps_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("eps", S_IRUGO, proc_net_sctp, &sctp_eps_seq_fops); + p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_eps_seq_fops); if (!p) return -ENOMEM; @@ -261,9 +267,9 @@ int __init sctp_eps_proc_init(void) } /* Cleanup the proc fs entry for 'eps' object. */ -void sctp_eps_proc_exit(void) +void sctp_eps_proc_exit(struct net *net) { - remove_proc_entry("eps", proc_net_sctp); + remove_proc_entry("eps", net->sctp.proc_net_sctp); } @@ -316,6 +322,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_for_each_hentry(epb, node, &head->chain) { assoc = sctp_assoc(epb); sk = epb->sk; + if (!net_eq(sock_net(sk), seq_file_net(seq))) + continue; seq_printf(seq, "%8pK %8pK %-3d %-3d %-2d %-4d " "%4d %8d %8d %7d %5lu %-5d %5d ", @@ -354,7 +362,8 @@ static const struct seq_operations sctp_assoc_ops = { /* Initialize the seq file operations for 'assocs' object. */ static int sctp_assocs_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &sctp_assoc_ops); + return seq_open_net(inode, file, &sctp_assoc_ops, + sizeof(struct seq_net_private)); } static const struct file_operations sctp_assocs_seq_fops = { @@ -365,11 +374,11 @@ static const struct file_operations sctp_assocs_seq_fops = { }; /* Set up the proc fs entry for 'assocs' object. */ -int __init sctp_assocs_proc_init(void) +int __net_init sctp_assocs_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("assocs", S_IRUGO, proc_net_sctp, + p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp, &sctp_assocs_seq_fops); if (!p) return -ENOMEM; @@ -378,9 +387,9 @@ int __init sctp_assocs_proc_init(void) } /* Cleanup the proc fs entry for 'assocs' object. */ -void sctp_assocs_proc_exit(void) +void sctp_assocs_proc_exit(struct net *net) { - remove_proc_entry("assocs", proc_net_sctp); + remove_proc_entry("assocs", net->sctp.proc_net_sctp); } static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos) @@ -426,6 +435,8 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) sctp_local_bh_disable(); read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { + if (!net_eq(sock_net(epb->sk), seq_file_net(seq))) + continue; assoc = sctp_assoc(epb); list_for_each_entry(tsp, &assoc->peer.transport_addr_list, transports) { @@ -489,14 +500,15 @@ static const struct seq_operations sctp_remaddr_ops = { }; /* Cleanup the proc fs entry for 'remaddr' object. */ -void sctp_remaddr_proc_exit(void) +void sctp_remaddr_proc_exit(struct net *net) { - remove_proc_entry("remaddr", proc_net_sctp); + remove_proc_entry("remaddr", net->sctp.proc_net_sctp); } static int sctp_remaddr_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &sctp_remaddr_ops); + return seq_open_net(inode, file, &sctp_remaddr_ops, + sizeof(struct seq_net_private)); } static const struct file_operations sctp_remaddr_seq_fops = { @@ -506,11 +518,12 @@ static const struct file_operations sctp_remaddr_seq_fops = { .release = seq_release, }; -int __init sctp_remaddr_proc_init(void) +int __net_init sctp_remaddr_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("remaddr", S_IRUGO, proc_net_sctp, &sctp_remaddr_seq_fops); + p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_remaddr_seq_fops); if (!p) return -ENOMEM; return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1f89c4e69645..2d518425d598 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -69,21 +69,10 @@ /* Global data structures. */ struct sctp_globals sctp_globals __read_mostly; -DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; - -#ifdef CONFIG_PROC_FS -struct proc_dir_entry *proc_net_sctp; -#endif struct idr sctp_assocs_id; DEFINE_SPINLOCK(sctp_assocs_id_lock); -/* This is the global socket data structure used for responding to - * the Out-of-the-blue (OOTB) packets. A control sock will be created - * for this socket at the initialization time. - */ -static struct sock *sctp_ctl_sock; - static struct sctp_pf *sctp_pf_inet6_specific; static struct sctp_pf *sctp_pf_inet_specific; static struct sctp_af *sctp_af_v4_specific; @@ -96,74 +85,54 @@ long sysctl_sctp_mem[3]; int sysctl_sctp_rmem[3]; int sysctl_sctp_wmem[3]; -/* Return the address of the control sock. */ -struct sock *sctp_get_ctl_sock(void) -{ - return sctp_ctl_sock; -} - /* Set up the proc fs entry for the SCTP protocol. */ -static __init int sctp_proc_init(void) +static __net_init int sctp_proc_init(struct net *net) { - if (percpu_counter_init(&sctp_sockets_allocated, 0)) - goto out_nomem; #ifdef CONFIG_PROC_FS - if (!proc_net_sctp) { - proc_net_sctp = proc_mkdir("sctp", init_net.proc_net); - if (!proc_net_sctp) - goto out_free_percpu; - } - - if (sctp_snmp_proc_init()) + net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net); + if (!net->sctp.proc_net_sctp) + goto out_proc_net_sctp; + if (sctp_snmp_proc_init(net)) goto out_snmp_proc_init; - if (sctp_eps_proc_init()) + if (sctp_eps_proc_init(net)) goto out_eps_proc_init; - if (sctp_assocs_proc_init()) + if (sctp_assocs_proc_init(net)) goto out_assocs_proc_init; - if (sctp_remaddr_proc_init()) + if (sctp_remaddr_proc_init(net)) goto out_remaddr_proc_init; return 0; out_remaddr_proc_init: - sctp_assocs_proc_exit(); + sctp_assocs_proc_exit(net); out_assocs_proc_init: - sctp_eps_proc_exit(); + sctp_eps_proc_exit(net); out_eps_proc_init: - sctp_snmp_proc_exit(); + sctp_snmp_proc_exit(net); out_snmp_proc_init: - if (proc_net_sctp) { - proc_net_sctp = NULL; - remove_proc_entry("sctp", init_net.proc_net); - } -out_free_percpu: - percpu_counter_destroy(&sctp_sockets_allocated); -#else - return 0; -#endif /* CONFIG_PROC_FS */ - -out_nomem: + remove_proc_entry("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; +out_proc_net_sctp: return -ENOMEM; +#endif /* CONFIG_PROC_FS */ + return 0; } /* Clean up the proc fs entry for the SCTP protocol. * Note: Do not make this __exit as it is used in the init error * path. */ -static void sctp_proc_exit(void) +static void sctp_proc_exit(struct net *net) { #ifdef CONFIG_PROC_FS - sctp_snmp_proc_exit(); - sctp_eps_proc_exit(); - sctp_assocs_proc_exit(); - sctp_remaddr_proc_exit(); - - if (proc_net_sctp) { - proc_net_sctp = NULL; - remove_proc_entry("sctp", init_net.proc_net); - } + sctp_snmp_proc_exit(net); + sctp_eps_proc_exit(net); + sctp_assocs_proc_exit(net); + sctp_remaddr_proc_exit(net); + + remove_proc_entry("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; #endif - percpu_counter_destroy(&sctp_sockets_allocated); } /* Private helper to extract ipv4 address and stash them in @@ -201,29 +170,29 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, /* Extract our IP addresses from the system and stash them in the * protocol structure. */ -static void sctp_get_local_addr_list(void) +static void sctp_get_local_addr_list(struct net *net) { struct net_device *dev; struct list_head *pos; struct sctp_af *af; rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); - af->copy_addrlist(&sctp_local_addr_list, dev); + af->copy_addrlist(&net->sctp.local_addr_list, dev); } } rcu_read_unlock(); } /* Free the existing local addresses. */ -static void sctp_free_local_addr_list(void) +static void sctp_free_local_addr_list(struct net *net) { struct sctp_sockaddr_entry *addr; struct list_head *pos, *temp; - list_for_each_safe(pos, temp, &sctp_local_addr_list) { + list_for_each_safe(pos, temp, &net->sctp.local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); list_del(pos); kfree(addr); @@ -231,17 +200,17 @@ static void sctp_free_local_addr_list(void) } /* Copy the local addresses which are valid for 'scope' into 'bp'. */ -int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, - gfp_t gfp, int copy_flags) +int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, + sctp_scope_t scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; int error = 0; rcu_read_lock(); - list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) { if (!addr->valid) continue; - if (sctp_in_scope(&addr->a, scope)) { + if (sctp_in_scope(net, &addr->a, scope)) { /* Now that the address is in scope, check to see if * the address type is really supported by the local * sock as well as the remote peer. @@ -397,7 +366,8 @@ static int sctp_v4_addr_valid(union sctp_addr *addr, /* Should this be available for binding? */ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) { - int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr); + struct net *net = sock_net(&sp->inet.sk); + int ret = inet_addr_type(net, addr->v4.sin_addr.s_addr); if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && @@ -484,7 +454,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", __func__, &fl4->daddr, &fl4->saddr); - rt = ip_route_output_key(&init_net, fl4); + rt = ip_route_output_key(sock_net(sk), fl4); if (!IS_ERR(rt)) dst = &rt->dst; @@ -530,7 +500,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, (AF_INET == laddr->a.sa.sa_family)) { fl4->saddr = laddr->a.v4.sin_addr.s_addr; fl4->fl4_sport = laddr->a.v4.sin_port; - rt = ip_route_output_key(&init_net, fl4); + rt = ip_route_output_key(sock_net(sk), fl4); if (!IS_ERR(rt)) { dst = &rt->dst; goto out_unlock; @@ -627,14 +597,15 @@ static void sctp_v4_ecn_capable(struct sock *sk) void sctp_addr_wq_timeout_handler(unsigned long arg) { + struct net *net = (struct net *)arg; struct sctp_sockaddr_entry *addrw, *temp; struct sctp_sock *sp; - spin_lock_bh(&sctp_addr_wq_lock); + spin_lock_bh(&net->sctp.addr_wq_lock); - list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) { SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ", - " for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state, + " for cmd %d at entry %p\n", &net->sctp.addr_waitq, &addrw->a, addrw->state, addrw); #if IS_ENABLED(CONFIG_IPV6) @@ -648,7 +619,7 @@ void sctp_addr_wq_timeout_handler(unsigned long arg) goto free_next; in6 = (struct in6_addr *)&addrw->a.v6.sin6_addr; - if (ipv6_chk_addr(&init_net, in6, NULL, 0) == 0 && + if (ipv6_chk_addr(net, in6, NULL, 0) == 0 && addrw->state == SCTP_ADDR_NEW) { unsigned long timeo_val; @@ -656,12 +627,12 @@ void sctp_addr_wq_timeout_handler(unsigned long arg) SCTP_ADDRESS_TICK_DELAY); timeo_val = jiffies; timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); - mod_timer(&sctp_addr_wq_timer, timeo_val); + mod_timer(&net->sctp.addr_wq_timer, timeo_val); break; } } #endif - list_for_each_entry(sp, &sctp_auto_asconf_splist, auto_asconf_list) { + list_for_each_entry(sp, &net->sctp.auto_asconf_splist, auto_asconf_list) { struct sock *sk; sk = sctp_opt2sk(sp); @@ -679,31 +650,32 @@ free_next: list_del(&addrw->list); kfree(addrw); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); } -static void sctp_free_addr_wq(void) +static void sctp_free_addr_wq(struct net *net) { struct sctp_sockaddr_entry *addrw; struct sctp_sockaddr_entry *temp; - spin_lock_bh(&sctp_addr_wq_lock); - del_timer(&sctp_addr_wq_timer); - list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + spin_lock_bh(&net->sctp.addr_wq_lock); + del_timer(&net->sctp.addr_wq_timer); + list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) { list_del(&addrw->list); kfree(addrw); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); } /* lookup the entry for the same address in the addr_waitq * sctp_addr_wq MUST be locked */ -static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entry *addr) +static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct net *net, + struct sctp_sockaddr_entry *addr) { struct sctp_sockaddr_entry *addrw; - list_for_each_entry(addrw, &sctp_addr_waitq, list) { + list_for_each_entry(addrw, &net->sctp.addr_waitq, list) { if (addrw->a.sa.sa_family != addr->a.sa.sa_family) continue; if (addrw->a.sa.sa_family == AF_INET) { @@ -719,7 +691,7 @@ static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entr return NULL; } -void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd) +void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cmd) { struct sctp_sockaddr_entry *addrw; unsigned long timeo_val; @@ -730,38 +702,38 @@ void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd) * new address after a couple of addition and deletion of that address */ - spin_lock_bh(&sctp_addr_wq_lock); + spin_lock_bh(&net->sctp.addr_wq_lock); /* Offsets existing events in addr_wq */ - addrw = sctp_addr_wq_lookup(addr); + addrw = sctp_addr_wq_lookup(net, addr); if (addrw) { if (addrw->state != cmd) { SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ", " in wq %p\n", addrw->state, &addrw->a, - &sctp_addr_waitq); + &net->sctp.addr_waitq); list_del(&addrw->list); kfree(addrw); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); return; } /* OK, we have to add the new address to the wait queue */ addrw = kmemdup(addr, sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); if (addrw == NULL) { - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); return; } addrw->state = cmd; - list_add_tail(&addrw->list, &sctp_addr_waitq); + list_add_tail(&addrw->list, &net->sctp.addr_waitq); SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ", - " in wq %p\n", addrw->state, &addrw->a, &sctp_addr_waitq); + " in wq %p\n", addrw->state, &addrw->a, &net->sctp.addr_waitq); - if (!timer_pending(&sctp_addr_wq_timer)) { + if (!timer_pending(&net->sctp.addr_wq_timer)) { timeo_val = jiffies; timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); - mod_timer(&sctp_addr_wq_timer, timeo_val); + mod_timer(&net->sctp.addr_wq_timer, timeo_val); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); } /* Event handler for inet address addition/deletion events. @@ -776,11 +748,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; + struct net *net = dev_net(ifa->ifa_dev->dev); int found = 0; - if (!net_eq(dev_net(ifa->ifa_dev->dev), &init_net)) - return NOTIFY_DONE; - switch (ev) { case NETDEV_UP: addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); @@ -789,27 +759,27 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; - spin_lock_bh(&sctp_local_addr_lock); - list_add_tail_rcu(&addr->list, &sctp_local_addr_list); - sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); - spin_unlock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); + list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW); + spin_unlock_bh(&net->sctp.local_addr_lock); } break; case NETDEV_DOWN: - spin_lock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); list_for_each_entry_safe(addr, temp, - &sctp_local_addr_list, list) { + &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET && addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { - sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); break; } } - spin_unlock_bh(&sctp_local_addr_lock); + spin_unlock_bh(&net->sctp.local_addr_lock); if (found) kfree_rcu(addr, rcu); break; @@ -822,7 +792,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, * Initialize the control inode/socket with a control endpoint data * structure. This endpoint is reserved exclusively for the OOTB processing. */ -static int sctp_ctl_sock_init(void) +static int sctp_ctl_sock_init(struct net *net) { int err; sa_family_t family = PF_INET; @@ -830,14 +800,14 @@ static int sctp_ctl_sock_init(void) if (sctp_get_pf_specific(PF_INET6)) family = PF_INET6; - err = inet_ctl_sock_create(&sctp_ctl_sock, family, - SOCK_SEQPACKET, IPPROTO_SCTP, &init_net); + err = inet_ctl_sock_create(&net->sctp.ctl_sock, family, + SOCK_SEQPACKET, IPPROTO_SCTP, net); /* If IPv6 socket could not be created, try the IPv4 socket */ if (err < 0 && family == PF_INET6) - err = inet_ctl_sock_create(&sctp_ctl_sock, AF_INET, + err = inet_ctl_sock_create(&net->sctp.ctl_sock, AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP, - &init_net); + net); if (err < 0) { pr_err("Failed to create the SCTP control socket\n"); @@ -990,7 +960,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; - SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); + SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); return ip_queue_xmit(skb, &transport->fl); } @@ -1063,6 +1033,7 @@ static const struct net_protocol sctp_protocol = { .handler = sctp_rcv, .err_handler = sctp_v4_err, .no_policy = 1, + .netns_ok = 1, }; /* IPv4 address related functions. */ @@ -1130,16 +1101,16 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family) return 1; } -static inline int init_sctp_mibs(void) +static inline int init_sctp_mibs(struct net *net) { - return snmp_mib_init((void __percpu **)sctp_statistics, + return snmp_mib_init((void __percpu **)net->sctp.sctp_statistics, sizeof(struct sctp_mib), __alignof__(struct sctp_mib)); } -static inline void cleanup_sctp_mibs(void) +static inline void cleanup_sctp_mibs(struct net *net) { - snmp_mib_free((void __percpu **)sctp_statistics); + snmp_mib_free((void __percpu **)net->sctp.sctp_statistics); } static void sctp_v4_pf_init(void) @@ -1194,6 +1165,143 @@ static void sctp_v4_del_protocol(void) unregister_inetaddr_notifier(&sctp_inetaddr_notifier); } +static int sctp_net_init(struct net *net) +{ + int status; + + /* + * 14. Suggested SCTP Protocol Parameter Values + */ + /* The following protocol parameters are RECOMMENDED: */ + /* RTO.Initial - 3 seconds */ + net->sctp.rto_initial = SCTP_RTO_INITIAL; + /* RTO.Min - 1 second */ + net->sctp.rto_min = SCTP_RTO_MIN; + /* RTO.Max - 60 seconds */ + net->sctp.rto_max = SCTP_RTO_MAX; + /* RTO.Alpha - 1/8 */ + net->sctp.rto_alpha = SCTP_RTO_ALPHA; + /* RTO.Beta - 1/4 */ + net->sctp.rto_beta = SCTP_RTO_BETA; + + /* Valid.Cookie.Life - 60 seconds */ + net->sctp.valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE; + + /* Whether Cookie Preservative is enabled(1) or not(0) */ + net->sctp.cookie_preserve_enable = 1; + + /* Max.Burst - 4 */ + net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST; + + /* Association.Max.Retrans - 10 attempts + * Path.Max.Retrans - 5 attempts (per destination address) + * Max.Init.Retransmits - 8 attempts + */ + net->sctp.max_retrans_association = 10; + net->sctp.max_retrans_path = 5; + net->sctp.max_retrans_init = 8; + + /* Sendbuffer growth - do per-socket accounting */ + net->sctp.sndbuf_policy = 0; + + /* Rcvbuffer growth - do per-socket accounting */ + net->sctp.rcvbuf_policy = 0; + + /* HB.interval - 30 seconds */ + net->sctp.hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT; + + /* delayed SACK timeout */ + net->sctp.sack_timeout = SCTP_DEFAULT_TIMEOUT_SACK; + + /* Disable ADDIP by default. */ + net->sctp.addip_enable = 0; + net->sctp.addip_noauth = 0; + net->sctp.default_auto_asconf = 0; + + /* Enable PR-SCTP by default. */ + net->sctp.prsctp_enable = 1; + + /* Disable AUTH by default. */ + net->sctp.auth_enable = 0; + + /* Set SCOPE policy to enabled */ + net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE; + + /* Set the default rwnd update threshold */ + net->sctp.rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; + + /* Initialize maximum autoclose timeout. */ + net->sctp.max_autoclose = INT_MAX / HZ; + + status = sctp_sysctl_net_register(net); + if (status) + goto err_sysctl_register; + + /* Allocate and initialise sctp mibs. */ + status = init_sctp_mibs(net); + if (status) + goto err_init_mibs; + + /* Initialize proc fs directory. */ + status = sctp_proc_init(net); + if (status) + goto err_init_proc; + + sctp_dbg_objcnt_init(net); + + /* Initialize the control inode/socket for handling OOTB packets. */ + if ((status = sctp_ctl_sock_init(net))) { + pr_err("Failed to initialize the SCTP control sock\n"); + goto err_ctl_sock_init; + } + + /* Initialize the local address list. */ + INIT_LIST_HEAD(&net->sctp.local_addr_list); + spin_lock_init(&net->sctp.local_addr_lock); + sctp_get_local_addr_list(net); + + /* Initialize the address event list */ + INIT_LIST_HEAD(&net->sctp.addr_waitq); + INIT_LIST_HEAD(&net->sctp.auto_asconf_splist); + spin_lock_init(&net->sctp.addr_wq_lock); + net->sctp.addr_wq_timer.expires = 0; + setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, + (unsigned long)net); + + return 0; + +err_ctl_sock_init: + sctp_dbg_objcnt_exit(net); + sctp_proc_exit(net); +err_init_proc: + cleanup_sctp_mibs(net); +err_init_mibs: + sctp_sysctl_net_unregister(net); +err_sysctl_register: + return status; +} + +static void sctp_net_exit(struct net *net) +{ + /* Free the local address list */ + sctp_free_addr_wq(net); + sctp_free_local_addr_list(net); + + /* Free the control endpoint. */ + inet_ctl_sock_destroy(net->sctp.ctl_sock); + + sctp_dbg_objcnt_exit(net); + + sctp_proc_exit(net); + cleanup_sctp_mibs(net); + sctp_sysctl_net_unregister(net); +} + +static struct pernet_operations sctp_net_ops = { + .init = sctp_net_init, + .exit = sctp_net_exit, +}; + /* Initialize the universe into something sensible. */ SCTP_STATIC __init int sctp_init(void) { @@ -1224,62 +1332,9 @@ SCTP_STATIC __init int sctp_init(void) if (!sctp_chunk_cachep) goto err_chunk_cachep; - /* Allocate and initialise sctp mibs. */ - status = init_sctp_mibs(); + status = percpu_counter_init(&sctp_sockets_allocated, 0); if (status) - goto err_init_mibs; - - /* Initialize proc fs directory. */ - status = sctp_proc_init(); - if (status) - goto err_init_proc; - - /* Initialize object count debugging. */ - sctp_dbg_objcnt_init(); - - /* - * 14. Suggested SCTP Protocol Parameter Values - */ - /* The following protocol parameters are RECOMMENDED: */ - /* RTO.Initial - 3 seconds */ - sctp_rto_initial = SCTP_RTO_INITIAL; - /* RTO.Min - 1 second */ - sctp_rto_min = SCTP_RTO_MIN; - /* RTO.Max - 60 seconds */ - sctp_rto_max = SCTP_RTO_MAX; - /* RTO.Alpha - 1/8 */ - sctp_rto_alpha = SCTP_RTO_ALPHA; - /* RTO.Beta - 1/4 */ - sctp_rto_beta = SCTP_RTO_BETA; - - /* Valid.Cookie.Life - 60 seconds */ - sctp_valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE; - - /* Whether Cookie Preservative is enabled(1) or not(0) */ - sctp_cookie_preserve_enable = 1; - - /* Max.Burst - 4 */ - sctp_max_burst = SCTP_DEFAULT_MAX_BURST; - - /* Association.Max.Retrans - 10 attempts - * Path.Max.Retrans - 5 attempts (per destination address) - * Max.Init.Retransmits - 8 attempts - */ - sctp_max_retrans_association = 10; - sctp_max_retrans_path = 5; - sctp_max_retrans_init = 8; - - /* Sendbuffer growth - do per-socket accounting */ - sctp_sndbuf_policy = 0; - - /* Rcvbuffer growth - do per-socket accounting */ - sctp_rcvbuf_policy = 0; - - /* HB.interval - 30 seconds */ - sctp_hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT; - - /* delayed SACK timeout */ - sctp_sack_timeout = SCTP_DEFAULT_TIMEOUT_SACK; + goto err_percpu_counter_init; /* Implementation specific variables. */ @@ -1287,9 +1342,6 @@ SCTP_STATIC __init int sctp_init(void) sctp_max_instreams = SCTP_DEFAULT_INSTREAMS; sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS; - /* Initialize maximum autoclose timeout. */ - sctp_max_autoclose = INT_MAX / HZ; - /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); @@ -1376,41 +1428,12 @@ SCTP_STATIC __init int sctp_init(void) pr_info("Hash tables configured (established %d bind %d)\n", sctp_assoc_hashsize, sctp_port_hashsize); - /* Disable ADDIP by default. */ - sctp_addip_enable = 0; - sctp_addip_noauth = 0; - sctp_default_auto_asconf = 0; - - /* Enable PR-SCTP by default. */ - sctp_prsctp_enable = 1; - - /* Disable AUTH by default. */ - sctp_auth_enable = 0; - - /* Set SCOPE policy to enabled */ - sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE; - - /* Set the default rwnd update threshold */ - sctp_rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; - sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); sctp_v4_pf_init(); sctp_v6_pf_init(); - /* Initialize the local address list. */ - INIT_LIST_HEAD(&sctp_local_addr_list); - spin_lock_init(&sctp_local_addr_lock); - sctp_get_local_addr_list(); - - /* Initialize the address event list */ - INIT_LIST_HEAD(&sctp_addr_waitq); - INIT_LIST_HEAD(&sctp_auto_asconf_splist); - spin_lock_init(&sctp_addr_wq_lock); - sctp_addr_wq_timer.expires = 0; - setup_timer(&sctp_addr_wq_timer, sctp_addr_wq_timeout_handler, 0); - status = sctp_v4_protosw_init(); if (status) @@ -1420,11 +1443,9 @@ SCTP_STATIC __init int sctp_init(void) if (status) goto err_v6_protosw_init; - /* Initialize the control inode/socket for handling OOTB packets. */ - if ((status = sctp_ctl_sock_init())) { - pr_err("Failed to initialize the SCTP control sock\n"); - goto err_ctl_sock_init; - } + status = register_pernet_subsys(&sctp_net_ops); + if (status) + goto err_register_pernet_subsys; status = sctp_v4_add_protocol(); if (status) @@ -1441,13 +1462,12 @@ out: err_v6_add_protocol: sctp_v4_del_protocol(); err_add_protocol: - inet_ctl_sock_destroy(sctp_ctl_sock); -err_ctl_sock_init: + unregister_pernet_subsys(&sctp_net_ops); +err_register_pernet_subsys: sctp_v6_protosw_exit(); err_v6_protosw_init: sctp_v4_protosw_exit(); err_protosw_init: - sctp_free_local_addr_list(); sctp_v4_pf_exit(); sctp_v6_pf_exit(); sctp_sysctl_unregister(); @@ -1461,11 +1481,8 @@ err_ehash_alloc: get_order(sctp_assoc_hashsize * sizeof(struct sctp_hashbucket))); err_ahash_alloc: - sctp_dbg_objcnt_exit(); - sctp_proc_exit(); -err_init_proc: - cleanup_sctp_mibs(); -err_init_mibs: + percpu_counter_destroy(&sctp_sockets_allocated); +err_percpu_counter_init: kmem_cache_destroy(sctp_chunk_cachep); err_chunk_cachep: kmem_cache_destroy(sctp_bucket_cachep); @@ -1482,18 +1499,13 @@ SCTP_STATIC __exit void sctp_exit(void) /* Unregister with inet6/inet layers. */ sctp_v6_del_protocol(); sctp_v4_del_protocol(); - sctp_free_addr_wq(); - /* Free the control endpoint. */ - inet_ctl_sock_destroy(sctp_ctl_sock); + unregister_pernet_subsys(&sctp_net_ops); /* Free protosw registrations */ sctp_v6_protosw_exit(); sctp_v4_protosw_exit(); - /* Free the local address list. */ - sctp_free_local_addr_list(); - /* Unregister with socket layer. */ sctp_v6_pf_exit(); sctp_v4_pf_exit(); @@ -1508,9 +1520,7 @@ SCTP_STATIC __exit void sctp_exit(void) get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); - sctp_dbg_objcnt_exit(); - sctp_proc_exit(); - cleanup_sctp_mibs(); + percpu_counter_destroy(&sctp_sockets_allocated); rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 479a70ef6ff8..fbe1636309a7 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -198,6 +198,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, const struct sctp_bind_addr *bp, gfp_t gfp, int vparam_len) { + struct net *net = sock_net(asoc->base.sk); sctp_inithdr_t init; union sctp_params addrs; size_t chunksize; @@ -237,7 +238,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); - if (sctp_prsctp_enable) + if (net->sctp.prsctp_enable) chunksize += sizeof(prsctp_param); /* ADDIP: Section 4.2.7: @@ -245,7 +246,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, * the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and * INIT-ACK parameters. */ - if (sctp_addip_enable) { + if (net->sctp.addip_enable) { extensions[num_ext] = SCTP_CID_ASCONF; extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; num_ext += 2; @@ -257,7 +258,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize += vparam_len; /* Account for AUTH related parameters */ - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { /* Add random parameter length*/ chunksize += sizeof(asoc->c.auth_random); @@ -331,7 +332,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_addto_param(retval, num_ext, extensions); } - if (sctp_prsctp_enable) + if (net->sctp.prsctp_enable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); if (sp->adaptation_ind) { @@ -342,7 +343,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, } /* Add SCTP-AUTH chunks to the parameter list */ - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { sctp_addto_chunk(retval, sizeof(asoc->c.auth_random), asoc->c.auth_random); if (auth_hmacs) @@ -1940,7 +1941,7 @@ static int sctp_process_hn_param(const struct sctp_association *asoc, return 0; } -static int sctp_verify_ext_param(union sctp_params param) +static int sctp_verify_ext_param(struct net *net, union sctp_params param) { __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); int have_auth = 0; @@ -1964,10 +1965,10 @@ static int sctp_verify_ext_param(union sctp_params param) * only if ADD-IP is turned on and we are not backward-compatible * mode. */ - if (sctp_addip_noauth) + if (net->sctp.addip_noauth) return 1; - if (sctp_addip_enable && !have_auth && have_asconf) + if (net->sctp.addip_enable && !have_auth && have_asconf) return 0; return 1; @@ -1976,13 +1977,14 @@ static int sctp_verify_ext_param(union sctp_params param) static void sctp_process_ext_param(struct sctp_association *asoc, union sctp_params param) { + struct net *net = sock_net(asoc->base.sk); __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); int i; for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { case SCTP_CID_FWD_TSN: - if (sctp_prsctp_enable && + if (net->sctp.prsctp_enable && !asoc->peer.prsctp_capable) asoc->peer.prsctp_capable = 1; break; @@ -1990,12 +1992,12 @@ static void sctp_process_ext_param(struct sctp_association *asoc, /* if the peer reports AUTH, assume that he * supports AUTH. */ - if (sctp_auth_enable) + if (net->sctp.auth_enable) asoc->peer.auth_capable = 1; break; case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: - if (sctp_addip_enable) + if (net->sctp.addip_enable) asoc->peer.asconf_capable = 1; break; default: @@ -2081,7 +2083,8 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, * SCTP_IERROR_ERROR - stop processing, trigger an ERROR * SCTP_IERROR_NO_ERROR - continue with the chunk */ -static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, +static sctp_ierror_t sctp_verify_param(struct net *net, + const struct sctp_association *asoc, union sctp_params param, sctp_cid_t cid, struct sctp_chunk *chunk, @@ -2110,12 +2113,12 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, break; case SCTP_PARAM_SUPPORTED_EXT: - if (!sctp_verify_ext_param(param)) + if (!sctp_verify_ext_param(net, param)) return SCTP_IERROR_ABORT; break; case SCTP_PARAM_SET_PRIMARY: - if (sctp_addip_enable) + if (net->sctp.addip_enable) break; goto fallthrough; @@ -2126,12 +2129,12 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, break; case SCTP_PARAM_FWD_TSN_SUPPORT: - if (sctp_prsctp_enable) + if (net->sctp.prsctp_enable) break; goto fallthrough; case SCTP_PARAM_RANDOM: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fallthrough; /* SCTP-AUTH: Secion 6.1 @@ -2148,7 +2151,7 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, break; case SCTP_PARAM_CHUNKS: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fallthrough; /* SCTP-AUTH: Section 3.2 @@ -2164,7 +2167,7 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc, break; case SCTP_PARAM_HMAC_ALGO: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fallthrough; hmacs = (struct sctp_hmac_algo_param *)param.p; @@ -2198,7 +2201,7 @@ fallthrough: } /* Verify the INIT packet before we process it. */ -int sctp_verify_init(const struct sctp_association *asoc, +int sctp_verify_init(struct net *net, const struct sctp_association *asoc, sctp_cid_t cid, sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk, @@ -2245,7 +2248,7 @@ int sctp_verify_init(const struct sctp_association *asoc, /* Verify all the variable length parameters */ sctp_walk_params(param, peer_init, init_hdr.params) { - result = sctp_verify_param(asoc, param, cid, chunk, errp); + result = sctp_verify_param(net, asoc, param, cid, chunk, errp); switch (result) { case SCTP_IERROR_ABORT: case SCTP_IERROR_NOMEM: @@ -2270,6 +2273,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, const union sctp_addr *peer_addr, sctp_init_chunk_t *peer_init, gfp_t gfp) { + struct net *net = sock_net(asoc->base.sk); union sctp_params param; struct sctp_transport *transport; struct list_head *pos, *temp; @@ -2326,7 +2330,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * also give us an option to silently ignore the packet, which * is what we'll do here. */ - if (!sctp_addip_noauth && + if (!net->sctp.addip_noauth && (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) { asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP | SCTP_PARAM_DEL_IP | @@ -2466,6 +2470,7 @@ static int sctp_process_param(struct sctp_association *asoc, const union sctp_addr *peer_addr, gfp_t gfp) { + struct net *net = sock_net(asoc->base.sk); union sctp_addr addr; int i; __u16 sat; @@ -2494,13 +2499,13 @@ do_addr_param: af = sctp_get_af_specific(param_type2af(param.p->type)); af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0); scope = sctp_scope(peer_addr); - if (sctp_in_scope(&addr, scope)) + if (sctp_in_scope(net, &addr, scope)) if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED)) return 0; break; case SCTP_PARAM_COOKIE_PRESERVATIVE: - if (!sctp_cookie_preserve_enable) + if (!net->sctp.cookie_preserve_enable) break; stale = ntohl(param.life->lifespan_increment); @@ -2580,7 +2585,7 @@ do_addr_param: break; case SCTP_PARAM_SET_PRIMARY: - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) goto fall_through; addr_param = param.v + sizeof(sctp_addip_param_t); @@ -2607,7 +2612,7 @@ do_addr_param: break; case SCTP_PARAM_FWD_TSN_SUPPORT: - if (sctp_prsctp_enable) { + if (net->sctp.prsctp_enable) { asoc->peer.prsctp_capable = 1; break; } @@ -2615,7 +2620,7 @@ do_addr_param: goto fall_through; case SCTP_PARAM_RANDOM: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fall_through; /* Save peer's random parameter */ @@ -2628,7 +2633,7 @@ do_addr_param: break; case SCTP_PARAM_HMAC_ALGO: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fall_through; /* Save peer's HMAC list */ @@ -2644,7 +2649,7 @@ do_addr_param: break; case SCTP_PARAM_CHUNKS: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fall_through; asoc->peer.peer_chunks = kmemdup(param.p, diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index fe99628e1257..bcfebb91559d 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -251,6 +251,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer) int error; struct sctp_transport *transport = (struct sctp_transport *) peer; struct sctp_association *asoc = transport->asoc; + struct net *net = sock_net(asoc->base.sk); /* Check whether a task is in the sock. */ @@ -271,7 +272,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer) goto out_unlock; /* Run through the state machine. */ - error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT, + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX), asoc->state, asoc->ep, asoc, @@ -291,6 +292,7 @@ out_unlock: static void sctp_generate_timeout_event(struct sctp_association *asoc, sctp_event_timeout_t timeout_type) { + struct net *net = sock_net(asoc->base.sk); int error = 0; sctp_bh_lock_sock(asoc->base.sk); @@ -312,7 +314,7 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, goto out_unlock; /* Run through the state machine. */ - error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT, + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(timeout_type), asoc->state, asoc->ep, asoc, (void *)timeout_type, GFP_ATOMIC); @@ -371,6 +373,7 @@ void sctp_generate_heartbeat_event(unsigned long data) int error = 0; struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; + struct net *net = sock_net(asoc->base.sk); sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { @@ -388,7 +391,7 @@ void sctp_generate_heartbeat_event(unsigned long data) if (transport->dead) goto out_unlock; - error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT, + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT), asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); @@ -408,6 +411,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) { struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; + struct net *net = sock_net(asoc->base.sk); sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { @@ -426,7 +430,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) if (asoc->base.dead) goto out_unlock; - sctp_do_sm(SCTP_EVENT_T_OTHER, + sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); @@ -753,8 +757,10 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, int err = 0; if (sctp_outq_sack(&asoc->outqueue, sackh)) { + struct net *net = sock_net(asoc->base.sk); + /* There are no more TSNs awaiting SACK. */ - err = sctp_do_sm(SCTP_EVENT_T_OTHER, + err = sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_NO_PENDING_TSN), asoc->state, asoc->ep, asoc, NULL, GFP_ATOMIC); @@ -1042,6 +1048,8 @@ static int sctp_cmd_send_msg(struct sctp_association *asoc, */ static void sctp_cmd_send_asconf(struct sctp_association *asoc) { + struct net *net = sock_net(asoc->base.sk); + /* Send the next asconf chunk from the addip chunk * queue. */ @@ -1053,7 +1061,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc) /* Hold the chunk until an ASCONF_ACK is received. */ sctp_chunk_hold(asconf); - if (sctp_primitive_ASCONF(asoc, asconf)) + if (sctp_primitive_ASCONF(net, asoc, asconf)) sctp_chunk_free(asconf); else asoc->addip_last_asconf = asconf; @@ -1089,7 +1097,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc) * If you want to understand all of lksctp, this is a * good place to start. */ -int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, +int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, sctp_state_t state, struct sctp_endpoint *ep, struct sctp_association *asoc, @@ -1110,12 +1118,12 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, /* Look up the state function, run it, and then process the * side effects. These three steps are the heart of lksctp. */ - state_fn = sctp_sm_lookup_event(event_type, state, subtype); + state_fn = sctp_sm_lookup_event(net, event_type, state, subtype); sctp_init_cmd_seq(&commands); DEBUG_PRE; - status = (*state_fn->fn)(ep, asoc, subtype, event_arg, &commands); + status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands); DEBUG_POST; error = sctp_side_effects(event_type, subtype, state, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 9fca10357350..094813b6c3c3 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -66,7 +66,8 @@ #include <net/sctp/sm.h> #include <net/sctp/structs.h> -static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, +static struct sctp_packet *sctp_abort_pkt_new(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, const void *payload, @@ -74,36 +75,43 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, static int sctp_eat_data(const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands); -static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc, +static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, + const struct sctp_association *asoc, const struct sctp_chunk *chunk); -static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, +static void sctp_send_stale_cookie_err(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, struct sctp_chunk *err_chunk); -static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands); -static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands); -static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands); static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk); -static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, +static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, + sctp_cmd_seq_t *commands, __be16 error, int sk_err, const struct sctp_association *asoc, struct sctp_transport *transport); static sctp_disposition_t sctp_sf_abort_violation( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, void *arg, @@ -112,6 +120,7 @@ static sctp_disposition_t sctp_sf_abort_violation( const size_t paylen); static sctp_disposition_t sctp_sf_violation_chunklen( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -119,6 +128,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_paramlen( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -126,6 +136,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen( sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_ctsn( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -133,18 +144,21 @@ static sctp_disposition_t sctp_sf_violation_ctsn( sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_chunk( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands); -static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep, +static sctp_ierror_t sctp_sf_authenticate(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, struct sctp_chunk *chunk); -static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, +static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -204,7 +218,8 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_4_C(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -214,7 +229,7 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, struct sctp_ulpevent *ev; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* RFC 2960 6.10 Bundling * @@ -222,11 +237,11 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, * SHUTDOWN COMPLETE with any other chunks. */ if (!chunk->singleton) - return sctp_sf_violation_chunk(ep, asoc, type, arg, commands); + return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* RFC 2960 10.2 SCTP-to-ULP @@ -259,8 +274,8 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); @@ -289,7 +304,8 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -313,21 +329,21 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, * with an INIT chunk that is bundled with other chunks. */ if (!chunk->singleton) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* Make sure that the INIT chunk has a valid length. * Normally, this would cause an ABORT with a Protocol Violation @@ -335,7 +351,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, * just discard the packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the INIT is coming toward a closing socket, we'll send back * and ABORT. Essentially, this catches the race of INIT being @@ -344,18 +360,18 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, * can treat this OOTB */ if (sctp_sstate(ep->base.sk, CLOSING)) - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes if there is any. */ if (err_chunk) { - packet = sctp_abort_pkt_new(ep, asoc, arg, + packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + sizeof(sctp_chunkhdr_t), ntohs(err_chunk->chunk_hdr->length) - @@ -366,13 +382,13 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); return SCTP_DISPOSITION_CONSUME; } else { return SCTP_DISPOSITION_NOMEM; } } else { - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } } @@ -484,7 +500,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -496,25 +513,25 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, struct sctp_packet *packet; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* 6.10 Bundling * An endpoint MUST NOT bundle INIT, INIT ACK or * SHUTDOWN COMPLETE with any other chunks. */ if (!chunk->singleton) - return sctp_sf_violation_chunk(ep, asoc, type, arg, commands); + return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the INIT-ACK chunk has a valid length */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data; /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { @@ -526,7 +543,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, * the association. */ if (err_chunk) { - packet = sctp_abort_pkt_new(ep, asoc, arg, + packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + sizeof(sctp_chunkhdr_t), ntohs(err_chunk->chunk_hdr->length) - @@ -537,7 +554,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); error = SCTP_ERROR_INV_PARAM; } } @@ -554,10 +571,10 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, * was malformed. */ if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED, + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc, chunk->transport); } @@ -633,7 +650,8 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) @@ -650,9 +668,9 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } /* Make sure that the COOKIE_ECHO chunk has a valid length. @@ -661,7 +679,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, * in sctp_unpack_cookie(). */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* If the endpoint is not listening or if the number of associations * on the TCP-style socket exceed the max backlog, respond with an @@ -670,7 +688,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, sk = ep->base.sk; if (!sctp_sstate(sk, LISTENING) || (sctp_style(sk, TCP) && sk_acceptq_is_full(sk))) - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* "Decode" the chunk. We have no optional parameters so we * are in good shape. @@ -703,13 +721,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, goto nomem; case -SCTP_IERROR_STALE_COOKIE: - sctp_send_stale_cookie_err(ep, asoc, chunk, commands, + sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands, err_chk_p); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); case -SCTP_IERROR_BAD_SIG: default: - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } } @@ -756,14 +774,14 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t)); auth.transport = chunk->transport; - ret = sctp_sf_authenticate(ep, new_asoc, type, &auth); + ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth); /* We can now safely free the auth_chunk clone */ kfree_skb(chunk->auth_chunk); if (ret != SCTP_IERROR_NO_ERROR) { sctp_association_free(new_asoc); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } } @@ -804,8 +822,8 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); if (new_asoc->autoclose) @@ -856,7 +874,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) @@ -865,13 +884,13 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, struct sctp_ulpevent *ev; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Verify that the chunk length for the COOKIE-ACK is OK. * If we don't do this, any bundled chunks may be junked. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Reset init error count upon receipt of COOKIE-ACK, @@ -892,8 +911,8 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_ACTIVEESTABS); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); if (asoc->autoclose) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, @@ -958,7 +977,8 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep, } /* Generate a HEARTBEAT packet on the given transport. */ -sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -972,8 +992,8 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } @@ -1028,7 +1048,8 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_beat_8_3(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1039,11 +1060,11 @@ sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep, size_t paylen = 0; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* 8.3 The receiver of the HEARTBEAT should immediately @@ -1095,7 +1116,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1108,12 +1130,12 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, unsigned long max_interval; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT-ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t) + sizeof(sctp_sender_hb_info_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data; @@ -1171,7 +1193,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, /* Helper function to send out an abort for the restart * condition. */ -static int sctp_sf_send_restart_abort(union sctp_addr *ssa, +static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa, struct sctp_chunk *init, sctp_cmd_seq_t *commands) { @@ -1197,18 +1219,18 @@ static int sctp_sf_send_restart_abort(union sctp_addr *ssa, errhdr->length = htons(len); /* Assign to the control socket. */ - ep = sctp_sk((sctp_get_ctl_sock()))->ep; + ep = sctp_sk(net->sctp.ctl_sock)->ep; /* Association is NULL since this may be a restart attack and we * want to send back the attacker's vtag. */ - pkt = sctp_abort_pkt_new(ep, NULL, init, errhdr, len); + pkt = sctp_abort_pkt_new(net, ep, NULL, init, errhdr, len); if (!pkt) goto out; sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(pkt)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); /* Discard the rest of the inbound packet. */ sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); @@ -1240,6 +1262,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, struct sctp_chunk *init, sctp_cmd_seq_t *commands) { + struct net *net = sock_net(new_asoc->base.sk); struct sctp_transport *new_addr; int ret = 1; @@ -1258,7 +1281,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, transports) { if (!list_has_sctp_addr(&asoc->peer.transport_addr_list, &new_addr->ipaddr)) { - sctp_sf_send_restart_abort(&new_addr->ipaddr, init, + sctp_sf_send_restart_abort(net, &new_addr->ipaddr, init, commands); ret = 0; break; @@ -1358,6 +1381,7 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc, * chunk handling. */ static sctp_disposition_t sctp_sf_do_unexpected_init( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -1382,20 +1406,20 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( * with an INIT chunk that is bundled with other chunks. */ if (!chunk->singleton) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); /* Make sure that the INIT chunk has a valid length. * In this case, we generate a protocol violation since we have * an association established. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data; @@ -1405,14 +1429,14 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes if there is any. */ if (err_chunk) { - packet = sctp_abort_pkt_new(ep, asoc, arg, + packet = sctp_abort_pkt_new(net, ep, asoc, arg, (__u8 *)(err_chunk->chunk_hdr) + sizeof(sctp_chunkhdr_t), ntohs(err_chunk->chunk_hdr->length) - @@ -1421,14 +1445,14 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); retval = SCTP_DISPOSITION_CONSUME; } else { retval = SCTP_DISPOSITION_NOMEM; } goto cleanup; } else { - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } } @@ -1570,7 +1594,8 @@ cleanup: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1579,7 +1604,7 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep, /* Call helper to do the real work for both simulataneous and * duplicate INIT chunk handling. */ - return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands); + return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands); } /* @@ -1623,7 +1648,8 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1632,7 +1658,7 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep, /* Call helper to do the real work for both simulataneous and * duplicate INIT chunk handling. */ - return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands); + return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands); } @@ -1645,7 +1671,8 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep, * An unexpected INIT ACK usually indicates the processing of an old or * duplicated INIT chunk. */ -sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) @@ -1653,10 +1680,10 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep, /* Per the above section, we'll discard the chunk if we have an * endpoint. If this is an OOTB INIT-ACK, treat it as such. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) - return sctp_sf_ootb(ep, asoc, type, arg, commands); + if (ep == sctp_sk(net->sctp.ctl_sock)->ep) + return sctp_sf_ootb(net, ep, asoc, type, arg, commands); else - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); } /* Unexpected COOKIE-ECHO handler for peer restart (Table 2, action 'A') @@ -1664,7 +1691,8 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep, * Section 5.2.4 * A) In this case, the peer may have restarted. */ -static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -1700,7 +1728,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, * its peer. */ if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) { - disposition = sctp_sf_do_9_2_reshutack(ep, asoc, + disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc, SCTP_ST_CHUNK(chunk->chunk_hdr->type), chunk, commands); if (SCTP_DISPOSITION_NOMEM == disposition) @@ -1763,7 +1791,8 @@ nomem: * after responding to the local endpoint's INIT */ /* This case represents an initialization collision. */ -static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -1784,7 +1813,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); repl = sctp_make_cookie_ack(new_asoc, chunk); @@ -1833,7 +1862,8 @@ nomem: * but a new tag of its own. */ /* This case represents an initialization collision. */ -static sctp_disposition_t sctp_sf_do_dupcook_c(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -1854,7 +1884,8 @@ static sctp_disposition_t sctp_sf_do_dupcook_c(const struct sctp_endpoint *ep, * enter the ESTABLISHED state, if it has not already done so. */ /* This case represents an initialization collision. */ -static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_dupcook_d(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -1876,7 +1907,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); @@ -1948,7 +1979,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -1967,7 +1999,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, * done later. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* "Decode" the chunk. We have no optional parameters so we @@ -2001,12 +2033,12 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, goto nomem; case -SCTP_IERROR_STALE_COOKIE: - sctp_send_stale_cookie_err(ep, asoc, chunk, commands, + sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands, err_chk_p); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); case -SCTP_IERROR_BAD_SIG: default: - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } } @@ -2017,27 +2049,27 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, switch (action) { case 'A': /* Association restart. */ - retval = sctp_sf_do_dupcook_a(ep, asoc, chunk, commands, + retval = sctp_sf_do_dupcook_a(net, ep, asoc, chunk, commands, new_asoc); break; case 'B': /* Collision case B. */ - retval = sctp_sf_do_dupcook_b(ep, asoc, chunk, commands, + retval = sctp_sf_do_dupcook_b(net, ep, asoc, chunk, commands, new_asoc); break; case 'C': /* Collision case C. */ - retval = sctp_sf_do_dupcook_c(ep, asoc, chunk, commands, + retval = sctp_sf_do_dupcook_c(net, ep, asoc, chunk, commands, new_asoc); break; case 'D': /* Collision case D. */ - retval = sctp_sf_do_dupcook_d(ep, asoc, chunk, commands, + retval = sctp_sf_do_dupcook_d(net, ep, asoc, chunk, commands, new_asoc); break; default: /* Discard packet for all others. */ - retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands); + retval = sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); break; } @@ -2063,6 +2095,7 @@ nomem: * See sctp_sf_do_9_1_abort(). */ sctp_disposition_t sctp_sf_shutdown_pending_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -2072,7 +2105,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it @@ -2085,7 +2118,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks * F4) One special consideration is that ABORT Chunks arriving @@ -2094,9 +2127,9 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); - return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } /* @@ -2104,7 +2137,8 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( * * See sctp_sf_do_9_1_abort(). */ -sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2113,7 +2147,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it @@ -2126,7 +2160,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks * F4) One special consideration is that ABORT Chunks arriving @@ -2135,7 +2169,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); /* Stop the T2-shutdown timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -2145,7 +2179,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } /* @@ -2154,6 +2188,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, * See sctp_sf_do_9_1_abort(). */ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -2163,7 +2198,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort( /* The same T2 timer, so we should be able to use * common function with the SHUTDOWN-SENT state. */ - return sctp_sf_shutdown_sent_abort(ep, asoc, type, arg, commands); + return sctp_sf_shutdown_sent_abort(net, ep, asoc, type, arg, commands); } /* @@ -2180,7 +2215,8 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort( * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2190,13 +2226,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, sctp_errhdr_t *err; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ERROR chunk has a valid length. * The parameter walking depends on this as well. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Process the error here */ @@ -2206,7 +2242,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, */ sctp_walk_errors(err, chunk->chunk_hdr) { if (SCTP_ERROR_STALE_COOKIE == err->cause) - return sctp_sf_do_5_2_6_stale(ep, asoc, type, + return sctp_sf_do_5_2_6_stale(net, ep, asoc, type, arg, commands); } @@ -2215,7 +2251,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, * we are discarding the packet, there should be no adverse * affects. */ - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* @@ -2243,7 +2279,8 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2365,7 +2402,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2374,7 +2412,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it @@ -2387,7 +2425,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks * F4) One special consideration is that ABORT Chunks arriving @@ -2396,12 +2434,13 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); - return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } -static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, +static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2418,7 +2457,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, sctp_errhdr_t *err; sctp_walk_errors(err, chunk->chunk_hdr); if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); error = ((sctp_errhdr_t *)chunk->skb->data)->cause; } @@ -2426,8 +2465,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); /* ASSOC_FAILED will DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(error)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -2437,7 +2476,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, * * See sctp_sf_do_9_1_abort() above. */ -sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2448,7 +2488,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep, __be16 error = SCTP_ERROR_NO_ERROR; if (!sctp_vtag_verify_either(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ABORT chunk has a valid length. * Since this is an ABORT chunk, we have to discard it @@ -2461,27 +2501,28 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep, * packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) error = ((sctp_errhdr_t *)chunk->skb->data)->cause; - return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED, asoc, + return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc, chunk->transport); } /* * Process an incoming ICMP as an ABORT. (COOKIE-WAIT state) */ -sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - return sctp_stop_t1_and_abort(commands, SCTP_ERROR_NO_ERROR, + return sctp_stop_t1_and_abort(net, commands, SCTP_ERROR_NO_ERROR, ENOPROTOOPT, asoc, (struct sctp_transport *)arg); } @@ -2489,7 +2530,8 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep /* * Process an ABORT. (COOKIE-ECHOED state) */ -sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2498,7 +2540,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep, /* There is a single T1 timer, so we should be able to use * common function with the COOKIE-WAIT state. */ - return sctp_sf_cookie_wait_abort(ep, asoc, type, arg, commands); + return sctp_sf_cookie_wait_abort(net, ep, asoc, type, arg, commands); } /* @@ -2506,7 +2548,8 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep, * * This is common code called by several sctp_sf_*_abort() functions above. */ -static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, +static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, + sctp_cmd_seq_t *commands, __be16 error, int sk_err, const struct sctp_association *asoc, struct sctp_transport *transport) @@ -2514,7 +2557,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, SCTP_DEBUG_PRINTK("ABORT received (INIT).\n"); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(sk_err)); @@ -2557,7 +2600,8 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2570,12 +2614,12 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Convert the elaborate header. */ @@ -2595,7 +2639,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, * sender with an ABORT. */ if (!TSN_lt(ctsn, asoc->next_tsn)) - return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); + return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands); /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT * When a peer sends a SHUTDOWN, SCTP delivers this notification to @@ -2619,7 +2663,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { - disposition = sctp_sf_do_9_2_shutdown_ack(ep, asoc, type, + disposition = sctp_sf_do_9_2_shutdown_ack(net, ep, asoc, type, arg, commands); } @@ -2645,7 +2689,8 @@ out: * The Cumulative TSN Ack of the received SHUTDOWN chunk * MUST be processed. */ -sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2656,12 +2701,12 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); sdh = (sctp_shutdownhdr_t *)chunk->skb->data; @@ -2678,7 +2723,7 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, * sender with an ABORT. */ if (!TSN_lt(ctsn, asoc->next_tsn)) - return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); + return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands); /* verify, by checking the Cumulative TSN Ack field of the * chunk, that all its outstanding DATA chunks have been @@ -2697,7 +2742,8 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, * that belong to this association, it should discard the INIT chunk and * retransmit the SHUTDOWN ACK chunk. */ -sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2708,7 +2754,7 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep, /* Make sure that the chunk has a valid length */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Since we are not going to really process this INIT, there @@ -2760,7 +2806,8 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2771,10 +2818,10 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep, u32 lowest_tsn; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); cwr = (sctp_cwrhdr_t *) chunk->skb->data; @@ -2815,7 +2862,8 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_ecne(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2825,10 +2873,10 @@ sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); ecne = (sctp_ecnehdr_t *) chunk->skb->data; @@ -2871,7 +2919,8 @@ sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -2884,11 +2933,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); error = sctp_eat_data(asoc, chunk, commands ); @@ -2897,16 +2946,16 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, break; case SCTP_IERROR_HIGH_TSN: case SCTP_IERROR_BAD_STREAM: - SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); + SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_noforce; case SCTP_IERROR_DUP_TSN: case SCTP_IERROR_IGNORE_TSN: - SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); + SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_force; case SCTP_IERROR_NO_DATA: goto consume; case SCTP_IERROR_PROTO_VIOLATION: - return sctp_sf_abort_violation(ep, asoc, chunk, commands, + return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); default: BUG(); @@ -2992,7 +3041,8 @@ consume: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3004,11 +3054,11 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); error = sctp_eat_data(asoc, chunk, commands ); @@ -3022,7 +3072,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep, case SCTP_IERROR_NO_DATA: goto consume; case SCTP_IERROR_PROTO_VIOLATION: - return sctp_sf_abort_violation(ep, asoc, chunk, commands, + return sctp_sf_abort_violation(net, ep, asoc, chunk, commands, (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t)); default: BUG(); @@ -3082,7 +3132,8 @@ consume: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3093,18 +3144,18 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Pull the SACK chunk from the data buffer */ sackh = sctp_sm_pull_sack(chunk); /* Was this a bogus SACK? */ if (!sackh) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); chunk->subh.sack_hdr = sackh; ctsn = ntohl(sackh->cum_tsn_ack); @@ -3125,7 +3176,7 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, * sender with an ABORT. */ if (!TSN_lt(ctsn, asoc->next_tsn)) - return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); + return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands); /* Return this SACK for further processing. */ sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_SACKH(sackh)); @@ -3154,7 +3205,8 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3164,7 +3216,7 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; struct sctp_chunk *abort; - packet = sctp_ootb_pkt_new(asoc, chunk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { /* Make an ABORT. The T bit will be set if the asoc @@ -3188,9 +3240,9 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); - sctp_sf_pdiscard(ep, asoc, type, arg, commands); + sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); return SCTP_DISPOSITION_CONSUME; } @@ -3205,7 +3257,8 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_operr_notify(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3215,15 +3268,15 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, sctp_errhdr_t *err; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ERROR chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); sctp_walk_errors(err, chunk->chunk_hdr); if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_violation_paramlen(ep, asoc, type, arg, + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err, commands); sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR, @@ -3242,7 +3295,8 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, * * The return value is the disposition. */ -sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_9_2_final(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3253,11 +3307,11 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, struct sctp_ulpevent *ev; if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* 10.2 H) SHUTDOWN COMPLETE notification * @@ -3290,8 +3344,8 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); /* ...and remove all record of the association. */ @@ -3324,7 +3378,8 @@ nomem: * receiver of the OOTB packet shall discard the OOTB packet and take * no further action. */ -sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_ootb(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3338,13 +3393,13 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, int ootb_shut_ack = 0; int ootb_cookie_ack = 0; - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); ch = (sctp_chunkhdr_t *) chunk->chunk_hdr; do { /* Report violation if the chunk is less then minimal */ if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Now that we know we at least have a chunk header, @@ -3359,7 +3414,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, * sending an ABORT of its own. */ if (SCTP_CID_ABORT == ch->type) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* RFC 8.4, 7) If the packet contains a "Stale cookie" ERROR * or a COOKIE ACK the SCTP Packet should be silently @@ -3381,18 +3436,18 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, /* Report violation if chunk len overflows */ ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); ch = (sctp_chunkhdr_t *) ch_end; } while (ch_end < skb_tail_pointer(skb)); if (ootb_shut_ack) - return sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands); + return sctp_sf_shut_8_4_5(net, ep, asoc, type, arg, commands); else if (ootb_cookie_ack) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); else - return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); } /* @@ -3416,7 +3471,8 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, +static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3426,7 +3482,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; struct sctp_chunk *shut; - packet = sctp_ootb_pkt_new(asoc, chunk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { /* Make an SHUTDOWN_COMPLETE. @@ -3450,19 +3506,19 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); /* If the chunk length is invalid, we don't want to process * the reset of the packet. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* We need to discard the rest of the packet to prevent * potential bomming attacks from additional bundled chunks. * This is documented in SCTP Threats ID. */ - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } return SCTP_DISPOSITION_NOMEM; @@ -3479,7 +3535,8 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, * chunks. --piggy ] * */ -sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3489,7 +3546,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Although we do have an association in this case, it corresponds @@ -3497,13 +3554,14 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, * packet and the state function that handles OOTB SHUTDOWN_ACK is * called with a NULL association. */ - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); - return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands); + return sctp_sf_shut_8_4_5(net, ep, NULL, type, arg, commands); } /* ADDIP Section 4.2 Upon reception of an ASCONF Chunk. */ -sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_asconf(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) @@ -3519,7 +3577,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* ADD-IP: Section 4.1.1 @@ -3528,12 +3586,12 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, * is received unauthenticated it MUST be silently discarded as * described in [I-D.ietf-tsvwg-sctp-auth]. */ - if (!sctp_addip_noauth && !chunk->auth) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + if (!net->sctp.addip_noauth && !chunk->auth) + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the ASCONF ADDIP chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); hdr = (sctp_addiphdr_t *)chunk->skb->data; @@ -3542,7 +3600,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, addr_param = (union sctp_addr_param *)hdr->params; length = ntohs(addr_param->p.length); if (length < sizeof(sctp_paramhdr_t)) - return sctp_sf_violation_paramlen(ep, asoc, type, arg, + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)addr_param, commands); /* Verify the ASCONF chunk before processing it. */ @@ -3550,7 +3608,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)((void *)addr_param + length), (void *)chunk->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, arg, + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err_param, commands); /* ADDIP 5.2 E1) Compare the value of the serial number to the value @@ -3630,7 +3688,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, * When building TLV parameters for the ASCONF Chunk that will add or * delete IP addresses the D0 to D13 rules should be applied: */ -sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) @@ -3645,7 +3704,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(asconf_ack, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* ADD-IP, Section 4.1.2: @@ -3654,12 +3713,12 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, * is received unauthenticated it MUST be silently discarded as * described in [I-D.ietf-tsvwg-sctp-auth]. */ - if (!sctp_addip_noauth && !asconf_ack->auth) - return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); + if (!net->sctp.addip_noauth && !asconf_ack->auth) + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the ADDIP chunk has a valid length. */ if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data; @@ -3670,7 +3729,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)addip_hdr->params, (void *)asconf_ack->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, arg, + return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err_param, commands); if (last_asconf) { @@ -3705,8 +3764,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_ASCONF_ACK)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -3739,8 +3798,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_ASCONF_ACK)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -3761,7 +3820,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -3776,12 +3836,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep, if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* Make sure that the FORWARD_TSN chunk has valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data; @@ -3828,6 +3888,7 @@ discard_noforce: } sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -3843,12 +3904,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* Make sure that the FORWARD_TSN chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data; @@ -3915,7 +3976,8 @@ gen_shutdown: * * The return value is the disposition of the chunk. */ -static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep, +static sctp_ierror_t sctp_sf_authenticate(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, struct sctp_chunk *chunk) @@ -3988,7 +4050,8 @@ nomem: return SCTP_IERROR_NOMEM; } -sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_eat_auth(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4001,21 +4064,21 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, /* Make sure that the peer has AUTH capable */ if (!asoc->peer.auth_capable) - return sctp_sf_unk_chunk(ep, asoc, type, arg, commands); + return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands); if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, SCTP_NULL()); - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } /* Make sure that the AUTH chunk has valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_auth_chunk))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); auth_hdr = (struct sctp_authhdr *)chunk->skb->data; - error = sctp_sf_authenticate(ep, asoc, type, chunk); + error = sctp_sf_authenticate(net, ep, asoc, type, chunk); switch (error) { case SCTP_IERROR_AUTH_BAD_HMAC: /* Generate the ERROR chunk and discard the rest @@ -4032,10 +4095,10 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, /* Fall Through */ case SCTP_IERROR_AUTH_BAD_KEYID: case SCTP_IERROR_BAD_SIG: - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); case SCTP_IERROR_PROTO_VIOLATION: - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); case SCTP_IERROR_NOMEM: @@ -4084,7 +4147,8 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_unk_chunk(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4097,20 +4161,20 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, SCTP_DEBUG_PRINTK("Processing the unknown chunk id %d.\n", type.chunk); if (!sctp_vtag_verify(unk_chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the chunk has a valid length. * Since we don't know the chunk type, we use a general * chunkhdr structure to make a comparison. */ if (!sctp_chunk_length_valid(unk_chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); switch (type.chunk & SCTP_CID_ACTION_MASK) { case SCTP_CID_ACTION_DISCARD: /* Discard the packet. */ - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); break; case SCTP_CID_ACTION_DISCARD_ERR: /* Generate an ERROR chunk as response. */ @@ -4125,7 +4189,7 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, } /* Discard the packet. */ - sctp_sf_pdiscard(ep, asoc, type, arg, commands); + sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); return SCTP_DISPOSITION_CONSUME; break; case SCTP_CID_ACTION_SKIP: @@ -4167,7 +4231,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_discard_chunk(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4180,7 +4245,7 @@ sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep, * chunkhdr structure to make a comparison. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); SCTP_DEBUG_PRINTK("Chunk %d is discarded\n", type.chunk); @@ -4205,13 +4270,14 @@ sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_pdiscard(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS); + SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS); sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; @@ -4232,7 +4298,8 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, * We simply tag the chunk as a violation. The state machine will log * the violation and continue. */ -sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_violation(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4242,7 +4309,7 @@ sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep, /* Make sure that the chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); return SCTP_DISPOSITION_VIOLATION; @@ -4252,6 +4319,7 @@ sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep, * Common function to handle a protocol violation. */ static sctp_disposition_t sctp_sf_abort_violation( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, void *arg, @@ -4302,7 +4370,7 @@ static sctp_disposition_t sctp_sf_abort_violation( } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); if (asoc->state <= SCTP_STATE_COOKIE_ECHOED) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -4316,10 +4384,10 @@ static sctp_disposition_t sctp_sf_abort_violation( SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); } } else { - packet = sctp_ootb_pkt_new(asoc, chunk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (!packet) goto nomem_pkt; @@ -4334,13 +4402,13 @@ static sctp_disposition_t sctp_sf_abort_violation( sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); discard: - sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); + sctp_sf_pdiscard(net, ep, asoc, SCTP_ST_CHUNK(0), arg, commands); return SCTP_DISPOSITION_ABORT; nomem_pkt: @@ -4369,6 +4437,7 @@ nomem: * Generate an ABORT chunk and terminate the association. */ static sctp_disposition_t sctp_sf_violation_chunklen( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4377,7 +4446,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( { static const char err_str[]="The following chunk had invalid length:"; - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, + return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); } @@ -4388,6 +4457,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( * the length is considered as invalid. */ static sctp_disposition_t sctp_sf_violation_paramlen( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4407,17 +4477,17 @@ static sctp_disposition_t sctp_sf_violation_paramlen( goto nomem; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); discard: - sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); + sctp_sf_pdiscard(net, ep, asoc, SCTP_ST_CHUNK(0), arg, commands); return SCTP_DISPOSITION_ABORT; nomem: return SCTP_DISPOSITION_NOMEM; @@ -4430,6 +4500,7 @@ nomem: * error code. */ static sctp_disposition_t sctp_sf_violation_ctsn( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4438,7 +4509,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn( { static const char err_str[]="The cumulative tsn ack beyond the max tsn currently sent:"; - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, + return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); } @@ -4449,6 +4520,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn( * on the path and we may not want to continue this communication. */ static sctp_disposition_t sctp_sf_violation_chunk( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4458,9 +4530,9 @@ static sctp_disposition_t sctp_sf_violation_chunk( static const char err_str[]="The following chunk violates protocol:"; if (!asoc) - return sctp_sf_violation(ep, asoc, type, arg, commands); + return sctp_sf_violation(net, ep, asoc, type, arg, commands); - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, + return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, sizeof(err_str)); } /*************************************************************************** @@ -4523,7 +4595,8 @@ static sctp_disposition_t sctp_sf_violation_chunk( * * The return value is a disposition. */ -sctp_disposition_t sctp_sf_do_prm_asoc(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4634,7 +4707,8 @@ nomem: * * The return value is the disposition. */ -sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_prm_send(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4673,6 +4747,7 @@ sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep, * The return value is the disposition. */ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4694,7 +4769,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { - disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type, + disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type, arg, commands); } return disposition; @@ -4728,6 +4803,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( * The return value is the disposition. */ sctp_disposition_t sctp_sf_do_9_1_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4759,14 +4835,15 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_USER_ABORT)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return retval; } /* We tried an illegal operation on an association which is closed. */ -sctp_disposition_t sctp_sf_error_closed(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_error_closed(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4779,7 +4856,8 @@ sctp_disposition_t sctp_sf_error_closed(const struct sctp_endpoint *ep, /* We tried an illegal operation on an association which is shutting * down. */ -sctp_disposition_t sctp_sf_error_shutdown(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_error_shutdown(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -4805,6 +4883,7 @@ sctp_disposition_t sctp_sf_error_shutdown(const struct sctp_endpoint *ep, * (timers) */ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4817,7 +4896,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown( sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); + SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); @@ -4839,6 +4918,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown( * (timers) */ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4847,7 +4927,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown( /* There is a single T1 timer, so we should be able to use * common function with the COOKIE-WAIT state. */ - return sctp_sf_cookie_wait_prm_shutdown(ep, asoc, type, arg, commands); + return sctp_sf_cookie_wait_prm_shutdown(net, ep, asoc, type, arg, commands); } /* @@ -4865,6 +4945,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown( * (timers) */ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4884,7 +4965,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); /* Even if we can't send the ABORT due to low memory delete the * TCB. This is a departure from our typical NOMEM handling. @@ -4914,6 +4995,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( * (timers) */ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4923,7 +5005,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort( /* There is a single T1 timer, so we should be able to use * common function with the COOKIE-WAIT state. */ - return sctp_sf_cookie_wait_prm_abort(ep, asoc, type, arg, commands); + return sctp_sf_cookie_wait_prm_abort(net, ep, asoc, type, arg, commands); } /* @@ -4939,6 +5021,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort( * (timers) */ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4949,7 +5032,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands); + return sctp_sf_do_9_1_prm_abort(net, ep, asoc, type, arg, commands); } /* @@ -4965,6 +5048,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort( * (timers) */ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -4979,7 +5063,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands); + return sctp_sf_do_9_1_prm_abort(net, ep, asoc, type, arg, commands); } /* @@ -4995,6 +5079,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort( * (timers) */ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5004,7 +5089,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort( /* The same T2 timer, so we should be able to use * common function with the SHUTDOWN-SENT state. */ - return sctp_sf_shutdown_sent_prm_abort(ep, asoc, type, arg, commands); + return sctp_sf_shutdown_sent_prm_abort(net, ep, asoc, type, arg, commands); } /* @@ -5030,6 +5115,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort( * association on which a heartbeat should be issued. */ sctp_disposition_t sctp_sf_do_prm_requestheartbeat( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5061,7 +5147,8 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat( * When an endpoint has an ASCONF signaled change to be sent to the * remote endpoint it should do A1 to A9 */ -sctp_disposition_t sctp_sf_do_prm_asconf(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5082,6 +5169,7 @@ sctp_disposition_t sctp_sf_do_prm_asconf(const struct sctp_endpoint *ep, * The return value is the disposition of the primitive. */ sctp_disposition_t sctp_sf_ignore_primitive( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5103,6 +5191,7 @@ sctp_disposition_t sctp_sf_ignore_primitive( * retransmit, the stack will immediately send up this notification. */ sctp_disposition_t sctp_sf_do_no_pending_tsn( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5134,6 +5223,7 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn( * The return value is the disposition. */ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5203,6 +5293,7 @@ nomem: * The return value is the disposition. */ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5221,11 +5312,11 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( */ if (chunk) { if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t))) - return sctp_sf_violation_chunklen(ep, asoc, type, arg, + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); } @@ -5273,7 +5364,8 @@ nomem: * * The return value is the disposition of the event. */ -sctp_disposition_t sctp_sf_ignore_other(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_ignore_other(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5298,7 +5390,8 @@ sctp_disposition_t sctp_sf_ignore_other(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5306,7 +5399,7 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, { struct sctp_transport *transport = arg; - SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T3_RTX_EXPIREDS); if (asoc->overall_error_count >= asoc->max_retrans) { if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) { @@ -5327,8 +5420,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } } @@ -5384,13 +5477,14 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, * allow. However, an SCTP transmitter MUST NOT be more aggressive than * the following algorithms allow. */ -sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { - SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS); sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); return SCTP_DISPOSITION_CONSUME; } @@ -5414,7 +5508,8 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, * (timers, events) * */ -sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5425,7 +5520,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); - SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS); if (attempts <= asoc->max_init_attempts) { bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; @@ -5475,7 +5570,8 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, * (timers, events) * */ -sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5485,7 +5581,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); - SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS); if (attempts <= asoc->max_init_attempts) { repl = sctp_make_cookie_echo(asoc, NULL); @@ -5523,7 +5619,8 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep * the T2-Shutdown timer, giving its peer ample opportunity to transmit * all of its queued DATA chunks that have not yet been sent. */ -sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5532,7 +5629,7 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); - SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS); ((struct sctp_association *)asoc)->shutdown_retries++; @@ -5542,8 +5639,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, /* Note: CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } @@ -5592,6 +5689,7 @@ nomem: * If the T4 RTO timer expires the endpoint should do B1 to B5 */ sctp_disposition_t sctp_sf_t4_timer_expire( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5601,7 +5699,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire( struct sctp_chunk *chunk = asoc->addip_last_asconf; struct sctp_transport *transport = chunk->transport; - SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T4_RTO_EXPIREDS); /* ADDIP 4.1 B1) Increment the error counters and perform path failure * detection on the appropriate destination address as defined in @@ -5626,8 +5724,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire( SCTP_ERROR(ETIMEDOUT)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -5662,7 +5760,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire( * At the expiration of this timer the sender SHOULD abort the association * by sending an ABORT chunk. */ -sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5671,7 +5770,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); - SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); reply = sctp_make_abort(asoc, NULL, 0); if (!reply) @@ -5683,8 +5782,8 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; nomem: @@ -5697,6 +5796,7 @@ nomem: * the user. So this routine looks same as sctp_sf_do_9_2_prm_shutdown(). */ sctp_disposition_t sctp_sf_autoclose_timer_expire( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, @@ -5705,7 +5805,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( { int disposition; - SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS); /* From 9.2 Shutdown of an Association * Upon receipt of the SHUTDOWN primitive from its upper @@ -5720,7 +5820,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { - disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type, + disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type, arg, commands); } return disposition; @@ -5738,7 +5838,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_not_impl(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_not_impl(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5755,7 +5856,8 @@ sctp_disposition_t sctp_sf_not_impl(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_bug(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_bug(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5775,7 +5877,8 @@ sctp_disposition_t sctp_sf_bug(const struct sctp_endpoint *ep, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_timer_ignore(const struct sctp_endpoint *ep, +sctp_disposition_t sctp_sf_timer_ignore(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, void *arg, @@ -5817,7 +5920,8 @@ static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk) /* Create an ABORT packet to be sent as a response, with the specified * error causes. */ -static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, +static struct sctp_packet *sctp_abort_pkt_new(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, const void *payload, @@ -5826,7 +5930,7 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, struct sctp_packet *packet; struct sctp_chunk *abort; - packet = sctp_ootb_pkt_new(asoc, chunk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { /* Make an ABORT. @@ -5858,7 +5962,8 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, } /* Allocate a packet for responding in the OOTB conditions. */ -static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc, +static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, + const struct sctp_association *asoc, const struct sctp_chunk *chunk) { struct sctp_packet *packet; @@ -5911,7 +6016,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc } /* Make a transport for the bucket, Eliza... */ - transport = sctp_transport_new(sctp_source(chunk), GFP_ATOMIC); + transport = sctp_transport_new(net, sctp_source(chunk), GFP_ATOMIC); if (!transport) goto nomem; @@ -5919,7 +6024,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc * the source address. */ sctp_transport_route(transport, (union sctp_addr *)&chunk->dest, - sctp_sk(sctp_get_ctl_sock())); + sctp_sk(net->sctp.ctl_sock)); packet = sctp_packet_init(&transport->packet, transport, sport, dport); packet = sctp_packet_config(packet, vtag, 0); @@ -5937,7 +6042,8 @@ void sctp_ootb_pkt_free(struct sctp_packet *packet) } /* Send a stale cookie error when a invalid COOKIE ECHO chunk is found */ -static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, +static void sctp_send_stale_cookie_err(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *chunk, sctp_cmd_seq_t *commands, @@ -5946,7 +6052,7 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, struct sctp_packet *packet; if (err_chunk) { - packet = sctp_ootb_pkt_new(asoc, chunk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { struct sctp_signed_cookie *cookie; @@ -5959,7 +6065,7 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, sctp_packet_append_chunk(packet, err_chunk); sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } else sctp_chunk_free (err_chunk); } @@ -5979,6 +6085,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, __u32 tsn; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); u16 ssn; u16 sid; u8 ordered = 0; @@ -6109,8 +6216,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_DATA)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_IERROR_NO_DATA; } @@ -6120,9 +6227,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, * if we renege and the chunk arrives again. */ if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) - SCTP_INC_STATS(SCTP_MIB_INUNORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_INUNORDERCHUNKS); else { - SCTP_INC_STATS(SCTP_MIB_INORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_INORDERCHUNKS); ordered = 1; } diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 7c211a7f90f4..84d98d8a5a74 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -59,7 +59,8 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES]; static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES]; -static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, +static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, + sctp_cid_t cid, sctp_state_t state); @@ -82,13 +83,14 @@ static const sctp_sm_table_entry_t bug = { rtn; \ }) -const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, +const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, + sctp_event_t event_type, sctp_state_t state, sctp_subtype_t event_subtype) { switch (event_type) { case SCTP_EVENT_T_CHUNK: - return sctp_chunk_event_lookup(event_subtype.chunk, state); + return sctp_chunk_event_lookup(net, event_subtype.chunk, state); case SCTP_EVENT_T_TIMEOUT: return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, timeout_event_table); @@ -906,7 +908,8 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE, }; -static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, +static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, + sctp_cid_t cid, sctp_state_t state) { if (state > SCTP_STATE_MAX) @@ -915,12 +918,12 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, if (cid <= SCTP_CID_BASE_MAX) return &chunk_event_table[cid][state]; - if (sctp_prsctp_enable) { + if (net->sctp.prsctp_enable) { if (cid == SCTP_CID_FWD_TSN) return &prsctp_chunk_event_table[0][state]; } - if (sctp_addip_enable) { + if (net->sctp.addip_enable) { if (cid == SCTP_CID_ASCONF) return &addip_chunk_event_table[0][state]; @@ -928,7 +931,7 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid, return &addip_chunk_event_table[1][state]; } - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { if (cid == SCTP_CID_AUTH) return &auth_chunk_event_table[0][state]; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5e259817a7f3..d37d24ff197f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -427,6 +427,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) static int sctp_send_asconf(struct sctp_association *asoc, struct sctp_chunk *chunk) { + struct net *net = sock_net(asoc->base.sk); int retval = 0; /* If there is an outstanding ASCONF chunk, queue it for later @@ -439,7 +440,7 @@ static int sctp_send_asconf(struct sctp_association *asoc, /* Hold the chunk until an ASCONF_ACK is received. */ sctp_chunk_hold(chunk); - retval = sctp_primitive_ASCONF(asoc, chunk); + retval = sctp_primitive_ASCONF(net, asoc, chunk); if (retval) sctp_chunk_free(chunk); else @@ -515,6 +516,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -529,7 +531,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, int i; int retval = 0; - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) return retval; sp = sctp_sk(sk); @@ -717,6 +719,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -732,7 +735,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, int stored = 0; chunk = NULL; - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) return retval; sp = sctp_sk(sk); @@ -1050,6 +1053,7 @@ static int __sctp_connect(struct sock* sk, int addrs_size, sctp_assoc_t *assoc_id) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc = NULL; @@ -1200,7 +1204,7 @@ static int __sctp_connect(struct sock* sk, goto out_free; } - err = sctp_primitive_ASSOCIATE(asoc, NULL); + err = sctp_primitive_ASSOCIATE(net, asoc, NULL); if (err < 0) { goto out_free; } @@ -1458,6 +1462,7 @@ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, */ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) { + struct net *net = sock_net(sk); struct sctp_endpoint *ep; struct sctp_association *asoc; struct list_head *pos, *temp; @@ -1499,9 +1504,9 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) chunk = sctp_make_abort_user(asoc, NULL, 0); if (chunk) - sctp_primitive_ABORT(asoc, chunk); + sctp_primitive_ABORT(net, asoc, chunk); } else - sctp_primitive_SHUTDOWN(asoc, NULL); + sctp_primitive_SHUTDOWN(net, asoc, NULL); } /* On a TCP-style socket, block for at most linger_time if set. */ @@ -1569,6 +1574,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t msg_len) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *new_asoc=NULL, *asoc=NULL; @@ -1714,7 +1720,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, if (sinfo_flags & SCTP_EOF) { SCTP_DEBUG_PRINTK("Shutting down association: %p\n", asoc); - sctp_primitive_SHUTDOWN(asoc, NULL); + sctp_primitive_SHUTDOWN(net, asoc, NULL); err = 0; goto out_unlock; } @@ -1727,7 +1733,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc); - sctp_primitive_ABORT(asoc, chunk); + sctp_primitive_ABORT(net, asoc, chunk); err = 0; goto out_unlock; } @@ -1900,7 +1906,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, /* Auto-connect, if we aren't connected already. */ if (sctp_state(asoc, CLOSED)) { - err = sctp_primitive_ASSOCIATE(asoc, NULL); + err = sctp_primitive_ASSOCIATE(net, asoc, NULL); if (err < 0) goto out_free; SCTP_DEBUG_PRINTK("We associated primitively.\n"); @@ -1928,7 +1934,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, * works that way today. Keep it that way or this * breaks. */ - err = sctp_primitive_SEND(asoc, datamsg); + err = sctp_primitive_SEND(net, asoc, datamsg); /* Did the lower layer accept the chunk? */ if (err) sctp_datamsg_free(datamsg); @@ -2320,7 +2326,9 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, int error; if (params->spp_flags & SPP_HB_DEMAND && trans) { - error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans); + struct net *net = sock_net(trans->asoc->base.sk); + + error = sctp_primitive_REQUESTHEARTBEAT(net, trans->asoc, trans); if (error) return error; } @@ -3033,6 +3041,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_association *asoc = NULL; struct sctp_setpeerprim prim; @@ -3042,7 +3051,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva sp = sctp_sk(sk); - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) return -EPERM; if (optlen != sizeof(struct sctp_setpeerprim)) @@ -3279,9 +3288,10 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authchunk val; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authchunk)) @@ -3311,11 +3321,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_hmacalgo *hmacs; u32 idents; int err; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen < sizeof(struct sctp_hmacalgo)) @@ -3348,11 +3359,12 @@ static int sctp_setsockopt_auth_key(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authkey *authkey; struct sctp_association *asoc; int ret; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen <= sizeof(struct sctp_authkey)) @@ -3389,10 +3401,11 @@ static int sctp_setsockopt_active_key(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authkeyid val; struct sctp_association *asoc; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authkeyid)) @@ -3417,10 +3430,11 @@ static int sctp_setsockopt_del_key(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authkeyid val; struct sctp_association *asoc; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authkeyid)) @@ -3471,7 +3485,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, sp->do_auto_asconf = 0; } else if (val && !sp->do_auto_asconf) { list_add_tail(&sp->auto_asconf_list, - &sctp_auto_asconf_splist); + &sock_net(sk)->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } return 0; @@ -3843,6 +3857,7 @@ out: */ SCTP_STATIC int sctp_init_sock(struct sock *sk) { + struct net *net = sock_net(sk); struct sctp_endpoint *ep; struct sctp_sock *sp; @@ -3872,7 +3887,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->default_timetolive = 0; sp->default_rcv_context = 0; - sp->max_burst = sctp_max_burst; + sp->max_burst = net->sctp.max_burst; /* Initialize default setup parameters. These parameters * can be modified with the SCTP_INITMSG socket option or @@ -3880,24 +3895,24 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) */ sp->initmsg.sinit_num_ostreams = sctp_max_outstreams; sp->initmsg.sinit_max_instreams = sctp_max_instreams; - sp->initmsg.sinit_max_attempts = sctp_max_retrans_init; - sp->initmsg.sinit_max_init_timeo = sctp_rto_max; + sp->initmsg.sinit_max_attempts = net->sctp.max_retrans_init; + sp->initmsg.sinit_max_init_timeo = net->sctp.rto_max; /* Initialize default RTO related parameters. These parameters can * be modified for with the SCTP_RTOINFO socket option. */ - sp->rtoinfo.srto_initial = sctp_rto_initial; - sp->rtoinfo.srto_max = sctp_rto_max; - sp->rtoinfo.srto_min = sctp_rto_min; + sp->rtoinfo.srto_initial = net->sctp.rto_initial; + sp->rtoinfo.srto_max = net->sctp.rto_max; + sp->rtoinfo.srto_min = net->sctp.rto_min; /* Initialize default association related parameters. These parameters * can be modified with the SCTP_ASSOCINFO socket option. */ - sp->assocparams.sasoc_asocmaxrxt = sctp_max_retrans_association; + sp->assocparams.sasoc_asocmaxrxt = net->sctp.max_retrans_association; sp->assocparams.sasoc_number_peer_destinations = 0; sp->assocparams.sasoc_peer_rwnd = 0; sp->assocparams.sasoc_local_rwnd = 0; - sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life; + sp->assocparams.sasoc_cookie_life = net->sctp.valid_cookie_life; /* Initialize default event subscriptions. By default, all the * options are off. @@ -3907,10 +3922,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) /* Default Peer Address Parameters. These defaults can * be modified via SCTP_PEER_ADDR_PARAMS */ - sp->hbinterval = sctp_hb_interval; - sp->pathmaxrxt = sctp_max_retrans_path; + sp->hbinterval = net->sctp.hb_interval; + sp->pathmaxrxt = net->sctp.max_retrans_path; sp->pathmtu = 0; // allow default discovery - sp->sackdelay = sctp_sack_timeout; + sp->sackdelay = net->sctp.sack_timeout; sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | @@ -3961,10 +3976,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - if (sctp_default_auto_asconf) { + sock_prot_inuse_add(net, sk->sk_prot, 1); + if (net->sctp.default_auto_asconf) { list_add_tail(&sp->auto_asconf_list, - &sctp_auto_asconf_splist); + &net->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } else sp->do_auto_asconf = 0; @@ -4011,6 +4026,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) */ SCTP_STATIC void sctp_shutdown(struct sock *sk, int how) { + struct net *net = sock_net(sk); struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -4022,7 +4038,7 @@ SCTP_STATIC void sctp_shutdown(struct sock *sk, int how) if (!list_empty(&ep->asocs)) { asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); - sctp_primitive_SHUTDOWN(asoc, NULL); + sctp_primitive_SHUTDOWN(net, asoc, NULL); } } } @@ -4653,9 +4669,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, union sctp_addr temp; int cnt = 0; int addrlen; + struct net *net = sock_net(sk); rcu_read_lock(); - list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) { if (!addr->valid) continue; @@ -5299,12 +5316,13 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_hmacalgo __user *p = (void __user *)optval; struct sctp_hmac_algo_param *hmacs; __u16 data_len = 0; u32 num_idents; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; hmacs = sctp_sk(sk)->ep->auth_hmacs_list; @@ -5328,10 +5346,11 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, static int sctp_getsockopt_active_key(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_authkeyid val; struct sctp_association *asoc; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (len < sizeof(struct sctp_authkeyid)) @@ -5360,6 +5379,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -5367,7 +5387,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (len < sizeof(struct sctp_authchunks)) @@ -5403,6 +5423,7 @@ num: static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -5410,7 +5431,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (len < sizeof(struct sctp_authchunks)) @@ -5769,7 +5790,7 @@ static void sctp_unhash(struct sock *sk) * a fastreuse flag (FIXME: NPI ipg). */ static struct sctp_bind_bucket *sctp_bucket_create( - struct sctp_bind_hashbucket *head, unsigned short snum); + struct sctp_bind_hashbucket *head, struct net *, unsigned short snum); static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) { @@ -5799,11 +5820,12 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) rover = low; if (inet_is_reserved_local_port(rover)) continue; - index = sctp_phashfn(rover); + index = sctp_phashfn(sock_net(sk), rover); head = &sctp_port_hashtable[index]; sctp_spin_lock(&head->lock); sctp_for_each_hentry(pp, node, &head->chain) - if (pp->port == rover) + if ((pp->port == rover) && + net_eq(sock_net(sk), pp->net)) goto next; break; next: @@ -5827,10 +5849,10 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) * to the port number (snum) - we detect that with the * port iterator, pp being NULL. */ - head = &sctp_port_hashtable[sctp_phashfn(snum)]; + head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)]; sctp_spin_lock(&head->lock); sctp_for_each_hentry(pp, node, &head->chain) { - if (pp->port == snum) + if ((pp->port == snum) && net_eq(pp->net, sock_net(sk))) goto pp_found; } } @@ -5881,7 +5903,7 @@ pp_found: pp_not_found: /* If there was a hash table miss, create a new port. */ ret = 1; - if (!pp && !(pp = sctp_bucket_create(head, snum))) + if (!pp && !(pp = sctp_bucket_create(head, sock_net(sk), snum))) goto fail_unlock; /* In either case (hit or miss), make sure fastreuse is 1 only @@ -6113,7 +6135,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) ********************************************************************/ static struct sctp_bind_bucket *sctp_bucket_create( - struct sctp_bind_hashbucket *head, unsigned short snum) + struct sctp_bind_hashbucket *head, struct net *net, unsigned short snum) { struct sctp_bind_bucket *pp; @@ -6123,6 +6145,7 @@ static struct sctp_bind_bucket *sctp_bucket_create( pp->port = snum; pp->fastreuse = 0; INIT_HLIST_HEAD(&pp->owner); + pp->net = net; hlist_add_head(&pp->node, &head->chain); } return pp; @@ -6142,7 +6165,8 @@ static void sctp_bucket_destroy(struct sctp_bind_bucket *pp) static inline void __sctp_put_port(struct sock *sk) { struct sctp_bind_hashbucket *head = - &sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->inet_num)]; + &sctp_port_hashtable[sctp_phashfn(sock_net(sk), + inet_sk(sk)->inet_num)]; struct sctp_bind_bucket *pp; sctp_spin_lock(&head->lock); @@ -6809,7 +6833,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsp->hmac = NULL; /* Hook this new socket in to the bind_hash list. */ - head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->inet_num)]; + head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk), + inet_sk(oldsk)->inet_num)]; sctp_local_bh_disable(); sctp_spin_lock(&head->lock); pp = sctp_sk(oldsk)->bind_hash; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 2b2bfe933ff1..70e3ba5cb50b 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -64,8 +64,34 @@ extern int sysctl_sctp_wmem[3]; static ctl_table sctp_table[] = { { + .procname = "sctp_mem", + .data = &sysctl_sctp_mem, + .maxlen = sizeof(sysctl_sctp_mem), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax + }, + { + .procname = "sctp_rmem", + .data = &sysctl_sctp_rmem, + .maxlen = sizeof(sysctl_sctp_rmem), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sctp_wmem", + .data = &sysctl_sctp_wmem, + .maxlen = sizeof(sysctl_sctp_wmem), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + + { /* sentinel */ } +}; + +static ctl_table sctp_net_table[] = { + { .procname = "rto_initial", - .data = &sctp_rto_initial, + .data = &init_net.sctp.rto_initial, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -74,7 +100,7 @@ static ctl_table sctp_table[] = { }, { .procname = "rto_min", - .data = &sctp_rto_min, + .data = &init_net.sctp.rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -83,7 +109,7 @@ static ctl_table sctp_table[] = { }, { .procname = "rto_max", - .data = &sctp_rto_max, + .data = &init_net.sctp.rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -91,17 +117,22 @@ static ctl_table sctp_table[] = { .extra2 = &timer_max }, { - .procname = "valid_cookie_life", - .data = &sctp_valid_cookie_life, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &timer_max + .procname = "rto_alpha_exp_divisor", + .data = &init_net.sctp.rto_alpha, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + { + .procname = "rto_beta_exp_divisor", + .data = &init_net.sctp.rto_beta, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, }, { .procname = "max_burst", - .data = &sctp_max_burst, + .data = &init_net.sctp.max_burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -109,31 +140,42 @@ static ctl_table sctp_table[] = { .extra2 = &int_max }, { - .procname = "association_max_retrans", - .data = &sctp_max_retrans_association, + .procname = "cookie_preserve_enable", + .data = &init_net.sctp.cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "valid_cookie_life", + .data = &init_net.sctp.valid_cookie_life, + .maxlen = sizeof(unsigned int), + .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &int_max + .extra1 = &one, + .extra2 = &timer_max }, { - .procname = "sndbuf_policy", - .data = &sctp_sndbuf_policy, + .procname = "sack_timeout", + .data = &init_net.sctp.sack_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &sack_timer_min, + .extra2 = &sack_timer_max, }, { - .procname = "rcvbuf_policy", - .data = &sctp_rcvbuf_policy, - .maxlen = sizeof(int), + .procname = "hb_interval", + .data = &init_net.sctp.hb_interval, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &timer_max }, { - .procname = "path_max_retrans", - .data = &sctp_max_retrans_path, + .procname = "association_max_retrans", + .data = &init_net.sctp.max_retrans_association, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -141,17 +183,17 @@ static ctl_table sctp_table[] = { .extra2 = &int_max }, { - .procname = "pf_retrans", - .data = &sctp_pf_retrans, + .procname = "path_max_retrans", + .data = &init_net.sctp.max_retrans_path, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = &one, .extra2 = &int_max }, { .procname = "max_init_retransmits", - .data = &sctp_max_retrans_init, + .data = &init_net.sctp.max_retrans_init, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -159,103 +201,66 @@ static ctl_table sctp_table[] = { .extra2 = &int_max }, { - .procname = "hb_interval", - .data = &sctp_hb_interval, - .maxlen = sizeof(unsigned int), + .procname = "pf_retrans", + .data = &init_net.sctp.pf_retrans, + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &timer_max + .extra1 = &zero, + .extra2 = &int_max }, { - .procname = "cookie_preserve_enable", - .data = &sctp_cookie_preserve_enable, + .procname = "sndbuf_policy", + .data = &init_net.sctp.sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "rto_alpha_exp_divisor", - .data = &sctp_rto_alpha, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "rto_beta_exp_divisor", - .data = &sctp_rto_beta, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "addip_enable", - .data = &sctp_addip_enable, + .procname = "rcvbuf_policy", + .data = &init_net.sctp.rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "default_auto_asconf", - .data = &sctp_default_auto_asconf, + .data = &init_net.sctp.default_auto_asconf, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "prsctp_enable", - .data = &sctp_prsctp_enable, + .procname = "addip_enable", + .data = &init_net.sctp.addip_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "sack_timeout", - .data = &sctp_sack_timeout, + .procname = "addip_noauth_enable", + .data = &init_net.sctp.addip_noauth, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &sack_timer_min, - .extra2 = &sack_timer_max, - }, - { - .procname = "sctp_mem", - .data = &sysctl_sctp_mem, - .maxlen = sizeof(sysctl_sctp_mem), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax - }, - { - .procname = "sctp_rmem", - .data = &sysctl_sctp_rmem, - .maxlen = sizeof(sysctl_sctp_rmem), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "sctp_wmem", - .data = &sysctl_sctp_wmem, - .maxlen = sizeof(sysctl_sctp_wmem), - .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "auth_enable", - .data = &sctp_auth_enable, + .procname = "prsctp_enable", + .data = &init_net.sctp.prsctp_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "addip_noauth_enable", - .data = &sctp_addip_noauth, + .procname = "auth_enable", + .data = &init_net.sctp.auth_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "addr_scope_policy", - .data = &sctp_scope_policy, + .data = &init_net.sctp.scope_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -264,7 +269,7 @@ static ctl_table sctp_table[] = { }, { .procname = "rwnd_update_shift", - .data = &sctp_rwnd_upd_shift, + .data = &init_net.sctp.rwnd_upd_shift, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -273,7 +278,7 @@ static ctl_table sctp_table[] = { }, { .procname = "max_autoclose", - .data = &sctp_max_autoclose, + .data = &init_net.sctp.max_autoclose, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &proc_doulongvec_minmax, @@ -284,6 +289,27 @@ static ctl_table sctp_table[] = { { /* sentinel */ } }; +int sctp_sysctl_net_register(struct net *net) +{ + struct ctl_table *table; + int i; + + table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + for (i = 0; table[i].data; i++) + table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; + + net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table); + return 0; +} + +void sctp_sysctl_net_unregister(struct net *net) +{ + unregister_net_sysctl_table(net->sctp.sysctl_header); +} + static struct ctl_table_header * sctp_sysctl_header; /* Sysctl registration. */ diff --git a/net/sctp/transport.c b/net/sctp/transport.c index c97472b248a2..953c21e4af97 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -59,7 +59,8 @@ /* 1st Level Abstractions. */ /* Initialize a new transport from provided memory. */ -static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, +static struct sctp_transport *sctp_transport_init(struct net *net, + struct sctp_transport *peer, const union sctp_addr *addr, gfp_t gfp) { @@ -76,7 +77,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * given destination transport address, set RTO to the protocol * parameter 'RTO.Initial'. */ - peer->rto = msecs_to_jiffies(sctp_rto_initial); + peer->rto = msecs_to_jiffies(net->sctp.rto_initial); peer->last_time_heard = jiffies; peer->last_time_ecne_reduced = jiffies; @@ -86,8 +87,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, SPP_SACKDELAY_ENABLE; /* Initialize the default path max_retrans. */ - peer->pathmaxrxt = sctp_max_retrans_path; - peer->pf_retrans = sctp_pf_retrans; + peer->pathmaxrxt = net->sctp.max_retrans_path; + peer->pf_retrans = net->sctp.pf_retrans; INIT_LIST_HEAD(&peer->transmitted); INIT_LIST_HEAD(&peer->send_ready); @@ -109,7 +110,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, } /* Allocate and initialize a new transport. */ -struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, +struct sctp_transport *sctp_transport_new(struct net *net, + const union sctp_addr *addr, gfp_t gfp) { struct sctp_transport *transport; @@ -118,7 +120,7 @@ struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, if (!transport) goto fail; - if (!sctp_transport_init(transport, addr, gfp)) + if (!sctp_transport_init(net, transport, addr, gfp)) goto fail_init; transport->malloced = 1; @@ -316,6 +318,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return); if (tp->rttvar || tp->srtt) { + struct net *net = sock_net(tp->asoc->base.sk); /* 6.3.1 C3) When a new RTT measurement R' is made, set * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' @@ -327,10 +330,10 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) * For example, assuming the default value of RTO.Alpha of * 1/8, rto_alpha would be expressed as 3. */ - tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta) - + ((abs(tp->srtt - rtt)) >> sctp_rto_beta); - tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha) - + (rtt >> sctp_rto_alpha); + tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta) + + ((abs(tp->srtt - rtt)) >> net->sctp.rto_beta); + tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha) + + (rtt >> net->sctp.rto_alpha); } else { /* 6.3.1 C2) When the first RTT measurement R is made, set * SRTT <- R, RTTVAR <- R/2. diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 33d894776192..10c018a5b9fe 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -702,7 +702,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, if (rx_count >= asoc->base.sk->sk_rcvbuf) { if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || - (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize))) + (!sk_rmem_schedule(asoc->base.sk, chunk->skb, + chunk->skb->truesize))) goto fail; } diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index f5a6a4f4faf7..360d8697b95c 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -326,7 +326,9 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, * payload was fragmented on the way and ip had to reassemble them. * We add the rest of skb's to the first skb's fraglist. */ -static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag) +static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net, + struct sk_buff_head *queue, struct sk_buff *f_frag, + struct sk_buff *l_frag) { struct sk_buff *pos; struct sk_buff *new = NULL; @@ -394,7 +396,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu } event = sctp_skb2event(f_frag); - SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS); + SCTP_INC_STATS(net, SCTP_MIB_REASMUSRMSGS); return event; } @@ -493,7 +495,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul cevent = sctp_skb2event(pd_first); pd_point = sctp_sk(asoc->base.sk)->pd_point; if (pd_point && pd_point <= pd_len) { - retval = sctp_make_reassembled_event(&ulpq->reasm, + retval = sctp_make_reassembled_event(sock_net(asoc->base.sk), + &ulpq->reasm, pd_first, pd_last); if (retval) @@ -503,7 +506,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul done: return retval; found: - retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos); + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, pos); if (retval) retval->msg_flags |= MSG_EOR; goto done; @@ -563,7 +567,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq) * further. */ done: - retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, last_frag); if (retval && is_last) retval->msg_flags |= MSG_EOR; @@ -655,7 +660,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq) * further. */ done: - retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, last_frag); return retval; } diff --git a/net/socket.c b/net/socket.c index dfe5b66c97e0..a5471f804d99 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2657,6 +2657,7 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) return -EFAULT; + memset(&ifc, 0, sizeof(ifc)); if (ifc32.ifcbuf == 0) { ifc32.ifc_len = 0; ifc.ifc_len = 0; diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 9fe8857d8d59..03d03e37a7d5 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -21,6 +21,11 @@ config SUNRPC_XPRT_RDMA If unsure, say N. +config SUNRPC_SWAP + bool + depends on SUNRPC + select NETVM + config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism" depends on SUNRPC && CRYPTO diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 727e506cacda..b5c067bccc45 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -13,6 +13,7 @@ #include <linux/errno.h> #include <linux/hash.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/gss_api.h> #include <linux/spinlock.h> #ifdef RPC_DEBUG @@ -122,6 +123,59 @@ rpcauth_unregister(const struct rpc_authops *ops) } EXPORT_SYMBOL_GPL(rpcauth_unregister); +/** + * rpcauth_list_flavors - discover registered flavors and pseudoflavors + * @array: array to fill in + * @size: size of "array" + * + * Returns the number of array items filled in, or a negative errno. + * + * The returned array is not sorted by any policy. Callers should not + * rely on the order of the items in the returned array. + */ +int +rpcauth_list_flavors(rpc_authflavor_t *array, int size) +{ + rpc_authflavor_t flavor; + int result = 0; + + spin_lock(&rpc_authflavor_lock); + for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) { + const struct rpc_authops *ops = auth_flavors[flavor]; + rpc_authflavor_t pseudos[4]; + int i, len; + + if (result >= size) { + result = -ENOMEM; + break; + } + + if (ops == NULL) + continue; + if (ops->list_pseudoflavors == NULL) { + array[result++] = ops->au_flavor; + continue; + } + len = ops->list_pseudoflavors(pseudos, ARRAY_SIZE(pseudos)); + if (len < 0) { + result = len; + break; + } + for (i = 0; i < len; i++) { + if (result >= size) { + result = -ENOMEM; + break; + } + array[result++] = pseudos[i]; + } + } + spin_unlock(&rpc_authflavor_lock); + + dprintk("RPC: %s returns %d\n", __func__, result); + return result; +} +EXPORT_SYMBOL_GPL(rpcauth_list_flavors); + struct rpc_auth * rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index d3ad81f8da5b..34c522021004 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1619,6 +1619,7 @@ static const struct rpc_authops authgss_ops = { .crcreate = gss_create_cred, .pipes_create = gss_pipes_dentries_create, .pipes_destroy = gss_pipes_dentries_destroy, + .list_pseudoflavors = gss_mech_list_pseudoflavors, }; static const struct rpc_credops gss_credops = { diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 782bfe1b6465..b174fcd9ff4c 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -239,14 +239,28 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); -int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr) +/** + * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors + * @array: array to fill in + * @size: size of "array" + * + * Returns the number of array items filled in, or a negative errno. + * + * The returned array is not sorted by any policy. Callers should not + * rely on the order of the items in the returned array. + */ +int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size) { struct gss_api_mech *pos = NULL; int j, i = 0; spin_lock(®istered_mechs_lock); list_for_each_entry(pos, ®istered_mechs, gm_list) { - for (j=0; j < pos->gm_pf_num; j++) { + for (j = 0; j < pos->gm_pf_num; j++) { + if (i >= size) { + spin_unlock(®istered_mechs_lock); + return -ENOMEM; + } array_ptr[i++] = pos->gm_pfs[j].pseudoflavor; } } @@ -254,8 +268,6 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr) return i; } -EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors); - u32 gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) { diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 47ad2666fdf6..2afd2a84dc35 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1349,8 +1349,11 @@ static int c_show(struct seq_file *m, void *p) if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ seq_printf(m, "# "); - else + else { + if (cache_is_expired(cd, cp)) + seq_printf(m, "# "); cache_put(cp, cd); + } return cd->cache_show(m, cd, cp); } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 00eb859b7de5..fa48c60aef23 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -717,6 +717,15 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) atomic_inc(&clnt->cl_count); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; + if (sk_memalloc_socks()) { + struct rpc_xprt *xprt; + + rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); + if (xprt->swapper) + task->tk_flags |= RPC_TASK_SWAPPER; + rcu_read_unlock(); + } /* Add to the client's list of all tasks */ spin_lock(&clnt->cl_lock); list_add_tail(&task->tk_task, &clnt->cl_tasks); @@ -1844,12 +1853,13 @@ call_timeout(struct rpc_task *task) return; } if (RPC_IS_SOFT(task)) { - if (clnt->cl_chatty) + if (clnt->cl_chatty) { rcu_read_lock(); printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, rcu_dereference(clnt->cl_xprt)->servername); rcu_read_unlock(); + } if (task->tk_flags & RPC_TASK_TIMEOUT) rpc_exit(task, -ETIMEDOUT); else diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 92509ffe15fc..a70acae496e4 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -251,7 +251,7 @@ static int rpcb_create_local_unix(struct net *net) if (IS_ERR(clnt)) { dprintk("RPC: failed to create AF_LOCAL rpcbind " "client (errno %ld).\n", PTR_ERR(clnt)); - result = -PTR_ERR(clnt); + result = PTR_ERR(clnt); goto out; } @@ -298,7 +298,7 @@ static int rpcb_create_local_net(struct net *net) if (IS_ERR(clnt)) { dprintk("RPC: failed to create local rpcbind " "client (errno %ld).\n", PTR_ERR(clnt)); - result = -PTR_ERR(clnt); + result = PTR_ERR(clnt); goto out; } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 994cfea2bad6..128494ec9a64 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -300,8 +300,9 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); /* * Make an RPC task runnable. * - * Note: If the task is ASYNC, this must be called with - * the spinlock held to protect the wait queue operation. + * Note: If the task is ASYNC, and is being made runnable after sitting on an + * rpc_wait_queue, this must be called with the queue spinlock held to protect + * the wait queue operation. */ static void rpc_make_runnable(struct rpc_task *task) { @@ -790,7 +791,9 @@ void rpc_execute(struct rpc_task *task) static void rpc_async_schedule(struct work_struct *work) { + current->flags |= PF_FSTRANS; __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); + current->flags &= ~PF_FSTRANS; } /** @@ -812,7 +815,10 @@ static void rpc_async_schedule(struct work_struct *work) void *rpc_malloc(struct rpc_task *task, size_t size) { struct rpc_buffer *buf; - gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT; + gfp_t gfp = GFP_NOWAIT; + + if (RPC_IS_SWAPPER(task)) + gfp |= __GFP_MEMALLOC; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) @@ -886,7 +892,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta static struct rpc_task * rpc_alloc_task(void) { - return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); + return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO); } /* diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 0cf165580d8d..0afba1b4b656 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -129,34 +129,6 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len) EXPORT_SYMBOL_GPL(xdr_terminate_string); void -xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, - unsigned int len) -{ - struct kvec *tail = xdr->tail; - u32 *p; - - xdr->pages = pages; - xdr->page_base = base; - xdr->page_len = len; - - p = (u32 *)xdr->head[0].iov_base + XDR_QUADLEN(xdr->head[0].iov_len); - tail->iov_base = p; - tail->iov_len = 0; - - if (len & 3) { - unsigned int pad = 4 - (len & 3); - - *p = 0; - tail->iov_base = (char *)p + (len & 3); - tail->iov_len = pad; - len += pad; - } - xdr->buflen += len; - xdr->len += len; -} -EXPORT_SYMBOL_GPL(xdr_encode_pages); - -void xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, struct page **pages, unsigned int base, unsigned int len) { @@ -457,6 +429,16 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len) EXPORT_SYMBOL_GPL(xdr_shift_buf); /** + * xdr_stream_pos - Return the current offset from the start of the xdr_stream + * @xdr: pointer to struct xdr_stream + */ +unsigned int xdr_stream_pos(const struct xdr_stream *xdr) +{ + return (unsigned int)(XDR_QUADLEN(xdr->buf->len) - xdr->nwords) << 2; +} +EXPORT_SYMBOL_GPL(xdr_stream_pos); + +/** * xdr_init_encode - Initialize a struct xdr_stream for sending data. * @xdr: pointer to xdr_stream struct * @buf: pointer to XDR buffer in which to encode data @@ -556,13 +538,11 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b EXPORT_SYMBOL_GPL(xdr_write_pages); static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov, - __be32 *p, unsigned int len) + unsigned int len) { if (len > iov->iov_len) len = iov->iov_len; - if (p == NULL) - p = (__be32*)iov->iov_base; - xdr->p = p; + xdr->p = (__be32*)iov->iov_base; xdr->end = (__be32*)(iov->iov_base + len); xdr->iov = iov; xdr->page_ptr = NULL; @@ -609,7 +589,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr) newbase -= xdr->buf->page_base; if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len); + xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len); } static bool xdr_set_next_buffer(struct xdr_stream *xdr) @@ -618,7 +598,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr) xdr_set_next_page(xdr); else if (xdr->iov == xdr->buf->head) { if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len); + xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len); } return xdr->p != xdr->end; } @@ -634,10 +614,15 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) xdr->buf = buf; xdr->scratch.iov_base = NULL; xdr->scratch.iov_len = 0; + xdr->nwords = XDR_QUADLEN(buf->len); if (buf->head[0].iov_len != 0) - xdr_set_iov(xdr, buf->head, p, buf->len); + xdr_set_iov(xdr, buf->head, buf->len); else if (buf->page_len != 0) xdr_set_page_base(xdr, 0, buf->len); + if (p != NULL && p > xdr->p && xdr->end >= p) { + xdr->nwords -= p - xdr->p; + xdr->p = p; + } } EXPORT_SYMBOL_GPL(xdr_init_decode); @@ -662,12 +647,14 @@ EXPORT_SYMBOL_GPL(xdr_init_decode_pages); static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { + unsigned int nwords = XDR_QUADLEN(nbytes); __be32 *p = xdr->p; - __be32 *q = p + XDR_QUADLEN(nbytes); + __be32 *q = p + nwords; - if (unlikely(q > xdr->end || q < p)) + if (unlikely(nwords > xdr->nwords || q > xdr->end || q < p)) return NULL; xdr->p = q; + xdr->nwords -= nwords; return p; } @@ -734,6 +721,31 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) } EXPORT_SYMBOL_GPL(xdr_inline_decode); +static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) +{ + struct xdr_buf *buf = xdr->buf; + struct kvec *iov; + unsigned int nwords = XDR_QUADLEN(len); + unsigned int cur = xdr_stream_pos(xdr); + + if (xdr->nwords == 0) + return 0; + if (nwords > xdr->nwords) { + nwords = xdr->nwords; + len = nwords << 2; + } + /* Realign pages to current pointer position */ + iov = buf->head; + if (iov->iov_len > cur) + xdr_shrink_bufhead(buf, iov->iov_len - cur); + + /* Truncate page data and move it into the tail */ + if (buf->page_len > len) + xdr_shrink_pagelen(buf, buf->page_len - len); + xdr->nwords = XDR_QUADLEN(buf->len - cur); + return len; +} + /** * xdr_read_pages - Ensure page-based XDR data to decode is aligned at current pointer position * @xdr: pointer to xdr_stream struct @@ -742,39 +754,37 @@ EXPORT_SYMBOL_GPL(xdr_inline_decode); * Moves data beyond the current pointer position from the XDR head[] buffer * into the page list. Any data that lies beyond current position + "len" * bytes is moved into the XDR tail[]. + * + * Returns the number of XDR encoded bytes now contained in the pages */ -void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) +unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len) { struct xdr_buf *buf = xdr->buf; struct kvec *iov; - ssize_t shift; + unsigned int nwords; unsigned int end; - int padding; + unsigned int padding; - /* Realign pages to current pointer position */ - iov = buf->head; - shift = iov->iov_len + (char *)iov->iov_base - (char *)xdr->p; - if (shift > 0) - xdr_shrink_bufhead(buf, shift); - - /* Truncate page data and move it into the tail */ - if (buf->page_len > len) - xdr_shrink_pagelen(buf, buf->page_len - len); - padding = (XDR_QUADLEN(len) << 2) - len; + len = xdr_align_pages(xdr, len); + if (len == 0) + return 0; + nwords = XDR_QUADLEN(len); + padding = (nwords << 2) - len; xdr->iov = iov = buf->tail; /* Compute remaining message length. */ - end = iov->iov_len; - shift = buf->buflen - buf->len; - if (shift < end) - end -= shift; - else if (shift > 0) - end = 0; + end = ((xdr->nwords - nwords) << 2) + padding; + if (end > iov->iov_len) + end = iov->iov_len; + /* * Position current pointer at beginning of tail, and * set remaining message length. */ xdr->p = (__be32 *)((char *)iov->iov_base + padding); xdr->end = (__be32 *)((char *)iov->iov_base + end); + xdr->page_ptr = NULL; + xdr->nwords = XDR_QUADLEN(end - padding); + return len; } EXPORT_SYMBOL_GPL(xdr_read_pages); @@ -790,12 +800,13 @@ EXPORT_SYMBOL_GPL(xdr_read_pages); */ void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) { - xdr_read_pages(xdr, len); + len = xdr_align_pages(xdr, len); /* * Position current pointer at beginning of tail, and * set remaining message length. */ - xdr_set_page_base(xdr, 0, len); + if (len != 0) + xdr_set_page_base(xdr, 0, len); } EXPORT_SYMBOL_GPL(xdr_enter_page); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index b446e100286f..06cdbff79e4a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -200,6 +200,7 @@ xprt_rdma_connect_worker(struct work_struct *work) int rc = 0; if (!xprt->shutdown) { + current->flags |= PF_FSTRANS; xprt_clear_connected(xprt); dprintk("RPC: %s: %sconnect\n", __func__, @@ -212,10 +213,10 @@ xprt_rdma_connect_worker(struct work_struct *work) out: xprt_wake_pending_tasks(xprt, rc); - out_clear: dprintk("RPC: %s: exit\n", __func__); xprt_clear_connecting(xprt); + current->flags &= ~PF_FSTRANS; } /* diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 62d0dac8f780..400567243f84 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1892,6 +1892,8 @@ static void xs_local_setup_socket(struct work_struct *work) if (xprt->shutdown) goto out; + current->flags |= PF_FSTRANS; + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); status = __sock_create(xprt->xprt_net, AF_LOCAL, SOCK_STREAM, 0, &sock, 1); @@ -1925,7 +1927,47 @@ static void xs_local_setup_socket(struct work_struct *work) out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); + current->flags &= ~PF_FSTRANS; +} + +#ifdef CONFIG_SUNRPC_SWAP +static void xs_set_memalloc(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + + if (xprt->swapper) + sk_set_memalloc(transport->inet); +} + +/** + * xs_swapper - Tag this transport as being used for swap. + * @xprt: transport to tag + * @enable: enable/disable + * + */ +int xs_swapper(struct rpc_xprt *xprt, int enable) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + int err = 0; + + if (enable) { + xprt->swapper++; + xs_set_memalloc(xprt); + } else if (xprt->swapper) { + xprt->swapper--; + sk_clear_memalloc(transport->inet); + } + + return err; +} +EXPORT_SYMBOL_GPL(xs_swapper); +#else +static void xs_set_memalloc(struct rpc_xprt *xprt) +{ } +#endif static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { @@ -1951,6 +1993,8 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) transport->sock = sock; transport->inet = sk; + xs_set_memalloc(xprt); + write_unlock_bh(&sk->sk_callback_lock); } xs_udp_do_set_buffer_size(xprt); @@ -1967,6 +2011,8 @@ static void xs_udp_setup_socket(struct work_struct *work) if (xprt->shutdown) goto out; + current->flags |= PF_FSTRANS; + /* Start by resetting any existing state */ xs_reset_transport(transport); sock = xs_create_sock(xprt, transport, @@ -1985,6 +2031,7 @@ static void xs_udp_setup_socket(struct work_struct *work) out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); + current->flags &= ~PF_FSTRANS; } /* @@ -2075,6 +2122,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) if (!xprt_bound(xprt)) goto out; + xs_set_memalloc(xprt); + /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; @@ -2110,6 +2159,8 @@ static void xs_tcp_setup_socket(struct work_struct *work) if (xprt->shutdown) goto out; + current->flags |= PF_FSTRANS; + if (!sock) { clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); sock = xs_create_sock(xprt, transport, @@ -2159,6 +2210,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EINPROGRESS: case -EALREADY: xprt_clear_connecting(xprt); + current->flags &= ~PF_FSTRANS; return; case -EINVAL: /* Happens, for instance, if the user specified a link @@ -2171,6 +2223,7 @@ out_eagain: out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); + current->flags &= ~PF_FSTRANS; } /** diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 09e71241265d..4ec5c80e8a7c 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -49,21 +49,6 @@ struct tipc_bearer tipc_bearers[MAX_BEARERS]; static void bearer_disable(struct tipc_bearer *b_ptr); /** - * media_name_valid - validate media name - * - * Returns 1 if media name is valid, otherwise 0. - */ -static int media_name_valid(const char *name) -{ - u32 len; - - len = strlen(name); - if ((len + 1) > TIPC_MAX_MEDIA_NAME) - return 0; - return strspn(name, tipc_alphabet) == len; -} - -/** * tipc_media_find - locates specified media object by name */ struct tipc_media *tipc_media_find(const char *name) @@ -102,7 +87,7 @@ int tipc_register_media(struct tipc_media *m_ptr) write_lock_bh(&tipc_net_lock); - if (!media_name_valid(m_ptr->name)) + if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME) goto exit; if ((m_ptr->bcast_addr.media_id != m_ptr->type_id) || !m_ptr->bcast_addr.broadcast) @@ -206,9 +191,7 @@ static int bearer_name_validate(const char *name, /* validate component parts of bearer name */ if ((media_len <= 1) || (media_len > TIPC_MAX_MEDIA_NAME) || - (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME) || - (strspn(media_name, tipc_alphabet) != (media_len - 1)) || - (strspn(if_name, tipc_alphabet) != (if_len - 1))) + (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME)) return 0; /* return bearer name components, if necessary */ diff --git a/net/tipc/config.c b/net/tipc/config.c index a056a3852f71..f67866c765dd 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -2,7 +2,7 @@ * net/tipc/config.c: TIPC configuration management code * * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2007, 2010-2011, Wind River Systems + * Copyright (c) 2004-2007, 2010-2012, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -208,36 +208,6 @@ static struct sk_buff *cfg_set_remote_mng(void) return tipc_cfg_reply_none(); } -static struct sk_buff *cfg_set_max_publications(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value < 1 || value > 65535) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max publications must be 1-65535)"); - tipc_max_publications = value; - return tipc_cfg_reply_none(); -} - -static struct sk_buff *cfg_set_max_subscriptions(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value < 1 || value > 65535) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max subscriptions must be 1-65535"); - tipc_max_subscriptions = value; - return tipc_cfg_reply_none(); -} - static struct sk_buff *cfg_set_max_ports(void) { u32 value; @@ -357,12 +327,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_SET_MAX_PORTS: rep_tlv_buf = cfg_set_max_ports(); break; - case TIPC_CMD_SET_MAX_PUBL: - rep_tlv_buf = cfg_set_max_publications(); - break; - case TIPC_CMD_SET_MAX_SUBSCR: - rep_tlv_buf = cfg_set_max_subscriptions(); - break; case TIPC_CMD_SET_NETID: rep_tlv_buf = cfg_set_netid(); break; @@ -372,12 +336,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_PORTS: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports); break; - case TIPC_CMD_GET_MAX_PUBL: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_publications); - break; - case TIPC_CMD_GET_MAX_SUBSCR: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions); - break; case TIPC_CMD_GET_NETID: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); break; @@ -393,6 +351,10 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_CLUSTERS: case TIPC_CMD_SET_MAX_NODES: case TIPC_CMD_GET_MAX_NODES: + case TIPC_CMD_SET_MAX_SUBSCR: + case TIPC_CMD_GET_MAX_SUBSCR: + case TIPC_CMD_SET_MAX_PUBL: + case TIPC_CMD_GET_MAX_PUBL: case TIPC_CMD_SET_LOG_SIZE: case TIPC_CMD_DUMP_LOG: rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED diff --git a/net/tipc/core.c b/net/tipc/core.c index 6586eac6a50e..bfe8af88469a 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -48,18 +48,13 @@ /* global variables used by multiple sub-systems within TIPC */ -int tipc_random; - -const char tipc_alphabet[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_."; +int tipc_random __read_mostly; /* configurable TIPC parameters */ -u32 tipc_own_addr; -int tipc_max_ports; -int tipc_max_subscriptions; -int tipc_max_publications; -int tipc_net_id; -int tipc_remote_management; +u32 tipc_own_addr __read_mostly; +int tipc_max_ports __read_mostly; +int tipc_net_id __read_mostly; +int tipc_remote_management __read_mostly; /** @@ -101,9 +96,8 @@ int tipc_core_start_net(unsigned long addr) { int res; - res = tipc_net_start(addr); - if (!res) - res = tipc_eth_media_start(); + tipc_net_start(addr); + res = tipc_eth_media_start(); if (res) tipc_core_stop_net(); return res; @@ -160,8 +154,6 @@ static int __init tipc_init(void) tipc_own_addr = 0; tipc_remote_management = 1; - tipc_max_publications = 10000; - tipc_max_subscriptions = 2000; tipc_max_ports = CONFIG_TIPC_PORTS; tipc_net_id = 4711; diff --git a/net/tipc/core.h b/net/tipc/core.h index fd42e106c185..0207db04179a 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -60,7 +60,9 @@ #define TIPC_MOD_VER "2.0.0" -#define ULTRA_STRING_MAX_LEN 32768 +#define ULTRA_STRING_MAX_LEN 32768 +#define TIPC_MAX_SUBSCRIPTIONS 65535 +#define TIPC_MAX_PUBLICATIONS 65535 struct tipc_msg; /* msg.h */ @@ -74,19 +76,15 @@ int tipc_snprintf(char *buf, int len, const char *fmt, ...); /* * Global configuration variables */ -extern u32 tipc_own_addr; -extern int tipc_max_ports; -extern int tipc_max_subscriptions; -extern int tipc_max_publications; -extern int tipc_net_id; -extern int tipc_remote_management; +extern u32 tipc_own_addr __read_mostly; +extern int tipc_max_ports __read_mostly; +extern int tipc_net_id __read_mostly; +extern int tipc_remote_management __read_mostly; /* * Other global variables */ -extern int tipc_random; -extern const char tipc_alphabet[]; - +extern int tipc_random __read_mostly; /* * Routines available to privileged subsystems diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 90ac9bfa7abb..2132c1ef2951 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -46,19 +46,30 @@ * @bearer: ptr to associated "generic" bearer structure * @dev: ptr to associated Ethernet network device * @tipc_packet_type: used in binding TIPC to Ethernet driver + * @setup: work item used when enabling bearer * @cleanup: work item used when disabling bearer */ struct eth_bearer { struct tipc_bearer *bearer; struct net_device *dev; struct packet_type tipc_packet_type; + struct work_struct setup; struct work_struct cleanup; }; static struct tipc_media eth_media_info; static struct eth_bearer eth_bearers[MAX_ETH_BEARERS]; static int eth_started; -static struct notifier_block notifier; + +static int recv_notification(struct notifier_block *nb, unsigned long evt, + void *dv); +/* + * Network device notifier info + */ +static struct notifier_block notifier = { + .notifier_call = recv_notification, + .priority = 0 +}; /** * eth_media_addr_set - initialize Ethernet media address structure @@ -134,6 +145,17 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, } /** + * setup_bearer - setup association between Ethernet bearer and interface + */ +static void setup_bearer(struct work_struct *work) +{ + struct eth_bearer *eb_ptr = + container_of(work, struct eth_bearer, setup); + + dev_add_pack(&eb_ptr->tipc_packet_type); +} + +/** * enable_bearer - attach TIPC bearer to an Ethernet interface */ static int enable_bearer(struct tipc_bearer *tb_ptr) @@ -173,7 +195,8 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) eb_ptr->tipc_packet_type.func = recv_msg; eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr; INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list)); - dev_add_pack(&eb_ptr->tipc_packet_type); + INIT_WORK(&eb_ptr->setup, setup_bearer); + schedule_work(&eb_ptr->setup); /* Associate TIPC bearer with Ethernet bearer */ eb_ptr->bearer = tb_ptr; @@ -357,8 +380,6 @@ int tipc_eth_media_start(void) if (res) return res; - notifier.notifier_call = &recv_notification; - notifier.priority = 0; res = register_netdevice_notifier(¬ifier); if (!res) eth_started = 1; diff --git a/net/tipc/handler.c b/net/tipc/handler.c index 7a52d3922f3c..111ff8300ae5 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -45,7 +45,7 @@ struct queue_item { static struct kmem_cache *tipc_queue_item_cache; static struct list_head signal_queue_head; static DEFINE_SPINLOCK(qitem_lock); -static int handler_enabled; +static int handler_enabled __read_mostly; static void process_signal_queue(unsigned long dummy); diff --git a/net/tipc/link.c b/net/tipc/link.c index 1c1e6151875e..a79c755cb417 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -210,9 +210,7 @@ static int link_name_validate(const char *name, (z_local > 255) || (c_local > 4095) || (n_local > 4095) || (z_peer > 255) || (c_peer > 4095) || (n_peer > 4095) || (if_local_len <= 1) || (if_local_len > TIPC_MAX_IF_NAME) || - (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME) || - (strspn(if_local, tipc_alphabet) != (if_local_len - 1)) || - (strspn(if_peer, tipc_alphabet) != (if_peer_len - 1))) + (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME)) return 0; /* return link name components, if necessary */ diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 360c478b0b53..98975e80bb51 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -41,7 +41,7 @@ #include "subscr.h" #include "port.h" -static int tipc_nametbl_size = 1024; /* must be a power of 2 */ +#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ /** * struct name_info - name sequence publication info @@ -114,7 +114,7 @@ DEFINE_RWLOCK(tipc_nametbl_lock); static int hash(int x) { - return x & (tipc_nametbl_size - 1); + return x & (TIPC_NAMETBL_SIZE - 1); } /** @@ -667,9 +667,9 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, { struct publication *publ; - if (table.local_publ_count >= tipc_max_publications) { + if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) { pr_warn("Publication failed, local publication limit reached (%u)\n", - tipc_max_publications); + TIPC_MAX_PUBLICATIONS); return NULL; } @@ -871,7 +871,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info, ret += nametbl_header(buf, len, depth); lowbound = 0; upbound = ~0; - for (i = 0; i < tipc_nametbl_size; i++) { + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { seq_head = &table.types[i]; hlist_for_each_entry(seq, seq_node, seq_head, ns_list) { ret += nameseq_list(seq, buf + ret, len - ret, @@ -935,7 +935,7 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) int tipc_nametbl_init(void) { - table.types = kcalloc(tipc_nametbl_size, sizeof(struct hlist_head), + table.types = kcalloc(TIPC_NAMETBL_SIZE, sizeof(struct hlist_head), GFP_ATOMIC); if (!table.types) return -ENOMEM; @@ -953,7 +953,7 @@ void tipc_nametbl_stop(void) /* Verify name table is empty, then release it */ write_lock_bh(&tipc_nametbl_lock); - for (i = 0; i < tipc_nametbl_size; i++) { + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { if (hlist_empty(&table.types[i])) continue; pr_err("nametbl_stop(): orphaned hash chain detected\n"); diff --git a/net/tipc/net.c b/net/tipc/net.c index 5b5cea259caf..7d305ecc09c2 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -171,7 +171,7 @@ void tipc_net_route_msg(struct sk_buff *buf) tipc_link_send(buf, dnode, msg_link_selector(msg)); } -int tipc_net_start(u32 addr) +void tipc_net_start(u32 addr) { char addr_string[16]; @@ -187,7 +187,6 @@ int tipc_net_start(u32 addr) pr_info("Started in network mode\n"); pr_info("Own node address %s, network identity %u\n", tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); - return 0; } void tipc_net_stop(void) diff --git a/net/tipc/net.h b/net/tipc/net.h index 9eb4b9e220eb..079daadb3f72 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -41,7 +41,7 @@ extern rwlock_t tipc_net_lock; void tipc_net_route_msg(struct sk_buff *buf); -int tipc_net_start(u32 addr); +void tipc_net_start(u32 addr); void tipc_net_stop(void); #endif diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 5ed5965eb0be..0f7d0d007e22 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -304,9 +304,9 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, } /* Refuse subscription if global limit exceeded */ - if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) { + if (atomic_read(&topsrv.subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", - tipc_max_subscriptions); + TIPC_MAX_SUBSCRIPTIONS); subscr_terminate(subscriber); return NULL; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 79981d97bc9c..c5ee4ff61364 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -823,6 +823,34 @@ fail: return NULL; } +static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) +{ + struct dentry *dentry; + struct path path; + int err = 0; + /* + * Get the parent directory, calculate the hash for last + * component. + */ + dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + return err; + + /* + * All right, let's create it. + */ + err = security_path_mknod(&path, dentry, mode, 0); + if (!err) { + err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); + if (!err) { + res->mnt = mntget(path.mnt); + res->dentry = dget(dentry); + } + } + done_path_create(&path, dentry); + return err; +} static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { @@ -831,8 +859,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; char *sun_path = sunaddr->sun_path; - struct dentry *dentry = NULL; - struct path path; int err; unsigned int hash; struct unix_address *addr; @@ -869,43 +895,23 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) atomic_set(&addr->refcnt, 1); if (sun_path[0]) { - umode_t mode; - err = 0; - /* - * Get the parent directory, calculate the hash for last - * component. - */ - dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto out_mknod_parent; - - /* - * All right, let's create it. - */ - mode = S_IFSOCK | + struct path path; + umode_t mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = mnt_want_write(path.mnt); - if (err) - goto out_mknod_dput; - err = security_path_mknod(&path, dentry, mode, 0); - if (err) - goto out_mknod_drop_write; - err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); -out_mknod_drop_write: - mnt_drop_write(path.mnt); - if (err) - goto out_mknod_dput; - mutex_unlock(&path.dentry->d_inode->i_mutex); - dput(path.dentry); - path.dentry = dentry; - + err = unix_mknod(sun_path, mode, &path); + if (err) { + if (err == -EEXIST) + err = -EADDRINUSE; + unix_release_addr(addr); + goto out_up; + } addr->hash = UNIX_HASH_SIZE; - } - - spin_lock(&unix_table_lock); - - if (!sun_path[0]) { + hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1); + spin_lock(&unix_table_lock); + u->path = path; + list = &unix_socket_table[hash]; + } else { + spin_lock(&unix_table_lock); err = -EADDRINUSE; if (__unix_find_socket_byname(net, sunaddr, addr_len, sk->sk_type, hash)) { @@ -914,9 +920,6 @@ out_mknod_drop_write: } list = &unix_socket_table[addr->hash]; - } else { - list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)]; - u->path = path; } err = 0; @@ -930,16 +933,6 @@ out_up: mutex_unlock(&u->readlock); out: return err; - -out_mknod_dput: - dput(dentry); - mutex_unlock(&path.dentry->d_inode->i_mutex); - path_put(&path); -out_mknod_parent: - if (err == -EEXIST) - err = -EADDRINUSE; - unix_release_addr(addr); - goto out_up; } static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) @@ -1457,7 +1450,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, false); if (err < 0) return err; @@ -1626,7 +1619,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, if (NULL == siocb->scm) siocb->scm = &tmp_scm; wait_for_unix_gc(); - err = scm_send(sock, msg, siocb->scm); + err = scm_send(sock, msg, siocb->scm, false); if (err < 0) return err; diff --git a/net/wireless/core.c b/net/wireless/core.c index 31b40cc4a9c3..dcd64d5b07aa 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -952,6 +952,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, */ synchronize_rcu(); INIT_LIST_HEAD(&wdev->list); + /* + * Ensure that all events have been processed and + * freed. + */ + cfg80211_process_wdev_events(wdev); break; case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) diff --git a/net/wireless/core.h b/net/wireless/core.h index 5206c6844fd7..bc7430b54771 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -426,6 +426,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, struct net_device *dev, enum nl80211_iftype ntype, u32 *flags, struct vif_params *params); void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); +void cfg80211_process_wdev_events(struct wireless_dev *wdev); int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2303ee73b50a..2ded3c7fad06 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -680,6 +680,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_IBSS; if (rd_flags & NL80211_RRF_DFS) channel_flags |= IEEE80211_CHAN_RADAR; + if (rd_flags & NL80211_RRF_NO_OFDM) + channel_flags |= IEEE80211_CHAN_NO_OFDM; return channel_flags; } @@ -901,7 +903,21 @@ static void handle_channel(struct wiphy *wiphy, chan->max_antenna_gain = min(chan->orig_mag, (int) MBI_TO_DBI(power_rule->max_antenna_gain)); chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp); - chan->max_power = min(chan->max_power, chan->max_reg_power); + if (chan->orig_mpwr) { + /* + * Devices that have their own custom regulatory domain + * but also use WIPHY_FLAG_STRICT_REGULATORY will follow the + * passed country IE power settings. + */ + if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && + wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY && + wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) + chan->max_power = chan->max_reg_power; + else + chan->max_power = min(chan->orig_mpwr, + chan->max_reg_power); + } else + chan->max_power = chan->max_reg_power; } static void handle_band(struct wiphy *wiphy, @@ -1885,6 +1901,7 @@ static void restore_custom_reg_settings(struct wiphy *wiphy) chan->flags = chan->orig_flags; chan->max_antenna_gain = chan->orig_mag; chan->max_power = chan->orig_mpwr; + chan->beacon_found = false; } } } diff --git a/net/wireless/util.c b/net/wireless/util.c index 26f8cd30f712..994e2f0cc7a8 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -735,7 +735,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) wdev->connect_keys = NULL; } -static void cfg80211_process_wdev_events(struct wireless_dev *wdev) +void cfg80211_process_wdev_events(struct wireless_dev *wdev) { struct cfg80211_event *ev; unsigned long flags; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c5a5165a5927..2ed698c190b5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -42,13 +42,14 @@ static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); static struct dst_entry *xfrm_policy_sk_bundles; static DEFINE_RWLOCK(xfrm_policy_lock); -static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); -static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; +static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); +static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] + __read_mostly; static struct kmem_cache *xfrm_dst_cache __read_mostly; static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); +static inline void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); @@ -1357,6 +1358,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); xdst->flo.ops = &xfrm_bundle_fc_ops; + if (afinfo->init_dst) + afinfo->init_dst(net, xdst); } else xdst = ERR_PTR(-ENOBUFS); @@ -2418,7 +2421,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_policy_afinfo_lock); + spin_lock_bh(&xfrm_policy_afinfo_lock); if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else { @@ -2439,9 +2442,9 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->neigh_lookup = xfrm_neigh_lookup; if (likely(afinfo->garbage_collect == NULL)) afinfo->garbage_collect = xfrm_garbage_collect_deferred; - xfrm_policy_afinfo[afinfo->family] = afinfo; + rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); } - write_unlock_bh(&xfrm_policy_afinfo_lock); + spin_unlock_bh(&xfrm_policy_afinfo_lock); rtnl_lock(); for_each_net(net) { @@ -2474,13 +2477,14 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) return -EINVAL; if (unlikely(afinfo->family >= NPROTO)) return -EAFNOSUPPORT; - write_lock_bh(&xfrm_policy_afinfo_lock); + spin_lock_bh(&xfrm_policy_afinfo_lock); if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) err = -EINVAL; else { struct dst_ops *dst_ops = afinfo->dst_ops; - xfrm_policy_afinfo[afinfo->family] = NULL; + rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], + NULL); dst_ops->kmem_cachep = NULL; dst_ops->check = NULL; dst_ops->negative_advice = NULL; @@ -2488,7 +2492,8 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) afinfo->garbage_collect = NULL; } } - write_unlock_bh(&xfrm_policy_afinfo_lock); + spin_unlock_bh(&xfrm_policy_afinfo_lock); + synchronize_rcu(); return err; } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); @@ -2497,16 +2502,16 @@ static void __net_init xfrm_dst_ops_init(struct net *net) { struct xfrm_policy_afinfo *afinfo; - read_lock_bh(&xfrm_policy_afinfo_lock); - afinfo = xfrm_policy_afinfo[AF_INET]; + rcu_read_lock_bh(); + afinfo = rcu_dereference_bh(xfrm_policy_afinfo[AF_INET]); if (afinfo) net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; #if IS_ENABLED(CONFIG_IPV6) - afinfo = xfrm_policy_afinfo[AF_INET6]; + afinfo = rcu_dereference_bh(xfrm_policy_afinfo[AF_INET6]); if (afinfo) net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; #endif - read_unlock_bh(&xfrm_policy_afinfo_lock); + rcu_read_unlock_bh(); } static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) @@ -2514,16 +2519,16 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) struct xfrm_policy_afinfo *afinfo; if (unlikely(family >= NPROTO)) return NULL; - read_lock(&xfrm_policy_afinfo_lock); - afinfo = xfrm_policy_afinfo[family]; + rcu_read_lock(); + afinfo = rcu_dereference(xfrm_policy_afinfo[family]); if (unlikely(!afinfo)) - read_unlock(&xfrm_policy_afinfo_lock); + rcu_read_unlock(); return afinfo; } -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) +static inline void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) { - read_unlock(&xfrm_policy_afinfo_lock); + rcu_read_unlock(); } static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5b228f97d4b3..7856c33898fa 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -415,8 +415,17 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) if (x->lft.hard_add_expires_seconds) { long tmo = x->lft.hard_add_expires_seconds + x->curlft.add_time - now; - if (tmo <= 0) - goto expired; + if (tmo <= 0) { + if (x->xflags & XFRM_SOFT_EXPIRE) { + /* enter hard expire without soft expire first?! + * setting a new date could trigger this. + * workarbound: fix x->curflt.add_time by below: + */ + x->curlft.add_time = now - x->saved_tmo - 1; + tmo = x->lft.hard_add_expires_seconds - x->saved_tmo; + } else + goto expired; + } if (tmo < next) next = tmo; } @@ -433,10 +442,14 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) if (x->lft.soft_add_expires_seconds) { long tmo = x->lft.soft_add_expires_seconds + x->curlft.add_time - now; - if (tmo <= 0) + if (tmo <= 0) { warn = 1; - else if (tmo < next) + x->xflags &= ~XFRM_SOFT_EXPIRE; + } else if (tmo < next) { next = tmo; + x->xflags |= XFRM_SOFT_EXPIRE; + x->saved_tmo = tmo; + } } if (x->lft.soft_use_expires_seconds) { long tmo = x->lft.soft_use_expires_seconds + @@ -1687,7 +1700,7 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { - acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT); + acqret = km->acquire(x, t, pol); if (!acqret) err = acqret; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e75d8e47f35c..ab58034c42d6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2567,8 +2567,7 @@ static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, } static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, - struct xfrm_tmpl *xt, struct xfrm_policy *xp, - int dir) + struct xfrm_tmpl *xt, struct xfrm_policy *xp) { __u32 seq = xfrm_get_acqseq(); struct xfrm_user_acquire *ua; @@ -2583,7 +2582,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, memcpy(&ua->id, &x->id, sizeof(ua->id)); memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr)); memcpy(&ua->sel, &x->sel, sizeof(ua->sel)); - copy_to_user_policy(xp, &ua->policy, dir); + copy_to_user_policy(xp, &ua->policy, XFRM_POLICY_OUT); ua->aalgos = xt->aalgos; ua->ealgos = xt->ealgos; ua->calgos = xt->calgos; @@ -2605,7 +2604,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, - struct xfrm_policy *xp, int dir) + struct xfrm_policy *xp) { struct net *net = xs_net(x); struct sk_buff *skb; @@ -2614,7 +2613,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, if (skb == NULL) return -ENOMEM; - if (build_acquire(skb, x, xt, xp, dir) < 0) + if (build_acquire(skb, x, xt, xp) < 0) BUG(); return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); |