From ab73b751303bc60d7d9fba875c958dedfe14754c Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 10 Apr 2012 12:51:52 +0200 Subject: NFC: Export LLCP general bytes getter Signed-off-by: Samuel Ortiz --- include/net/nfc/nfc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index b7ca4a2a1d72..3116f923f607 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -188,6 +188,7 @@ struct sk_buff *nfc_alloc_recv_skb(unsigned int size, gfp_t gfp); int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gt, u8 gt_len); +u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len); int nfc_targets_found(struct nfc_dev *dev, struct nfc_target *targets, int ntargets); -- cgit v1.2.3 From fe7c580073280c15bb4eb4f82bf20dddc1a68383 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 15 May 2012 15:57:06 +0200 Subject: NFC: Add target mode protocols to the polling loop startup routine Signed-off-by: Samuel Ortiz --- drivers/nfc/pn533.c | 39 +++++++++++++++++++++++++++++---------- drivers/nfc/pn544_hci.c | 10 ++++++---- include/linux/nfc.h | 4 ++++ include/net/nfc/hci.h | 3 ++- include/net/nfc/nfc.h | 3 ++- include/net/nfc/shdlc.h | 3 ++- net/nfc/core.c | 10 +++++----- net/nfc/hci/core.c | 5 +++-- net/nfc/hci/shdlc.c | 6 ++++-- net/nfc/nci/core.c | 7 ++++--- net/nfc/netlink.c | 19 +++++++++++++++---- net/nfc/nfc.h | 2 +- 12 files changed, 77 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c index 19110f0eb15f..38a523c62132 100644 --- a/drivers/nfc/pn533.c +++ b/drivers/nfc/pn533.c @@ -1078,27 +1078,23 @@ stop_poll: return 0; } -static int pn533_start_poll(struct nfc_dev *nfc_dev, u32 protocols) +static int pn533_init_target(struct nfc_dev *nfc_dev, u32 protocols) +{ + return 0; +} + +static int pn533_start_im_poll(struct nfc_dev *nfc_dev, u32 protocols) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct pn533_poll_modulations *start_mod; int rc; - nfc_dev_dbg(&dev->interface->dev, "%s - protocols=0x%x", __func__, - protocols); - if (dev->poll_mod_count) { nfc_dev_err(&dev->interface->dev, "Polling operation already" " active"); return -EBUSY; } - if (dev->tgt_active_prot) { - nfc_dev_err(&dev->interface->dev, "Cannot poll with a target" - " already activated"); - return -EBUSY; - } - pn533_poll_create_mod_list(dev, protocols); if (!dev->poll_mod_count) { @@ -1135,6 +1131,29 @@ error: return rc; } +static int pn533_start_poll(struct nfc_dev *nfc_dev, + u32 im_protocols, u32 tm_protocols) +{ + struct pn533 *dev = nfc_get_drvdata(nfc_dev); + + nfc_dev_dbg(&dev->interface->dev, + "%s: im protocols 0x%x tm protocols 0x%x", + __func__, im_protocols, tm_protocols); + + if (dev->tgt_active_prot) { + nfc_dev_err(&dev->interface->dev, + "Cannot poll with a target already activated"); + return -EBUSY; + } + + if (!tm_protocols) + return pn533_start_im_poll(nfc_dev, im_protocols); + else if (!im_protocols) + return pn533_init_target(nfc_dev, tm_protocols); + else + return -EINVAL; +} + static void pn533_stop_poll(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); diff --git a/drivers/nfc/pn544_hci.c b/drivers/nfc/pn544_hci.c index 281f18c2fb82..457eac35dc74 100644 --- a/drivers/nfc/pn544_hci.c +++ b/drivers/nfc/pn544_hci.c @@ -576,7 +576,8 @@ static int pn544_hci_xmit(struct nfc_shdlc *shdlc, struct sk_buff *skb) return pn544_hci_i2c_write(client, skb->data, skb->len); } -static int pn544_hci_start_poll(struct nfc_shdlc *shdlc, u32 protocols) +static int pn544_hci_start_poll(struct nfc_shdlc *shdlc, + u32 im_protocols, u32 tm_protocols) { struct nfc_hci_dev *hdev = nfc_shdlc_get_hci_dev(shdlc); u8 phases = 0; @@ -584,7 +585,8 @@ static int pn544_hci_start_poll(struct nfc_shdlc *shdlc, u32 protocols) u8 duration[2]; u8 activated; - pr_info(DRIVER_DESC ": %s protocols = %d\n", __func__, protocols); + pr_info(DRIVER_DESC ": %s protocols 0x%x 0x%x\n", + __func__, im_protocols, tm_protocols); r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, NFC_HCI_EVT_END_OPERATION, NULL, 0); @@ -604,10 +606,10 @@ static int pn544_hci_start_poll(struct nfc_shdlc *shdlc, u32 protocols) if (r < 0) return r; - if (protocols & (NFC_PROTO_ISO14443_MASK | NFC_PROTO_MIFARE_MASK | + if (im_protocols & (NFC_PROTO_ISO14443_MASK | NFC_PROTO_MIFARE_MASK | NFC_PROTO_JEWEL_MASK)) phases |= 1; /* Type A */ - if (protocols & NFC_PROTO_FELICA_MASK) { + if (im_protocols & NFC_PROTO_FELICA_MASK) { phases |= (1 << 2); /* Type F 212 */ phases |= (1 << 3); /* Type F 424 */ } diff --git a/include/linux/nfc.h b/include/linux/nfc.h index 0ae9b5857c83..548715881fb0 100644 --- a/include/linux/nfc.h +++ b/include/linux/nfc.h @@ -94,6 +94,8 @@ enum nfc_commands { * @NFC_ATTR_TARGET_SENSF_RES: NFC-F targets extra information, max 18 bytes * @NFC_ATTR_COMM_MODE: Passive or active mode * @NFC_ATTR_RF_MODE: Initiator or target + * @NFC_ATTR_IM_PROTOCOLS: Initiator mode protocols to poll for + * @NFC_ATTR_TM_PROTOCOLS: Target mode protocols to listen for */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -109,6 +111,8 @@ enum nfc_attrs { NFC_ATTR_COMM_MODE, NFC_ATTR_RF_MODE, NFC_ATTR_DEVICE_POWERED, + NFC_ATTR_IM_PROTOCOLS, + NFC_ATTR_TM_PROTOCOLS, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 4467c9460857..e30e6a869714 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -31,7 +31,8 @@ struct nfc_hci_ops { void (*close) (struct nfc_hci_dev *hdev); int (*hci_ready) (struct nfc_hci_dev *hdev); int (*xmit) (struct nfc_hci_dev *hdev, struct sk_buff *skb); - int (*start_poll) (struct nfc_hci_dev *hdev, u32 protocols); + int (*start_poll) (struct nfc_hci_dev *hdev, + u32 im_protocols, u32 tm_protocols); int (*target_from_gate) (struct nfc_hci_dev *hdev, u8 gate, struct nfc_target *target); int (*complete_target_discovered) (struct nfc_hci_dev *hdev, u8 gate, diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 3116f923f607..97aa0e81aa83 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -53,7 +53,8 @@ struct nfc_target; struct nfc_ops { int (*dev_up)(struct nfc_dev *dev); int (*dev_down)(struct nfc_dev *dev); - int (*start_poll)(struct nfc_dev *dev, u32 protocols); + int (*start_poll)(struct nfc_dev *dev, + u32 im_protocols, u32 tm_protocols); void (*stop_poll)(struct nfc_dev *dev); int (*dep_link_up)(struct nfc_dev *dev, struct nfc_target *target, u8 comm_mode, u8 *gb, size_t gb_len); diff --git a/include/net/nfc/shdlc.h b/include/net/nfc/shdlc.h index ab06afd462da..35e930d2f638 100644 --- a/include/net/nfc/shdlc.h +++ b/include/net/nfc/shdlc.h @@ -27,7 +27,8 @@ struct nfc_shdlc_ops { void (*close) (struct nfc_shdlc *shdlc); int (*hci_ready) (struct nfc_shdlc *shdlc); int (*xmit) (struct nfc_shdlc *shdlc, struct sk_buff *skb); - int (*start_poll) (struct nfc_shdlc *shdlc, u32 protocols); + int (*start_poll) (struct nfc_shdlc *shdlc, + u32 im_protocols, u32 tm_protocols); int (*target_from_gate) (struct nfc_shdlc *shdlc, u8 gate, struct nfc_target *target); int (*complete_target_discovered) (struct nfc_shdlc *shdlc, u8 gate, diff --git a/net/nfc/core.c b/net/nfc/core.c index f5a43f701a9e..c83717bfcb8a 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -121,14 +121,14 @@ error: * The device remains polling for targets until a target is found or * the nfc_stop_poll function is called. */ -int nfc_start_poll(struct nfc_dev *dev, u32 protocols) +int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols) { int rc; - pr_debug("dev_name=%s protocols=0x%x\n", - dev_name(&dev->dev), protocols); + pr_debug("dev_name %s initiator protocols 0x%x target protocols 0x%x\n", + dev_name(&dev->dev), im_protocols, tm_protocols); - if (!protocols) + if (!im_protocols && !tm_protocols) return -EINVAL; device_lock(&dev->dev); @@ -143,7 +143,7 @@ int nfc_start_poll(struct nfc_dev *dev, u32 protocols) goto error; } - rc = dev->ops->start_poll(dev, protocols); + rc = dev->ops->start_poll(dev, im_protocols, tm_protocols); if (!rc) dev->polling = true; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index e1a640d2b588..281f3a3bec00 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -481,12 +481,13 @@ static int hci_dev_down(struct nfc_dev *nfc_dev) return 0; } -static int hci_start_poll(struct nfc_dev *nfc_dev, u32 protocols) +static int hci_start_poll(struct nfc_dev *nfc_dev, + u32 im_protocols, u32 tm_protocols) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); if (hdev->ops->start_poll) - return hdev->ops->start_poll(hdev, protocols); + return hdev->ops->start_poll(hdev, im_protocols, tm_protocols); else return nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, NFC_HCI_EVT_READER_REQUESTED, NULL, 0); diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/shdlc.c index 5665dc6d893a..6b836e6242b7 100644 --- a/net/nfc/hci/shdlc.c +++ b/net/nfc/hci/shdlc.c @@ -765,14 +765,16 @@ static int nfc_shdlc_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb) return 0; } -static int nfc_shdlc_start_poll(struct nfc_hci_dev *hdev, u32 protocols) +static int nfc_shdlc_start_poll(struct nfc_hci_dev *hdev, + u32 im_protocols, u32 tm_protocols) { struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); pr_debug("\n"); if (shdlc->ops->start_poll) - return shdlc->ops->start_poll(shdlc, protocols); + return shdlc->ops->start_poll(shdlc, + im_protocols, tm_protocols); return 0; } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index d560e6f13072..0f718982f808 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -387,7 +387,8 @@ static int nci_dev_down(struct nfc_dev *nfc_dev) return nci_close_device(ndev); } -static int nci_start_poll(struct nfc_dev *nfc_dev, __u32 protocols) +static int nci_start_poll(struct nfc_dev *nfc_dev, + __u32 im_protocols, __u32 tm_protocols) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; @@ -413,11 +414,11 @@ static int nci_start_poll(struct nfc_dev *nfc_dev, __u32 protocols) return -EBUSY; } - rc = nci_request(ndev, nci_rf_discover_req, protocols, + rc = nci_request(ndev, nci_rf_discover_req, im_protocols, msecs_to_jiffies(NCI_RF_DISC_TIMEOUT)); if (!rc) - ndev->poll_prots = protocols; + ndev->poll_prots = im_protocols; return rc; } diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 581d419083aa..a18fd56798fc 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -49,6 +49,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_COMM_MODE] = { .type = NLA_U8 }, [NFC_ATTR_RF_MODE] = { .type = NLA_U8 }, [NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 }, + [NFC_ATTR_IM_PROTOCOLS] = { .type = NLA_U32 }, + [NFC_ATTR_TM_PROTOCOLS] = { .type = NLA_U32 }, }; static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, @@ -519,16 +521,25 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info) struct nfc_dev *dev; int rc; u32 idx; - u32 protocols; + u32 im_protocols = 0, tm_protocols = 0; pr_debug("Poll start\n"); if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || - !info->attrs[NFC_ATTR_PROTOCOLS]) + ((!info->attrs[NFC_ATTR_IM_PROTOCOLS] && + !info->attrs[NFC_ATTR_PROTOCOLS]) && + !info->attrs[NFC_ATTR_TM_PROTOCOLS])) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); - protocols = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]); + + if (info->attrs[NFC_ATTR_TM_PROTOCOLS]) + tm_protocols = nla_get_u32(info->attrs[NFC_ATTR_TM_PROTOCOLS]); + else if (info->attrs[NFC_ATTR_PROTOCOLS]) + tm_protocols = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]); + + if (info->attrs[NFC_ATTR_IM_PROTOCOLS]) + im_protocols = nla_get_u32(info->attrs[NFC_ATTR_IM_PROTOCOLS]); dev = nfc_get_device(idx); if (!dev) @@ -536,7 +547,7 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info) mutex_lock(&dev->genl_data.genl_data_mutex); - rc = nfc_start_poll(dev, protocols); + rc = nfc_start_poll(dev, im_protocols, tm_protocols); if (!rc) dev->genl_data.poll_req_pid = info->snd_pid; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 3dd4232ae664..7d9708f2a66c 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -158,7 +158,7 @@ int nfc_dev_up(struct nfc_dev *dev); int nfc_dev_down(struct nfc_dev *dev); -int nfc_start_poll(struct nfc_dev *dev, u32 protocols); +int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols); int nfc_stop_poll(struct nfc_dev *dev); -- cgit v1.2.3 From fc40a8c1a06ab7db45da790693dd9802612a055c Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 1 Jun 2012 13:21:13 +0200 Subject: NFC: Add target mode activation netlink event Userspace gets a netlink event upon target mode activation. The LLCP layer is also signaled when we get an ATR_REQ in order to get the remote general bytes. Signed-off-by: Samuel Ortiz --- drivers/nfc/pn533.c | 27 ++++++++++++++++++++-- include/linux/nfc.h | 7 ++++++ include/net/nfc/nfc.h | 4 ++++ net/nfc/core.c | 35 +++++++++++++++++++++++++++++ net/nfc/netlink.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 3 +++ 6 files changed, 136 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c index 605a08a62e45..c6b9bc5ac6e6 100644 --- a/drivers/nfc/pn533.c +++ b/drivers/nfc/pn533.c @@ -260,6 +260,10 @@ struct pn533_cmd_jump_dep_response { #define PN533_INIT_TARGET_PASSIVE 0x1 #define PN533_INIT_TARGET_DEP 0x2 +#define PN533_INIT_TARGET_RESP_FRAME_MASK 0x3 +#define PN533_INIT_TARGET_RESP_ACTIVE 0x1 +#define PN533_INIT_TARGET_RESP_DEP 0x4 + struct pn533_cmd_init_target { u8 mode; u8 mifare[6]; @@ -1128,10 +1132,13 @@ static int pn533_init_target_frame(struct pn533_frame *frame, return 0; } +#define ATR_REQ_GB_OFFSET 17 static int pn533_init_target_complete(struct pn533 *dev, void *arg, u8 *params, int params_len) { struct pn533_cmd_init_target_response *resp; + u8 frame, comm_mode = NFC_COMM_PASSIVE, *gb; + size_t gb_len; nfc_dev_dbg(&dev->interface->dev, "%s", __func__); @@ -1143,11 +1150,27 @@ static int pn533_init_target_complete(struct pn533 *dev, void *arg, return params_len; } + if (params_len < ATR_REQ_GB_OFFSET + 1) + return -EINVAL; + resp = (struct pn533_cmd_init_target_response *) params; - nfc_dev_dbg(&dev->interface->dev, "Target mode 0x%x\n", resp->mode); + nfc_dev_dbg(&dev->interface->dev, "Target mode 0x%x param len %d\n", + resp->mode, params_len); - return 0; + frame = resp->mode & PN533_INIT_TARGET_RESP_FRAME_MASK; + if (frame == PN533_INIT_TARGET_RESP_ACTIVE) + comm_mode = NFC_COMM_ACTIVE; + + /* Again, only DEP */ + if ((resp->mode & PN533_INIT_TARGET_RESP_DEP) == 0) + return -EOPNOTSUPP; + + gb = resp->cmd + ATR_REQ_GB_OFFSET; + gb_len = params_len - (ATR_REQ_GB_OFFSET + 1); + + return nfc_tm_activated(dev->nfc_dev, NFC_PROTO_NFC_DEP_MASK, + comm_mode, gb, gb_len); } static int pn533_init_target(struct nfc_dev *nfc_dev, u32 protocols) diff --git a/include/linux/nfc.h b/include/linux/nfc.h index 548715881fb0..d124e9273fcb 100644 --- a/include/linux/nfc.h +++ b/include/linux/nfc.h @@ -56,6 +56,10 @@ * %NFC_ATTR_PROTOCOLS) * @NFC_EVENT_DEVICE_REMOVED: event emitted when a device is removed * (it sends %NFC_ATTR_DEVICE_INDEX) + * @NFC_EVENT_TM_ACTIVATED: event emitted when the adapter is activated in + * target mode. + * @NFC_EVENT_DEVICE_DEACTIVATED: event emitted when the adapter is deactivated + * from target mode. */ enum nfc_commands { NFC_CMD_UNSPEC, @@ -71,6 +75,8 @@ enum nfc_commands { NFC_EVENT_DEVICE_ADDED, NFC_EVENT_DEVICE_REMOVED, NFC_EVENT_TARGET_LOST, + NFC_EVENT_TM_ACTIVATED, + NFC_EVENT_TM_DEACTIVATED, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; @@ -122,6 +128,7 @@ enum nfc_attrs { #define NFC_NFCID1_MAXSIZE 10 #define NFC_SENSB_RES_MAXSIZE 12 #define NFC_SENSF_RES_MAXSIZE 18 +#define NFC_GB_MAXSIZE 48 /* NFC protocols */ #define NFC_PROTO_JEWEL 1 diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 97aa0e81aa83..41573b4bd78a 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -198,4 +198,8 @@ int nfc_target_lost(struct nfc_dev *dev, u32 target_idx); int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); +int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, + u8 *gb, size_t gb_len); +int nfc_tm_deactivated(struct nfc_dev *dev); + #endif /* __NET_NFC_H */ diff --git a/net/nfc/core.c b/net/nfc/core.c index c83717bfcb8a..17f147430b7c 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -455,6 +455,41 @@ u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len) } EXPORT_SYMBOL(nfc_get_local_general_bytes); +int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, + u8 *gb, size_t gb_len) +{ + int rc; + + device_lock(&dev->dev); + + dev->polling = false; + + if (gb != NULL) { + rc = nfc_set_remote_general_bytes(dev, gb, gb_len); + if (rc < 0) + goto out; + } + + if (protocol == NFC_PROTO_NFC_DEP_MASK) + nfc_dep_link_is_up(dev, 0, comm_mode, NFC_RF_TARGET); + + rc = nfc_genl_tm_activated(dev, protocol); + +out: + device_unlock(&dev->dev); + + return rc; +} +EXPORT_SYMBOL(nfc_tm_activated); + +int nfc_tm_deactivated(struct nfc_dev *dev) +{ + dev->dep_link_up = false; + + return nfc_genl_tm_deactivated(dev); +} +EXPORT_SYMBOL(nfc_tm_deactivated); + /** * nfc_alloc_send_skb - allocate a skb for data exchange responses * diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index a18fd56798fc..21eaa9b5c6bf 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -221,6 +221,68 @@ free_msg: return -EMSGSIZE; } +int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_TM_ACTIVATED); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) + goto nla_put_failure; + if (nla_put_u32(msg, NFC_ATTR_TM_PROTOCOLS, protocol)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + +int nfc_genl_tm_deactivated(struct nfc_dev *dev) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_TM_DEACTIVATED); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + int nfc_genl_device_added(struct nfc_dev *dev) { struct sk_buff *msg; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 7d9708f2a66c..cd9fcbe57464 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -128,6 +128,9 @@ int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); int nfc_genl_dep_link_down_event(struct nfc_dev *dev); +int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol); +int nfc_genl_tm_deactivated(struct nfc_dev *dev); + struct nfc_dev *nfc_get_device(unsigned int idx); static inline void nfc_put_device(struct nfc_dev *dev) -- cgit v1.2.3 From f212ad5e993e7efb996fc8ce94a5de8f0bd06d41 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 31 May 2012 00:02:26 +0200 Subject: NFC: Set the NFC device RF mode appropriately Signed-off-by: Samuel Ortiz --- include/linux/nfc.h | 1 + include/net/nfc/nfc.h | 2 +- net/nfc/core.c | 14 ++++++++++---- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/linux/nfc.h b/include/linux/nfc.h index d124e9273fcb..f4e6dd915b1c 100644 --- a/include/linux/nfc.h +++ b/include/linux/nfc.h @@ -146,6 +146,7 @@ enum nfc_attrs { /* NFC RF modes */ #define NFC_RF_INITIATOR 0 #define NFC_RF_TARGET 1 +#define NFC_RF_NONE 2 /* NFC protocols masks used in bitsets */ #define NFC_PROTO_JEWEL_MASK (1 << NFC_PROTO_JEWEL) diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 41573b4bd78a..a6a7b49a3e2d 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -100,10 +100,10 @@ struct nfc_dev { int targets_generation; struct device dev; bool dev_up; + u8 rf_mode; bool polling; struct nfc_target *active_target; bool dep_link_up; - u32 dep_rf_mode; struct nfc_genl_data genl_data; u32 supported_protocols; diff --git a/net/nfc/core.c b/net/nfc/core.c index 17f147430b7c..722a0c76c669 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -144,8 +144,10 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols) } rc = dev->ops->start_poll(dev, im_protocols, tm_protocols); - if (!rc) + if (!rc) { dev->polling = true; + dev->rf_mode = NFC_RF_NONE; + } error: device_unlock(&dev->dev); @@ -235,8 +237,10 @@ int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode) } rc = dev->ops->dep_link_up(dev, target, comm_mode, gb, gb_len); - if (!rc) + if (!rc) { dev->active_target = target; + dev->rf_mode = NFC_RF_INITIATOR; + } error: device_unlock(&dev->dev); @@ -264,7 +268,7 @@ int nfc_dep_link_down(struct nfc_dev *dev) goto error; } - if (dev->dep_rf_mode == NFC_RF_TARGET) { + if (dev->rf_mode == NFC_RF_TARGET) { rc = -EOPNOTSUPP; goto error; } @@ -286,7 +290,6 @@ int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode) { dev->dep_link_up = true; - dev->dep_rf_mode = rf_mode; nfc_llcp_mac_is_up(dev, target_idx, comm_mode, rf_mode); @@ -330,6 +333,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol) rc = dev->ops->activate_target(dev, target, protocol); if (!rc) { dev->active_target = target; + dev->rf_mode = NFC_RF_INITIATOR; if (dev->ops->check_presence) mod_timer(&dev->check_pres_timer, jiffies + @@ -470,6 +474,8 @@ int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, goto out; } + dev->rf_mode = NFC_RF_TARGET; + if (protocol == NFC_PROTO_NFC_DEP_MASK) nfc_dep_link_is_up(dev, 0, comm_mode, NFC_RF_TARGET); -- cgit v1.2.3 From be9ae4ce4ee66e211815122ab4f41913efed4fec Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 16 May 2012 15:55:48 +0200 Subject: NFC: Introduce target mode tx ops And rename the initiator mode data exchange ops for consistency sake. Signed-off-by: Samuel Ortiz --- drivers/nfc/pn533.c | 8 ++++---- include/net/nfc/nfc.h | 3 ++- net/nfc/core.c | 37 ++++++++++++++++++++----------------- net/nfc/hci/core.c | 8 ++++---- net/nfc/nci/core.c | 8 ++++---- 5 files changed, 34 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c index c6b9bc5ac6e6..fd94c6f5d6a8 100644 --- a/drivers/nfc/pn533.c +++ b/drivers/nfc/pn533.c @@ -1691,9 +1691,9 @@ error: return 0; } -static int pn533_data_exchange(struct nfc_dev *nfc_dev, - struct nfc_target *target, struct sk_buff *skb, - data_exchange_cb_t cb, void *cb_context) +static int pn533_transceive(struct nfc_dev *nfc_dev, + struct nfc_target *target, struct sk_buff *skb, + data_exchange_cb_t cb, void *cb_context) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct pn533_frame *out_frame, *in_frame; @@ -1853,7 +1853,7 @@ struct nfc_ops pn533_nfc_ops = { .stop_poll = pn533_stop_poll, .activate_target = pn533_activate_target, .deactivate_target = pn533_deactivate_target, - .data_exchange = pn533_data_exchange, + .im_transceive = pn533_transceive, }; static int pn533_probe(struct usb_interface *interface, diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index a6a7b49a3e2d..45c4c970575c 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -63,9 +63,10 @@ struct nfc_ops { u32 protocol); void (*deactivate_target)(struct nfc_dev *dev, struct nfc_target *target); - int (*data_exchange)(struct nfc_dev *dev, struct nfc_target *target, + int (*im_transceive)(struct nfc_dev *dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context); + int (*tm_send)(struct nfc_dev *dev, struct sk_buff *skb); int (*check_presence)(struct nfc_dev *dev, struct nfc_target *target); }; diff --git a/net/nfc/core.c b/net/nfc/core.c index 722a0c76c669..76c1e207d297 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -413,27 +413,30 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, goto error; } - if (dev->active_target == NULL) { - rc = -ENOTCONN; - kfree_skb(skb); - goto error; - } + if (dev->rf_mode == NFC_RF_INITIATOR && dev->active_target != NULL) { + if (dev->active_target->idx != target_idx) { + rc = -EADDRNOTAVAIL; + kfree_skb(skb); + goto error; + } - if (dev->active_target->idx != target_idx) { - rc = -EADDRNOTAVAIL; + if (dev->ops->check_presence) + del_timer_sync(&dev->check_pres_timer); + + rc = dev->ops->im_transceive(dev, dev->active_target, skb, cb, + cb_context); + + if (!rc && dev->ops->check_presence) + mod_timer(&dev->check_pres_timer, jiffies + + msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); + } else if (dev->rf_mode == NFC_RF_TARGET && dev->ops->tm_send != NULL) { + rc = dev->ops->tm_send(dev, skb); + } else { + rc = -ENOTCONN; kfree_skb(skb); goto error; } - if (dev->ops->check_presence) - del_timer_sync(&dev->check_pres_timer); - - rc = dev->ops->data_exchange(dev, dev->active_target, skb, cb, - cb_context); - - if (!rc && dev->ops->check_presence) - mod_timer(&dev->check_pres_timer, jiffies + - msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); error: device_unlock(&dev->dev); @@ -727,7 +730,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, struct nfc_dev *dev; if (!ops->start_poll || !ops->stop_poll || !ops->activate_target || - !ops->deactivate_target || !ops->data_exchange) + !ops->deactivate_target || !ops->im_transceive) return NULL; if (!supported_protocols) diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 281f3a3bec00..a8b0b71e8f86 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -512,9 +512,9 @@ static void hci_deactivate_target(struct nfc_dev *nfc_dev, { } -static int hci_data_exchange(struct nfc_dev *nfc_dev, struct nfc_target *target, - struct sk_buff *skb, data_exchange_cb_t cb, - void *cb_context) +static int hci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, + struct sk_buff *skb, data_exchange_cb_t cb, + void *cb_context) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); int r; @@ -580,7 +580,7 @@ static struct nfc_ops hci_nfc_ops = { .stop_poll = hci_stop_poll, .activate_target = hci_activate_target, .deactivate_target = hci_deactivate_target, - .data_exchange = hci_data_exchange, + .im_transceive = hci_transceive, .check_presence = hci_check_presence, }; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 0f718982f808..766a02b1dfa1 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -522,9 +522,9 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, } } -static int nci_data_exchange(struct nfc_dev *nfc_dev, struct nfc_target *target, - struct sk_buff *skb, - data_exchange_cb_t cb, void *cb_context) +static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, + struct sk_buff *skb, + data_exchange_cb_t cb, void *cb_context) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; @@ -557,7 +557,7 @@ static struct nfc_ops nci_nfc_ops = { .stop_poll = nci_stop_poll, .activate_target = nci_activate_target, .deactivate_target = nci_deactivate_target, - .data_exchange = nci_data_exchange, + .im_transceive = nci_transceive, }; /* ---- Interface to NCI drivers ---- */ -- cgit v1.2.3 From 73167ced31d15c04e57b9e0885ac05675e9195a4 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 31 May 2012 00:05:50 +0200 Subject: NFC: Introduce target mode rx data callback This routine will be called by drivers whenever they receive data in target mode. This should be unexpected events and as such should be handled by a standalone API (i.e. not as a callback pointer from an existing API). Signed-off-by: Samuel Ortiz --- include/net/nfc/nfc.h | 1 + net/nfc/core.c | 12 ++++++++++++ net/nfc/llcp/llcp.c | 15 +++++++++++++++ net/nfc/nfc.h | 7 +++++++ 4 files changed, 35 insertions(+) (limited to 'include/net') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 45c4c970575c..180964b954ab 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -202,5 +202,6 @@ int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx, int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, u8 *gb, size_t gb_len); int nfc_tm_deactivated(struct nfc_dev *dev); +int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb); #endif /* __NET_NFC_H */ diff --git a/net/nfc/core.c b/net/nfc/core.c index 76c1e207d297..6a3799eebc30 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -462,6 +462,18 @@ u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len) } EXPORT_SYMBOL(nfc_get_local_general_bytes); +int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb) +{ + /* Only LLCP target mode for now */ + if (dev->dep_link_up == false) { + kfree_skb(skb); + return -ENOLINK; + } + + return nfc_llcp_data_received(dev, skb); +} +EXPORT_SYMBOL(nfc_tm_data_received); + int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, u8 *gb, size_t gb_len) { diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 5f7aa3f632fb..5705e6dffb32 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -937,6 +937,21 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) return; } +int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb) +{ + struct nfc_llcp_local *local; + + local = nfc_llcp_find_local(dev); + if (local == NULL) + return -ENODEV; + + local->rx_pending = skb_get(skb); + del_timer(&local->link_timer); + queue_work(local->rx_wq, &local->rx_work); + + return 0; +} + void nfc_llcp_mac_is_down(struct nfc_dev *dev) { struct nfc_llcp_local *local; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index cd9fcbe57464..c5e42b79a418 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -55,6 +55,7 @@ int nfc_llcp_register_device(struct nfc_dev *dev); void nfc_llcp_unregister_device(struct nfc_dev *dev); int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len); u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len); +int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb); int __init nfc_llcp_init(void); void nfc_llcp_exit(void); @@ -90,6 +91,12 @@ static inline u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *gb_len) return NULL; } +static inline int nfc_llcp_data_received(struct nfc_dev *dev, + struct sk_buff *skb) +{ + return 0; +} + static inline int nfc_llcp_init(void) { return 0; -- cgit v1.2.3 From 2827011f666e157f3307d55070a75e1d1110b194 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 17 May 2012 21:14:09 -0700 Subject: Bluetooth: Fix early return from l2cap_chan_del This fixes a regression from commit 2ead70b8390d199ca04cd35311b51f5f3676079e that is present in all kernels starting at v3.0. When L2CAP information was moved to struct l2cap_chan, a check was added to l2cap_chan_del to avoid certain cleanup operations when ERTM or streaming mode had not yet been initialized. The logic in the check did not take in to account that chan->conf_state is set to 0 in l2cap_chan_ready, so l2cap_chan_del failed to cancel timers and leaked memory any time the ERTM queues or lists were not empty. This change makes sure that l2cap_chan_del only returns early if ERTM initialization was not performed. Signed-off-by: Mat Martineau Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap_core.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 1c7d1cd5e679..452fcc4c0fff 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -597,6 +597,7 @@ enum { CONF_EWS_RECV, CONF_LOC_CONF_PEND, CONF_REM_CONF_PEND, + CONF_NOT_COMPLETE, }; #define L2CAP_CONF_MAX_CONF_REQ 2 diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 078bf805cd97..d9f215f3f8e9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -392,6 +392,9 @@ struct l2cap_chan *l2cap_chan_create(void) atomic_set(&chan->refcnt, 1); + /* This flag is cleared in l2cap_chan_ready() */ + set_bit(CONF_NOT_COMPLETE, &chan->conf_state); + BT_DBG("chan %p", chan); return chan; @@ -509,8 +512,7 @@ static void l2cap_chan_del(struct l2cap_chan *chan, int err) release_sock(sk); - if (!(test_bit(CONF_OUTPUT_DONE, &chan->conf_state) && - test_bit(CONF_INPUT_DONE, &chan->conf_state))) + if (test_bit(CONF_NOT_COMPLETE, &chan->conf_state)) return; skb_queue_purge(&chan->tx_q); @@ -923,6 +925,7 @@ static void l2cap_chan_ready(struct l2cap_chan *chan) BT_DBG("sk %p, parent %p", sk, parent); + /* This clears all conf flags, including CONF_NOT_COMPLETE */ chan->conf_state = 0; __clear_chan_timer(chan); -- cgit v1.2.3 From f5dbb0772df3feb2bb5eda8a9f0e0acdeb25653f Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 17 May 2012 20:53:38 -0700 Subject: Bluetooth: Remove receive code that has been superceded This deletes the receive code that had handlers for each frame type at the top level, and then had logic to determine the receive state within each handler. Signed-off-by: Mat Martineau Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 8 - net/bluetooth/l2cap_core.c | 492 ------------------------------------------ 2 files changed, 500 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 452fcc4c0fff..7d1da5a7d11e 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -419,11 +419,6 @@ struct l2cap_seq_list { #define L2CAP_SEQ_LIST_CLEAR 0xFFFF #define L2CAP_SEQ_LIST_TAIL 0x8000 -struct srej_list { - __u16 tx_seq; - struct list_head list; -}; - struct l2cap_chan { struct sock *sk; @@ -475,14 +470,12 @@ struct l2cap_chan { __u16 expected_ack_seq; __u16 expected_tx_seq; __u16 buffer_seq; - __u16 buffer_seq_srej; __u16 srej_save_reqseq; __u16 last_acked_seq; __u16 frames_sent; __u16 unacked_frames; __u8 retry_count; __u16 srej_queue_next; - __u8 num_acked; __u16 sdu_len; struct sk_buff *sdu; struct sk_buff *sdu_last_frag; @@ -515,7 +508,6 @@ struct l2cap_chan { struct sk_buff_head srej_q; struct l2cap_seq_list srej_list; struct l2cap_seq_list retrans_list; - struct list_head srej_l; struct list_head list; struct list_head global_l; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 0a195ab4a385..d795d15cadf9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -534,8 +534,6 @@ static void l2cap_chan_del(struct l2cap_chan *chan, int err) skb_queue_purge(&chan->tx_q); if (chan->mode == L2CAP_MODE_ERTM) { - struct srej_list *l, *tmp; - __clear_retrans_timer(chan); __clear_monitor_timer(chan); __clear_ack_timer(chan); @@ -544,10 +542,6 @@ static void l2cap_chan_del(struct l2cap_chan *chan, int err) l2cap_seq_list_free(&chan->srej_list); l2cap_seq_list_free(&chan->retrans_list); - list_for_each_entry_safe(l, tmp, &chan->srej_l, list) { - list_del(&l->list); - kfree(l); - } } } @@ -1658,25 +1652,6 @@ static void l2cap_retrans_timeout(struct work_struct *work) l2cap_chan_put(chan); } -static void l2cap_drop_acked_frames(struct l2cap_chan *chan) -{ - struct sk_buff *skb; - - while ((skb = skb_peek(&chan->tx_q)) && - chan->unacked_frames) { - if (bt_cb(skb)->control.txseq == chan->expected_ack_seq) - break; - - skb = skb_dequeue(&chan->tx_q); - kfree_skb(skb); - - chan->unacked_frames--; - } - - if (!chan->unacked_frames) - __clear_retrans_timer(chan); -} - static int l2cap_streaming_send(struct l2cap_chan *chan, struct sk_buff_head *skbs) { @@ -1718,53 +1693,6 @@ static int l2cap_streaming_send(struct l2cap_chan *chan, return 0; } -static void l2cap_retransmit_one_frame(struct l2cap_chan *chan, u16 tx_seq) -{ - struct sk_buff *skb, *tx_skb; - u16 fcs; - u32 control; - - skb = skb_peek(&chan->tx_q); - if (!skb) - return; - - while (bt_cb(skb)->control.txseq != tx_seq) { - if (skb_queue_is_last(&chan->tx_q, skb)) - return; - - skb = skb_queue_next(&chan->tx_q, skb); - } - - if (bt_cb(skb)->control.retries == chan->remote_max_tx && - chan->remote_max_tx) { - l2cap_send_disconn_req(chan->conn, chan, ECONNABORTED); - return; - } - - tx_skb = skb_clone(skb, GFP_ATOMIC); - bt_cb(skb)->control.retries++; - - control = __get_control(chan, tx_skb->data + L2CAP_HDR_SIZE); - control &= __get_sar_mask(chan); - - if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state)) - control |= __set_ctrl_final(chan); - - control |= __set_reqseq(chan, chan->buffer_seq); - control |= __set_txseq(chan, tx_seq); - - __put_control(chan, control, tx_skb->data + L2CAP_HDR_SIZE); - - if (chan->fcs == L2CAP_FCS_CRC16) { - fcs = crc16(0, (u8 *)tx_skb->data, - tx_skb->len - L2CAP_FCS_SIZE); - put_unaligned_le16(fcs, - tx_skb->data + tx_skb->len - L2CAP_FCS_SIZE); - } - - l2cap_do_send(chan, tx_skb); -} - static int l2cap_ertm_send(struct l2cap_chan *chan) { struct sk_buff *skb, *tx_skb; @@ -1868,18 +1796,6 @@ static void l2cap_send_ack(struct l2cap_chan *chan) __l2cap_send_ack(chan); } -static void l2cap_send_srejtail(struct l2cap_chan *chan) -{ - struct srej_list *tail; - u32 control; - - control = __set_ctrl_super(chan, L2CAP_SUPER_SREJ); - control |= __set_ctrl_final(chan); - - tail = list_entry((&chan->srej_l)->prev, struct srej_list, list); - control |= __set_reqseq(chan, tail->tx_seq); -} - static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan, struct msghdr *msg, int len, int count, struct sk_buff *skb) @@ -2639,7 +2555,6 @@ static inline int l2cap_ertm_init(struct l2cap_chan *chan) chan->expected_ack_seq = 0; chan->unacked_frames = 0; chan->buffer_seq = 0; - chan->num_acked = 0; chan->frames_sent = 0; chan->last_acked_seq = 0; chan->sdu = NULL; @@ -2660,7 +2575,6 @@ static inline int l2cap_ertm_init(struct l2cap_chan *chan) skb_queue_head_init(&chan->srej_q); - INIT_LIST_HEAD(&chan->srej_l); err = l2cap_seq_list_init(&chan->srej_list, chan->tx_win); if (err < 0) return err; @@ -4277,41 +4191,6 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct l2cap_chan *chan) } } -static int l2cap_add_to_srej_queue(struct l2cap_chan *chan, struct sk_buff *skb, u16 tx_seq, u8 sar) -{ - struct sk_buff *next_skb; - int tx_seq_offset, next_tx_seq_offset; - - bt_cb(skb)->control.txseq = tx_seq; - bt_cb(skb)->control.sar = sar; - - next_skb = skb_peek(&chan->srej_q); - - tx_seq_offset = __seq_offset(chan, tx_seq, chan->buffer_seq); - - while (next_skb) { - if (bt_cb(next_skb)->control.txseq == tx_seq) - return -EINVAL; - - next_tx_seq_offset = __seq_offset(chan, - bt_cb(next_skb)->control.txseq, chan->buffer_seq); - - if (next_tx_seq_offset > tx_seq_offset) { - __skb_queue_before(&chan->srej_q, next_skb, skb); - return 0; - } - - if (skb_queue_is_last(&chan->srej_q, next_skb)) - next_skb = NULL; - else - next_skb = skb_queue_next(&chan->srej_q, next_skb); - } - - __skb_queue_tail(&chan->srej_q, skb); - - return 0; -} - static void append_skb_frag(struct sk_buff *skb, struct sk_buff *new_frag, struct sk_buff **last_frag) { @@ -4457,377 +4336,6 @@ void l2cap_chan_busy(struct l2cap_chan *chan, int busy) } } -static void l2cap_check_srej_gap(struct l2cap_chan *chan, u16 tx_seq) -{ - struct sk_buff *skb; - u32 control; - - while ((skb = skb_peek(&chan->srej_q)) && - !test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { - int err; - - if (bt_cb(skb)->control.txseq != tx_seq) - break; - - skb = skb_dequeue(&chan->srej_q); - control = __set_ctrl_sar(chan, bt_cb(skb)->control.sar); - - if (err < 0) { - l2cap_send_disconn_req(chan->conn, chan, ECONNRESET); - break; - } - - chan->buffer_seq_srej = __next_seq(chan, chan->buffer_seq_srej); - tx_seq = __next_seq(chan, tx_seq); - } -} - -static void l2cap_resend_srejframe(struct l2cap_chan *chan, u16 tx_seq) -{ - struct srej_list *l, *tmp; - u32 control; - - list_for_each_entry_safe(l, tmp, &chan->srej_l, list) { - if (l->tx_seq == tx_seq) { - list_del(&l->list); - kfree(l); - return; - } - control = __set_ctrl_super(chan, L2CAP_SUPER_SREJ); - control |= __set_reqseq(chan, l->tx_seq); - list_del(&l->list); - list_add_tail(&l->list, &chan->srej_l); - } -} - -static int l2cap_send_srejframe(struct l2cap_chan *chan, u16 tx_seq) -{ - struct srej_list *new; - u32 control; - - while (tx_seq != chan->expected_tx_seq) { - control = __set_ctrl_super(chan, L2CAP_SUPER_SREJ); - control |= __set_reqseq(chan, chan->expected_tx_seq); - l2cap_seq_list_append(&chan->srej_list, chan->expected_tx_seq); - - new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC); - if (!new) - return -ENOMEM; - - new->tx_seq = chan->expected_tx_seq; - - chan->expected_tx_seq = __next_seq(chan, chan->expected_tx_seq); - - list_add_tail(&new->list, &chan->srej_l); - } - - chan->expected_tx_seq = __next_seq(chan, chan->expected_tx_seq); - - return 0; -} - -static inline int l2cap_data_channel_iframe(struct l2cap_chan *chan, u32 rx_control, struct sk_buff *skb) -{ - u16 tx_seq = __get_txseq(chan, rx_control); - u16 req_seq = __get_reqseq(chan, rx_control); - u8 sar = __get_ctrl_sar(chan, rx_control); - int tx_seq_offset, expected_tx_seq_offset; - int num_to_ack = (chan->tx_win/6) + 1; - int err = 0; - - BT_DBG("chan %p len %d tx_seq %d rx_control 0x%8.8x", chan, skb->len, - tx_seq, rx_control); - - if (__is_ctrl_final(chan, rx_control) && - test_bit(CONN_WAIT_F, &chan->conn_state)) { - __clear_monitor_timer(chan); - if (chan->unacked_frames > 0) - __set_retrans_timer(chan); - clear_bit(CONN_WAIT_F, &chan->conn_state); - } - - chan->expected_ack_seq = req_seq; - l2cap_drop_acked_frames(chan); - - tx_seq_offset = __seq_offset(chan, tx_seq, chan->buffer_seq); - - /* invalid tx_seq */ - if (tx_seq_offset >= chan->tx_win) { - l2cap_send_disconn_req(chan->conn, chan, ECONNRESET); - goto drop; - } - - if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { - if (!test_bit(CONN_RNR_SENT, &chan->conn_state)) - l2cap_send_ack(chan); - goto drop; - } - - if (tx_seq == chan->expected_tx_seq) - goto expected; - - if (test_bit(CONN_SREJ_SENT, &chan->conn_state)) { - struct srej_list *first; - - first = list_first_entry(&chan->srej_l, - struct srej_list, list); - if (tx_seq == first->tx_seq) { - l2cap_add_to_srej_queue(chan, skb, tx_seq, sar); - l2cap_check_srej_gap(chan, tx_seq); - - list_del(&first->list); - kfree(first); - - if (list_empty(&chan->srej_l)) { - chan->buffer_seq = chan->buffer_seq_srej; - clear_bit(CONN_SREJ_SENT, &chan->conn_state); - l2cap_send_ack(chan); - BT_DBG("chan %p, Exit SREJ_SENT", chan); - } - } else { - struct srej_list *l; - - /* duplicated tx_seq */ - if (l2cap_add_to_srej_queue(chan, skb, tx_seq, sar) < 0) - goto drop; - - list_for_each_entry(l, &chan->srej_l, list) { - if (l->tx_seq == tx_seq) { - l2cap_resend_srejframe(chan, tx_seq); - return 0; - } - } - - err = l2cap_send_srejframe(chan, tx_seq); - if (err < 0) { - l2cap_send_disconn_req(chan->conn, chan, -err); - return err; - } - } - } else { - expected_tx_seq_offset = __seq_offset(chan, - chan->expected_tx_seq, chan->buffer_seq); - - /* duplicated tx_seq */ - if (tx_seq_offset < expected_tx_seq_offset) - goto drop; - - set_bit(CONN_SREJ_SENT, &chan->conn_state); - - BT_DBG("chan %p, Enter SREJ", chan); - - INIT_LIST_HEAD(&chan->srej_l); - chan->buffer_seq_srej = chan->buffer_seq; - - __skb_queue_head_init(&chan->srej_q); - l2cap_add_to_srej_queue(chan, skb, tx_seq, sar); - - /* Set P-bit only if there are some I-frames to ack. */ - if (__clear_ack_timer(chan)) - set_bit(CONN_SEND_PBIT, &chan->conn_state); - - err = l2cap_send_srejframe(chan, tx_seq); - if (err < 0) { - l2cap_send_disconn_req(chan->conn, chan, -err); - return err; - } - } - return 0; - -expected: - chan->expected_tx_seq = __next_seq(chan, chan->expected_tx_seq); - - if (test_bit(CONN_SREJ_SENT, &chan->conn_state)) { - bt_cb(skb)->control.txseq = tx_seq; - bt_cb(skb)->control.sar = sar; - __skb_queue_tail(&chan->srej_q, skb); - return 0; - } - - chan->buffer_seq = __next_seq(chan, chan->buffer_seq); - - if (err < 0) { - l2cap_send_disconn_req(chan->conn, chan, ECONNRESET); - return err; - } - - if (__is_ctrl_final(chan, rx_control)) { - if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) - l2cap_retransmit_frames(chan); - } - - - chan->num_acked = (chan->num_acked + 1) % num_to_ack; - if (chan->num_acked == num_to_ack - 1) - l2cap_send_ack(chan); - else - __set_ack_timer(chan); - - return 0; - -drop: - kfree_skb(skb); - return 0; -} - -static inline void l2cap_data_channel_rrframe(struct l2cap_chan *chan, u32 rx_control) -{ - BT_DBG("chan %p, req_seq %d ctrl 0x%8.8x", chan, - __get_reqseq(chan, rx_control), rx_control); - - chan->expected_ack_seq = __get_reqseq(chan, rx_control); - l2cap_drop_acked_frames(chan); - - if (__is_ctrl_poll(chan, rx_control)) { - set_bit(CONN_SEND_FBIT, &chan->conn_state); - if (test_bit(CONN_SREJ_SENT, &chan->conn_state)) { - if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state) && - (chan->unacked_frames > 0)) - __set_retrans_timer(chan); - - clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); - l2cap_send_srejtail(chan); - } else { - l2cap_send_i_or_rr_or_rnr(chan); - } - - } else if (__is_ctrl_final(chan, rx_control)) { - clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); - - if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) - l2cap_retransmit_frames(chan); - - } else { - if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state) && - (chan->unacked_frames > 0)) - __set_retrans_timer(chan); - - clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); - if (test_bit(CONN_SREJ_SENT, &chan->conn_state)) - l2cap_send_ack(chan); - else - l2cap_ertm_send(chan); - } -} - -static inline void l2cap_data_channel_rejframe(struct l2cap_chan *chan, u32 rx_control) -{ - u16 tx_seq = __get_reqseq(chan, rx_control); - - BT_DBG("chan %p, req_seq %d ctrl 0x%8.8x", chan, tx_seq, rx_control); - - clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); - - chan->expected_ack_seq = tx_seq; - l2cap_drop_acked_frames(chan); - - if (__is_ctrl_final(chan, rx_control)) { - if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) - l2cap_retransmit_frames(chan); - } else { - l2cap_retransmit_frames(chan); - - if (test_bit(CONN_WAIT_F, &chan->conn_state)) - set_bit(CONN_REJ_ACT, &chan->conn_state); - } -} -static inline void l2cap_data_channel_srejframe(struct l2cap_chan *chan, u32 rx_control) -{ - u16 tx_seq = __get_reqseq(chan, rx_control); - - BT_DBG("chan %p, req_seq %d ctrl 0x%8.8x", chan, tx_seq, rx_control); - - clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); - - if (__is_ctrl_poll(chan, rx_control)) { - chan->expected_ack_seq = tx_seq; - l2cap_drop_acked_frames(chan); - - set_bit(CONN_SEND_FBIT, &chan->conn_state); - l2cap_retransmit_one_frame(chan, tx_seq); - - l2cap_ertm_send(chan); - - if (test_bit(CONN_WAIT_F, &chan->conn_state)) { - chan->srej_save_reqseq = tx_seq; - set_bit(CONN_SREJ_ACT, &chan->conn_state); - } - } else if (__is_ctrl_final(chan, rx_control)) { - if (test_bit(CONN_SREJ_ACT, &chan->conn_state) && - chan->srej_save_reqseq == tx_seq) - clear_bit(CONN_SREJ_ACT, &chan->conn_state); - else - l2cap_retransmit_one_frame(chan, tx_seq); - } else { - l2cap_retransmit_one_frame(chan, tx_seq); - if (test_bit(CONN_WAIT_F, &chan->conn_state)) { - chan->srej_save_reqseq = tx_seq; - set_bit(CONN_SREJ_ACT, &chan->conn_state); - } - } -} - -static inline void l2cap_data_channel_rnrframe(struct l2cap_chan *chan, u32 rx_control) -{ - u16 tx_seq = __get_reqseq(chan, rx_control); - - BT_DBG("chan %p, req_seq %d ctrl 0x%8.8x", chan, tx_seq, rx_control); - - set_bit(CONN_REMOTE_BUSY, &chan->conn_state); - chan->expected_ack_seq = tx_seq; - l2cap_drop_acked_frames(chan); - - if (__is_ctrl_poll(chan, rx_control)) - set_bit(CONN_SEND_FBIT, &chan->conn_state); - - if (!test_bit(CONN_SREJ_SENT, &chan->conn_state)) { - __clear_retrans_timer(chan); - if (__is_ctrl_poll(chan, rx_control)) - l2cap_send_rr_or_rnr(chan, L2CAP_CTRL_FINAL); - return; - } - - if (__is_ctrl_poll(chan, rx_control)) { - l2cap_send_srejtail(chan); - } else { - rx_control = __set_ctrl_super(chan, L2CAP_SUPER_RR); - } -} - -static inline int l2cap_data_channel_sframe(struct l2cap_chan *chan, u32 rx_control, struct sk_buff *skb) -{ - BT_DBG("chan %p rx_control 0x%8.8x len %d", chan, rx_control, skb->len); - - if (__is_ctrl_final(chan, rx_control) && - test_bit(CONN_WAIT_F, &chan->conn_state)) { - __clear_monitor_timer(chan); - if (chan->unacked_frames > 0) - __set_retrans_timer(chan); - clear_bit(CONN_WAIT_F, &chan->conn_state); - } - - switch (__get_ctrl_super(chan, rx_control)) { - case L2CAP_SUPER_RR: - l2cap_data_channel_rrframe(chan, rx_control); - break; - - case L2CAP_SUPER_REJ: - l2cap_data_channel_rejframe(chan, rx_control); - break; - - case L2CAP_SUPER_SREJ: - l2cap_data_channel_srejframe(chan, rx_control); - break; - - case L2CAP_SUPER_RNR: - l2cap_data_channel_rnrframe(chan, rx_control); - break; - } - - kfree_skb(skb); - return 0; -} - static u8 l2cap_classify_txseq(struct l2cap_chan *chan, u16 txseq) { BT_DBG("chan %p, txseq %d", chan, txseq); -- cgit v1.2.3 From 4239d16f360ce4c8a1798508dd171ebce93985ba Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 17 May 2012 20:53:49 -0700 Subject: Bluetooth: Check rules when setting retransmit or monitor timers The ERTM specification requires the retransmit timer to be cancelled when the monitor timer is set. The retransmit timer cannot be set again while the monitor timer is pending. Signed-off-by: Mat Martineau Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 4 ---- net/bluetooth/l2cap_core.c | 22 ++++++++++++++++++++-- 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 7d1da5a7d11e..117db8e4a5f4 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -706,11 +706,7 @@ static inline bool l2cap_clear_timer(struct l2cap_chan *chan, #define __set_chan_timer(c, t) l2cap_set_timer(c, &c->chan_timer, (t)) #define __clear_chan_timer(c) l2cap_clear_timer(c, &c->chan_timer) -#define __set_retrans_timer(c) l2cap_set_timer(c, &c->retrans_timer, \ - msecs_to_jiffies(L2CAP_DEFAULT_RETRANS_TO)); #define __clear_retrans_timer(c) l2cap_clear_timer(c, &c->retrans_timer) -#define __set_monitor_timer(c) l2cap_set_timer(c, &c->monitor_timer, \ - msecs_to_jiffies(L2CAP_DEFAULT_MONITOR_TO)); #define __clear_monitor_timer(c) l2cap_clear_timer(c, &c->monitor_timer) #define __set_ack_timer(c) l2cap_set_timer(c, &chan->ack_timer, \ msecs_to_jiffies(L2CAP_DEFAULT_ACK_TO)); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 8ea9ec648bfd..38e9a0ea4f48 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -227,6 +227,24 @@ static inline void l2cap_chan_set_err(struct l2cap_chan *chan, int err) release_sock(sk); } +static void __set_retrans_timer(struct l2cap_chan *chan) +{ + if (!delayed_work_pending(&chan->monitor_timer) && + chan->retrans_timeout) { + l2cap_set_timer(chan, &chan->retrans_timer, + msecs_to_jiffies(chan->retrans_timeout)); + } +} + +static void __set_monitor_timer(struct l2cap_chan *chan) +{ + __clear_retrans_timer(chan); + if (chan->monitor_timeout) { + l2cap_set_timer(chan, &chan->monitor_timer, + msecs_to_jiffies(chan->monitor_timeout)); + } +} + static struct sk_buff *l2cap_ertm_seq_in_queue(struct sk_buff_head *head, u16 seq) { @@ -1619,7 +1637,7 @@ int __l2cap_wait_ack(struct sock *sk) static void l2cap_monitor_timeout(struct work_struct *work) { struct l2cap_chan *chan = container_of(work, struct l2cap_chan, - monitor_timer.work); + monitor_timer.work); BT_DBG("chan %p", chan); @@ -1643,7 +1661,7 @@ static void l2cap_monitor_timeout(struct work_struct *work) static void l2cap_retrans_timeout(struct work_struct *work) { struct l2cap_chan *chan = container_of(work, struct l2cap_chan, - retrans_timer.work); + retrans_timer.work); BT_DBG("chan %p", chan); -- cgit v1.2.3 From 522cc2ee6e55ba49f4df338e0dfcfb989b46eb8c Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 17 May 2012 20:53:54 -0700 Subject: Bluetooth: Remove unused ERTM control field macros Now that l2cap_ctrl is used to set up control fields, these macros are not needed. Signed-off-by: Mat Martineau Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 168 ------------------------------------------ 1 file changed, 168 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 117db8e4a5f4..7bc40198f147 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -725,174 +725,6 @@ static inline __u16 __next_seq(struct l2cap_chan *chan, __u16 seq) return (seq + 1) % (chan->tx_win_max + 1); } -static inline int l2cap_tx_window_full(struct l2cap_chan *ch) -{ - int sub; - - sub = (ch->next_tx_seq - ch->expected_ack_seq) % 64; - - if (sub < 0) - sub += 64; - - return sub == ch->remote_tx_win; -} - -static inline __u16 __get_reqseq(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (ctrl & L2CAP_EXT_CTRL_REQSEQ) >> - L2CAP_EXT_CTRL_REQSEQ_SHIFT; - else - return (ctrl & L2CAP_CTRL_REQSEQ) >> L2CAP_CTRL_REQSEQ_SHIFT; -} - -static inline __u32 __set_reqseq(struct l2cap_chan *chan, __u32 reqseq) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (reqseq << L2CAP_EXT_CTRL_REQSEQ_SHIFT) & - L2CAP_EXT_CTRL_REQSEQ; - else - return (reqseq << L2CAP_CTRL_REQSEQ_SHIFT) & L2CAP_CTRL_REQSEQ; -} - -static inline __u16 __get_txseq(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (ctrl & L2CAP_EXT_CTRL_TXSEQ) >> - L2CAP_EXT_CTRL_TXSEQ_SHIFT; - else - return (ctrl & L2CAP_CTRL_TXSEQ) >> L2CAP_CTRL_TXSEQ_SHIFT; -} - -static inline __u32 __set_txseq(struct l2cap_chan *chan, __u32 txseq) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (txseq << L2CAP_EXT_CTRL_TXSEQ_SHIFT) & - L2CAP_EXT_CTRL_TXSEQ; - else - return (txseq << L2CAP_CTRL_TXSEQ_SHIFT) & L2CAP_CTRL_TXSEQ; -} - -static inline bool __is_sframe(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return ctrl & L2CAP_EXT_CTRL_FRAME_TYPE; - else - return ctrl & L2CAP_CTRL_FRAME_TYPE; -} - -static inline __u32 __set_sframe(struct l2cap_chan *chan) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return L2CAP_EXT_CTRL_FRAME_TYPE; - else - return L2CAP_CTRL_FRAME_TYPE; -} - -static inline __u8 __get_ctrl_sar(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (ctrl & L2CAP_EXT_CTRL_SAR) >> L2CAP_EXT_CTRL_SAR_SHIFT; - else - return (ctrl & L2CAP_CTRL_SAR) >> L2CAP_CTRL_SAR_SHIFT; -} - -static inline __u32 __set_ctrl_sar(struct l2cap_chan *chan, __u32 sar) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (sar << L2CAP_EXT_CTRL_SAR_SHIFT) & L2CAP_EXT_CTRL_SAR; - else - return (sar << L2CAP_CTRL_SAR_SHIFT) & L2CAP_CTRL_SAR; -} - -static inline bool __is_sar_start(struct l2cap_chan *chan, __u32 ctrl) -{ - return __get_ctrl_sar(chan, ctrl) == L2CAP_SAR_START; -} - -static inline __u32 __get_sar_mask(struct l2cap_chan *chan) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return L2CAP_EXT_CTRL_SAR; - else - return L2CAP_CTRL_SAR; -} - -static inline __u8 __get_ctrl_super(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (ctrl & L2CAP_EXT_CTRL_SUPERVISE) >> - L2CAP_EXT_CTRL_SUPER_SHIFT; - else - return (ctrl & L2CAP_CTRL_SUPERVISE) >> L2CAP_CTRL_SUPER_SHIFT; -} - -static inline __u32 __set_ctrl_super(struct l2cap_chan *chan, __u32 super) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (super << L2CAP_EXT_CTRL_SUPER_SHIFT) & - L2CAP_EXT_CTRL_SUPERVISE; - else - return (super << L2CAP_CTRL_SUPER_SHIFT) & - L2CAP_CTRL_SUPERVISE; -} - -static inline __u32 __set_ctrl_final(struct l2cap_chan *chan) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return L2CAP_EXT_CTRL_FINAL; - else - return L2CAP_CTRL_FINAL; -} - -static inline bool __is_ctrl_final(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return ctrl & L2CAP_EXT_CTRL_FINAL; - else - return ctrl & L2CAP_CTRL_FINAL; -} - -static inline __u32 __set_ctrl_poll(struct l2cap_chan *chan) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return L2CAP_EXT_CTRL_POLL; - else - return L2CAP_CTRL_POLL; -} - -static inline bool __is_ctrl_poll(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return ctrl & L2CAP_EXT_CTRL_POLL; - else - return ctrl & L2CAP_CTRL_POLL; -} - -static inline __u32 __get_control(struct l2cap_chan *chan, void *p) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return get_unaligned_le32(p); - else - return get_unaligned_le16(p); -} - -static inline void __put_control(struct l2cap_chan *chan, __u32 control, - void *p) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return put_unaligned_le32(control, p); - else - return put_unaligned_le16(control, p); -} - -static inline __u8 __ctrl_size(struct l2cap_chan *chan) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return L2CAP_EXT_HDR_SIZE - L2CAP_HDR_SIZE; - else - return L2CAP_ENH_HDR_SIZE - L2CAP_HDR_SIZE; -} extern bool disable_ertm; -- cgit v1.2.3 From 38351c66e407e610283e5332b819822055db473c Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Tue, 22 May 2012 19:00:20 -0300 Subject: Bluetooth: Fix trailing whitespaces in license text As reported by checkpatch.pl Signed-off-by: Gustavo Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 961669b648fd..b98181bd2b33 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -1,4 +1,4 @@ -/* +/* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated @@ -12,13 +12,13 @@ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ -- cgit v1.2.3 From a6c511c636848f871f5b7aef38e25e5b894b3b48 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Wed, 23 May 2012 12:35:46 +0200 Subject: Bluetooth: Rename HCI_QUIRK_NO_RESET to HCI_QUIRK_RESET_ON_CLOSE HCI_QUIRK_NO_RESET name is misleading - purpose of this quirk is to reset device on close instead of init, not to not reset at all. Rename it to HCI_QUIRK_RESET_ON_CLOSE to avoid confusion. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- drivers/bluetooth/bpa10x.c | 2 +- drivers/bluetooth/btusb.c | 6 +++--- drivers/bluetooth/hci_ldisc.c | 2 +- include/net/bluetooth/hci.h | 2 +- net/bluetooth/hci_core.c | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index 609861a53c28..29caaed2d715 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -470,7 +470,7 @@ static int bpa10x_probe(struct usb_interface *intf, const struct usb_device_id * hdev->flush = bpa10x_flush; hdev->send = bpa10x_send_frame; - set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); + set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); err = hci_register_dev(hdev); if (err < 0) { diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c9463af8e564..3a6cdc9b75a3 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1026,7 +1026,7 @@ static int btusb_probe(struct usb_interface *intf, data->isoc = usb_ifnum_to_if(data->udev, 1); if (!reset) - set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); + set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); if (force_scofix || id->driver_info & BTUSB_WRONG_SCO_MTU) { if (!disable_scofix) @@ -1038,7 +1038,7 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_DIGIANSWER) { data->cmdreq_type = USB_TYPE_VENDOR; - set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); + set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); } if (id->driver_info & BTUSB_CSR) { @@ -1046,7 +1046,7 @@ static int btusb_probe(struct usb_interface *intf, /* Old firmware would otherwise execute USB reset */ if (le16_to_cpu(udev->descriptor.bcdDevice) < 0x117) - set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); + set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); } if (id->driver_info & BTUSB_SNIFFER) { diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index e564579a6115..2f9b796e106e 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -394,7 +394,7 @@ static int hci_uart_register_dev(struct hci_uart *hu) set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); if (!test_bit(HCI_UART_RESET_ON_INIT, &hu->hdev_flags)) - set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); + set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); if (test_bit(HCI_UART_CREATE_AMP, &hu->hdev_flags)) hdev->dev_type = HCI_AMP; diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 66a7b579e31c..97c57aa938f3 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -58,7 +58,7 @@ /* HCI device quirks */ enum { - HCI_QUIRK_NO_RESET, + HCI_QUIRK_RESET_ON_CLOSE, HCI_QUIRK_RAW_DEVICE, HCI_QUIRK_FIXUP_BUFFER_SIZE }; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d0a960dabd53..0ed4edf0f77b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -203,7 +203,7 @@ static void bredr_init(struct hci_dev *hdev) /* Mandatory initialization */ /* Reset */ - if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) { + if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { set_bit(HCI_RESET, &hdev->flags); hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); } @@ -792,7 +792,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) skb_queue_purge(&hdev->cmd_q); atomic_set(&hdev->cmd_cnt, 1); if (!test_bit(HCI_RAW, &hdev->flags) && - test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) { + test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { set_bit(HCI_INIT, &hdev->flags); __hci_request(hdev, hci_reset_req, 0, msecs_to_jiffies(250)); -- cgit v1.2.3 From 9b3b44604ac8e06d299718c5d0fa0b91b675ae0b Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Wed, 23 May 2012 11:31:20 +0300 Subject: Bluetooth: Use defined link key size Remove magic number with defined link key size. Signed-off-by: Andrei Emeltchenko Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 6 ++++-- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_core.c | 2 +- net/bluetooth/hci_event.c | 2 +- net/bluetooth/mgmt.c | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 97c57aa938f3..0bc5555510f3 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -30,6 +30,8 @@ #define HCI_MAX_EVENT_SIZE 260 #define HCI_MAX_FRAME_SIZE (HCI_MAX_ACL_SIZE + 4) +#define HCI_LINK_KEY_SIZE 16 + /* HCI dev events */ #define HCI_DEV_REG 1 #define HCI_DEV_UNREG 2 @@ -371,7 +373,7 @@ struct hci_cp_reject_conn_req { #define HCI_OP_LINK_KEY_REPLY 0x040b struct hci_cp_link_key_reply { bdaddr_t bdaddr; - __u8 link_key[16]; + __u8 link_key[HCI_LINK_KEY_SIZE]; } __packed; #define HCI_OP_LINK_KEY_NEG_REPLY 0x040c @@ -1048,7 +1050,7 @@ struct hci_ev_link_key_req { #define HCI_EV_LINK_KEY_NOTIFY 0x18 struct hci_ev_link_key_notify { bdaddr_t bdaddr; - __u8 link_key[16]; + __u8 link_key[HCI_LINK_KEY_SIZE]; __u8 key_type; } __packed; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9fc7728f94e4..6c658fc7ac93 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -105,7 +105,7 @@ struct link_key { struct list_head list; bdaddr_t bdaddr; u8 type; - u8 val[16]; + u8 val[HCI_LINK_KEY_SIZE]; u8 pin_len; }; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0ed4edf0f77b..027257d4b52a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1291,7 +1291,7 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key, } bacpy(&key->bdaddr, bdaddr); - memcpy(key->val, val, 16); + memcpy(key->val, val, HCI_LINK_KEY_SIZE); key->pin_len = pin_len; if (type == HCI_LK_CHANGED_COMBINATION) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6c2d7ccf26e5..1795c0c9b411 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2739,7 +2739,7 @@ static inline void hci_link_key_request_evt(struct hci_dev *hdev, } bacpy(&cp.bdaddr, &ev->bdaddr); - memcpy(cp.link_key, key->val, 16); + memcpy(cp.link_key, key->val, HCI_LINK_KEY_SIZE); hci_send_cmd(hdev, HCI_OP_LINK_KEY_REPLY, sizeof(cp), &cp); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6a7e926c418f..1fd49e652694 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2955,7 +2955,7 @@ int mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bacpy(&ev.key.addr.bdaddr, &key->bdaddr); ev.key.addr.type = BDADDR_BREDR; ev.key.type = key->type; - memcpy(ev.key.val, key->val, 16); + memcpy(ev.key.val, key->val, HCI_LINK_KEY_SIZE); ev.key.pin_len = key->pin_len; return mgmt_event(MGMT_EV_NEW_LINK_KEY, hdev, &ev, sizeof(ev), NULL); -- cgit v1.2.3 From c3c7ea65941a0b7a4f1b9655e7aaaab6ce1874d2 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Wed, 23 May 2012 04:04:20 -0300 Subject: Bluetooth: Fix coding style in include/net/bluetooth Fix all warning and errors reported by checkpatch but license trailing whitespace and bdaddr_t definition. Signed-off-by: Gustavo Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 26 ++++++++++++++------------ include/net/bluetooth/hci.h | 4 ++-- include/net/bluetooth/hci_core.h | 12 +++++++----- 3 files changed, 23 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index b98181bd2b33..7a9f9612db5a 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -25,7 +25,7 @@ #ifndef __BLUETOOTH_H #define __BLUETOOTH_H -#include +#include #include #include #include @@ -168,8 +168,8 @@ typedef struct { #define BDADDR_LE_PUBLIC 0x01 #define BDADDR_LE_RANDOM 0x02 -#define BDADDR_ANY (&(bdaddr_t) {{0, 0, 0, 0, 0, 0}}) -#define BDADDR_LOCAL (&(bdaddr_t) {{0, 0, 0, 0xff, 0xff, 0xff}}) +#define BDADDR_ANY (&(bdaddr_t) {{0, 0, 0, 0, 0, 0} }) +#define BDADDR_LOCAL (&(bdaddr_t) {{0, 0, 0, 0xff, 0xff, 0xff} }) /* Copy, swap, convert BD Address */ static inline int bacmp(bdaddr_t *ba1, bdaddr_t *ba2) @@ -215,7 +215,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags); int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags); -uint bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait); +uint bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); @@ -225,12 +225,12 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock); /* Skb helpers */ struct l2cap_ctrl { - unsigned int sframe : 1, - poll : 1, - final : 1, - fcs : 1, - sar : 2, - super : 2; + unsigned int sframe:1, + poll:1, + final:1, + fcs:1, + sar:2, + super:2; __u16 reqseq; __u16 txseq; __u8 retries; @@ -249,7 +249,8 @@ static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how) { struct sk_buff *skb; - if ((skb = alloc_skb(len + BT_SKB_RESERVE, how))) { + skb = alloc_skb(len + BT_SKB_RESERVE, how); + if (skb) { skb_reserve(skb, BT_SKB_RESERVE); bt_cb(skb)->incoming = 0; } @@ -261,7 +262,8 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, { struct sk_buff *skb; - if ((skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err))) { + skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err); + if (skb) { skb_reserve(skb, BT_SKB_RESERVE); bt_cb(skb)->incoming = 0; } diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 0bc5555510f3..5de351e49d49 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1309,12 +1309,12 @@ static inline struct hci_sco_hdr *hci_sco_hdr(const struct sk_buff *skb) } /* Command opcode pack/unpack */ -#define hci_opcode_pack(ogf, ocf) (__u16) ((ocf & 0x03ff)|(ogf << 10)) +#define hci_opcode_pack(ogf, ocf) ((__u16) ((ocf & 0x03ff)|(ogf << 10))) #define hci_opcode_ogf(op) (op >> 10) #define hci_opcode_ocf(op) (op & 0x03ff) /* ACL handle and flags pack/unpack */ -#define hci_handle_pack(h, f) (__u16) ((h & 0x0fff)|(f << 12)) +#define hci_handle_pack(h, f) ((__u16) ((h & 0x0fff)|(f << 12))) #define hci_handle(h) (h & 0x0fff) #define hci_flags(h) (h >> 12) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6c658fc7ac93..a8ba50d7a81e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -65,7 +65,7 @@ struct discovery_state { DISCOVERY_RESOLVING, DISCOVERY_STOPPING, } state; - struct list_head all; /* All devices found during inquiry */ + struct list_head all; /* All devices found during inquiry */ struct list_head unknown; /* Name state not known */ struct list_head resolve; /* Name needs to be resolved */ __u32 timestamp; @@ -360,7 +360,8 @@ extern int l2cap_connect_cfm(struct hci_conn *hcon, u8 status); extern int l2cap_disconn_ind(struct hci_conn *hcon); extern int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason); extern int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt); -extern int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags); +extern int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, + u16 flags); extern int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); extern int sco_connect_cfm(struct hci_conn *hcon, __u8 status); @@ -429,8 +430,8 @@ enum { static inline bool hci_conn_ssp_enabled(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - return (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && - test_bit(HCI_CONN_SSP_ENABLED, &conn->flags)); + return test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && + test_bit(HCI_CONN_SSP_ENABLED, &conn->flags); } static inline void hci_conn_hash_init(struct hci_dev *hdev) @@ -661,7 +662,8 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg); int hci_get_auth_info(struct hci_dev *hdev, void __user *arg); int hci_inquiry(void __user *arg); -struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr); +struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, + bdaddr_t *bdaddr); int hci_blacklist_clear(struct hci_dev *hdev); int hci_blacklist_add(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type); int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type); -- cgit v1.2.3 From 8c520a59927a5600973782505dbb750d985057c4 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Wed, 23 May 2012 04:04:22 -0300 Subject: Bluetooth: Remove unnecessary headers include Most of the include were unnecessary or already included by some other header. Replace module.h by export.h where possible. Signed-off-by: Gustavo Padovan Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 8 -------- include/net/bluetooth/bluetooth.h | 3 --- include/net/bluetooth/hci.h | 1 - include/net/bluetooth/hci_core.h | 1 - net/bluetooth/af_bluetooth.c | 11 ----------- net/bluetooth/bnep/core.c | 17 ----------------- net/bluetooth/bnep/netdev.c | 10 +--------- net/bluetooth/bnep/sock.c | 18 +----------------- net/bluetooth/hci_conn.c | 16 +--------------- net/bluetooth/hci_core.c | 23 ++--------------------- net/bluetooth/hci_event.c | 15 +-------------- net/bluetooth/hci_sock.c | 20 +------------------- net/bluetooth/hci_sysfs.c | 4 ---- net/bluetooth/hidp/core.c | 19 ------------------- net/bluetooth/hidp/sock.c | 16 +--------------- net/bluetooth/l2cap_core.c | 19 ------------------- net/bluetooth/l2cap_sock.c | 1 - net/bluetooth/lib.c | 7 +------ net/bluetooth/mgmt.c | 2 -- net/bluetooth/rfcomm/core.c | 14 -------------- net/bluetooth/rfcomm/sock.c | 21 +-------------------- net/bluetooth/rfcomm/tty.c | 5 ----- net/bluetooth/sco.c | 18 ------------------ net/bluetooth/smp.c | 7 ++++--- 24 files changed, 14 insertions(+), 262 deletions(-) (limited to 'include/net') diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 3a6cdc9b75a3..a45e717f5f84 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -21,15 +21,7 @@ * */ -#include #include -#include -#include -#include -#include -#include -#include - #include #include diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 7a9f9612db5a..565d4bee1e49 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -25,9 +25,6 @@ #ifndef __BLUETOOTH_H #define __BLUETOOTH_H -#include -#include -#include #include #include diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 5de351e49d49..edb663908121 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1292,7 +1292,6 @@ struct hci_sco_hdr { __u8 dlen; } __packed; -#include static inline struct hci_event_hdr *hci_event_hdr(const struct sk_buff *skb) { return (struct hci_event_hdr *) skb->data; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a8ba50d7a81e..d584a47d1c86 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -25,7 +25,6 @@ #ifndef __HCI_CORE_H #define __HCI_CORE_H -#include #include /* HCI priority */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index e31a20f5b6be..251747269d37 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -25,18 +25,7 @@ /* Bluetooth address family and sockets. */ #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include #include diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index a918f6e4f003..4a6620bc1570 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -26,26 +26,9 @@ */ #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include - -#include #include - -#include #include -#include - #include #include diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 46c9ece7b04a..98f86f91d47c 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -25,16 +25,8 @@ SOFTWARE IS DISCLAIMED. */ -#include -#include - -#include -#include +#include #include -#include -#include - -#include #include #include diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 180bfc45810d..5e5f5b410e0b 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -24,24 +24,8 @@ SOFTWARE IS DISCLAIMED. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include -#include -#include -#include -#include -#include - #include "bnep.h" diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 231fc4400f37..3bb2d552a888 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -24,21 +24,7 @@ /* Bluetooth HCI connection handling. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include +#include #include #include diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bee425ad25b5..3431ec908c02 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -25,28 +25,9 @@ /* Bluetooth HCI core. */ -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include -#include -#include +#include #include #include diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 87e6f74af6fe..5e24a57a4613 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -24,20 +24,7 @@ /* Bluetooth HCI event handling. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include +#include #include #include diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 9d8e1c39955e..a7f04de03d79 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -24,25 +24,7 @@ /* Bluetooth HCI sockets. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include +#include #include #include diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index ee8d9ea6bf3c..a20e61c3653d 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -1,10 +1,6 @@ /* Bluetooth HCI driver model support. */ -#include -#include -#include #include -#include #include #include diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index c8625b8ccb6a..8a4afc7515a2 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -21,27 +21,8 @@ */ #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include - -#include -#include #include #include diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 73a32d705c1f..18b3f6892a36 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -20,22 +20,8 @@ SOFTWARE IS DISCLAIMED. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include -#include -#include -#include -#include #include "hidp.h" diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index db76a7750ee6..f6b785593ec3 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -30,27 +30,8 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include #include -#include - -#include #include #include diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 3bb1611b9d48..4d3660540c05 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -27,7 +27,6 @@ /* Bluetooth L2CAP sockets. */ -#include #include #include diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c index 994bc3c7ddc7..e1c97527e16c 100644 --- a/net/bluetooth/lib.c +++ b/net/bluetooth/lib.c @@ -26,12 +26,7 @@ #define pr_fmt(fmt) "Bluetooth: " fmt -#include - -#include -#include -#include -#include +#include #include diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 984afe4ef407..205574edff20 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -24,8 +24,6 @@ /* Bluetooth HCI Management interface */ -#include -#include #include #include diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 585d3916d3d4..c75107ef8920 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -26,22 +26,8 @@ */ #include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include - -#include -#include #include #include diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index e8707debb864..7e1e59645c05 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -25,27 +25,8 @@ * RFCOMM sockets. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include -#include -#include -#include - -#include #include #include diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index f2f4d064df94..cb960773c002 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -31,11 +31,6 @@ #include #include -#include -#include -#include -#include - #include #include #include diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 6401ccae2045..40bbe25dcff7 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -25,26 +25,8 @@ /* Bluetooth SCO sockets. */ #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include -#include -#include - -#include #include #include diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 6fc7c4708f3e..ff4835b61de9 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -20,14 +20,15 @@ SOFTWARE IS DISCLAIMED. */ +#include +#include +#include + #include #include #include #include #include -#include -#include -#include #define SMP_TIMEOUT msecs_to_jiffies(30000) -- cgit v1.2.3 From 59e54bd15d63f102c71c3ce695bca5ed90926e46 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Wed, 23 May 2012 15:44:06 +0300 Subject: Bluetooth: Define L2CAP conf continuation flag Define Continuation flag which the only flag used from Flags field in L2CAP Configuration Request and Response. Signed-off-by: Andrei Emeltchenko Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 3 +++ net/bluetooth/l2cap_core.c | 10 +++++----- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 7bc40198f147..01422578cc78 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -271,6 +271,9 @@ struct l2cap_conf_rsp { #define L2CAP_CONF_PENDING 0x0004 #define L2CAP_CONF_EFS_REJECT 0x0005 +/* configuration req/rsp continuation flag */ +#define L2CAP_CONF_FLAG_CONTINUATION 0x0001 + struct l2cap_conf_opt { __u8 type; __u8 len; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index f6b785593ec3..e31b005f9827 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2944,7 +2944,7 @@ done: } req->dcid = cpu_to_le16(chan->dcid); - req->flags = cpu_to_le16(0); + req->flags = __constant_cpu_to_le16(0); return ptr - data; } @@ -3164,7 +3164,7 @@ done: } rsp->scid = cpu_to_le16(chan->dcid); rsp->result = cpu_to_le16(result); - rsp->flags = cpu_to_le16(0x0000); + rsp->flags = __constant_cpu_to_le16(0); return ptr - data; } @@ -3263,7 +3263,7 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, voi } req->dcid = cpu_to_le16(chan->dcid); - req->flags = cpu_to_le16(0x0000); + req->flags = __constant_cpu_to_le16(0); return ptr - data; } @@ -3618,7 +3618,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr memcpy(chan->conf_req + chan->conf_len, req->data, len); chan->conf_len += len; - if (flags & 0x0001) { + if (flags & L2CAP_CONF_FLAG_CONTINUATION) { /* Incomplete config. Send empty response. */ l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, l2cap_build_conf_rsp(chan, rsp, @@ -3769,7 +3769,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr goto done; } - if (flags & 0x01) + if (flags & L2CAP_CONF_FLAG_CONTINUATION) goto done; set_bit(CONF_INPUT_DONE, &chan->conf_state); -- cgit v1.2.3 From 2983fd682444180e45567ce8147a612b97ba69da Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 24 May 2012 15:42:50 +0300 Subject: Bluetooth: Define and use PSM identifiers Define assigned Protocol and Service Multiplexor (PSM) identifiers and use them instead of magic numbers. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 4 ++++ net/bluetooth/l2cap_core.c | 4 ++-- net/bluetooth/l2cap_sock.c | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 01422578cc78..f44344b92d2d 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -229,6 +229,10 @@ struct l2cap_conn_rsp { __le16 status; } __packed; +/* protocol/service multiplexer (PSM) */ +#define L2CAP_PSM_SDP 0x0001 +#define L2CAP_PSM_RFCOMM 0x0003 + /* channel indentifier */ #define L2CAP_CID_SIGNALING 0x0001 #define L2CAP_CID_CONN_LESS 0x0002 diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c9e6ae4a3363..65c3f4e13965 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -648,7 +648,7 @@ static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan) default: return HCI_AT_NO_BONDING; } - } else if (chan->psm == cpu_to_le16(0x0001)) { + } else if (chan->psm == __constant_cpu_to_le16(L2CAP_PSM_SDP)) { if (chan->sec_level == BT_SECURITY_LOW) chan->sec_level = BT_SECURITY_SDP; @@ -3393,7 +3393,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd lock_sock(parent); /* Check if the ACL is secure enough (if not SDP) */ - if (psm != cpu_to_le16(0x0001) && + if (psm != __constant_cpu_to_le16(L2CAP_PSM_SDP) && !hci_conn_check_link_mode(conn->hcon)) { conn->disc_reason = HCI_ERROR_AUTH_FAILURE; result = L2CAP_CR_SEC_BLOCK; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 4d3660540c05..d244361a455c 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -88,8 +88,8 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) if (err < 0) goto done; - if (__le16_to_cpu(la.l2_psm) == 0x0001 || - __le16_to_cpu(la.l2_psm) == 0x0003) + if (__le16_to_cpu(la.l2_psm) == L2CAP_PSM_SDP || + __le16_to_cpu(la.l2_psm) == L2CAP_PSM_RFCOMM) chan->sec_level = BT_SECURITY_SDP; bacpy(&bt_sk(sk)->src, &la.l2_bdaddr); -- cgit v1.2.3 From 523e93cdb39086b25af2ed19d2a69248510727a2 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Fri, 25 May 2012 15:09:26 +0300 Subject: Bluetooth: Define HCI AMP cmd struct Add HCI commands to deal with Bluetooth AMP controllers. Those commands will be used by bluetooth and softamp code. Signed-off-by: Andrei Emeltchenko Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 81 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index edb663908121..de09a26e4223 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -31,6 +31,7 @@ #define HCI_MAX_FRAME_SIZE (HCI_MAX_ACL_SIZE + 4) #define HCI_LINK_KEY_SIZE 16 +#define HCI_AMP_LINK_KEY_SIZE (2 * HCI_LINK_KEY_SIZE) /* HCI dev events */ #define HCI_DEV_REG 1 @@ -525,6 +526,28 @@ struct hci_cp_io_capability_neg_reply { __u8 reason; } __packed; +#define HCI_OP_CREATE_PHY_LINK 0x0435 +struct hci_cp_create_phy_link { + __u8 phy_handle; + __u8 key_len; + __u8 key_type; + __u8 key[HCI_AMP_LINK_KEY_SIZE]; +} __packed; + +#define HCI_OP_ACCEPT_PHY_LINK 0x0436 +struct hci_cp_accept_phy_link { + __u8 phy_handle; + __u8 key_len; + __u8 key_type; + __u8 key[HCI_AMP_LINK_KEY_SIZE]; +} __packed; + +#define HCI_OP_DISCONN_PHY_LINK 0x0437 +struct hci_cp_disconn_phy_link { + __u8 phy_handle; + __u8 reason; +} __packed; + #define HCI_OP_SNIFF_MODE 0x0803 struct hci_cp_sniff_mode { __le16 handle; @@ -820,6 +843,31 @@ struct hci_rp_read_local_amp_info { __le32 be_flush_to; } __packed; +#define HCI_OP_READ_LOCAL_AMP_ASSOC 0x140a +struct hci_cp_read_local_amp_assoc { + __u8 phy_handle; + __le16 len_so_far; + __le16 max_len; +} __packed; +struct hci_rp_read_local_amp_assoc { + __u8 status; + __u8 phy_handle; + __le16 rem_len; + __u8 frag[0]; +} __packed; + +#define HCI_OP_WRITE_REMOTE_AMP_ASSOC 0x140b +struct hci_cp_write_remote_amp_assoc { + __u8 phy_handle; + __le16 len_so_far; + __le16 rem_len; + __u8 frag[0]; +} __packed; +struct hci_rp_write_remote_amp_assoc { + __u8 status; + __u8 phy_handle; +} __packed; + #define HCI_OP_LE_SET_EVENT_MASK 0x2001 struct hci_cp_le_set_event_mask { __u8 mask[8]; @@ -1192,6 +1240,39 @@ struct hci_ev_le_meta { __u8 subevent; } __packed; +#define HCI_EV_PHY_LINK_COMPLETE 0x40 +struct hci_ev_phy_link_complete { + __u8 status; + __u8 phy_handle; +} __packed; + +#define HCI_EV_CHANNEL_SELECTED 0x41 +struct hci_ev_channel_selected { + __u8 phy_handle; +} __packed; + +#define HCI_EV_DISCONN_PHY_LINK_COMPLETE 0x42 +struct hci_ev_disconn_phy_link_complete { + __u8 status; + __u8 phy_handle; + __u8 reason; +} __packed; + +#define HCI_EV_LOGICAL_LINK_COMPLETE 0x45 +struct hci_ev_logical_link_complete { + __u8 status; + __le16 handle; + __u8 phy_handle; + __u8 flow_spec_id; +} __packed; + +#define HCI_EV_DISCONN_LOGICAL_LINK_COMPLETE 0x46 +struct hci_ev_disconn_logical_link_complete { + __u8 status; + __le16 handle; + __u8 reason; +} __packed; + #define HCI_EV_NUM_COMP_BLOCKS 0x48 struct hci_comp_blocks_info { __le16 handle; -- cgit v1.2.3 From 80b980279508edd1a92d8d77ec99b0ddad00c5fe Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Sun, 27 May 2012 22:27:51 -0300 Subject: Bluetooth: Use chan as parameters for l2cap chan ops Use chan instead of void * makes more sense here. Signed-off-by: Gustavo Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 10 ++++++---- net/bluetooth/l2cap_core.c | 30 +++++++++++++++--------------- net/bluetooth/l2cap_sock.c | 16 ++++++++-------- 3 files changed, 29 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index f44344b92d2d..aa2dbc680d5c 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -527,10 +527,12 @@ struct l2cap_chan { struct l2cap_ops { char *name; - struct l2cap_chan *(*new_connection) (void *data); - int (*recv) (void *data, struct sk_buff *skb); - void (*close) (void *data); - void (*state_change) (void *data, int state); + struct l2cap_chan *(*new_connection) (struct l2cap_chan *chan); + int (*recv) (struct l2cap_chan * chan, + struct sk_buff *skb); + void (*close) (struct l2cap_chan *chan); + void (*state_change) (struct l2cap_chan *chan, + int state); struct sk_buff *(*alloc_skb) (struct l2cap_chan *chan, unsigned long len, int nb); }; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index de0dc9ec9862..7edc8146db26 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -180,7 +180,7 @@ static void __l2cap_state_change(struct l2cap_chan *chan, int state) state_to_string(state)); chan->state = state; - chan->ops->state_change(chan->data, state); + chan->ops->state_change(chan, state); } static void l2cap_state_change(struct l2cap_chan *chan, int state) @@ -381,7 +381,7 @@ static void l2cap_chan_timeout(struct work_struct *work) l2cap_chan_unlock(chan); - chan->ops->close(chan->data); + chan->ops->close(chan); mutex_unlock(&conn->chan_lock); l2cap_chan_put(chan); @@ -569,7 +569,7 @@ static void l2cap_chan_cleanup_listen(struct sock *parent) l2cap_chan_close(chan, ECONNRESET); l2cap_chan_unlock(chan); - chan->ops->close(chan->data); + chan->ops->close(chan); } } @@ -1213,7 +1213,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) goto clean; } - chan = pchan->ops->new_connection(pchan->data); + chan = pchan->ops->new_connection(pchan); if (!chan) goto clean; @@ -1324,7 +1324,7 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) l2cap_chan_unlock(chan); - chan->ops->close(chan->data); + chan->ops->close(chan); l2cap_chan_put(chan); } @@ -2568,7 +2568,7 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) if (!nskb) continue; - if (chan->ops->recv(chan->data, nskb)) + if (chan->ops->recv(chan, nskb)) kfree_skb(nskb); } @@ -3411,7 +3411,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd goto response; } - chan = pchan->ops->new_connection(pchan->data); + chan = pchan->ops->new_connection(pchan); if (!chan) goto response; @@ -3420,7 +3420,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd /* Check if we already have channel with that dcid */ if (__l2cap_get_chan_by_dcid(conn, scid)) { sock_set_flag(sk, SOCK_ZAPPED); - chan->ops->close(chan->data); + chan->ops->close(chan); goto response; } @@ -3831,7 +3831,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd l2cap_chan_unlock(chan); - chan->ops->close(chan->data); + chan->ops->close(chan); l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); @@ -3865,7 +3865,7 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd l2cap_chan_unlock(chan); - chan->ops->close(chan->data); + chan->ops->close(chan); l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); @@ -4435,7 +4435,7 @@ static int l2cap_reassemble_sdu(struct l2cap_chan *chan, struct sk_buff *skb, if (chan->sdu) break; - err = chan->ops->recv(chan->data, skb); + err = chan->ops->recv(chan, skb); break; case L2CAP_SAR_START: @@ -4485,7 +4485,7 @@ static int l2cap_reassemble_sdu(struct l2cap_chan *chan, struct sk_buff *skb, if (chan->sdu->len != chan->sdu_len) break; - err = chan->ops->recv(chan->data, chan->sdu); + err = chan->ops->recv(chan, chan->sdu); if (!err) { /* Reassembly complete */ @@ -5207,7 +5207,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk if (chan->imtu < skb->len) goto drop; - if (!chan->ops->recv(chan->data, skb)) + if (!chan->ops->recv(chan, skb)) goto done; break; @@ -5246,7 +5246,7 @@ static inline int l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, str if (chan->imtu < skb->len) goto drop; - if (!chan->ops->recv(chan->data, skb)) + if (!chan->ops->recv(chan, skb)) return 0; drop: @@ -5272,7 +5272,7 @@ static inline int l2cap_att_channel(struct l2cap_conn *conn, u16 cid, if (chan->imtu < skb->len) goto drop; - if (!chan->ops->recv(chan->data, skb)) + if (!chan->ops->recv(chan, skb)) return 0; drop: diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index d244361a455c..db787f67c52a 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -872,9 +872,9 @@ static int l2cap_sock_release(struct socket *sock) return err; } -static struct l2cap_chan *l2cap_sock_new_connection_cb(void *data) +static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) { - struct sock *sk, *parent = data; + struct sock *sk, *parent = chan->data; sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC); @@ -888,10 +888,10 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(void *data) return l2cap_pi(sk)->chan; } -static int l2cap_sock_recv_cb(void *data, struct sk_buff *skb) +static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { int err; - struct sock *sk = data; + struct sock *sk = chan->data; struct l2cap_pinfo *pi = l2cap_pi(sk); lock_sock(sk); @@ -924,16 +924,16 @@ done: return err; } -static void l2cap_sock_close_cb(void *data) +static void l2cap_sock_close_cb(struct l2cap_chan *chan) { - struct sock *sk = data; + struct sock *sk = chan->data; l2cap_sock_kill(sk); } -static void l2cap_sock_state_change_cb(void *data, int state) +static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state) { - struct sock *sk = data; + struct sock *sk = chan->data; sk->sk_state = state; } -- cgit v1.2.3 From c0df7f6e06e1aeccee39c801af7f78cadeb9f345 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Sun, 27 May 2012 22:27:52 -0300 Subject: Bluetooth: Move clean up code and set of SOCK_ZAPPED to l2cap_sock.c This remove a bit more of socket code from l2cap core, this calls set the SOCK_ZAPPED and do some clean up depending on the socket state. Reported-by: Mat Martineau Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap_core.c | 55 +++++++------------------------------- net/bluetooth/l2cap_sock.c | 61 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 46 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index aa2dbc680d5c..76b0e7e5dec2 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -530,6 +530,7 @@ struct l2cap_ops { struct l2cap_chan *(*new_connection) (struct l2cap_chan *chan); int (*recv) (struct l2cap_chan * chan, struct sk_buff *skb); + void (*teardown) (struct l2cap_chan *chan, int err); void (*close) (struct l2cap_chan *chan); void (*state_change) (struct l2cap_chan *chan, int state); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 7edc8146db26..1f4c72074154 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -493,9 +493,7 @@ static void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) static void l2cap_chan_del(struct l2cap_chan *chan, int err) { - struct sock *sk = chan->sk; struct l2cap_conn *conn = chan->conn; - struct sock *parent = bt_sk(sk)->parent; __clear_chan_timer(chan); @@ -511,21 +509,8 @@ static void l2cap_chan_del(struct l2cap_chan *chan, int err) hci_conn_put(conn->hcon); } - lock_sock(sk); - - __l2cap_state_change(chan, BT_CLOSED); - sock_set_flag(sk, SOCK_ZAPPED); - - if (err) - __l2cap_chan_set_err(chan, err); - - if (parent) { - bt_accept_unlink(sk); - parent->sk_data_ready(parent, 0); - } else - sk->sk_state_change(sk); - - release_sock(sk); + if (chan->ops->teardown) + chan->ops->teardown(chan, err); if (test_bit(CONF_NOT_COMPLETE, &chan->conf_state)) return; @@ -554,25 +539,6 @@ static void l2cap_chan_del(struct l2cap_chan *chan, int err) return; } -static void l2cap_chan_cleanup_listen(struct sock *parent) -{ - struct sock *sk; - - BT_DBG("parent %p", parent); - - /* Close not yet accepted channels */ - while ((sk = bt_accept_dequeue(parent, NULL))) { - struct l2cap_chan *chan = l2cap_pi(sk)->chan; - - l2cap_chan_lock(chan); - __clear_chan_timer(chan); - l2cap_chan_close(chan, ECONNRESET); - l2cap_chan_unlock(chan); - - chan->ops->close(chan); - } -} - void l2cap_chan_close(struct l2cap_chan *chan, int reason) { struct l2cap_conn *conn = chan->conn; @@ -583,12 +549,8 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) switch (chan->state) { case BT_LISTEN: - lock_sock(sk); - l2cap_chan_cleanup_listen(sk); - - __l2cap_state_change(chan, BT_CLOSED); - sock_set_flag(sk, SOCK_ZAPPED); - release_sock(sk); + if (chan->ops->teardown) + chan->ops->teardown(chan, 0); break; case BT_CONNECTED: @@ -630,9 +592,8 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) break; default: - lock_sock(sk); - sock_set_flag(sk, SOCK_ZAPPED); - release_sock(sk); + if (chan->ops->teardown) + chan->ops->teardown(chan, 0); break; } } @@ -3419,7 +3380,9 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd /* Check if we already have channel with that dcid */ if (__l2cap_get_chan_by_dcid(conn, scid)) { - sock_set_flag(sk, SOCK_ZAPPED); + if (chan->ops->teardown) + chan->ops->teardown(chan, 0); + chan->ops->close(chan); goto response; } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index db787f67c52a..3f5946351fb9 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -872,6 +872,25 @@ static int l2cap_sock_release(struct socket *sock) return err; } +static void l2cap_sock_cleanup_listen(struct sock *parent) +{ + struct sock *sk; + + BT_DBG("parent %p", parent); + + /* Close not yet accepted channels */ + while ((sk = bt_accept_dequeue(parent, NULL))) { + struct l2cap_chan *chan = l2cap_pi(sk)->chan; + + l2cap_chan_lock(chan); + __clear_chan_timer(chan); + l2cap_chan_close(chan, ECONNRESET); + l2cap_chan_unlock(chan); + + l2cap_sock_kill(sk); + } +} + static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) { struct sock *sk, *parent = chan->data; @@ -931,6 +950,47 @@ static void l2cap_sock_close_cb(struct l2cap_chan *chan) l2cap_sock_kill(sk); } +static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err) +{ + struct sock *sk = chan->data; + struct sock *parent; + + lock_sock(sk); + + parent = bt_sk(sk)->parent; + + sock_set_flag(sk, SOCK_ZAPPED); + + switch (chan->state) { + case BT_OPEN: + case BT_BOUND: + case BT_CLOSED: + break; + case BT_LISTEN: + l2cap_sock_cleanup_listen(sk); + sk->sk_state = BT_CLOSED; + chan->state = BT_CLOSED; + + break; + default: + sk->sk_state = BT_CLOSED; + chan->state = BT_CLOSED; + + sk->sk_err = err; + + if (parent) { + bt_accept_unlink(sk); + parent->sk_data_ready(parent, 0); + } else { + sk->sk_state_change(sk); + } + + break; + } + + release_sock(sk); +} + static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state) { struct sock *sk = chan->data; @@ -959,6 +1019,7 @@ static struct l2cap_ops l2cap_chan_ops = { .new_connection = l2cap_sock_new_connection_cb, .recv = l2cap_sock_recv_cb, .close = l2cap_sock_close_cb, + .teardown = l2cap_sock_teardown_cb, .state_change = l2cap_sock_state_change_cb, .alloc_skb = l2cap_sock_alloc_skb_cb, }; -- cgit v1.2.3 From 54a59aa2b562872781d6a8fc89f300d360941691 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Sun, 27 May 2012 22:27:53 -0300 Subject: Bluetooth: Add l2cap_chan->ops->ready() This move socket specific code to l2cap_sock.c. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap_core.c | 18 +++--------------- net/bluetooth/l2cap_sock.c | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 76b0e7e5dec2..c5726c24ee03 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -534,6 +534,7 @@ struct l2cap_ops { void (*close) (struct l2cap_chan *chan); void (*state_change) (struct l2cap_chan *chan, int state); + void (*ready) (struct l2cap_chan *chan); struct sk_buff *(*alloc_skb) (struct l2cap_chan *chan, unsigned long len, int nb); }; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 1f4c72074154..5947eb1c1bee 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -931,26 +931,14 @@ static void l2cap_send_conn_req(struct l2cap_chan *chan) static void l2cap_chan_ready(struct l2cap_chan *chan) { - struct sock *sk = chan->sk; - struct sock *parent; - - lock_sock(sk); - - parent = bt_sk(sk)->parent; - - BT_DBG("sk %p, parent %p", sk, parent); - /* This clears all conf flags, including CONF_NOT_COMPLETE */ chan->conf_state = 0; __clear_chan_timer(chan); - __l2cap_state_change(chan, BT_CONNECTED); - sk->sk_state_change(sk); + chan->state = BT_CONNECTED; - if (parent) - parent->sk_data_ready(parent, 0); - - release_sock(sk); + if (chan->ops->ready) + chan->ops->ready(chan); } static void l2cap_do_start(struct l2cap_chan *chan) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 3f5946351fb9..5563023001c6 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1014,6 +1014,26 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan, return skb; } +static void l2cap_sock_ready_cb(struct l2cap_chan *chan) +{ + struct sock *sk = chan->data; + struct sock *parent; + + lock_sock(sk); + + parent = bt_sk(sk)->parent; + + BT_DBG("sk %p, parent %p", sk, parent); + + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + + if (parent) + parent->sk_data_ready(parent, 0); + + release_sock(sk); +} + static struct l2cap_ops l2cap_chan_ops = { .name = "L2CAP Socket Interface", .new_connection = l2cap_sock_new_connection_cb, @@ -1021,6 +1041,7 @@ static struct l2cap_ops l2cap_chan_ops = { .close = l2cap_sock_close_cb, .teardown = l2cap_sock_teardown_cb, .state_change = l2cap_sock_state_change_cb, + .ready = l2cap_sock_ready_cb, .alloc_skb = l2cap_sock_alloc_skb_cb, }; -- cgit v1.2.3 From 466f8004f364e9cb46d9124109972489eccfb404 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:01 +0300 Subject: Bluetooth: A2MP: Create A2MP channel Create and initialize fixed A2MP channel Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 6 ++++ net/bluetooth/Makefile | 3 +- net/bluetooth/a2mp.c | 69 +++++++++++++++++++++++++++++++++++++++++++ net/bluetooth/l2cap_core.c | 6 ++-- 4 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 net/bluetooth/a2mp.c (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index c5726c24ee03..aaba222306b6 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -52,6 +52,8 @@ #define L2CAP_CONN_TIMEOUT msecs_to_jiffies(40000) #define L2CAP_INFO_TIMEOUT msecs_to_jiffies(4000) +#define L2CAP_A2MP_DEFAULT_MTU 670 + /* L2CAP socket address */ struct sockaddr_l2 { sa_family_t l2_family; @@ -236,6 +238,7 @@ struct l2cap_conn_rsp { /* channel indentifier */ #define L2CAP_CID_SIGNALING 0x0001 #define L2CAP_CID_CONN_LESS 0x0002 +#define L2CAP_CID_A2MP 0x0003 #define L2CAP_CID_LE_DATA 0x0004 #define L2CAP_CID_LE_SIGNALING 0x0005 #define L2CAP_CID_SMP 0x0006 @@ -758,5 +761,8 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, void l2cap_chan_busy(struct l2cap_chan *chan, int busy); int l2cap_chan_check_security(struct l2cap_chan *chan); void l2cap_chan_set_defaults(struct l2cap_chan *chan); +int l2cap_ertm_init(struct l2cap_chan *chan); +void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan); +void l2cap_chan_del(struct l2cap_chan *chan, int err); #endif /* __L2CAP_H */ diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 2dc5a5700f53..fa6d94a4602a 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -9,4 +9,5 @@ obj-$(CONFIG_BT_CMTP) += cmtp/ obj-$(CONFIG_BT_HIDP) += hidp/ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ - hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o + hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \ + a2mp.o diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c new file mode 100644 index 000000000000..de455a264451 --- /dev/null +++ b/net/bluetooth/a2mp.c @@ -0,0 +1,69 @@ +/* + Copyright (c) 2010,2011 Code Aurora Forum. All rights reserved. + Copyright (c) 2011,2012 Intel Corp. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 and + only version 2 as published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ + +#include +#include +#include + +static struct l2cap_ops a2mp_chan_ops = { + .name = "L2CAP A2MP channel", +}; + +static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn) +{ + struct l2cap_chan *chan; + int err; + + chan = l2cap_chan_create(); + if (!chan) + return NULL; + + BT_DBG("chan %p", chan); + + hci_conn_hold(conn->hcon); + + chan->omtu = L2CAP_A2MP_DEFAULT_MTU; + chan->imtu = L2CAP_A2MP_DEFAULT_MTU; + chan->flush_to = L2CAP_DEFAULT_FLUSH_TO; + + chan->ops = &a2mp_chan_ops; + + l2cap_chan_set_defaults(chan); + chan->remote_max_tx = chan->max_tx; + chan->remote_tx_win = chan->tx_win; + + chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO; + chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO; + + skb_queue_head_init(&chan->tx_q); + + chan->mode = L2CAP_MODE_ERTM; + + err = l2cap_ertm_init(chan); + if (err < 0) { + l2cap_chan_del(chan, 0); + return NULL; + } + + chan->conf_state = 0; + + l2cap_chan_add(conn, chan); + + chan->remote_mps = chan->omtu; + chan->mps = chan->omtu; + + chan->state = BT_CONNECTED; + + return chan; +} diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 778c0c8cdc59..2c616cf24c71 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -484,14 +484,14 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) list_add(&chan->list, &conn->chan_l); } -static void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) +void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { mutex_lock(&conn->chan_lock); __l2cap_chan_add(conn, chan); mutex_unlock(&conn->chan_lock); } -static void l2cap_chan_del(struct l2cap_chan *chan, int err) +void l2cap_chan_del(struct l2cap_chan *chan, int err) { struct l2cap_conn *conn = chan->conn; @@ -2691,7 +2691,7 @@ static void l2cap_ack_timeout(struct work_struct *work) l2cap_chan_put(chan); } -static inline int l2cap_ertm_init(struct l2cap_chan *chan) +int l2cap_ertm_init(struct l2cap_chan *chan) { int err; -- cgit v1.2.3 From 9740e49d17e55f3832661fd99a8e0a17e921a82e Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:02 +0300 Subject: Bluetooth: A2MP: AMP Manager basic functions Define AMP Manager and some basic functions. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/a2mp.h | 30 ++++++++++++++++++++++ include/net/bluetooth/hci_core.h | 1 + net/bluetooth/a2mp.c | 54 ++++++++++++++++++++++++++++++++++++++++ net/bluetooth/hci_conn.c | 4 +++ 4 files changed, 89 insertions(+) create mode 100644 include/net/bluetooth/a2mp.h (limited to 'include/net') diff --git a/include/net/bluetooth/a2mp.h b/include/net/bluetooth/a2mp.h new file mode 100644 index 000000000000..ff4754000cf8 --- /dev/null +++ b/include/net/bluetooth/a2mp.h @@ -0,0 +1,30 @@ +/* + Copyright (c) 2010,2011 Code Aurora Forum. All rights reserved. + Copyright (c) 2011,2012 Intel Corp. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 and + only version 2 as published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ + +#ifndef __A2MP_H +#define __A2MP_H + +struct amp_mgr { + struct l2cap_conn *l2cap_conn; + struct l2cap_chan *a2mp_chan; + struct kref kref; + __u8 ident; + __u8 handle; + unsigned long flags; +}; + +void amp_mgr_get(struct amp_mgr *mgr); +int amp_mgr_put(struct amp_mgr *mgr); + +#endif /* __A2MP_H */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d584a47d1c86..6e64b76e30aa 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -332,6 +332,7 @@ struct hci_conn { void *l2cap_data; void *sco_data; void *smp_conn; + struct amp_mgr *amp_mgr; struct hci_conn *link; diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index de455a264451..3c241c2b3e1a 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -15,6 +15,7 @@ #include #include #include +#include static struct l2cap_ops a2mp_chan_ops = { .name = "L2CAP A2MP channel", @@ -67,3 +68,56 @@ static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn) return chan; } + +/* AMP Manager functions */ +void amp_mgr_get(struct amp_mgr *mgr) +{ + BT_DBG("mgr %p", mgr); + + kref_get(&mgr->kref); +} + +static void amp_mgr_destroy(struct kref *kref) +{ + struct amp_mgr *mgr = container_of(kref, struct amp_mgr, kref); + + BT_DBG("mgr %p", mgr); + + kfree(mgr); +} + +int amp_mgr_put(struct amp_mgr *mgr) +{ + BT_DBG("mgr %p", mgr); + + return kref_put(&mgr->kref, &_mgr_destroy); +} + +static struct amp_mgr *amp_mgr_create(struct l2cap_conn *conn) +{ + struct amp_mgr *mgr; + struct l2cap_chan *chan; + + mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return NULL; + + BT_DBG("conn %p mgr %p", conn, mgr); + + mgr->l2cap_conn = conn; + + chan = a2mp_chan_open(conn); + if (!chan) { + kfree(mgr); + return NULL; + } + + mgr->a2mp_chan = chan; + chan->data = mgr; + + conn->hcon->amp_mgr = mgr; + + kref_init(&mgr->kref); + + return mgr; +} diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 126876d915f5..1458667b2845 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -28,6 +28,7 @@ #include #include +#include static void hci_le_connect(struct hci_conn *conn) { @@ -411,6 +412,9 @@ int hci_conn_del(struct hci_conn *conn) hci_chan_list_flush(conn); + if (conn->amp_mgr) + amp_mgr_put(conn->amp_mgr); + hci_conn_hash_del(hdev, conn); if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); -- cgit v1.2.3 From f6d3c6e783b0e9f75b18232f8ff8cd5dbc3f7301 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:03 +0300 Subject: Bluetooth: A2MP: Build and Send msg helpers Helper function to build and send A2MP messages. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/a2mp.h | 7 +++++++ net/bluetooth/a2mp.c | 46 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/a2mp.h b/include/net/bluetooth/a2mp.h index ff4754000cf8..654df60cfd6d 100644 --- a/include/net/bluetooth/a2mp.h +++ b/include/net/bluetooth/a2mp.h @@ -24,6 +24,13 @@ struct amp_mgr { unsigned long flags; }; +struct a2mp_cmd { + __u8 code; + __u8 ident; + __le16 len; + __u8 data[0]; +} __packed; + void amp_mgr_get(struct amp_mgr *mgr); int amp_mgr_put(struct amp_mgr *mgr); diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 3c241c2b3e1a..53f49a0b7f9a 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -17,6 +17,52 @@ #include #include +/* A2MP build & send command helper functions */ +static struct a2mp_cmd *__a2mp_build(u8 code, u8 ident, u16 len, void *data) +{ + struct a2mp_cmd *cmd; + int plen; + + plen = sizeof(*cmd) + len; + cmd = kzalloc(plen, GFP_KERNEL); + if (!cmd) + return NULL; + + cmd->code = code; + cmd->ident = ident; + cmd->len = cpu_to_le16(len); + + memcpy(cmd->data, data, len); + + return cmd; +} + +static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, + void *data) +{ + struct l2cap_chan *chan = mgr->a2mp_chan; + struct a2mp_cmd *cmd; + u16 total_len = len + sizeof(*cmd); + struct kvec iv; + struct msghdr msg; + + cmd = __a2mp_build(code, ident, len, data); + if (!cmd) + return; + + iv.iov_base = cmd; + iv.iov_len = total_len; + + memset(&msg, 0, sizeof(msg)); + + msg.msg_iov = (struct iovec *) &iv; + msg.msg_iovlen = 1; + + l2cap_chan_send(chan, &msg, total_len, 0); + + kfree(cmd); +} + static struct l2cap_ops a2mp_chan_ops = { .name = "L2CAP A2MP channel", }; -- cgit v1.2.3 From b9058fb67c42851b4f852d90b11f43279586aae9 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:05 +0300 Subject: Bluetooth: A2MP: Definitions for A2MP commands Define A2MP command IDs and packet structures. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/a2mp.h | 73 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/a2mp.h b/include/net/bluetooth/a2mp.h index 654df60cfd6d..7cbeb911fbd1 100644 --- a/include/net/bluetooth/a2mp.h +++ b/include/net/bluetooth/a2mp.h @@ -31,6 +31,79 @@ struct a2mp_cmd { __u8 data[0]; } __packed; +/* A2MP command codes */ +#define A2MP_COMMAND_REJ 0x01 +struct a2mp_cmd_rej { + __le16 reason; + __u8 data[0]; +} __packed; + +#define A2MP_DISCOVER_REQ 0x02 +struct a2mp_discov_req { + __le16 mtu; + __le16 ext_feat; +} __packed; + +struct a2mp_cl { + __u8 id; + __u8 type; + __u8 status; +} __packed; + +#define A2MP_DISCOVER_RSP 0x03 +struct a2mp_discov_rsp { + __le16 mtu; + __le16 ext_feat; + struct a2mp_cl cl[0]; +} __packed; + +#define A2MP_CHANGE_NOTIFY 0x04 +#define A2MP_CHANGE_RSP 0x05 + +#define A2MP_GETINFO_REQ 0x06 +struct a2mp_info_req { + __u8 id; +} __packed; + +#define A2MP_GETINFO_RSP 0x07 +struct a2mp_info_rsp { + __u8 id; + __u8 status; + __le32 total_bw; + __le32 max_bw; + __le32 min_latency; + __le16 pal_cap; + __le16 assoc_size; +} __packed; + +#define A2MP_GETAMPASSOC_REQ 0x08 +struct a2mp_amp_assoc_req { + __u8 id; +} __packed; + +#define A2MP_GETAMPASSOC_RSP 0x09 +struct a2mp_amp_assoc_rsp { + __u8 id; + __u8 status; + __u8 amp_assoc[0]; +} __packed; + +#define A2MP_CREATEPHYSLINK_REQ 0x0A +#define A2MP_DISCONNPHYSLINK_REQ 0x0C +struct a2mp_physlink_req { + __u8 local_id; + __u8 remote_id; + __u8 amp_assoc[0]; +} __packed; + +#define A2MP_CREATEPHYSLINK_RSP 0x0B +#define A2MP_DISCONNPHYSLINK_RSP 0x0D +struct a2mp_physlink_rsp { + __u8 local_id; + __u8 remote_id; + __u8 status; +} __packed; + void amp_mgr_get(struct amp_mgr *mgr); int amp_mgr_put(struct amp_mgr *mgr); -- cgit v1.2.3 From e7af522e04bcf68caae6802722efc5c6e8fa63a7 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:06 +0300 Subject: Bluetooth: A2MP: Define A2MP status codes A2MP status codes copied from Bluez patch sent by Peter Krystad . Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/a2mp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/a2mp.h b/include/net/bluetooth/a2mp.h index 7cbeb911fbd1..391acd7a67d4 100644 --- a/include/net/bluetooth/a2mp.h +++ b/include/net/bluetooth/a2mp.h @@ -104,6 +104,16 @@ struct a2mp_physlink_rsp { __u8 status; } __packed; +/* A2MP response status */ +#define A2MP_STATUS_SUCCESS 0x00 +#define A2MP_STATUS_INVALID_CTRL_ID 0x01 +#define A2MP_STATUS_UNABLE_START_LINK_CREATION 0x02 +#define A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS 0x02 +#define A2MP_STATUS_COLLISION_OCCURED 0x03 +#define A2MP_STATUS_DISCONN_REQ_RECVD 0x04 +#define A2MP_STATUS_PHYS_LINK_EXISTS 0x05 +#define A2MP_STATUS_SECURITY_VIOLATION 0x06 + void amp_mgr_get(struct amp_mgr *mgr); int amp_mgr_put(struct amp_mgr *mgr); -- cgit v1.2.3 From 8598d064cbf22b2d84c7cd8a9fcb97138baffe3f Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:09 +0300 Subject: Bluetooth: A2MP: Process A2MP Discover Request Adds helper functions to count HCI devs and process A2MP Discover Request, code makes sure that first controller in the list is BREDR one. Trace is shown below: ... > ACL data: handle 11 flags 0x02 dlen 16 A2MP: Discover req: mtu/mps 670 mask: 0x0000 < ACL data: handle 11 flags 0x00 dlen 22 A2MP: Discover rsp: mtu/mps 670 mask: 0x0000 Controller list: id 0 type 0 (BR-EDR) status 0x01 (Bluetooth only) id 1 type 1 (802.11 AMP) status 0x01 (Bluetooth only) ... Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/a2mp.h | 2 + include/net/bluetooth/hci.h | 3 ++ include/net/bluetooth/hci_core.h | 13 ++++++ net/bluetooth/a2mp.c | 85 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/a2mp.h b/include/net/bluetooth/a2mp.h index 391acd7a67d4..96f9cc2cf59b 100644 --- a/include/net/bluetooth/a2mp.h +++ b/include/net/bluetooth/a2mp.h @@ -15,6 +15,8 @@ #ifndef __A2MP_H #define __A2MP_H +#define A2MP_FEAT_EXT 0x8000 + struct amp_mgr { struct l2cap_conn *l2cap_conn; struct l2cap_chan *a2mp_chan; diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index de09a26e4223..66af2c6193d5 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -59,6 +59,9 @@ #define HCI_BREDR 0x00 #define HCI_AMP 0x01 +/* First BR/EDR Controller shall have ID = 0 */ +#define HCI_BREDR_ID 0 + /* HCI device quirks */ enum { HCI_QUIRK_RESET_ON_CLOSE, diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6e64b76e30aa..20fd57367ddc 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -641,6 +641,19 @@ static inline void hci_set_drvdata(struct hci_dev *hdev, void *data) dev_set_drvdata(&hdev->dev, data); } +/* hci_dev_list shall be locked */ +static inline uint8_t __hci_num_ctrl(void) +{ + uint8_t count = 0; + struct list_head *p; + + list_for_each(p, &hci_dev_list) { + count++; + } + + return count; +} + struct hci_dev *hci_dev_get(int index); struct hci_dev *hci_get_route(bdaddr_t *src, bdaddr_t *dst); diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 188b42120074..1cc920a62b0f 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -63,6 +63,36 @@ static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, kfree(cmd); } +static inline void __a2mp_cl_bredr(struct a2mp_cl *cl) +{ + cl->id = 0; + cl->type = 0; + cl->status = 1; +} + +/* hci_dev_list shall be locked */ +static void __a2mp_add_cl(struct amp_mgr *mgr, struct a2mp_cl *cl, u8 num_ctrl) +{ + int i = 0; + struct hci_dev *hdev; + + __a2mp_cl_bredr(cl); + + list_for_each_entry(hdev, &hci_dev_list, list) { + /* Iterate through AMP controllers */ + if (hdev->id == HCI_BREDR_ID) + continue; + + /* Starting from second entry */ + if (++i >= num_ctrl) + return; + + cl[i].id = hdev->id; + cl[i].type = hdev->amp_type; + cl[i].status = hdev->amp_status; + } +} + /* Processing A2MP messages */ static int a2mp_command_rej(struct amp_mgr *mgr, struct sk_buff *skb, struct a2mp_cmd *hdr) @@ -79,6 +109,58 @@ static int a2mp_command_rej(struct amp_mgr *mgr, struct sk_buff *skb, return 0; } +static int a2mp_discover_req(struct amp_mgr *mgr, struct sk_buff *skb, + struct a2mp_cmd *hdr) +{ + struct a2mp_discov_req *req = (void *) skb->data; + u16 len = le16_to_cpu(hdr->len); + struct a2mp_discov_rsp *rsp; + u16 ext_feat; + u8 num_ctrl; + + if (len < sizeof(*req)) + return -EINVAL; + + skb_pull(skb, sizeof(*req)); + + ext_feat = le16_to_cpu(req->ext_feat); + + BT_DBG("mtu %d efm 0x%4.4x", le16_to_cpu(req->mtu), ext_feat); + + /* check that packet is not broken for now */ + while (ext_feat & A2MP_FEAT_EXT) { + if (len < sizeof(ext_feat)) + return -EINVAL; + + ext_feat = get_unaligned_le16(skb->data); + BT_DBG("efm 0x%4.4x", ext_feat); + len -= sizeof(ext_feat); + skb_pull(skb, sizeof(ext_feat)); + } + + read_lock(&hci_dev_list_lock); + + num_ctrl = __hci_num_ctrl(); + len = num_ctrl * sizeof(struct a2mp_cl) + sizeof(*rsp); + rsp = kmalloc(len, GFP_ATOMIC); + if (!rsp) { + read_unlock(&hci_dev_list_lock); + return -ENOMEM; + } + + rsp->mtu = __constant_cpu_to_le16(L2CAP_A2MP_DEFAULT_MTU); + rsp->ext_feat = 0; + + __a2mp_add_cl(mgr, rsp->cl, num_ctrl); + + read_unlock(&hci_dev_list_lock); + + a2mp_send(mgr, A2MP_DISCOVER_RSP, hdr->ident, len, rsp); + + kfree(rsp); + return 0; +} + /* Handle A2MP signalling */ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { @@ -109,6 +191,9 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) break; case A2MP_DISCOVER_REQ: + err = a2mp_discover_req(mgr, skb, hdr); + break; + case A2MP_CHANGE_NOTIFY: case A2MP_GETINFO_REQ: case A2MP_GETAMPASSOC_REQ: -- cgit v1.2.3 From 416fa7527d6bf658e5517ea36d2de9270be2c11e Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:16 +0300 Subject: Bluetooth: A2MP: Handling fixed channels A2MP fixed channel do not have sk Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/a2mp.c | 3 +-- net/bluetooth/l2cap_core.c | 17 +++++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index aaba222306b6..a00b43ecbc77 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -581,6 +581,7 @@ struct l2cap_conn { #define L2CAP_CHAN_RAW 1 #define L2CAP_CHAN_CONN_LESS 2 #define L2CAP_CHAN_CONN_ORIENTED 3 +#define L2CAP_CHAN_CONN_FIX_A2MP 4 /* ----- L2CAP socket info ----- */ #define l2cap_pi(sk) ((struct l2cap_pinfo *) sk) diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 6a933dab1b7f..f1ec1b1d308f 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -483,8 +483,7 @@ static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn) hci_conn_hold(conn->hcon); - chan->omtu = L2CAP_A2MP_DEFAULT_MTU; - chan->imtu = L2CAP_A2MP_DEFAULT_MTU; + chan->chan_type = L2CAP_CHAN_CONN_FIX_A2MP; chan->flush_to = L2CAP_DEFAULT_FLUSH_TO; chan->ops = &a2mp_chan_ops; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 2c616cf24c71..fc572795497a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -465,6 +465,13 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) chan->omtu = L2CAP_DEFAULT_MTU; break; + case L2CAP_CHAN_CONN_FIX_A2MP: + chan->scid = L2CAP_CID_A2MP; + chan->dcid = L2CAP_CID_A2MP; + chan->omtu = L2CAP_A2MP_DEFAULT_MTU; + chan->imtu = L2CAP_A2MP_DEFAULT_MTU; + break; + default: /* Raw socket can send/recv signalling messages only */ chan->scid = L2CAP_CID_SIGNALING; @@ -1001,6 +1008,11 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct l2cap_chan *c __clear_ack_timer(chan); } + if (chan->chan_type == L2CAP_CHAN_CONN_FIX_A2MP) { + __l2cap_state_change(chan, BT_DISCONN); + return; + } + req.dcid = cpu_to_le16(chan->dcid); req.scid = cpu_to_le16(chan->scid); l2cap_send_cmd(conn, l2cap_get_ident(conn), @@ -1195,6 +1207,11 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) l2cap_chan_lock(chan); + if (chan->chan_type == L2CAP_CHAN_CONN_FIX_A2MP) { + l2cap_chan_unlock(chan); + continue; + } + if (conn->hcon->type == LE_LINK) { if (smp_conn_security(conn, chan->sec_level)) l2cap_chan_ready(chan); -- cgit v1.2.3 From 97e8e89d2d8185b7644c9941636d3682eedc517b Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:17 +0300 Subject: Bluetooth: A2MP: Manage incoming connections Handle incoming A2MP connection by creating AMP manager and processing A2MP messages. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/a2mp.h | 4 ++++ net/bluetooth/a2mp.c | 16 ++++++++++++++++ net/bluetooth/l2cap_core.c | 19 +++++++++++++++---- 3 files changed, 35 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/a2mp.h b/include/net/bluetooth/a2mp.h index 96f9cc2cf59b..6a76e0a0705e 100644 --- a/include/net/bluetooth/a2mp.h +++ b/include/net/bluetooth/a2mp.h @@ -15,6 +15,8 @@ #ifndef __A2MP_H #define __A2MP_H +#include + #define A2MP_FEAT_EXT 0x8000 struct amp_mgr { @@ -118,5 +120,7 @@ struct a2mp_physlink_rsp { void amp_mgr_get(struct amp_mgr *mgr); int amp_mgr_put(struct amp_mgr *mgr); +struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn, + struct sk_buff *skb); #endif /* __A2MP_H */ diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index f1ec1b1d308f..e08ca2ac31aa 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -569,3 +569,19 @@ static struct amp_mgr *amp_mgr_create(struct l2cap_conn *conn) return mgr; } + +struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn, + struct sk_buff *skb) +{ + struct amp_mgr *mgr; + + mgr = amp_mgr_create(conn); + if (!mgr) { + BT_ERR("Could not create AMP manager"); + return NULL; + } + + BT_DBG("mgr: %p chan %p", mgr, mgr->a2mp_chan); + + return mgr->a2mp_chan; +} diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index fc572795497a..3daac2c6b7b4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -37,6 +37,7 @@ #include #include #include +#include bool disable_ertm; @@ -5132,10 +5133,20 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk chan = l2cap_get_chan_by_scid(conn, cid); if (!chan) { - BT_DBG("unknown cid 0x%4.4x", cid); - /* Drop packet and return */ - kfree_skb(skb); - return 0; + if (cid == L2CAP_CID_A2MP) { + chan = a2mp_channel_create(conn, skb); + if (!chan) { + kfree_skb(skb); + return 0; + } + + l2cap_chan_lock(chan); + } else { + BT_DBG("unknown cid 0x%4.4x", cid); + /* Drop packet and return */ + kfree_skb(skb); + return 0; + } } BT_DBG("chan %p, len %d", chan, skb->len); -- cgit v1.2.3 From 1afd5be87e30997a5623260d9177ed62cf88adbe Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Mon, 28 May 2012 11:47:20 +0200 Subject: Bluetooth: Remove unused HCI timeouts definitions Those are not used anywhere in code (and never were since introduction in 2006) so just remove them. Signed-off-by: Szymon Janc Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 66af2c6193d5..3f5d682e866f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -139,10 +139,8 @@ enum { #define HCIINQUIRY _IOR('H', 240, int) /* HCI timeouts */ -#define HCI_CONNECT_TIMEOUT (40000) /* 40 seconds */ #define HCI_DISCONN_TIMEOUT (2000) /* 2 seconds */ #define HCI_PAIRING_TIMEOUT (60000) /* 60 seconds */ -#define HCI_IDLE_TIMEOUT (6000) /* 6 seconds */ #define HCI_INIT_TIMEOUT (10000) /* 10 seconds */ #define HCI_CMD_TIMEOUT (1000) /* 1 seconds */ #define HCI_ACL_TX_TIMEOUT (45000) /* 45 seconds */ -- cgit v1.2.3 From 8c3a4f004e706fd7e681c68c6de4946c8c76b976 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Thu, 31 May 2012 17:01:35 -0300 Subject: Bluetooth: Rename L2CAP_LE_DEFAULT_MTU This patch renames L2CAP_LE_DEFAULT_MTU macro to L2CAP_LE_MIN_MTU since it represents the minimum MTU value, not the default MTU value for LE. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 2 +- net/bluetooth/l2cap_sock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index a00b43ecbc77..ce99c5683d9e 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -40,11 +40,11 @@ #define L2CAP_DEFAULT_MONITOR_TO 12000 /* 12 seconds */ #define L2CAP_DEFAULT_MAX_PDU_SIZE 1009 /* Sized for 3-DH5 packet */ #define L2CAP_DEFAULT_ACK_TO 200 -#define L2CAP_LE_DEFAULT_MTU 23 #define L2CAP_DEFAULT_MAX_SDU_SIZE 0xFFFF #define L2CAP_DEFAULT_SDU_ITIME 0xFFFFFFFF #define L2CAP_DEFAULT_ACC_LAT 0xFFFFFFFF #define L2CAP_BREDR_MAX_PAYLOAD 1019 /* 3-DH5 packet */ +#define L2CAP_LE_MIN_MTU 23 #define L2CAP_DISC_TIMEOUT msecs_to_jiffies(100) #define L2CAP_DISC_REJ_TIMEOUT msecs_to_jiffies(5000) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index ab5868d94307..a4bb27e8427e 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -449,7 +449,7 @@ static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu) { switch (chan->scid) { case L2CAP_CID_LE_DATA: - if (mtu < L2CAP_LE_DEFAULT_MTU) + if (mtu < L2CAP_LE_MIN_MTU) return false; break; -- cgit v1.2.3 From 7e1af8a3a51dbf5dc7392fb294a0830f7e853aa8 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Tue, 29 May 2012 13:19:26 -0300 Subject: Bluetooth: Create empty l2cap ops function A2MP doesn't use part of the L2CAP chan ops API so we just create general empty function instead of the A2MP specific one. Signed-off-by: Gustavo Padovan Signed-off-by: Johan Hedberg --- include/net/bluetooth/l2cap.h | 12 ++++++++++++ net/bluetooth/a2mp.c | 23 +++-------------------- 2 files changed, 15 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index ce99c5683d9e..d80e3f0691b4 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -740,6 +740,18 @@ static inline __u16 __next_seq(struct l2cap_chan *chan, __u16 seq) return (seq + 1) % (chan->tx_win_max + 1); } +static inline struct l2cap_chan *l2cap_chan_no_new_connection(struct l2cap_chan *chan) +{ + return NULL; +} + +static inline void l2cap_chan_no_teardown(struct l2cap_chan *chan, int err) +{ +} + +static inline void l2cap_chan_no_ready(struct l2cap_chan *chan) +{ +} extern bool disable_ertm; diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 0772c680abe6..fb93250b3938 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -440,23 +440,6 @@ static struct sk_buff *a2mp_chan_alloc_skb_cb(struct l2cap_chan *chan, return bt_skb_alloc(len, GFP_KERNEL); } -static struct l2cap_chan *a2mp_chan_no_new_conn_cb(struct l2cap_chan *chan) -{ - BT_ERR("new_connection for chan %p not implemented", chan); - - return NULL; -} - -static void a2mp_chan_no_teardown_cb(struct l2cap_chan *chan, int err) -{ - BT_ERR("teardown for chan %p not implemented", chan); -} - -static void a2mp_chan_no_ready(struct l2cap_chan *chan) -{ - BT_ERR("ready for chan %p not implemented", chan); -} - static struct l2cap_ops a2mp_chan_ops = { .name = "L2CAP A2MP channel", .recv = a2mp_chan_recv_cb, @@ -465,9 +448,9 @@ static struct l2cap_ops a2mp_chan_ops = { .alloc_skb = a2mp_chan_alloc_skb_cb, /* Not implemented for A2MP */ - .new_connection = a2mp_chan_no_new_conn_cb, - .teardown = a2mp_chan_no_teardown_cb, - .ready = a2mp_chan_no_ready, + .new_connection = l2cap_chan_no_new_connection, + .teardown = l2cap_chan_no_teardown, + .ready = l2cap_chan_no_ready, }; static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn) -- cgit v1.2.3 From 72d7872852e1734e94686012a2e9deade3457329 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Thu, 10 May 2012 16:18:26 +0300 Subject: mac80211: allow low-level drivers to set netdev feature bits Low level drivers can now set certain netdev feature bits in netdev_features member of the ieee80211_hw struct. These will be propagated to every netdev created from this HW. The white-listed features currently include only ones related to HW checksumming. Signed-off-by: Arik Nemtsov Signed-off-by: John W. Linville --- include/net/mac80211.h | 5 +++++ net/mac80211/iface.c | 2 ++ net/mac80211/main.c | 7 +++++++ 3 files changed, 14 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 1937c7d98304..0286c0476e44 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1297,6 +1297,10 @@ enum ieee80211_hw_flags { * reports, by default it is set to _MCS, _GI and _BW but doesn't * include _FMT. Use %IEEE80211_RADIOTAP_MCS_HAVE_* values, only * adding _BW is supported today. + * + * @netdev_features: netdev features to be set in each netdev created + * from this HW. Note only HW checksum features are currently + * compatible with mac80211. Other feature bits will be rejected. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -1319,6 +1323,7 @@ struct ieee80211_hw { u8 max_tx_aggregation_subframes; u8 offchannel_tx_hw_queue; u8 radiotap_mcs_details; + netdev_features_t netdev_features; }; /** diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d4c19a7773db..f970e0b3c4b9 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1352,6 +1352,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sdata->u.mgd.use_4addr = params->use_4addr; } + ndev->features |= local->hw.netdev_features; + ret = register_netdevice(ndev); if (ret) goto fail; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index f5548e953259..779ac613ee57 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -682,6 +682,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) enum ieee80211_band band; int channels, max_bitrates; bool supp_ht; + netdev_features_t feature_whitelist; static const u32 cipher_suites[] = { /* keep WEP first, it may be removed below */ WLAN_CIPHER_SUITE_WEP40, @@ -708,6 +709,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if ((hw->flags & IEEE80211_HW_SCAN_WHILE_IDLE) && !local->ops->hw_scan) return -EINVAL; + /* Only HW csum features are currently compatible with mac80211 */ + feature_whitelist = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_HW_CSUM; + if (WARN_ON(hw->netdev_features & ~feature_whitelist)) + return -EINVAL; + if (hw->max_report_rates == 0) hw->max_report_rates = hw->max_rates; -- cgit v1.2.3 From d63e9ae3b12fd0c6a3795c9b08de6b476f80b8c3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 15 May 2012 14:20:31 -0700 Subject: net: mac80211: Add and use ht_vdbg debugging macro Simplify the use of #ifdef CONFIG_MAC80211_HT_DEBUG/#endif by adding a logging macro to encapsulate the test. Convert the appropriate uses too. Signed-off-by: Joe Perches Signed-off-by: John W. Linville --- include/net/mac80211.h | 13 +++++++ net/mac80211/agg-rx.c | 24 ++++-------- net/mac80211/agg-tx.c | 103 ++++++++++++++----------------------------------- 3 files changed, 50 insertions(+), 90 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0286c0476e44..808462e2a71d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3844,4 +3844,17 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_vif *vif, */ int ieee80211_ave_rssi(struct ieee80211_vif *vif); +/* Extra debugging macros */ + +#ifdef CONFIG_MAC80211_HT_DEBUG +#define ht_vdbg(fmt, ...) \ + pr_debug(fmt, ##__VA_ARGS__) +#else +#define ht_vdbg(fmt, ...) \ +do { \ + if (0) \ + pr_debug(fmt, ##__VA_ARGS__); \ +} while (0) +#endif + #endif /* MAC80211_H */ diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index ec55f42705b7..a096b0dae37d 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -74,12 +74,10 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Rx BA session stop requested for %pM tid %u %s reason: %d\n", - sta->sta.addr, tid, - initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator", - (int)reason); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("Rx BA session stop requested for %pM tid %u %s reason: %d\n", + sta->sta.addr, tid, + initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator", + (int)reason); if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, &sta->sta, tid, NULL, 0)) @@ -154,9 +152,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data) return; } -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("rx session timer expired on tid %d\n", (u16)*ptid); -#endif + ht_vdbg("rx session timer expired on tid %d\n", (u16)*ptid); + set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired); ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); } @@ -243,9 +240,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_REQUEST_DECLINED; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Suspend in progress - Denying ADDBA request\n"); -#endif + ht_vdbg("Suspend in progress - Denying ADDBA request\n"); goto end_no_lock; } @@ -317,10 +312,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, &sta->sta, tid, &start_seq_num, 0); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Rx A-MPDU request on tid %d result %d\n", tid, ret); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - + ht_vdbg("Rx A-MPDU request on tid %d result %d\n", tid, ret); if (ret) { kfree(tid_agg_rx->reorder_buf); kfree(tid_agg_rx->reorder_time); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 90b2c0ffd5b0..da07f01cfe4d 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -184,10 +184,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, spin_unlock_bh(&sta->lock); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Tx BA session stop requested for %pM tid %u\n", - sta->sta.addr, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("Tx BA session stop requested for %pM tid %u\n", + sta->sta.addr, tid); del_timer_sync(&tid_tx->addba_resp_timer); del_timer_sync(&tid_tx->session_timer); @@ -253,16 +251,12 @@ static void sta_addba_resp_timer_expired(unsigned long data) if (!tid_tx || test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { rcu_read_unlock(); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("timer expired on tid %d but we are not (or no longer) expecting addBA response there\n", - tid); -#endif + ht_vdbg("timer expired on tid %d but we are not (or no longer) expecting addBA response there\n", + tid); return; } -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("addBA response timer expired on tid %d\n", tid); -#endif + ht_vdbg("addBA response timer expired on tid %d\n", tid); ieee80211_stop_tx_ba_session(&sta->sta, tid); rcu_read_unlock(); @@ -371,10 +365,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, &sta->sta, tid, &start_seq_num, 0); if (ret) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("BA request denied - HW unavailable for tid %d\n", - tid); -#endif + ht_vdbg("BA request denied - HW unavailable for tid %d\n", tid); spin_lock_bh(&sta->lock); ieee80211_agg_splice_packets(sdata, tid_tx, tid); ieee80211_assign_tid_tx(sta, tid, NULL); @@ -387,9 +378,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("activated addBA response timer on tid %d\n", tid); -#endif + ht_vdbg("activated addBA response timer on tid %d\n", tid); spin_lock_bh(&sta->lock); sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; @@ -436,9 +425,7 @@ static void sta_tx_agg_session_timer_expired(unsigned long data) rcu_read_unlock(); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("tx session timer expired on tid %d\n", (u16)*ptid); -#endif + ht_vdbg("tx session timer expired on tid %d\n", (u16)*ptid); ieee80211_stop_tx_ba_session(&sta->sta, *ptid); } @@ -462,10 +449,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)) return -EINVAL; -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Open BA session requested for %pM tid %u\n", - pubsta->addr, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("Open BA session requested for %pM tid %u\n", + pubsta->addr, tid); if (sdata->vif.type != NL80211_IFTYPE_STATION && sdata->vif.type != NL80211_IFTYPE_MESH_POINT && @@ -475,9 +460,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("BA sessions blocked - Denying BA session request\n"); -#endif + ht_vdbg("BA sessions blocked - Denying BA session request\n"); return -EINVAL; } @@ -495,10 +478,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, */ if (sta->sdata->vif.type == NL80211_IFTYPE_ADHOC && !sta->sta.ht_cap.ht_supported) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("BA request denied - IBSS STA %pM does not advertise HT support\n", - pubsta->addr); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("BA request denied - IBSS STA %pM does not advertise HT support\n", + pubsta->addr); return -EINVAL; } @@ -518,10 +499,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_BURST_RETRIES && time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] + HT_AGG_RETRIES_PERIOD)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("BA request denied - waiting a grace period after %d failed requests on tid %u\n", - sta->ampdu_mlme.addba_req_num[tid], tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("BA request denied - waiting a grace period after %d failed requests on tid %u\n", + sta->ampdu_mlme.addba_req_num[tid], tid); ret = -EBUSY; goto err_unlock_sta; } @@ -529,10 +508,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, tid_tx = rcu_dereference_protected_tid_tx(sta, tid); /* check if the TID is not in aggregation flow already */ if (tid_tx || sta->ampdu_mlme.tid_start_tx[tid]) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("BA request denied - session is not idle on tid %u\n", - tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("BA request denied - session is not idle on tid %u\n", + tid); ret = -EAGAIN; goto err_unlock_sta; } @@ -587,9 +564,7 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, tid_tx = rcu_dereference_protected_tid_tx(sta, tid); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Aggregation is on for tid %d\n", tid); -#endif + ht_vdbg("Aggregation is on for tid %d\n", tid); drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_OPERATIONAL, @@ -623,9 +598,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) trace_api_start_tx_ba_cb(sdata, ra, tid); if (tid >= STA_TID_NUM) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); -#endif + ht_vdbg("Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); return; } @@ -633,9 +606,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) sta = sta_info_get_bss(sdata, ra); if (!sta) { mutex_unlock(&local->sta_mtx); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Could not find station: %pM\n", ra); -#endif + ht_vdbg("Could not find station: %pM\n", ra); return; } @@ -643,9 +614,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (WARN_ON(!tid_tx)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("addBA was not requested!\n"); -#endif + ht_vdbg("addBA was not requested!\n"); goto unlock; } @@ -745,23 +714,17 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) trace_api_stop_tx_ba_cb(sdata, ra, tid); if (tid >= STA_TID_NUM) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); -#endif + ht_vdbg("Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); return; } -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Stopping Tx BA session for %pM tid %d\n", ra, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("Stopping Tx BA session for %pM tid %d\n", ra, tid); mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, ra); if (!sta) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Could not find station: %pM\n", ra); -#endif + ht_vdbg("Could not find station: %pM\n", ra); goto unlock; } @@ -770,9 +733,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("unexpected callback to A-MPDU stop\n"); -#endif + ht_vdbg("unexpected callback to A-MPDU stop\n"); goto unlock_sta; } @@ -848,17 +809,13 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, goto out; if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("wrong addBA response token, tid %d\n", tid); -#endif + ht_vdbg("wrong addBA response token, tid %d\n", tid); goto out; } del_timer_sync(&tid_tx->addba_resp_timer); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("switched off addBA timer for tid %d\n", tid); -#endif + ht_vdbg("switched off addBA timer for tid %d\n", tid); /* * addba_resp_timer may have fired before we got here, and @@ -867,10 +824,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, */ if (test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state) || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("got addBA resp for tid %d but we already gave up\n", - tid); -#endif + ht_vdbg("got addBA resp for tid %d but we already gave up\n", + tid); goto out; } -- cgit v1.2.3 From 499f42bb03a9bd8a23f73e7c3886f70f52e7edc5 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 15 May 2012 14:20:32 -0700 Subject: net: mac80211: Add and use ibss_vdbg debugging macro Simplify the use of #ifdef CONFIG_MAC80211_IBSS_DEBUG/#endif by adding a logging macro to encapsulate the test. Convert the appropriate uses too. Signed-off-by: Joe Perches Signed-off-by: John W. Linville --- include/net/mac80211.h | 11 +++++++ net/mac80211/ibss.c | 79 ++++++++++++++++++------------------------------- net/mac80211/sta_info.c | 6 ++-- 3 files changed, 41 insertions(+), 55 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 808462e2a71d..98da61e52a45 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3857,4 +3857,15 @@ do { \ } while (0) #endif +#ifdef CONFIG_MAC80211_IBSS_DEBUG +#define ibss_vdbg(fmt, ...) \ + pr_debug(fmt, ##__VA_ARGS__) +#else +#define ibss_vdbg(fmt, ...) \ +do { \ + if (0) \ + pr_debug(fmt, ##__VA_ARGS__); \ +} while (0) +#endif + #endif /* MAC80211_H */ diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 148c27553024..0bc47a825692 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -281,10 +281,8 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, if (sta_info_insert_rcu(sta)) return sta_info_get(sdata, addr); if (auth) { -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", - sdata->vif.addr, sdata->u.ibss.bssid, addr); -#endif + ibss_vdbg("TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", + sdata->vif.addr, sdata->u.ibss.bssid, addr); ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, NULL, 0, addr, sdata->u.ibss.bssid, NULL, 0, 0); } @@ -354,11 +352,9 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n", - sdata->name, mgmt->sa, mgmt->da, mgmt->bssid, - auth_transaction); -#endif + ibss_vdbg("%s: RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n", + sdata->name, mgmt->sa, mgmt->da, mgmt->bssid, + auth_transaction); sta_info_destroy_addr(sdata, mgmt->sa); ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false); rcu_read_unlock(); @@ -421,12 +417,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, ieee80211_mandatory_rates(local, band); if (sta->sta.supp_rates[band] != prev_rates) { -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", - sdata->name, sta->sta.addr, - prev_rates, - sta->sta.supp_rates[band]); -#endif + ibss_vdbg("%s: updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", + sdata->name, sta->sta.addr, + prev_rates, + sta->sta.supp_rates[band]); rates_updated = true; } } else { @@ -541,20 +535,16 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, rx_timestamp = drv_get_tsf(local, sdata); } -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", - mgmt->sa, mgmt->bssid, - (unsigned long long)rx_timestamp, - (unsigned long long)beacon_timestamp, - (unsigned long long)(rx_timestamp - beacon_timestamp), - jiffies); -#endif + ibss_vdbg("RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", + mgmt->sa, mgmt->bssid, + (unsigned long long)rx_timestamp, + (unsigned long long)beacon_timestamp, + (unsigned long long)(rx_timestamp - beacon_timestamp), + jiffies); if (beacon_timestamp > rx_timestamp) { -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n", - sdata->name, mgmt->bssid); -#endif + ibss_vdbg("%s: beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n", + sdata->name, mgmt->bssid); ieee80211_sta_join_ibss(sdata, bss); supp_rates = ieee80211_sta_get_rates(local, elems, band, NULL); ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, @@ -717,10 +707,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&ifibss->mtx); active_ibss = ieee80211_sta_active_ibss(sdata); -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: sta_find_ibss (active_ibss=%d)\n", - sdata->name, active_ibss); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ + ibss_vdbg("%s: sta_find_ibss (active_ibss=%d)\n", + sdata->name, active_ibss); if (active_ibss) return; @@ -743,11 +731,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) struct ieee80211_bss *bss; bss = (void *)cbss->priv; -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug(" sta_find_ibss: selected %pM current %pM\n", - cbss->bssid, ifibss->bssid); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ - + ibss_vdbg(" sta_find_ibss: selected %pM current %pM\n", + cbss->bssid, ifibss->bssid); pr_debug("%s: Selected IBSS BSSID %pM based on configured SSID\n", sdata->name, cbss->bssid); @@ -756,9 +741,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) return; } -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug(" did not try to join ibss\n"); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ + ibss_vdbg(" did not try to join ibss\n"); /* Selected IBSS not found in current scan results - try to scan */ if (time_after(jiffies, ifibss->last_scan_completed + @@ -815,11 +798,9 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, tx_last_beacon = drv_tx_last_beacon(local); -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n", - sdata->name, mgmt->sa, mgmt->da, - mgmt->bssid, tx_last_beacon); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ + ibss_vdbg("%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n", + sdata->name, mgmt->sa, mgmt->da, + mgmt->bssid, tx_last_beacon); if (!tx_last_beacon && is_multicast_ether_addr(mgmt->da)) return; @@ -832,10 +813,8 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, pos = mgmt->u.probe_req.variable; if (pos[0] != WLAN_EID_SSID || pos + 2 + pos[1] > end) { -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: Invalid SSID IE in ProbeReq from %pM\n", - sdata->name, mgmt->sa); -#endif + ibss_vdbg("%s: Invalid SSID IE in ProbeReq from %pM\n", + sdata->name, mgmt->sa); return; } if (pos[1] != 0 && @@ -852,9 +831,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, resp = (struct ieee80211_mgmt *) skb->data; memcpy(resp->da, mgmt->sa, ETH_ALEN); -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: Sending ProbeResp to %pM\n", sdata->name, resp->da); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ + ibss_vdbg("%s: Sending ProbeResp to %pM\n", sdata->name, resp->da); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; ieee80211_tx_skb(sdata, skb); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 4be509807607..0a70f797dcf1 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -887,10 +887,8 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, continue; if (time_after(jiffies, sta->last_rx + exp_time)) { -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: expiring inactive STA %pM\n", - sdata->name, sta->sta.addr); -#endif + ibss_vdbg("%s: expiring inactive STA %pM\n", + sdata->name, sta->sta.addr); WARN_ON(__sta_info_destroy(sta)); } } -- cgit v1.2.3 From 51ca9d8db280b960345e7306e6a036dd3880ecff Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 16 May 2012 19:09:48 +0300 Subject: mac80211: remove ieee80211_get_operstate() ieee80211_get_operstate() was used by drivers in order to know whether the sta link is up, but it's no longer needed (nor used) as mac80211 notifies the drivers about authorization changes (via the sta_state callback) Signed-off-by: Eliad Peller Signed-off-by: John W. Linville --- include/net/mac80211.h | 10 ---------- net/mac80211/mlme.c | 7 ------- 2 files changed, 17 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 98da61e52a45..d2ed86da8e4c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3555,16 +3555,6 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); -/** - * ieee80211_get_operstate - get the operstate of the vif - * - * @vif: &struct ieee80211_vif pointer from the add_interface callback. - * - * The driver might need to know the operstate of the net_device - * (specifically, whether the link is IF_OPER_UP after resume) - */ -unsigned char ieee80211_get_operstate(struct ieee80211_vif *vif); - /** * ieee80211_chswitch_done - Complete channel switch process * @vif: &struct ieee80211_vif pointer from the add_interface callback. diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 419301c69582..0c4e26ff7d91 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3530,10 +3530,3 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, gfp); } EXPORT_SYMBOL(ieee80211_cqm_rssi_notify); - -unsigned char ieee80211_get_operstate(struct ieee80211_vif *vif) -{ - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - return sdata->dev->operstate; -} -EXPORT_SYMBOL(ieee80211_get_operstate); -- cgit v1.2.3 From d58e7e37aac0465b08527adadc8016421bd4060e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 16 May 2012 23:50:17 +0200 Subject: cfg80211: simplify cfg80211_can_beacon_sec_chan API Change cfg80211_can_beacon_sec_chan() to return true if there is no secondary channel to simplify all the current users of it. They all check the channel type before calling the function because it returns false if there's no secondary channel. Also actually document the return value. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 5 ++++- net/mac80211/ibss.c | 3 +-- net/wireless/chan.c | 22 ++++++---------------- 3 files changed, 11 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0289d4ce7070..a8496f4ff5f1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3359,11 +3359,14 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, const u8 *frame, size_t len, int freq, int sig_dbm, gfp_t gfp); -/* +/** * cfg80211_can_beacon_sec_chan - test if ht40 on extension channel can be used * @wiphy: the wiphy * @chan: main channel * @channel_type: HT mode + * + * This function returns true if there is no secondary channel or the secondary + * channel can be used for beaconing (i.e. is not a radar channel etc.) */ bool cfg80211_can_beacon_sec_chan(struct wiphy *wiphy, struct ieee80211_channel *chan, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 0bc47a825692..725cb4be229d 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -82,8 +82,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, local->oper_channel = chan; channel_type = ifibss->channel_type; - if (channel_type > NL80211_CHAN_HT20 && - !cfg80211_can_beacon_sec_chan(local->hw.wiphy, chan, channel_type)) + if (!cfg80211_can_beacon_sec_chan(local->hw.wiphy, chan, channel_type)) channel_type = NL80211_CHAN_HT20; if (!ieee80211_set_channel_type(local, sdata, channel_type)) { /* can only fail due to HT40+/- mismatch */ diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 884801ac4dd0..20b87d895722 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -60,7 +60,7 @@ bool cfg80211_can_beacon_sec_chan(struct wiphy *wiphy, diff = -20; break; default: - return false; + return true; } sec_chan = ieee80211_get_channel(wiphy, chan->center_freq + diff); @@ -107,21 +107,11 @@ int cfg80211_set_freq(struct cfg80211_registered_device *rdev, wdev->iftype == NL80211_IFTYPE_AP || wdev->iftype == NL80211_IFTYPE_AP_VLAN || wdev->iftype == NL80211_IFTYPE_MESH_POINT || - wdev->iftype == NL80211_IFTYPE_P2P_GO)) { - switch (channel_type) { - case NL80211_CHAN_HT40PLUS: - case NL80211_CHAN_HT40MINUS: - if (!cfg80211_can_beacon_sec_chan(&rdev->wiphy, chan, - channel_type)) { - printk(KERN_DEBUG - "cfg80211: Secondary channel not " - "allowed to initiate communication\n"); - return -EINVAL; - } - break; - default: - break; - } + wdev->iftype == NL80211_IFTYPE_P2P_GO) && + !cfg80211_can_beacon_sec_chan(&rdev->wiphy, chan, channel_type)) { + printk(KERN_DEBUG + "cfg80211: Secondary channel not allowed to beacon\n"); + return -EINVAL; } result = rdev->ops->set_channel(&rdev->wiphy, -- cgit v1.2.3 From aa430da41019c1694f6a8e3b8bef1d12ed52b0ad Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 16 May 2012 23:50:18 +0200 Subject: cfg80211: provide channel to start_ap function Instead of setting the channel first and then starting the AP, let cfg80211 store the channel and provide it as one of the AP settings. This means that now you have to set the channel before you can start an AP interface, but since hostapd/wpa_supplicant always do that we're OK with this change. Alternatively, it's now possible to give the channel as an attribute to the start-ap nl80211 command, overriding any preset channel. Cc: Kalle Valo Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 36 ++------------------ drivers/net/wireless/ath/ath6kl/core.h | 3 -- drivers/net/wireless/ath/ath6kl/main.c | 1 - include/linux/nl80211.h | 2 ++ include/net/cfg80211.h | 12 ++++++- net/mac80211/cfg.c | 5 +++ net/wireless/nl80211.c | 53 ++++++++++++++++++++++++++++-- 7 files changed, 72 insertions(+), 40 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index b869a358ce43..f27e9732951d 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -2585,35 +2585,6 @@ static int ath6kl_set_ies(struct ath6kl_vif *vif, return 0; } -static int ath6kl_set_channel(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type) -{ - struct ath6kl_vif *vif; - - /* - * 'dev' could be NULL if a channel change is required for the hardware - * device itself, instead of a particular VIF. - * - * FIXME: To be handled properly when monitor mode is supported. - */ - if (!dev) - return -EBUSY; - - vif = netdev_priv(dev); - - if (!ath6kl_cfg80211_ready(vif)) - return -EIO; - - ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: center_freq=%u hw_value=%u\n", - __func__, chan->center_freq, chan->hw_value); - vif->next_chan = chan->center_freq; - vif->next_ch_type = channel_type; - vif->next_ch_band = chan->band; - - return 0; -} - static int ath6kl_get_rsn_capab(struct cfg80211_beacon_data *beacon, u8 *rsn_capab) { @@ -2791,7 +2762,7 @@ static int ath6kl_start_ap(struct wiphy *wiphy, struct net_device *dev, p.ssid_len = vif->ssid_len; memcpy(p.ssid, vif->ssid, vif->ssid_len); p.dot11_auth_mode = vif->dot11_auth_mode; - p.ch = cpu_to_le16(vif->next_chan); + p.ch = cpu_to_le16(info->channel->center_freq); /* Enable uAPSD support by default */ res = ath6kl_wmi_ap_set_apsd(ar->wmi, vif->fw_vif_idx, true); @@ -2815,8 +2786,8 @@ static int ath6kl_start_ap(struct wiphy *wiphy, struct net_device *dev, return res; } - if (ath6kl_set_htcap(vif, vif->next_ch_band, - vif->next_ch_type != NL80211_CHAN_NO_HT)) + if (ath6kl_set_htcap(vif, info->channel->band, + info->channel_type != NL80211_CHAN_NO_HT)) return -EIO; /* @@ -3271,7 +3242,6 @@ static struct cfg80211_ops ath6kl_cfg80211_ops = { .suspend = __ath6kl_cfg80211_suspend, .resume = __ath6kl_cfg80211_resume, #endif - .set_channel = ath6kl_set_channel, .start_ap = ath6kl_start_ap, .change_beacon = ath6kl_change_beacon, .stop_ap = ath6kl_stop_ap, diff --git a/drivers/net/wireless/ath/ath6kl/core.h b/drivers/net/wireless/ath/ath6kl/core.h index 4d9c6f142698..8443b2a4133e 100644 --- a/drivers/net/wireless/ath/ath6kl/core.h +++ b/drivers/net/wireless/ath/ath6kl/core.h @@ -553,9 +553,6 @@ struct ath6kl_vif { u32 last_cancel_roc_id; u32 send_action_id; bool probe_req_report; - u16 next_chan; - enum nl80211_channel_type next_ch_type; - enum ieee80211_band next_ch_band; u16 assoc_bss_beacon_int; u16 listen_intvl_t; u16 bmiss_time_t; diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c index e5524470529c..b836f2795114 100644 --- a/drivers/net/wireless/ath/ath6kl/main.c +++ b/drivers/net/wireless/ath/ath6kl/main.c @@ -598,7 +598,6 @@ static int ath6kl_commit_ch_switch(struct ath6kl_vif *vif, u16 channel) struct ath6kl *ar = vif->ar; - vif->next_chan = channel; vif->profile.ch = cpu_to_le16(channel); switch (vif->nw_type) { diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 6930dddad18a..85e5037a218d 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -170,6 +170,8 @@ * %NL80211_ATTR_CIPHER_GROUP, %NL80211_ATTR_WPA_VERSIONS, * %NL80211_ATTR_AKM_SUITES, %NL80211_ATTR_PRIVACY, * %NL80211_ATTR_AUTH_TYPE and %NL80211_ATTR_INACTIVITY_TIMEOUT. + * The channel to use can be set on the interface or be given using the + * %NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_WIPHY_CHANNEL_TYPE attrs. * @NL80211_CMD_NEW_BEACON: old alias for %NL80211_CMD_START_AP * @NL80211_CMD_STOP_AP: Stop AP operation on the given interface * @NL80211_CMD_DEL_BEACON: old alias for %NL80211_CMD_STOP_AP diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a8496f4ff5f1..a54fb895f613 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -404,6 +404,8 @@ struct cfg80211_beacon_data { * * Used to configure an AP interface. * + * @channel: the channel to start the AP on + * @channel_type: the channel type to use * @beacon: beacon data * @beacon_interval: beacon interval * @dtim_period: DTIM period @@ -417,6 +419,9 @@ struct cfg80211_beacon_data { * @inactivity_timeout: time in seconds to determine station's inactivity. */ struct cfg80211_ap_settings { + struct ieee80211_channel *channel; + enum nl80211_channel_type channel_type; + struct cfg80211_beacon_data beacon; int beacon_interval, dtim_period; @@ -2263,7 +2268,10 @@ struct cfg80211_cached_keys; * @netdev: (private) Used to reference back to the netdev * @current_bss: (private) Used by the internal configuration code * @channel: (private) Used by the internal configuration code to track - * user-set AP, monitor and WDS channels for wireless extensions + * the user-set AP, monitor and WDS channel + * @preset_chan: (private) Used by the internal configuration code to + * track the channel to be used for AP later + * @preset_chantype: (private) the corresponding channel type * @bssid: (private) Used by the internal configuration code * @ssid: (private) Used by the internal configuration code * @ssid_len: (private) Used by the internal configuration code @@ -2314,6 +2322,8 @@ struct wireless_dev { struct cfg80211_internal_bss *current_bss; /* associated / joined */ struct ieee80211_channel *channel; + struct ieee80211_channel *preset_chan; + enum nl80211_channel_type preset_chantype; bool ps; int ps_timeout; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9aab849fd6cf..8e9d525c4653 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -823,6 +823,11 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (old) return -EALREADY; + err = ieee80211_set_channel(wiphy, dev, params->channel, + params->channel_type); + if (err) + return err; + /* * Apply control port protocol, this allows us to * not encrypt dynamic WEP control frames. diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 206465dc0cab..74f4a8f93935 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -921,7 +921,11 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) goto nla_put_failure; } - CMD(set_channel, SET_CHANNEL); + if (dev->ops->set_channel || dev->ops->start_ap) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) + goto nla_put_failure; + } CMD(set_wds_peer, SET_WDS_PEER); if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { CMD(tdls_mgmt, TDLS_MGMT); @@ -1170,6 +1174,9 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) * Monitors are special as they are normally slaved to * whatever else is going on, so they behave as though * you tried setting the wiphy channel itself. + * + * For AP/GO modes, it's only for compatibility, you can + * also give the channel to the start-AP command. */ return !wdev || wdev->iftype == NL80211_IFTYPE_AP || @@ -1204,6 +1211,7 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct genl_info *info) { + struct ieee80211_channel *channel; enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; u32 freq; int result; @@ -1221,7 +1229,25 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); mutex_lock(&rdev->devlist_mtx); - if (wdev) { + if (wdev) switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + if (wdev->beacon_interval) { + result = -EBUSY; + break; + } + channel = rdev_freq_to_chan(rdev, freq, channel_type); + if (!channel || !cfg80211_can_beacon_sec_chan(&rdev->wiphy, + channel, + channel_type)) { + result = -EINVAL; + break; + } + wdev->preset_chan = channel; + wdev->preset_chantype = channel_type; + result = 0; + break; + default: wdev_lock(wdev); result = cfg80211_set_freq(rdev, wdev, freq, channel_type); wdev_unlock(wdev); @@ -2299,6 +2325,29 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]); } + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { + enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; + + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE] && + !nl80211_valid_channel_type(info, &channel_type)) + return -EINVAL; + + params.channel = rdev_freq_to_chan(rdev, + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]), + channel_type); + if (!params.channel) + return -EINVAL; + params.channel_type = channel_type; + } else if (wdev->preset_chan) { + params.channel = wdev->preset_chan; + params.channel_type = wdev->preset_chantype; + } else + return -EINVAL; + + if (!cfg80211_can_beacon_sec_chan(&rdev->wiphy, params.channel, + params.channel_type)) + return -EINVAL; + err = rdev->ops->start_ap(&rdev->wiphy, dev, ¶ms); if (!err) wdev->beacon_interval = params.beacon_interval; -- cgit v1.2.3 From cc1d2806bf06ab92268343d26eb3d8d8f00f8bc9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 16 May 2012 23:50:20 +0200 Subject: cfg80211: provide channel to join_mesh function Just like the AP mode patch, instead of setting the channel and then joining the mesh network, provide the channel to join the network on to the join_mesh() function. Like in AP mode, you can also give the channel to the join-mesh nl80211 command now. Unlike AP mode, it picks a default channel if none was given. As libertas uses mesh mode interfaces but has no join_mesh callback and we can't simply break it, keep some compatibility code for that case and configure the channel directly for it. In the non-libertas case, where we store the channel until join, allow setting it while the interface is down. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 4 ++ net/mac80211/cfg.c | 6 +++ net/wireless/core.h | 7 +++- net/wireless/mesh.c | 91 +++++++++++++++++++++++++++++++++++++++++++++- net/wireless/nl80211.c | 43 +++++++++++++++++----- net/wireless/wext-compat.c | 12 +++++- 6 files changed, 148 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a54fb895f613..4c90c44b8b75 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -831,6 +831,8 @@ struct mesh_config { /** * struct mesh_setup - 802.11s mesh setup configuration + * @channel: the channel to start the mesh network on + * @channel_type: the channel type to use * @mesh_id: the mesh ID * @mesh_id_len: length of the mesh ID, at least 1 and at most 32 bytes * @sync_method: which synchronization method to use @@ -845,6 +847,8 @@ struct mesh_config { * These parameters are fixed when the mesh is created. */ struct mesh_setup { + struct ieee80211_channel *channel; + enum nl80211_channel_type channel_type; const u8 *mesh_id; u8 mesh_id_len; u8 sync_method; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8e9d525c4653..f47af8b3185e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1598,6 +1598,12 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, err = copy_mesh_setup(ifmsh, setup); if (err) return err; + + err = ieee80211_set_channel(wiphy, dev, setup->channel, + setup->channel_type); + if (err) + return err; + ieee80211_start_mesh(sdata); return 0; diff --git a/net/wireless/core.h b/net/wireless/core.h index 8523f3878677..1d3d24126946 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -303,14 +303,17 @@ extern const struct mesh_config default_mesh_config; extern const struct mesh_setup default_mesh_setup; int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, - const struct mesh_setup *setup, + struct mesh_setup *setup, const struct mesh_config *conf); int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, - const struct mesh_setup *setup, + struct mesh_setup *setup, const struct mesh_config *conf); int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev); +int cfg80211_set_mesh_freq(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, int freq, + enum nl80211_channel_type channel_type); /* MLME */ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 2749cb86b462..2e3b700eba32 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -65,6 +65,9 @@ const struct mesh_config default_mesh_config = { }; const struct mesh_setup default_mesh_setup = { + /* cfg80211_join_mesh() will pick a channel if needed */ + .channel = NULL, + .channel_type = NL80211_CHAN_NO_HT, .sync_method = IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET, .path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP, .path_metric = IEEE80211_PATH_METRIC_AIRTIME, @@ -75,7 +78,7 @@ const struct mesh_setup default_mesh_setup = { int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, - const struct mesh_setup *setup, + struct mesh_setup *setup, const struct mesh_config *conf) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -101,6 +104,51 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!rdev->ops->join_mesh) return -EOPNOTSUPP; + if (!setup->channel) { + /* if no channel explicitly given, use preset channel */ + setup->channel = wdev->preset_chan; + setup->channel_type = wdev->preset_chantype; + } + + if (!setup->channel) { + /* if we don't have that either, use the first usable channel */ + enum ieee80211_band band; + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + struct ieee80211_supported_band *sband; + struct ieee80211_channel *chan; + int i; + + sband = rdev->wiphy.bands[band]; + if (!sband) + continue; + + for (i = 0; i < sband->n_channels; i++) { + chan = &sband->channels[i]; + if (chan->flags & (IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_PASSIVE_SCAN | + IEEE80211_CHAN_DISABLED | + IEEE80211_CHAN_RADAR)) + continue; + setup->channel = chan; + break; + } + + if (setup->channel) + break; + } + + /* no usable channel ... */ + if (!setup->channel) + return -EINVAL; + + setup->channel_type = NL80211_CHAN_NO_HT; + } + + if (!cfg80211_can_beacon_sec_chan(&rdev->wiphy, setup->channel, + setup->channel_type)) + return -EINVAL; + err = rdev->ops->join_mesh(&rdev->wiphy, dev, conf, setup); if (!err) { memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); @@ -112,7 +160,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, - const struct mesh_setup *setup, + struct mesh_setup *setup, const struct mesh_config *conf) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -125,6 +173,45 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, return err; } +int cfg80211_set_mesh_freq(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, int freq, + enum nl80211_channel_type channel_type) +{ + struct ieee80211_channel *channel; + + /* + * Workaround for libertas (only!), it puts the interface + * into mesh mode but doesn't implement join_mesh. Instead, + * it is configured via sysfs and then joins the mesh when + * you set the channel. Note that the libertas mesh isn't + * compatible with 802.11 mesh. + */ + if (!rdev->ops->join_mesh) { + int err; + + if (!netif_running(wdev->netdev)) + return -ENETDOWN; + wdev_lock(wdev); + err = cfg80211_set_freq(rdev, wdev, freq, channel_type); + wdev_unlock(wdev); + + return err; + } + + if (wdev->mesh_id_len) + return -EBUSY; + + channel = rdev_freq_to_chan(rdev, freq, channel_type); + if (!channel || !cfg80211_can_beacon_sec_chan(&rdev->wiphy, + channel, + channel_type)) { + return -EINVAL; + } + wdev->preset_chan = channel; + wdev->preset_chantype = channel_type; + return 0; +} + void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *macaddr, const u8* ie, u8 ie_len, gfp_t gfp) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 089a5204dad5..b22f1f876881 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -921,7 +921,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) goto nla_put_failure; } - if (dev->ops->set_channel || dev->ops->start_ap) { + if (dev->ops->set_channel || dev->ops->start_ap || + dev->ops->join_mesh) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) goto nla_put_failure; @@ -1166,17 +1167,19 @@ static int parse_txq_params(struct nlattr *tb[], static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) { /* - * You can only set the channel explicitly for AP and - * mesh type interfaces; all others have their channel - * managed via their respective "establish a connection" - * command (connect, join, ...) + * You can only set the channel explicitly for WDS interfaces, + * all others have their channel managed via their respective + * "establish a connection" command (connect, join, ...) + * + * For AP/GO and mesh mode, the channel can be set with the + * channel userspace API, but is only stored and passed to the + * low-level driver when the AP starts or the mesh is joined. + * This is for backward compatibility, userspace can also give + * the channel in the start-ap or join-mesh commands instead. * * Monitors are special as they are normally slaved to * whatever else is going on, so they behave as though * you tried setting the wiphy channel itself. - * - * For AP/GO modes, it's only for compatibility, you can - * also give the channel to the start-AP command. */ return !wdev || wdev->iftype == NL80211_IFTYPE_AP || @@ -1246,6 +1249,9 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, wdev->preset_chantype = channel_type; result = 0; break; + case NL80211_IFTYPE_MESH_POINT: + result = cfg80211_set_mesh_freq(rdev, wdev, freq, channel_type); + break; default: wdev_lock(wdev); result = cfg80211_set_freq(rdev, wdev, freq, channel_type); @@ -1335,8 +1341,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = 0; mutex_lock(&rdev->mtx); - } else if (netif_running(netdev) && - nl80211_can_set_dev_channel(netdev->ieee80211_ptr)) + } else if (nl80211_can_set_dev_channel(netdev->ieee80211_ptr)) wdev = netdev->ieee80211_ptr; else wdev = NULL; @@ -6080,6 +6085,24 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) return err; } + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { + enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; + + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE] && + !nl80211_valid_channel_type(info, &channel_type)) + return -EINVAL; + + setup.channel = rdev_freq_to_chan(rdev, + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]), + channel_type); + if (!setup.channel) + return -EINVAL; + setup.channel_type = channel_type; + } else { + /* cfg80211_join_mesh() will sort it out */ + setup.channel = NULL; + } + return cfg80211_join_mesh(rdev, dev, &setup, &cfg); } diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index b082fcc26f06..faeb03548aa4 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -796,7 +796,6 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); case NL80211_IFTYPE_MONITOR: - case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); if (freq < 0) return freq; @@ -808,6 +807,17 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, wdev_unlock(wdev); mutex_unlock(&rdev->devlist_mtx); return err; + case NL80211_IFTYPE_MESH_POINT: + freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + if (freq < 0) + return freq; + if (freq == 0) + return -EINVAL; + mutex_lock(&rdev->devlist_mtx); + err = cfg80211_set_mesh_freq(rdev, wdev, freq, + NL80211_CHAN_NO_HT); + mutex_unlock(&rdev->devlist_mtx); + return err; default: return -EOPNOTSUPP; } -- cgit v1.2.3 From e8c9bd5b8d807cfe6c923265969a523b1ba1e6c2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Jun 2012 08:18:22 +0200 Subject: cfg80211: clarify set_channel APIs Now that we've removed all uses of the set_channel API except for the monitor channel and in libertas, clarify this. Split the libertas mesh use into a new libertas_set_mesh_channel() operation, just to keep backward compatibility, and rename the normal set_channel() to set_monitor_channel(). Also describe the desired set_monitor_channel() semantics more clearly. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/libertas/cfg.c | 39 +++++++++++++++++++++++--------- drivers/net/wireless/libertas/dev.h | 1 + drivers/net/wireless/libertas/mesh.c | 7 ++---- drivers/net/wireless/orinoco/cfg.c | 9 ++++---- include/net/cfg80211.h | 24 ++++++++++++-------- net/mac80211/cfg.c | 9 +++++++- net/wireless/chan.c | 43 +++++------------------------------- net/wireless/core.h | 5 ++--- net/wireless/mesh.c | 26 +++++++++++----------- net/wireless/mlme.c | 2 -- net/wireless/nl80211.c | 21 ++++++++++-------- net/wireless/wext-compat.c | 10 ++------- net/wireless/wext-sme.c | 10 +++++++-- 13 files changed, 100 insertions(+), 106 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c index 2fa879b015b6..f4a203049fb4 100644 --- a/drivers/net/wireless/libertas/cfg.c +++ b/drivers/net/wireless/libertas/cfg.c @@ -435,24 +435,40 @@ static int lbs_add_wpa_tlv(u8 *tlv, const u8 *ie, u8 ie_len) * Set Channel */ -static int lbs_cfg_set_channel(struct wiphy *wiphy, - struct net_device *netdev, - struct ieee80211_channel *channel, - enum nl80211_channel_type channel_type) +static int lbs_cfg_set_monitor_channel(struct wiphy *wiphy, + struct ieee80211_channel *channel, + enum nl80211_channel_type channel_type) { struct lbs_private *priv = wiphy_priv(wiphy); int ret = -ENOTSUPP; - lbs_deb_enter_args(LBS_DEB_CFG80211, "iface %s freq %d, type %d", - netdev_name(netdev), channel->center_freq, channel_type); + lbs_deb_enter_args(LBS_DEB_CFG80211, "freq %d, type %d", + channel->center_freq, channel_type); if (channel_type != NL80211_CHAN_NO_HT) goto out; - if (netdev == priv->mesh_dev) - ret = lbs_mesh_set_channel(priv, channel->hw_value); - else - ret = lbs_set_channel(priv, channel->hw_value); + ret = lbs_set_channel(priv, channel->hw_value); + + out: + lbs_deb_leave_args(LBS_DEB_CFG80211, "ret %d", ret); + return ret; +} + +static int lbs_cfg_set_mesh_channel(struct wiphy *wiphy, + struct net_device *netdev, + struct ieee80211_channel *channel) +{ + struct lbs_private *priv = wiphy_priv(wiphy); + int ret = -ENOTSUPP; + + lbs_deb_enter_args(LBS_DEB_CFG80211, "iface %s freq %d", + netdev_name(netdev), channel->center_freq); + + if (netdev != priv->mesh_dev) + goto out; + + ret = lbs_mesh_set_channel(priv, channel->hw_value); out: lbs_deb_leave_args(LBS_DEB_CFG80211, "ret %d", ret); @@ -2029,7 +2045,8 @@ static int lbs_leave_ibss(struct wiphy *wiphy, struct net_device *dev) */ static struct cfg80211_ops lbs_cfg80211_ops = { - .set_channel = lbs_cfg_set_channel, + .set_monitor_channel = lbs_cfg_set_monitor_channel, + .libertas_set_mesh_channel = lbs_cfg_set_mesh_channel, .scan = lbs_cfg_scan, .connect = lbs_cfg_connect, .disconnect = lbs_cfg_disconnect, diff --git a/drivers/net/wireless/libertas/dev.h b/drivers/net/wireless/libertas/dev.h index 672005430aca..60996ce89f77 100644 --- a/drivers/net/wireless/libertas/dev.h +++ b/drivers/net/wireless/libertas/dev.h @@ -58,6 +58,7 @@ struct lbs_private { uint16_t mesh_tlv; u8 mesh_ssid[IEEE80211_MAX_SSID_LEN + 1]; u8 mesh_ssid_len; + u8 mesh_channel; #endif /* Debugfs */ diff --git a/drivers/net/wireless/libertas/mesh.c b/drivers/net/wireless/libertas/mesh.c index e87c031b298f..97807751ebcf 100644 --- a/drivers/net/wireless/libertas/mesh.c +++ b/drivers/net/wireless/libertas/mesh.c @@ -131,16 +131,13 @@ static int lbs_mesh_config(struct lbs_private *priv, uint16_t action, int lbs_mesh_set_channel(struct lbs_private *priv, u8 channel) { + priv->mesh_channel = channel; return lbs_mesh_config(priv, CMD_ACT_MESH_CONFIG_START, channel); } static uint16_t lbs_mesh_get_channel(struct lbs_private *priv) { - struct wireless_dev *mesh_wdev = priv->mesh_dev->ieee80211_ptr; - if (mesh_wdev->channel) - return mesh_wdev->channel->hw_value; - else - return 1; + return priv->mesh_channel ?: 1; } /*************************************************************************** diff --git a/drivers/net/wireless/orinoco/cfg.c b/drivers/net/wireless/orinoco/cfg.c index f7b15b8934fa..e15675585fb1 100644 --- a/drivers/net/wireless/orinoco/cfg.c +++ b/drivers/net/wireless/orinoco/cfg.c @@ -160,10 +160,9 @@ static int orinoco_scan(struct wiphy *wiphy, struct net_device *dev, return err; } -static int orinoco_set_channel(struct wiphy *wiphy, - struct net_device *netdev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type) +static int orinoco_set_monitor_channel(struct wiphy *wiphy, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type) { struct orinoco_private *priv = wiphy_priv(wiphy); int err = 0; @@ -286,7 +285,7 @@ static int orinoco_set_wiphy_params(struct wiphy *wiphy, u32 changed) const struct cfg80211_ops orinoco_cfg_ops = { .change_virtual_intf = orinoco_change_vif, - .set_channel = orinoco_set_channel, + .set_monitor_channel = orinoco_set_monitor_channel, .scan = orinoco_scan, .set_wiphy_params = orinoco_set_wiphy_params, }; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4c90c44b8b75..7319f25250b6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1420,11 +1420,14 @@ struct cfg80211_gtk_rekey_data { * * @set_txq_params: Set TX queue parameters * - * @set_channel: Set channel for a given wireless interface. Some devices - * may support multi-channel operation (by channel hopping) so cfg80211 - * doesn't verify much. Note, however, that the passed netdev may be - * %NULL as well if the user requested changing the channel for the - * device itself, or for a monitor interface. + * @libertas_set_mesh_channel: Only for backward compatibility for libertas, + * as it doesn't implement join_mesh and needs to set the channel to + * join the mesh instead. + * + * @set_monitor_channel: Set the monitor mode channel for the device. If other + * interfaces are active this callback should reject the configuration. + * If no interfaces are active or the device is down, the channel should + * be stored for when a monitor interface becomes active. * @get_channel: Get the current operating channel, should return %NULL if * there's no single defined operating channel if for example the * device implements channel hopping for multi-channel virtual interfaces. @@ -1614,9 +1617,13 @@ struct cfg80211_ops { int (*set_txq_params)(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_txq_params *params); - int (*set_channel)(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type); + int (*libertas_set_mesh_channel)(struct wiphy *wiphy, + struct net_device *dev, + struct ieee80211_channel *chan); + + int (*set_monitor_channel)(struct wiphy *wiphy, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type); int (*scan)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_scan_request *request); @@ -2325,7 +2332,6 @@ struct wireless_dev { spinlock_t event_lock; struct cfg80211_internal_bss *current_bss; /* associated / joined */ - struct ieee80211_channel *channel; struct ieee80211_channel *preset_chan; enum nl80211_channel_type preset_chantype; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 50aea1ac7e03..d99359a6f76d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -709,6 +709,13 @@ static int ieee80211_set_channel(struct wiphy *wiphy, return 0; } +static int ieee80211_set_monitor_channel(struct wiphy *wiphy, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type) +{ + return ieee80211_set_channel(wiphy, NULL, chan, channel_type); +} + static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, const u8 *resp, size_t resp_len) { @@ -2932,7 +2939,7 @@ struct cfg80211_ops mac80211_config_ops = { #endif .change_bss = ieee80211_change_bss, .set_txq_params = ieee80211_set_txq_params, - .set_channel = ieee80211_set_channel, + .set_monitor_channel = ieee80211_set_monitor_channel, .suspend = ieee80211_suspend, .resume = ieee80211_resume, .scan = ieee80211_scan, diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 20b87d895722..c1999e45a07c 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -78,50 +78,17 @@ bool cfg80211_can_beacon_sec_chan(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_can_beacon_sec_chan); -int cfg80211_set_freq(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, int freq, - enum nl80211_channel_type channel_type) +int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, + int freq, enum nl80211_channel_type chantype) { struct ieee80211_channel *chan; - int result; - if (wdev && wdev->iftype == NL80211_IFTYPE_MONITOR) - wdev = NULL; - - if (wdev) { - ASSERT_WDEV_LOCK(wdev); - - if (!netif_running(wdev->netdev)) - return -ENETDOWN; - } - - if (!rdev->ops->set_channel) + if (!rdev->ops->set_monitor_channel) return -EOPNOTSUPP; - chan = rdev_freq_to_chan(rdev, freq, channel_type); + chan = rdev_freq_to_chan(rdev, freq, chantype); if (!chan) return -EINVAL; - /* Both channels should be able to initiate communication */ - if (wdev && (wdev->iftype == NL80211_IFTYPE_ADHOC || - wdev->iftype == NL80211_IFTYPE_AP || - wdev->iftype == NL80211_IFTYPE_AP_VLAN || - wdev->iftype == NL80211_IFTYPE_MESH_POINT || - wdev->iftype == NL80211_IFTYPE_P2P_GO) && - !cfg80211_can_beacon_sec_chan(&rdev->wiphy, chan, channel_type)) { - printk(KERN_DEBUG - "cfg80211: Secondary channel not allowed to beacon\n"); - return -EINVAL; - } - - result = rdev->ops->set_channel(&rdev->wiphy, - wdev ? wdev->netdev : NULL, - chan, channel_type); - if (result) - return result; - - if (wdev) - wdev->channel = chan; - - return 0; + return rdev->ops->set_monitor_channel(&rdev->wiphy, chan, chantype); } diff --git a/net/wireless/core.h b/net/wireless/core.h index 1d3d24126946..9348a47562a4 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -444,9 +444,8 @@ cfg80211_can_add_interface(struct cfg80211_registered_device *rdev, struct ieee80211_channel * rdev_freq_to_chan(struct cfg80211_registered_device *rdev, int freq, enum nl80211_channel_type channel_type); -int cfg80211_set_freq(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, int freq, - enum nl80211_channel_type channel_type); +int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, + int freq, enum nl80211_channel_type chantype); int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, const u8 *rates, unsigned int n_rates, diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 2e3b700eba32..b44c736bf9cf 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -179,6 +179,13 @@ int cfg80211_set_mesh_freq(struct cfg80211_registered_device *rdev, { struct ieee80211_channel *channel; + channel = rdev_freq_to_chan(rdev, freq, channel_type); + if (!channel || !cfg80211_can_beacon_sec_chan(&rdev->wiphy, + channel, + channel_type)) { + return -EINVAL; + } + /* * Workaround for libertas (only!), it puts the interface * into mesh mode but doesn't implement join_mesh. Instead, @@ -186,27 +193,20 @@ int cfg80211_set_mesh_freq(struct cfg80211_registered_device *rdev, * you set the channel. Note that the libertas mesh isn't * compatible with 802.11 mesh. */ - if (!rdev->ops->join_mesh) { - int err; + if (rdev->ops->libertas_set_mesh_channel) { + if (channel_type != NL80211_CHAN_NO_HT) + return -EINVAL; if (!netif_running(wdev->netdev)) return -ENETDOWN; - wdev_lock(wdev); - err = cfg80211_set_freq(rdev, wdev, freq, channel_type); - wdev_unlock(wdev); - - return err; + return rdev->ops->libertas_set_mesh_channel(&rdev->wiphy, + wdev->netdev, + channel); } if (wdev->mesh_id_len) return -EBUSY; - channel = rdev_freq_to_chan(rdev, freq, channel_type); - if (!channel || !cfg80211_can_beacon_sec_chan(&rdev->wiphy, - channel, - channel_type)) { - return -EINVAL; - } wdev->preset_chan = channel; wdev->preset_chantype = channel_type; return 0; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index eb90988bbd36..da4406f11929 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -947,8 +947,6 @@ void cfg80211_ch_switch_notify(struct net_device *dev, int freq, if (WARN_ON(!chan)) goto out; - wdev->channel = chan; - nl80211_ch_switch_notify(rdev, dev, freq, type, GFP_KERNEL); out: wdev_unlock(wdev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b22f1f876881..5e29bd38e7df 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -921,7 +921,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) goto nla_put_failure; } - if (dev->ops->set_channel || dev->ops->start_ap || + if (dev->ops->set_monitor_channel || dev->ops->start_ap || dev->ops->join_mesh) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) @@ -1178,8 +1178,8 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) * the channel in the start-ap or join-mesh commands instead. * * Monitors are special as they are normally slaved to - * whatever else is going on, so they behave as though - * you tried setting the wiphy channel itself. + * whatever else is going on, so they have their own special + * operation to set the monitor channel if possible. */ return !wdev || wdev->iftype == NL80211_IFTYPE_AP || @@ -1217,6 +1217,10 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; u32 freq; int result; + enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR; + + if (wdev) + iftype = wdev->iftype; if (!info->attrs[NL80211_ATTR_WIPHY_FREQ]) return -EINVAL; @@ -1231,7 +1235,7 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); mutex_lock(&rdev->devlist_mtx); - if (wdev) switch (wdev->iftype) { + switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: if (wdev->beacon_interval) { @@ -1252,12 +1256,11 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_MESH_POINT: result = cfg80211_set_mesh_freq(rdev, wdev, freq, channel_type); break; + case NL80211_IFTYPE_MONITOR: + result = cfg80211_set_monitor_channel(rdev, freq, channel_type); + break; default: - wdev_lock(wdev); - result = cfg80211_set_freq(rdev, wdev, freq, channel_type); - wdev_unlock(wdev); - } else { - result = cfg80211_set_freq(rdev, NULL, freq, channel_type); + result = -EINVAL; } mutex_unlock(&rdev->devlist_mtx); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index faeb03548aa4..bc879833b21f 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -802,9 +802,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, if (freq == 0) return -EINVAL; mutex_lock(&rdev->devlist_mtx); - wdev_lock(wdev); - err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT); - wdev_unlock(wdev); + err = cfg80211_set_monitor_channel(rdev, freq, NL80211_CHAN_NO_HT); mutex_unlock(&rdev->devlist_mtx); return err; case NL80211_IFTYPE_MESH_POINT: @@ -848,11 +846,7 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, freq->e = 6; return 0; default: - if (!wdev->channel) - return -EINVAL; - freq->m = wdev->channel->center_freq; - freq->e = 6; - return 0; + return -EINVAL; } } diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 7decbd357d51..1f773f668d1a 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -111,9 +111,15 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, wdev->wext.connect.channel = chan; - /* SSID is not set, we just want to switch channel */ + /* + * SSID is not set, we just want to switch monitor channel, + * this is really just backward compatibility, if the SSID + * is set then we use the channel to select the BSS to use + * to connect to instead. If we were connected on another + * channel we disconnected above and reconnect below. + */ if (chan && !wdev->wext.connect.ssid_len) { - err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT); + err = cfg80211_set_monitor_channel(rdev, freq, NL80211_CHAN_NO_HT); goto out; } -- cgit v1.2.3 From 196ac1c13d4db6c276dbb1c9190c8d7d45a83f1f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Jun 2012 14:28:40 +0200 Subject: mac80211: do remain-on-channel while idle The IDLE handling in HW off-channel is broken right now since we turn off IDLE only when the off-channel period already started. Therefore, all drivers that use it today (only iwlwifi!) must support off-channel while idle, so playing with idle isn't needed at all. Off-channel in general, since it's no longer used for authentication/association, shouldn't affect PS, so also remove that logic. Also document a small caveat for reporting TX status from off-channel frames in HW remain-on-channel. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 8 +++++++- net/mac80211/cfg.c | 7 +++---- net/mac80211/iface.c | 17 +++++++---------- net/mac80211/mlme.c | 6 ------ net/mac80211/offchannel.c | 4 ---- 5 files changed, 17 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d2ed86da8e4c..6e700bf8d4a5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2183,7 +2183,13 @@ enum ieee80211_rate_control_changed { * offload. Frames to transmit on the off-channel channel are transmitted * normally except for the %IEEE80211_TX_CTL_TX_OFFCHAN flag. When the * duration (which will always be non-zero) expires, the driver must call - * ieee80211_remain_on_channel_expired(). This callback may sleep. + * ieee80211_remain_on_channel_expired(). + * The driver must not call ieee80211_remain_on_channel_expired() before + * the TX status for a frame that was sent off-channel, otherwise the TX + * status is reported to userspace in an invalid way. + * Note that this callback may be called while the device is in IDLE and + * must be accepted in this case. + * This callback may sleep. * @cancel_remain_on_channel: Requests that an ongoing off-channel period is * aborted before it expires. This callback may sleep. * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d99359a6f76d..a16907919709 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2187,8 +2187,6 @@ static int ieee80211_cancel_remain_on_channel_hw(struct ieee80211_local *local, local->hw_roc_cookie = 0; local->hw_roc_channel = NULL; - ieee80211_recalc_idle(local); - return 0; } @@ -2248,7 +2246,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_work *wk; const struct ieee80211_mgmt *mgmt = (void *)buf; u32 flags; - bool is_offchan = false; + bool is_offchan = false, in_hw_roc = false; if (dont_wait_for_ack) flags = IEEE80211_TX_CTL_NO_ACK; @@ -2268,6 +2266,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, if (chan == local->hw_roc_channel) { /* TODO: check channel type? */ is_offchan = false; + in_hw_roc = true; flags |= IEEE80211_TX_CTL_TX_OFFCHAN; } @@ -2370,7 +2369,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, * wait is involved, we might otherwise not be on * the right channel for long enough! */ - if (!is_offchan && !wait && !sdata->vif.bss_conf.idle) { + if (!is_offchan && !wait && (in_hw_roc || !sdata->vif.bss_conf.idle)) { ieee80211_tx_skb(sdata, skb); return 0; } diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ede5f4959904..968d71c50713 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1456,7 +1456,7 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; int count = 0; - bool working = false, scanning = false, hw_roc = false; + bool working = false, scanning = false; struct ieee80211_work *wk; unsigned int led_trig_start = 0, led_trig_stop = 0; @@ -1493,9 +1493,11 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) count++; } - list_for_each_entry(wk, &local->work_list, list) { - working = true; - wk->sdata->vif.bss_conf.idle = false; + if (!local->ops->remain_on_channel) { + list_for_each_entry(wk, &local->work_list, list) { + working = true; + wk->sdata->vif.bss_conf.idle = false; + } } if (local->scan_sdata && @@ -1504,9 +1506,6 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) local->scan_sdata->vif.bss_conf.idle = false; } - if (local->hw_roc_channel) - hw_roc = true; - list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -1518,7 +1517,7 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); } - if (working || scanning || hw_roc) + if (working || scanning) led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_WORK; else led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_WORK; @@ -1530,8 +1529,6 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop); - if (hw_roc) - return ieee80211_idle_off(local, "hw remain-on-channel"); if (working) return ieee80211_idle_off(local, "working"); if (scanning) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e7c4ec4ce166..0f45d02e0ba7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -930,11 +930,6 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) return; } - if (!list_empty(&local->work_list)) { - local->ps_sdata = NULL; - goto change; - } - list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; @@ -1007,7 +1002,6 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) local->ps_sdata = NULL; } - change: ieee80211_change_ps(local); } diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 935aa4b6deee..8f482b15bc51 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -207,8 +207,6 @@ static void ieee80211_hw_roc_start(struct work_struct *work) GFP_KERNEL); } - ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); } @@ -260,8 +258,6 @@ static void ieee80211_hw_roc_done(struct work_struct *work) local->hw_roc_channel = NULL; local->hw_roc_cookie = 0; - ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); } -- cgit v1.2.3 From 2eb278e083549f4eb29838037004054b3b55df62 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Jun 2012 14:28:42 +0200 Subject: mac80211: unify SW/offload remain-on-channel Redesign all the off-channel code, getting rid of the generic off-channel work concept, replacing it with a simple remain-on-channel list. This fixes a number of small issues with the ROC implementation: * offloaded remain-on-channel couldn't be queued, now we can queue it as well, if needed * in iwlwifi (the only user) offloaded ROC is mutually exclusive with scanning, use the new queue to handle that case -- I expect that it will later depend on a HW flag The bigger issue though is that there's a bad bug in the current implementation: if we get a mgmt TX request while HW roc is active, and this new request has a wait time, we actually schedule a software ROC instead since we can't guarantee the existing offloaded ROC will still be that long. To fix this, the queuing mechanism was needed. The queuing mechanism for offloaded ROC isn't yet optimal, ideally we should add API to have the HW extend the ROC if needed. We could add that later but for now use a software implementation. Overall, this unifies the behaviour between the offloaded and software-implemented case as much as possible. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 3 - net/mac80211/Makefile | 1 - net/mac80211/cfg.c | 478 ++++++++++++++++++++++++--------------------- net/mac80211/ieee80211_i.h | 89 +++------ net/mac80211/iface.c | 23 +-- net/mac80211/main.c | 10 +- net/mac80211/offchannel.c | 280 ++++++++++++++++++++++---- net/mac80211/scan.c | 4 +- net/mac80211/status.c | 28 +-- net/mac80211/work.c | 370 ----------------------------------- 10 files changed, 532 insertions(+), 754 deletions(-) delete mode 100644 net/mac80211/work.c (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6e700bf8d4a5..d152f54064fd 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2184,9 +2184,6 @@ enum ieee80211_rate_control_changed { * normally except for the %IEEE80211_TX_CTL_TX_OFFCHAN flag. When the * duration (which will always be non-zero) expires, the driver must call * ieee80211_remain_on_channel_expired(). - * The driver must not call ieee80211_remain_on_channel_expired() before - * the TX status for a frame that was sent off-channel, otherwise the TX - * status is reported to userspace in an invalid way. * Note that this callback may be called while the device is in IDLE and * must be accepted in this case. * This callback may sleep. diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 3e9d931bba35..2b1470bac178 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -9,7 +9,6 @@ mac80211-y := \ scan.o offchannel.o \ ht.o agg-tx.o agg-rx.o \ ibss.o \ - work.o \ iface.o \ rate.o \ michael.o \ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a16907919709..498c94e34427 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2112,35 +2112,171 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, return 0; } -static int ieee80211_remain_on_channel_hw(struct ieee80211_local *local, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type chantype, - unsigned int duration, u64 *cookie) -{ +static int ieee80211_start_roc_work(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel *channel, + enum nl80211_channel_type channel_type, + unsigned int duration, u64 *cookie, + struct sk_buff *txskb) +{ + struct ieee80211_roc_work *roc, *tmp; + bool queued = false; int ret; - u32 random_cookie; lockdep_assert_held(&local->mtx); - if (local->hw_roc_cookie) - return -EBUSY; - /* must be nonzero */ - random_cookie = random32() | 1; - - *cookie = random_cookie; - local->hw_roc_dev = dev; - local->hw_roc_cookie = random_cookie; - local->hw_roc_channel = chan; - local->hw_roc_channel_type = chantype; - local->hw_roc_duration = duration; - ret = drv_remain_on_channel(local, chan, chantype, duration); + roc = kzalloc(sizeof(*roc), GFP_KERNEL); + if (!roc) + return -ENOMEM; + + roc->chan = channel; + roc->chan_type = channel_type; + roc->duration = duration; + roc->req_duration = duration; + roc->frame = txskb; + roc->mgmt_tx_cookie = (unsigned long)txskb; + roc->sdata = sdata; + INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); + INIT_LIST_HEAD(&roc->dependents); + + /* if there's one pending or we're scanning, queue this one */ + if (!list_empty(&local->roc_list) || local->scanning) + goto out_check_combine; + + /* if not HW assist, just queue & schedule work */ + if (!local->ops->remain_on_channel) { + ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); + goto out_queue; + } + + /* otherwise actually kick it off here (for error handling) */ + + /* + * If the duration is zero, then the driver + * wouldn't actually do anything. Set it to + * 10 for now. + * + * TODO: cancel the off-channel operation + * when we get the SKB's TX status and + * the wait time was zero before. + */ + if (!duration) + duration = 10; + + ret = drv_remain_on_channel(local, channel, channel_type, duration); if (ret) { - local->hw_roc_channel = NULL; - local->hw_roc_cookie = 0; + kfree(roc); + return ret; } - return ret; + roc->started = true; + goto out_queue; + + out_check_combine: + list_for_each_entry(tmp, &local->roc_list, list) { + if (tmp->chan != channel || tmp->chan_type != channel_type) + continue; + + /* + * Extend this ROC if possible: + * + * If it hasn't started yet, just increase the duration + * and add the new one to the list of dependents. + */ + if (!tmp->started) { + list_add_tail(&roc->list, &tmp->dependents); + tmp->duration = max(tmp->duration, roc->duration); + queued = true; + break; + } + + /* If it has already started, it's more difficult ... */ + if (local->ops->remain_on_channel) { + unsigned long j = jiffies; + + /* + * In the offloaded ROC case, if it hasn't begun, add + * this new one to the dependent list to be handled + * when the the master one begins. If it has begun, + * check that there's still a minimum time left and + * if so, start this one, transmitting the frame, but + * add it to the list directly after this one with a + * a reduced time so we'll ask the driver to execute + * it right after finishing the previous one, in the + * hope that it'll also be executed right afterwards, + * effectively extending the old one. + * If there's no minimum time left, just add it to the + * normal list. + */ + if (!tmp->hw_begun) { + list_add_tail(&roc->list, &tmp->dependents); + queued = true; + break; + } + + if (time_before(j + IEEE80211_ROC_MIN_LEFT, + tmp->hw_start_time + + msecs_to_jiffies(tmp->duration))) { + int new_dur; + + ieee80211_handle_roc_started(roc); + + new_dur = roc->duration - + jiffies_to_msecs(tmp->hw_start_time + + msecs_to_jiffies( + tmp->duration) - + j); + + if (new_dur > 0) { + /* add right after tmp */ + list_add(&roc->list, &tmp->list); + } else { + list_add_tail(&roc->list, + &tmp->dependents); + } + queued = true; + } + } else if (del_timer_sync(&tmp->work.timer)) { + unsigned long new_end; + + /* + * In the software ROC case, cancel the timer, if + * that fails then the finish work is already + * queued/pending and thus we queue the new ROC + * normally, if that succeeds then we can extend + * the timer duration and TX the frame (if any.) + */ + + list_add_tail(&roc->list, &tmp->dependents); + queued = true; + + new_end = jiffies + msecs_to_jiffies(roc->duration); + + /* ok, it was started & we canceled timer */ + if (time_after(new_end, tmp->work.timer.expires)) + mod_timer(&tmp->work.timer, new_end); + else + add_timer(&tmp->work.timer); + + ieee80211_handle_roc_started(roc); + } + break; + } + + out_queue: + if (!queued) + list_add_tail(&roc->list, &local->roc_list); + + /* + * cookie is either the roc (for normal roc) + * or the SKB (for mgmt TX) + */ + if (txskb) + *cookie = (unsigned long)txskb; + else + *cookie = (unsigned long)roc; + + return 0; } static int ieee80211_remain_on_channel(struct wiphy *wiphy, @@ -2152,84 +2288,76 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; + int ret; - if (local->ops->remain_on_channel) { - int ret; - - mutex_lock(&local->mtx); - ret = ieee80211_remain_on_channel_hw(local, dev, - chan, channel_type, - duration, cookie); - local->hw_roc_for_tx = false; - mutex_unlock(&local->mtx); - - return ret; - } + mutex_lock(&local->mtx); + ret = ieee80211_start_roc_work(local, sdata, chan, channel_type, + duration, cookie, NULL); + mutex_unlock(&local->mtx); - return ieee80211_wk_remain_on_channel(sdata, chan, channel_type, - duration, cookie); + return ret; } -static int ieee80211_cancel_remain_on_channel_hw(struct ieee80211_local *local, - u64 cookie) +static int ieee80211_cancel_roc(struct ieee80211_local *local, + u64 cookie, bool mgmt_tx) { + struct ieee80211_roc_work *roc, *tmp, *found = NULL; int ret; - lockdep_assert_held(&local->mtx); + mutex_lock(&local->mtx); + list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { + if (!mgmt_tx && (unsigned long)roc != cookie) + continue; + else if (mgmt_tx && roc->mgmt_tx_cookie != cookie) + continue; - if (local->hw_roc_cookie != cookie) - return -ENOENT; + found = roc; + break; + } - ret = drv_cancel_remain_on_channel(local); - if (ret) - return ret; + if (!found) { + mutex_unlock(&local->mtx); + return -ENOENT; + } - local->hw_roc_cookie = 0; - local->hw_roc_channel = NULL; + if (local->ops->remain_on_channel) { + if (found->started) { + ret = drv_cancel_remain_on_channel(local); + if (WARN_ON_ONCE(ret)) { + mutex_unlock(&local->mtx); + return ret; + } + } - return 0; -} + list_del(&found->list); -static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, - struct net_device *dev, - u64 cookie) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; + ieee80211_run_deferred_scan(local); + ieee80211_start_next_roc(local); + mutex_unlock(&local->mtx); - if (local->ops->cancel_remain_on_channel) { - int ret; + ieee80211_roc_notify_destroy(found); + } else { + /* work may be pending so use it all the time */ + found->abort = true; + ieee80211_queue_delayed_work(&local->hw, &found->work, 0); - mutex_lock(&local->mtx); - ret = ieee80211_cancel_remain_on_channel_hw(local, cookie); mutex_unlock(&local->mtx); - return ret; + /* work will clean up etc */ + flush_delayed_work(&found->work); } - return ieee80211_wk_cancel_remain_on_channel(sdata, cookie); + return 0; } -static enum work_done_result -ieee80211_offchan_tx_done(struct ieee80211_work *wk, struct sk_buff *skb) +static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, + struct net_device *dev, + u64 cookie) { - /* - * Use the data embedded in the work struct for reporting - * here so if the driver mangled the SKB before dropping - * it (which is the only way we really should get here) - * then we don't report mangled data. - * - * If there was no wait time, then by the time we get here - * the driver will likely not have reported the status yet, - * so in that case userspace will have to deal with it. - */ - - if (wk->offchan_tx.wait && !wk->offchan_tx.status) - cfg80211_mgmt_tx_status(wk->sdata->dev, - (unsigned long) wk->offchan_tx.frame, - wk->data, wk->data_len, false, GFP_KERNEL); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; - return WORK_DONE_DESTROY; + return ieee80211_cancel_roc(local, cookie, false); } static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, @@ -2243,10 +2371,10 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct sta_info *sta; - struct ieee80211_work *wk; const struct ieee80211_mgmt *mgmt = (void *)buf; + bool need_offchan = false; u32 flags; - bool is_offchan = false, in_hw_roc = false; + int ret; if (dont_wait_for_ack) flags = IEEE80211_TX_CTL_NO_ACK; @@ -2254,34 +2382,28 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | IEEE80211_TX_CTL_REQ_TX_STATUS; - /* Check that we are on the requested channel for transmission */ - if (chan != local->tmp_channel && - chan != local->oper_channel) - is_offchan = true; - if (channel_type_valid && - (channel_type != local->tmp_channel_type && - channel_type != local->_oper_channel_type)) - is_offchan = true; - - if (chan == local->hw_roc_channel) { - /* TODO: check channel type? */ - is_offchan = false; - in_hw_roc = true; - flags |= IEEE80211_TX_CTL_TX_OFFCHAN; - } - if (no_cck) flags |= IEEE80211_TX_CTL_NO_CCK_RATE; - if (is_offchan && !offchan) - return -EBUSY; - switch (sdata->vif.type) { case NL80211_IFTYPE_ADHOC: + if (!sdata->vif.bss_conf.ibss_joined) + need_offchan = true; + /* fall through */ +#ifdef CONFIG_MAC80211_MESH + case NL80211_IFTYPE_MESH_POINT: + if (ieee80211_vif_is_mesh(&sdata->vif) && + !sdata->u.mesh.mesh_id_len) + need_offchan = true; + /* fall through */ +#endif case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_MESH_POINT: + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && + !ieee80211_vif_is_mesh(&sdata->vif) && + !rcu_access_pointer(sdata->bss->beacon)) + need_offchan = true; if (!ieee80211_is_action(mgmt->frame_control) || mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) break; @@ -2293,105 +2415,60 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: + if (!sdata->u.mgd.associated) + need_offchan = true; break; default: return -EOPNOTSUPP; } + mutex_lock(&local->mtx); + + /* Check if the operating channel is the requested channel */ + if (!need_offchan) { + need_offchan = chan != local->oper_channel; + if (channel_type_valid && + channel_type != local->_oper_channel_type) + need_offchan = true; + } + + if (need_offchan && !offchan) { + ret = -EBUSY; + goto out_unlock; + } + skb = dev_alloc_skb(local->hw.extra_tx_headroom + len); - if (!skb) - return -ENOMEM; + if (!skb) { + ret = -ENOMEM; + goto out_unlock; + } skb_reserve(skb, local->hw.extra_tx_headroom); memcpy(skb_put(skb, len), buf, len); IEEE80211_SKB_CB(skb)->flags = flags; - if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL && - flags & IEEE80211_TX_CTL_TX_OFFCHAN) - IEEE80211_SKB_CB(skb)->hw_queue = - local->hw.offchannel_tx_hw_queue; - skb->dev = sdata->dev; - *cookie = (unsigned long) skb; - - if (is_offchan && local->ops->remain_on_channel) { - unsigned int duration; - int ret; - - mutex_lock(&local->mtx); - /* - * If the duration is zero, then the driver - * wouldn't actually do anything. Set it to - * 100 for now. - * - * TODO: cancel the off-channel operation - * when we get the SKB's TX status and - * the wait time was zero before. - */ - duration = 100; - if (wait) - duration = wait; - ret = ieee80211_remain_on_channel_hw(local, dev, chan, - channel_type, - duration, cookie); - if (ret) { - kfree_skb(skb); - mutex_unlock(&local->mtx); - return ret; - } - - local->hw_roc_for_tx = true; - local->hw_roc_duration = wait; - - /* - * queue up frame for transmission after - * ieee80211_ready_on_channel call - */ - - /* modify cookie to prevent API mismatches */ - *cookie ^= 2; - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN; - if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) - IEEE80211_SKB_CB(skb)->hw_queue = - local->hw.offchannel_tx_hw_queue; - local->hw_roc_skb = skb; - local->hw_roc_skb_for_status = skb; - mutex_unlock(&local->mtx); - - return 0; - } - - /* - * Can transmit right away if the channel was the - * right one and there's no wait involved... If a - * wait is involved, we might otherwise not be on - * the right channel for long enough! - */ - if (!is_offchan && !wait && (in_hw_roc || !sdata->vif.bss_conf.idle)) { + if (!need_offchan) { ieee80211_tx_skb(sdata, skb); - return 0; - } - - wk = kzalloc(sizeof(*wk) + len, GFP_KERNEL); - if (!wk) { - kfree_skb(skb); - return -ENOMEM; + ret = 0; + goto out_unlock; } - wk->type = IEEE80211_WORK_OFFCHANNEL_TX; - wk->chan = chan; - wk->chan_type = channel_type; - wk->sdata = sdata; - wk->done = ieee80211_offchan_tx_done; - wk->offchan_tx.frame = skb; - wk->offchan_tx.wait = wait; - wk->data_len = len; - memcpy(wk->data, buf, len); + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN; + if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + IEEE80211_SKB_CB(skb)->hw_queue = + local->hw.offchannel_tx_hw_queue; - ieee80211_add_work(wk); - return 0; + /* This will handle all kinds of coalescing and immediate TX */ + ret = ieee80211_start_roc_work(local, sdata, chan, channel_type, + wait, cookie, skb); + if (ret) + kfree_skb(skb); + out_unlock: + mutex_unlock(&local->mtx); + return ret; } static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, @@ -2400,45 +2477,8 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - struct ieee80211_work *wk; - int ret = -ENOENT; - mutex_lock(&local->mtx); - - if (local->ops->cancel_remain_on_channel) { - cookie ^= 2; - ret = ieee80211_cancel_remain_on_channel_hw(local, cookie); - - if (ret == 0) { - kfree_skb(local->hw_roc_skb); - local->hw_roc_skb = NULL; - local->hw_roc_skb_for_status = NULL; - } - - mutex_unlock(&local->mtx); - - return ret; - } - - list_for_each_entry(wk, &local->work_list, list) { - if (wk->sdata != sdata) - continue; - - if (wk->type != IEEE80211_WORK_OFFCHANNEL_TX) - continue; - - if (cookie != (unsigned long) wk->offchan_tx.frame) - continue; - - wk->timeout = jiffies; - - ieee80211_queue_work(&local->hw, &local->work_work); - ret = 0; - break; - } - mutex_unlock(&local->mtx); - - return ret; + return ieee80211_cancel_roc(local, cookie, true); } static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8c026abcb8d9..e6cbf5b68c89 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -317,55 +317,30 @@ struct mesh_preq_queue { u8 flags; }; -enum ieee80211_work_type { - IEEE80211_WORK_ABORT, - IEEE80211_WORK_REMAIN_ON_CHANNEL, - IEEE80211_WORK_OFFCHANNEL_TX, -}; - -/** - * enum work_done_result - indicates what to do after work was done - * - * @WORK_DONE_DESTROY: This work item is no longer needed, destroy. - * @WORK_DONE_REQUEUE: This work item was reset to be reused, and - * should be requeued. - */ -enum work_done_result { - WORK_DONE_DESTROY, - WORK_DONE_REQUEUE, -}; +#if HZ/100 == 0 +#define IEEE80211_ROC_MIN_LEFT 1 +#else +#define IEEE80211_ROC_MIN_LEFT (HZ/100) +#endif -struct ieee80211_work { +struct ieee80211_roc_work { struct list_head list; + struct list_head dependents; - struct rcu_head rcu_head; + struct delayed_work work; struct ieee80211_sub_if_data *sdata; - enum work_done_result (*done)(struct ieee80211_work *wk, - struct sk_buff *skb); - struct ieee80211_channel *chan; enum nl80211_channel_type chan_type; - unsigned long timeout; - enum ieee80211_work_type type; + bool started, abort, hw_begun, notified; - bool started; + unsigned long hw_start_time; - union { - struct { - u32 duration; - } remain; - struct { - struct sk_buff *frame; - u32 wait; - bool status; - } offchan_tx; - }; - - size_t data_len; - u8 data[]; + u32 duration, req_duration; + struct sk_buff *frame; + u64 mgmt_tx_cookie; }; /* flags used in struct ieee80211_if_managed.flags */ @@ -847,13 +822,6 @@ struct ieee80211_local { const struct ieee80211_ops *ops; - /* - * work stuff, potentially off-channel (in the future) - */ - struct list_head work_list; - struct timer_list work_timer; - struct work_struct work_work; - /* * private workqueue to mac80211. mac80211 makes this accessible * via ieee80211_queue_work() @@ -1088,14 +1056,12 @@ struct ieee80211_local { } debugfs; #endif - struct ieee80211_channel *hw_roc_channel; - struct net_device *hw_roc_dev; - struct sk_buff *hw_roc_skb, *hw_roc_skb_for_status; + /* + * Remain-on-channel support + */ + struct list_head roc_list; struct work_struct hw_roc_start, hw_roc_done; - enum nl80211_channel_type hw_roc_channel_type; - unsigned int hw_roc_duration; - u32 hw_roc_cookie; - bool hw_roc_for_tx; + unsigned long hw_roc_start_time; struct idr ack_status_frames; spinlock_t ack_status_lock; @@ -1291,7 +1257,12 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, bool offchannel_ps_enable); void ieee80211_offchannel_return(struct ieee80211_local *local, bool offchannel_ps_disable); -void ieee80211_hw_roc_setup(struct ieee80211_local *local); +void ieee80211_roc_setup(struct ieee80211_local *local); +void ieee80211_start_next_roc(struct ieee80211_local *local); +void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata); +void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc); +void ieee80211_sw_roc_work(struct work_struct *work); +void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); /* interface handling */ int ieee80211_iface_init(void); @@ -1501,18 +1472,6 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, enum nl80211_channel_type channel_type, u16 prot_mode); -/* internal work items */ -void ieee80211_work_init(struct ieee80211_local *local); -void ieee80211_add_work(struct ieee80211_work *wk); -void free_work(struct ieee80211_work *wk); -void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata); -int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - unsigned int duration, u64 *cookie); -int ieee80211_wk_cancel_remain_on_channel( - struct ieee80211_sub_if_data *sdata, u64 cookie); - /* channel management */ enum ieee80211_chan_mode { CHAN_MODE_UNDEFINED, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 968d71c50713..87aeb4f21ffd 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -528,10 +528,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, */ netif_tx_stop_all_queues(sdata->dev); - /* - * Purge work for this interface. - */ - ieee80211_work_purge(sdata); + ieee80211_roc_purge(sdata); /* * Remove all stations associated with this interface. @@ -637,18 +634,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_configure_filter(local); break; default: - mutex_lock(&local->mtx); - if (local->hw_roc_dev == sdata->dev && - local->hw_roc_channel) { - /* ignore return value since this is racy */ - drv_cancel_remain_on_channel(local); - ieee80211_queue_work(&local->hw, &local->hw_roc_done); - } - mutex_unlock(&local->mtx); - - flush_work(&local->hw_roc_start); - flush_work(&local->hw_roc_done); - flush_work(&sdata->work); /* * When we get here, the interface is marked down. @@ -1457,8 +1442,8 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata; int count = 0; bool working = false, scanning = false; - struct ieee80211_work *wk; unsigned int led_trig_start = 0, led_trig_stop = 0; + struct ieee80211_roc_work *roc; #ifdef CONFIG_PROVE_LOCKING WARN_ON(debug_locks && !lockdep_rtnl_is_held() && @@ -1494,9 +1479,9 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) } if (!local->ops->remain_on_channel) { - list_for_each_entry(wk, &local->work_list, list) { + list_for_each_entry(roc, &local->roc_list, list) { working = true; - wk->sdata->vif.bss_conf.idle = false; + roc->sdata->vif.bss_conf.idle = false; } } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 779ac613ee57..d81c178c7712 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -625,8 +625,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work); - ieee80211_work_init(local); - INIT_WORK(&local->restart_work, ieee80211_restart_work); INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter); @@ -669,7 +667,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, ieee80211_led_names(local); - ieee80211_hw_roc_setup(local); + ieee80211_roc_setup(local); return &local->hw; } @@ -1016,12 +1014,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) rtnl_unlock(); - /* - * Now all work items will be gone, but the - * timer might still be armed, so delete it - */ - del_timer_sync(&local->work_timer); - cancel_work_sync(&local->restart_work); cancel_work_sync(&local->reconfig_filter); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 8f482b15bc51..abb226dc4753 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -16,6 +16,7 @@ #include #include "ieee80211_i.h" #include "driver-trace.h" +#include "driver-ops.h" /* * Tell our hardware to disable PS. @@ -181,32 +182,58 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, mutex_unlock(&local->iflist_mtx); } +void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) +{ + if (roc->notified) + return; + + if (roc->mgmt_tx_cookie) { + if (!WARN_ON(!roc->frame)) { + ieee80211_tx_skb(roc->sdata, roc->frame); + roc->frame = NULL; + } + } else { + cfg80211_ready_on_channel(roc->sdata->dev, (unsigned long)roc, + roc->chan, roc->chan_type, + roc->req_duration, GFP_KERNEL); + } + + roc->notified = true; +} + static void ieee80211_hw_roc_start(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, hw_roc_start); - struct ieee80211_sub_if_data *sdata; + struct ieee80211_roc_work *roc, *dep, *tmp; mutex_lock(&local->mtx); - if (!local->hw_roc_channel) { - mutex_unlock(&local->mtx); - return; - } + if (list_empty(&local->roc_list)) + goto out_unlock; - if (local->hw_roc_skb) { - sdata = IEEE80211_DEV_TO_SUB_IF(local->hw_roc_dev); - ieee80211_tx_skb(sdata, local->hw_roc_skb); - local->hw_roc_skb = NULL; - } else { - cfg80211_ready_on_channel(local->hw_roc_dev, - local->hw_roc_cookie, - local->hw_roc_channel, - local->hw_roc_channel_type, - local->hw_roc_duration, - GFP_KERNEL); - } + roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, + list); + + if (!roc->started) + goto out_unlock; + + roc->hw_begun = true; + roc->hw_start_time = local->hw_roc_start_time; + ieee80211_handle_roc_started(roc); + list_for_each_entry_safe(dep, tmp, &roc->dependents, list) { + ieee80211_handle_roc_started(dep); + + if (dep->duration > roc->duration) { + u32 dur = dep->duration; + dep->duration = dur - roc->duration; + roc->duration = dur; + list_del(&dep->list); + list_add(&dep->list, &roc->list); + } + } + out_unlock: mutex_unlock(&local->mtx); } @@ -214,50 +241,179 @@ void ieee80211_ready_on_channel(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); + local->hw_roc_start_time = jiffies; + trace_api_ready_on_channel(local); ieee80211_queue_work(hw, &local->hw_roc_start); } EXPORT_SYMBOL_GPL(ieee80211_ready_on_channel); -static void ieee80211_hw_roc_done(struct work_struct *work) +void ieee80211_start_next_roc(struct ieee80211_local *local) { - struct ieee80211_local *local = - container_of(work, struct ieee80211_local, hw_roc_done); + struct ieee80211_roc_work *roc; - mutex_lock(&local->mtx); + lockdep_assert_held(&local->mtx); - if (!local->hw_roc_channel) { - mutex_unlock(&local->mtx); + if (list_empty(&local->roc_list)) { + ieee80211_run_deferred_scan(local); return; } - /* was never transmitted */ - if (local->hw_roc_skb) { - u64 cookie; + roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, + list); - cookie = local->hw_roc_cookie ^ 2; + if (local->ops->remain_on_channel) { + int ret, duration = roc->duration; - cfg80211_mgmt_tx_status(local->hw_roc_dev, cookie, - local->hw_roc_skb->data, - local->hw_roc_skb->len, false, - GFP_KERNEL); + /* XXX: duplicated, see ieee80211_start_roc_work() */ + if (!duration) + duration = 10; - kfree_skb(local->hw_roc_skb); - local->hw_roc_skb = NULL; - local->hw_roc_skb_for_status = NULL; + ret = drv_remain_on_channel(local, roc->chan, + roc->chan_type, + duration); + + roc->started = true; + + if (ret) { + wiphy_warn(local->hw.wiphy, + "failed to start next HW ROC (%d)\n", ret); + /* + * queue the work struct again to avoid recursion + * when multiple failures occur + */ + ieee80211_remain_on_channel_expired(&local->hw); + } + } else { + /* delay it a bit */ + ieee80211_queue_delayed_work(&local->hw, &roc->work, + round_jiffies_relative(HZ/2)); + } +} + +void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) +{ + struct ieee80211_roc_work *dep, *tmp; + + /* was never transmitted */ + if (roc->frame) { + cfg80211_mgmt_tx_status(roc->sdata->dev, + (unsigned long)roc->frame, + roc->frame->data, roc->frame->len, + false, GFP_KERNEL); + kfree_skb(roc->frame); } - if (!local->hw_roc_for_tx) - cfg80211_remain_on_channel_expired(local->hw_roc_dev, - local->hw_roc_cookie, - local->hw_roc_channel, - local->hw_roc_channel_type, + if (!roc->mgmt_tx_cookie) + cfg80211_remain_on_channel_expired(roc->sdata->dev, + (unsigned long)roc, + roc->chan, roc->chan_type, GFP_KERNEL); - local->hw_roc_channel = NULL; - local->hw_roc_cookie = 0; + list_for_each_entry_safe(dep, tmp, &roc->dependents, list) + ieee80211_roc_notify_destroy(dep); + + kfree(roc); +} + +void ieee80211_sw_roc_work(struct work_struct *work) +{ + struct ieee80211_roc_work *roc = + container_of(work, struct ieee80211_roc_work, work.work); + struct ieee80211_sub_if_data *sdata = roc->sdata; + struct ieee80211_local *local = sdata->local; + + mutex_lock(&local->mtx); + + if (roc->abort) + goto finish; + + if (WARN_ON(list_empty(&local->roc_list))) + goto out_unlock; + + if (WARN_ON(roc != list_first_entry(&local->roc_list, + struct ieee80211_roc_work, + list))) + goto out_unlock; + + if (!roc->started) { + struct ieee80211_roc_work *dep; + + /* start this ROC */ + /* switch channel etc */ + ieee80211_recalc_idle(local); + + local->tmp_channel = roc->chan; + local->tmp_channel_type = roc->chan_type; + ieee80211_hw_config(local, 0); + + /* tell userspace or send frame */ + ieee80211_handle_roc_started(roc); + list_for_each_entry(dep, &roc->dependents, list) + ieee80211_handle_roc_started(dep); + + /* if it was pure TX, just finish right away */ + if (!roc->duration) + goto finish; + + roc->started = true; + ieee80211_queue_delayed_work(&local->hw, &roc->work, + msecs_to_jiffies(roc->duration)); + } else { + /* finish this ROC */ + finish: + list_del(&roc->list); + ieee80211_roc_notify_destroy(roc); + + if (roc->started) { + drv_flush(local, false); + + local->tmp_channel = NULL; + ieee80211_hw_config(local, 0); + + ieee80211_offchannel_return(local, true); + } + + ieee80211_recalc_idle(local); + + ieee80211_start_next_roc(local); + ieee80211_run_deferred_scan(local); + } + + out_unlock: + mutex_unlock(&local->mtx); +} + +static void ieee80211_hw_roc_done(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, hw_roc_done); + struct ieee80211_roc_work *roc; + + mutex_lock(&local->mtx); + + if (list_empty(&local->roc_list)) + goto out_unlock; + + roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, + list); + + if (!roc->started) + goto out_unlock; + + list_del(&roc->list); + + ieee80211_roc_notify_destroy(roc); + + /* if there's another roc, start it now */ + ieee80211_start_next_roc(local); + + /* or scan maybe */ + ieee80211_run_deferred_scan(local); + + out_unlock: mutex_unlock(&local->mtx); } @@ -271,8 +427,48 @@ void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw) } EXPORT_SYMBOL_GPL(ieee80211_remain_on_channel_expired); -void ieee80211_hw_roc_setup(struct ieee80211_local *local) +void ieee80211_roc_setup(struct ieee80211_local *local) { INIT_WORK(&local->hw_roc_start, ieee80211_hw_roc_start); INIT_WORK(&local->hw_roc_done, ieee80211_hw_roc_done); + INIT_LIST_HEAD(&local->roc_list); +} + +void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_roc_work *roc, *tmp; + LIST_HEAD(tmp_list); + + mutex_lock(&local->mtx); + list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { + if (roc->sdata != sdata) + continue; + + if (roc->started && local->ops->remain_on_channel) { + /* can race, so ignore return value */ + drv_cancel_remain_on_channel(local); + } + + list_move_tail(&roc->list, &tmp_list); + roc->abort = true; + } + + ieee80211_start_next_roc(local); + ieee80211_run_deferred_scan(local); + mutex_unlock(&local->mtx); + + list_for_each_entry_safe(roc, tmp, &tmp_list, list) { + if (local->ops->remain_on_channel) { + list_del(&roc->list); + ieee80211_roc_notify_destroy(roc); + } else { + ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); + + /* work will clean up etc */ + flush_delayed_work(&roc->work); + } + } + + WARN_ON_ONCE(!list_empty(&tmp_list)); } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 169da0742c81..379f178eab5f 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -323,7 +323,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); ieee80211_mesh_notify_scan_completed(local); - ieee80211_queue_work(&local->hw, &local->work_work); + ieee80211_start_next_roc(local); } void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) @@ -376,7 +376,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) static bool ieee80211_can_scan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { - if (!list_empty(&local->work_list)) + if (!list_empty(&local->roc_list)) return false; if (sdata->vif.type == NL80211_IFTYPE_STATION && diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 63a769015068..6b4f42527887 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -520,36 +520,16 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { u64 cookie = (unsigned long)skb; + acked = info->flags & IEEE80211_TX_STAT_ACK; if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) { - acked = info->flags & IEEE80211_TX_STAT_ACK; - + ieee80211_is_qos_nullfunc(hdr->frame_control)) cfg80211_probe_status(skb->dev, hdr->addr1, cookie, acked, GFP_ATOMIC); - } else { - struct ieee80211_work *wk; - - rcu_read_lock(); - list_for_each_entry_rcu(wk, &local->work_list, list) { - if (wk->type != IEEE80211_WORK_OFFCHANNEL_TX) - continue; - if (wk->offchan_tx.frame != skb) - continue; - wk->offchan_tx.status = true; - break; - } - rcu_read_unlock(); - if (local->hw_roc_skb_for_status == skb) { - cookie = local->hw_roc_cookie ^ 2; - local->hw_roc_skb_for_status = NULL; - } - + else cfg80211_mgmt_tx_status( skb->dev, cookie, skb->data, skb->len, - !!(info->flags & IEEE80211_TX_STAT_ACK), - GFP_ATOMIC); - } + acked, GFP_ATOMIC); } if (unlikely(info->ack_frame_id)) { diff --git a/net/mac80211/work.c b/net/mac80211/work.c deleted file mode 100644 index b2650a9d45ff..000000000000 --- a/net/mac80211/work.c +++ /dev/null @@ -1,370 +0,0 @@ -/* - * mac80211 work implementation - * - * Copyright 2003-2008, Jouni Malinen - * Copyright 2004, Instant802 Networks, Inc. - * Copyright 2005, Devicescape Software, Inc. - * Copyright 2006-2007 Jiri Benc - * Copyright 2007, Michael Wu - * Copyright 2009, Johannes Berg - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ieee80211_i.h" -#include "rate.h" -#include "driver-ops.h" - -enum work_action { - WORK_ACT_NONE, - WORK_ACT_TIMEOUT, -}; - - -/* utils */ -static inline void ASSERT_WORK_MTX(struct ieee80211_local *local) -{ - lockdep_assert_held(&local->mtx); -} - -/* - * We can have multiple work items (and connection probing) - * scheduling this timer, but we need to take care to only - * reschedule it when it should fire _earlier_ than it was - * asked for before, or if it's not pending right now. This - * function ensures that. Note that it then is required to - * run this function for all timeouts after the first one - * has happened -- the work that runs from this timer will - * do that. - */ -static void run_again(struct ieee80211_local *local, - unsigned long timeout) -{ - ASSERT_WORK_MTX(local); - - if (!timer_pending(&local->work_timer) || - time_before(timeout, local->work_timer.expires)) - mod_timer(&local->work_timer, timeout); -} - -void free_work(struct ieee80211_work *wk) -{ - kfree_rcu(wk, rcu_head); -} - -static enum work_action __must_check -ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk) -{ - /* - * First time we run, do nothing -- the generic code will - * have switched to the right channel etc. - */ - if (!wk->started) { - wk->timeout = jiffies + msecs_to_jiffies(wk->remain.duration); - - cfg80211_ready_on_channel(wk->sdata->dev, (unsigned long) wk, - wk->chan, wk->chan_type, - wk->remain.duration, GFP_KERNEL); - - return WORK_ACT_NONE; - } - - return WORK_ACT_TIMEOUT; -} - -static enum work_action __must_check -ieee80211_offchannel_tx(struct ieee80211_work *wk) -{ - if (!wk->started) { - wk->timeout = jiffies + msecs_to_jiffies(wk->offchan_tx.wait); - - /* - * After this, offchan_tx.frame remains but now is no - * longer a valid pointer -- we still need it as the - * cookie for canceling this work/status matching. - */ - ieee80211_tx_skb(wk->sdata, wk->offchan_tx.frame); - - return WORK_ACT_NONE; - } - - return WORK_ACT_TIMEOUT; -} - -static void ieee80211_work_timer(unsigned long data) -{ - struct ieee80211_local *local = (void *) data; - - if (local->quiescing) - return; - - ieee80211_queue_work(&local->hw, &local->work_work); -} - -static void ieee80211_work_work(struct work_struct *work) -{ - struct ieee80211_local *local = - container_of(work, struct ieee80211_local, work_work); - struct ieee80211_work *wk, *tmp; - LIST_HEAD(free_work); - enum work_action rma; - bool remain_off_channel = false; - - /* - * ieee80211_queue_work() should have picked up most cases, - * here we'll pick the rest. - */ - if (WARN(local->suspended, "work scheduled while going to suspend\n")) - return; - - mutex_lock(&local->mtx); - - if (local->scanning) { - mutex_unlock(&local->mtx); - return; - } - - ieee80211_recalc_idle(local); - - list_for_each_entry_safe(wk, tmp, &local->work_list, list) { - bool started = wk->started; - - /* mark work as started if it's on the current off-channel */ - if (!started && local->tmp_channel && - wk->chan == local->tmp_channel && - wk->chan_type == local->tmp_channel_type) { - started = true; - wk->timeout = jiffies; - } - - if (!started && !local->tmp_channel) { - ieee80211_offchannel_stop_vifs(local, true); - - local->tmp_channel = wk->chan; - local->tmp_channel_type = wk->chan_type; - - ieee80211_hw_config(local, 0); - - started = true; - wk->timeout = jiffies; - } - - /* don't try to work with items that aren't started */ - if (!started) - continue; - - if (time_is_after_jiffies(wk->timeout)) { - /* - * This work item isn't supposed to be worked on - * right now, but take care to adjust the timer - * properly. - */ - run_again(local, wk->timeout); - continue; - } - - switch (wk->type) { - default: - WARN_ON(1); - /* nothing */ - rma = WORK_ACT_NONE; - break; - case IEEE80211_WORK_ABORT: - rma = WORK_ACT_TIMEOUT; - break; - case IEEE80211_WORK_REMAIN_ON_CHANNEL: - rma = ieee80211_remain_on_channel_timeout(wk); - break; - case IEEE80211_WORK_OFFCHANNEL_TX: - rma = ieee80211_offchannel_tx(wk); - break; - } - - wk->started = started; - - switch (rma) { - case WORK_ACT_NONE: - /* might have changed the timeout */ - run_again(local, wk->timeout); - break; - case WORK_ACT_TIMEOUT: - list_del_rcu(&wk->list); - synchronize_rcu(); - list_add(&wk->list, &free_work); - break; - default: - WARN(1, "unexpected: %d", rma); - } - } - - list_for_each_entry(wk, &local->work_list, list) { - if (!wk->started) - continue; - if (wk->chan != local->tmp_channel || - wk->chan_type != local->tmp_channel_type) - continue; - remain_off_channel = true; - } - - if (!remain_off_channel && local->tmp_channel) { - local->tmp_channel = NULL; - ieee80211_hw_config(local, 0); - - ieee80211_offchannel_return(local, true); - - /* give connection some time to breathe */ - run_again(local, jiffies + HZ/2); - } - - ieee80211_recalc_idle(local); - ieee80211_run_deferred_scan(local); - - mutex_unlock(&local->mtx); - - list_for_each_entry_safe(wk, tmp, &free_work, list) { - wk->done(wk, NULL); - list_del(&wk->list); - kfree(wk); - } -} - -void ieee80211_add_work(struct ieee80211_work *wk) -{ - struct ieee80211_local *local; - - if (WARN_ON(!wk->chan)) - return; - - if (WARN_ON(!wk->sdata)) - return; - - if (WARN_ON(!wk->done)) - return; - - if (WARN_ON(!ieee80211_sdata_running(wk->sdata))) - return; - - wk->started = false; - - local = wk->sdata->local; - mutex_lock(&local->mtx); - list_add_tail(&wk->list, &local->work_list); - mutex_unlock(&local->mtx); - - ieee80211_queue_work(&local->hw, &local->work_work); -} - -void ieee80211_work_init(struct ieee80211_local *local) -{ - INIT_LIST_HEAD(&local->work_list); - setup_timer(&local->work_timer, ieee80211_work_timer, - (unsigned long)local); - INIT_WORK(&local->work_work, ieee80211_work_work); -} - -void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_work *wk; - bool cleanup = false; - - mutex_lock(&local->mtx); - list_for_each_entry(wk, &local->work_list, list) { - if (wk->sdata != sdata) - continue; - cleanup = true; - wk->type = IEEE80211_WORK_ABORT; - wk->started = true; - wk->timeout = jiffies; - } - mutex_unlock(&local->mtx); - - /* run cleanups etc. */ - if (cleanup) - ieee80211_work_work(&local->work_work); - - mutex_lock(&local->mtx); - list_for_each_entry(wk, &local->work_list, list) { - if (wk->sdata != sdata) - continue; - WARN_ON(1); - break; - } - mutex_unlock(&local->mtx); -} - -static enum work_done_result ieee80211_remain_done(struct ieee80211_work *wk, - struct sk_buff *skb) -{ - /* - * We are done serving the remain-on-channel command. - */ - cfg80211_remain_on_channel_expired(wk->sdata->dev, (unsigned long) wk, - wk->chan, wk->chan_type, - GFP_KERNEL); - - return WORK_DONE_DESTROY; -} - -int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - unsigned int duration, u64 *cookie) -{ - struct ieee80211_work *wk; - - wk = kzalloc(sizeof(*wk), GFP_KERNEL); - if (!wk) - return -ENOMEM; - - wk->type = IEEE80211_WORK_REMAIN_ON_CHANNEL; - wk->chan = chan; - wk->chan_type = channel_type; - wk->sdata = sdata; - wk->done = ieee80211_remain_done; - - wk->remain.duration = duration; - - *cookie = (unsigned long) wk; - - ieee80211_add_work(wk); - - return 0; -} - -int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata, - u64 cookie) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_work *wk, *tmp; - bool found = false; - - mutex_lock(&local->mtx); - list_for_each_entry_safe(wk, tmp, &local->work_list, list) { - if ((unsigned long) wk == cookie) { - wk->timeout = jiffies; - found = true; - break; - } - } - mutex_unlock(&local->mtx); - - if (!found) - return -ENOENT; - - ieee80211_queue_work(&local->hw, &local->work_work); - - return 0; -} -- cgit v1.2.3 From 2c352f444ccfa966a1aa4fd8e9ee29381c467448 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:09 +0000 Subject: netfilter: nf_conntrack: prepare namespace support for l4 protocol trackers This patch prepares the namespace support for layer 4 protocol trackers. Basically, this modifies the following interfaces: * nf_ct_[un]register_sysctl * nf_conntrack_l4proto_[un]register to include the namespace parameter. We still use init_net in this patch to prepare the ground for follow-up patches for each layer 4 protocol tracker. We add a new net_id field to struct nf_conntrack_l4proto that is used to store the pernet_operations id for each layer 4 protocol tracker. Note that AF_INET6's protocols do not need to do sysctl compat. Thus, we only register compat sysctl when l4proto.l3proto != AF_INET6. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 11 +- include/net/netns/conntrack.h | 12 +++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 18 ++-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 18 ++-- net/netfilter/nf_conntrack_proto.c | 143 ++++++++++++++++++------- net/netfilter/nf_conntrack_proto_dccp.c | 10 +- net/netfilter/nf_conntrack_proto_gre.c | 6 +- net/netfilter/nf_conntrack_proto_sctp.c | 10 +- net/netfilter/nf_conntrack_proto_udplite.c | 10 +- 9 files changed, 159 insertions(+), 79 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 3b572bb20aa2..d621c91de5c8 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -12,6 +12,7 @@ #include #include #include +#include struct seq_file; @@ -103,6 +104,10 @@ struct nf_conntrack_l4proto { struct ctl_table *ctl_compat_table; #endif #endif + int *net_id; + /* Init l4proto pernet data */ + int (*init_net)(struct net *net); + /* Protocol name */ const char *name; @@ -123,8 +128,10 @@ nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto); extern void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); /* Protocol registration. */ -extern int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *proto); -extern void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *proto); +extern int nf_conntrack_l4proto_register(struct net *net, + struct nf_conntrack_l4proto *proto); +extern void nf_conntrack_l4proto_unregister(struct net *net, + struct nf_conntrack_l4proto *proto); /* Generic netlink helpers */ extern int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index a053a19870cf..1f53038b0d1b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -8,6 +8,18 @@ struct ctl_table_header; struct nf_conntrack_ecache; +struct nf_proto_net { +#ifdef CONFIG_SYSCTL + struct ctl_table_header *ctl_table_header; + struct ctl_table *ctl_table; +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + struct ctl_table_header *ctl_compat_header; + struct ctl_table *ctl_compat_table; +#endif +#endif + unsigned int users; +}; + struct netns_ct { atomic_t count; unsigned int expect_count; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 91747d4ebc26..46ec515db129 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -391,19 +391,19 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) return ret; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_tcp4); if (ret < 0) { pr_err("nf_conntrack_ipv4: can't register tcp.\n"); goto cleanup_sockopt; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_udp4); if (ret < 0) { pr_err("nf_conntrack_ipv4: can't register udp.\n"); goto cleanup_tcp; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_icmp); if (ret < 0) { pr_err("nf_conntrack_ipv4: can't register icmp.\n"); goto cleanup_udp; @@ -434,11 +434,11 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) cleanup_ipv4: nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); cleanup_icmp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmp); cleanup_udp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udp4); cleanup_tcp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_tcp4); cleanup_sockopt: nf_unregister_sockopt(&so_getorigdst); return ret; @@ -452,9 +452,9 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) #endif nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmp); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udp4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_tcp4); nf_unregister_sockopt(&so_getorigdst); } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 3224ef90a21a..51ad9f104421 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -340,19 +340,19 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) need_conntrack(); nf_defrag_ipv6_enable(); - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_tcp6); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register tcp.\n"); return ret; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_udp6); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register udp.\n"); goto cleanup_tcp; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_icmpv6); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register icmpv6.\n"); goto cleanup_udp; @@ -376,11 +376,11 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) cleanup_ipv6: nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); cleanup_icmpv6: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmpv6); cleanup_udp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udp6); cleanup_tcp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_tcp6); return ret; } @@ -389,9 +389,9 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) synchronize_net(); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmpv6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udp6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_tcp6); } module_init(nf_conntrack_l3proto_ipv6_init); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 8b631b07a645..7ee31ac0a12c 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -36,28 +36,35 @@ static DEFINE_MUTEX(nf_ct_proto_mutex); #ifdef CONFIG_SYSCTL static int -nf_ct_register_sysctl(struct ctl_table_header **header, const char *path, - struct ctl_table *table, unsigned int *users) +nf_ct_register_sysctl(struct net *net, + struct ctl_table_header **header, + const char *path, + struct ctl_table *table, + unsigned int *users) { if (*header == NULL) { - *header = register_net_sysctl(&init_net, path, table); + *header = register_net_sysctl(net, path, table); if (*header == NULL) return -ENOMEM; } if (users != NULL) (*users)++; + return 0; } static void nf_ct_unregister_sysctl(struct ctl_table_header **header, - struct ctl_table *table, unsigned int *users) + struct ctl_table **table, + unsigned int *users) { if (users != NULL && --*users > 0) return; unregister_net_sysctl_table(*header); + kfree(*table); *header = NULL; + *table = NULL; } #endif @@ -167,7 +174,8 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto) #ifdef CONFIG_SYSCTL if (l3proto->ctl_table != NULL) { - err = nf_ct_register_sysctl(&l3proto->ctl_table_header, + err = nf_ct_register_sysctl(&init_net, + &l3proto->ctl_table_header, l3proto->ctl_table_path, l3proto->ctl_table, NULL); } @@ -180,7 +188,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto #ifdef CONFIG_SYSCTL if (l3proto->ctl_table_header != NULL) nf_ct_unregister_sysctl(&l3proto->ctl_table_header, - l3proto->ctl_table, NULL); + &l3proto->ctl_table, NULL); #endif } @@ -243,29 +251,54 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); -static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto) +static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, + struct nf_conntrack_l4proto *l4proto) +{ + if (l4proto->net_id) + return net_generic(net, *l4proto->net_id); + else + return NULL; +} + +static +int nf_ct_l4proto_register_sysctl(struct net *net, + struct nf_conntrack_l4proto *l4proto) { int err = 0; + struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto); + if (pn == NULL) + return 0; #ifdef CONFIG_SYSCTL - if (l4proto->ctl_table != NULL) { - err = nf_ct_register_sysctl(l4proto->ctl_table_header, + if (pn->ctl_table != NULL) { + err = nf_ct_register_sysctl(net, + &pn->ctl_table_header, "net/netfilter", - l4proto->ctl_table, - l4proto->ctl_table_users); - if (err < 0) + pn->ctl_table, + &pn->users); + if (err < 0) { + if (!pn->users) { + kfree(pn->ctl_table); + pn->ctl_table = NULL; + } goto out; + } } #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (l4proto->ctl_compat_table != NULL) { - err = nf_ct_register_sysctl(&l4proto->ctl_compat_table_header, + if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) { + err = nf_ct_register_sysctl(net, + &pn->ctl_compat_header, "net/ipv4/netfilter", - l4proto->ctl_compat_table, NULL); + pn->ctl_compat_table, + NULL); if (err == 0) goto out; - nf_ct_unregister_sysctl(l4proto->ctl_table_header, - l4proto->ctl_table, - l4proto->ctl_table_users); + + kfree(pn->ctl_compat_table); + pn->ctl_compat_table = NULL; + nf_ct_unregister_sysctl(&pn->ctl_table_header, + &pn->ctl_table, + &pn->users); } #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ out: @@ -273,25 +306,34 @@ out: return err; } -static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto) +static +void nf_ct_l4proto_unregister_sysctl(struct net *net, + struct nf_conntrack_l4proto *l4proto) { + struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto); + if (pn == NULL) + return; #ifdef CONFIG_SYSCTL - if (l4proto->ctl_table_header != NULL && - *l4proto->ctl_table_header != NULL) - nf_ct_unregister_sysctl(l4proto->ctl_table_header, - l4proto->ctl_table, - l4proto->ctl_table_users); + if (pn->ctl_table_header != NULL) + nf_ct_unregister_sysctl(&pn->ctl_table_header, + &pn->ctl_table, + &pn->users); + #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (l4proto->ctl_compat_table_header != NULL) - nf_ct_unregister_sysctl(&l4proto->ctl_compat_table_header, - l4proto->ctl_compat_table, NULL); + if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL) + nf_ct_unregister_sysctl(&pn->ctl_compat_header, + &pn->ctl_compat_table, + NULL); #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ +#else + pn->users--; #endif /* CONFIG_SYSCTL */ } /* FIXME: Allow NULL functions and sub in pointers to generic for them. --RR */ -int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) +static int +nf_conntrack_l4proto_register_net(struct nf_conntrack_l4proto *l4proto) { int ret = 0; @@ -333,10 +375,6 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) goto out_unlock; } - ret = nf_ct_l4proto_register_sysctl(l4proto); - if (ret < 0) - goto out_unlock; - l4proto->nla_size = 0; if (l4proto->nlattr_size) l4proto->nla_size += l4proto->nlattr_size(); @@ -345,17 +383,34 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto); - out_unlock: mutex_unlock(&nf_ct_proto_mutex); return ret; } -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register); -void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) +int nf_conntrack_l4proto_register(struct net *net, + struct nf_conntrack_l4proto *l4proto) { - struct net *net; + int ret = 0; + if (net == &init_net) + ret = nf_conntrack_l4proto_register_net(l4proto); + + if (ret < 0) + return ret; + + if (l4proto->init_net) + ret = l4proto->init_net(net); + if (ret < 0) + return ret; + + return nf_ct_l4proto_register_sysctl(net, l4proto); +} +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register); + +static void +nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto) +{ BUG_ON(l4proto->l3proto >= PF_MAX); mutex_lock(&nf_ct_proto_mutex); @@ -365,15 +420,21 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) ) != l4proto); rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], &nf_conntrack_l4proto_generic); - nf_ct_l4proto_unregister_sysctl(l4proto); mutex_unlock(&nf_ct_proto_mutex); synchronize_rcu(); +} +void nf_conntrack_l4proto_unregister(struct net *net, + struct nf_conntrack_l4proto *l4proto) +{ + if (net == &init_net) + nf_conntrack_l4proto_unregister_net(l4proto); + + nf_ct_l4proto_unregister_sysctl(net, l4proto); /* Remove all contrack entries for this protocol */ rtnl_lock(); - for_each_net(net) - nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); + nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); @@ -383,7 +444,7 @@ int nf_conntrack_proto_init(void) unsigned int i; int err; - err = nf_ct_l4proto_register_sysctl(&nf_conntrack_l4proto_generic); + err = nf_ct_l4proto_register_sysctl(&init_net, &nf_conntrack_l4proto_generic); if (err < 0) return err; @@ -397,7 +458,7 @@ void nf_conntrack_proto_fini(void) { unsigned int i; - nf_ct_l4proto_unregister_sysctl(&nf_conntrack_l4proto_generic); + nf_ct_l4proto_unregister_sysctl(&init_net, &nf_conntrack_l4proto_generic); /* free l3proto protocol tables */ for (i = 0; i < PF_MAX; i++) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index ef706a485be1..5a8e03724289 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -945,17 +945,17 @@ static int __init nf_conntrack_proto_dccp_init(void) if (err < 0) goto err1; - err = nf_conntrack_l4proto_register(&dccp_proto4); + err = nf_conntrack_l4proto_register(&init_net, &dccp_proto4); if (err < 0) goto err2; - err = nf_conntrack_l4proto_register(&dccp_proto6); + err = nf_conntrack_l4proto_register(&init_net, &dccp_proto6); if (err < 0) goto err3; return 0; err3: - nf_conntrack_l4proto_unregister(&dccp_proto4); + nf_conntrack_l4proto_unregister(&init_net, &dccp_proto4); err2: unregister_pernet_subsys(&dccp_net_ops); err1: @@ -965,8 +965,8 @@ err1: static void __exit nf_conntrack_proto_dccp_fini(void) { unregister_pernet_subsys(&dccp_net_ops); - nf_conntrack_l4proto_unregister(&dccp_proto6); - nf_conntrack_l4proto_unregister(&dccp_proto4); + nf_conntrack_l4proto_unregister(&init_net, &dccp_proto6); + nf_conntrack_l4proto_unregister(&init_net, &dccp_proto4); } module_init(nf_conntrack_proto_dccp_init); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 4bf6b4e4b776..132f0d2d82cc 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -396,18 +396,18 @@ static int __init nf_ct_proto_gre_init(void) { int rv; - rv = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4); + rv = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_gre4); if (rv < 0) return rv; rv = register_pernet_subsys(&proto_gre_net_ops); if (rv < 0) - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_gre4); return rv; } static void __exit nf_ct_proto_gre_fini(void) { - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_gre4); unregister_pernet_subsys(&proto_gre_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 996db2fa21f7..97bbc20e1a2b 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -791,12 +791,12 @@ static int __init nf_conntrack_proto_sctp_init(void) { int ret; - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp4); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_sctp4); if (ret) { pr_err("nf_conntrack_l4proto_sctp4: protocol register failed\n"); goto out; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp6); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_sctp6); if (ret) { pr_err("nf_conntrack_l4proto_sctp6: protocol register failed\n"); goto cleanup_sctp4; @@ -805,15 +805,15 @@ static int __init nf_conntrack_proto_sctp_init(void) return ret; cleanup_sctp4: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_sctp4); out: return ret; } static void __exit nf_conntrack_proto_sctp_fini(void) { - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp6); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_sctp6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_sctp4); } module_init(nf_conntrack_proto_sctp_init); diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 4d60a5376aa6..fa142a81496c 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -299,23 +299,23 @@ static int __init nf_conntrack_proto_udplite_init(void) { int err; - err = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udplite4); + err = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_udplite4); if (err < 0) goto err1; - err = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udplite6); + err = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_udplite6); if (err < 0) goto err2; return 0; err2: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udplite4); err1: return err; } static void __exit nf_conntrack_proto_udplite_exit(void) { - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite6); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udplite6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udplite4); } module_init(nf_conntrack_proto_udplite_init); -- cgit v1.2.3 From 524a53e5ad5f34f64ed34281e8b0eca19437db5b Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:10 +0000 Subject: netfilter: nf_conntrack: prepare namespace support for l3 protocol trackers This patch prepares the namespace support for layer 3 protocol trackers. Basically, this modifies the following interfaces: * nf_ct_l3proto_[un]register_sysctl. * nf_conntrack_l3proto_[un]register. We add a new nf_ct_l3proto_net is used to get the pernet data of l3proto. This adds rhe new struct nf_ip_net that is used to store the sysctl header and l3proto_ipv4,l4proto_tcp(6),l4proto_udp(6),l4proto_icmp(v6) because the protos such tcp and tcp6 use the same data,so making nf_ip_net as a field of netns_ct is the easiest way to manager it. This patch also adds init_net to struct nf_conntrack_l3proto to initial the layer 3 protocol pernet data. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 9 ++- include/net/netns/conntrack.h | 8 +++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 6 +- net/netfilter/nf_conntrack_proto.c | 92 ++++++++++++++++++++------ 5 files changed, 91 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 9699c028b74b..d6df8c71a7fe 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -69,6 +69,9 @@ struct nf_conntrack_l3proto { struct ctl_table *ctl_table; #endif /* CONFIG_SYSCTL */ + /* Init l3proto pernet data */ + int (*init_net)(struct net *net); + /* Module (if any) which this is connected to. */ struct module *me; }; @@ -76,8 +79,10 @@ struct nf_conntrack_l3proto { extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; /* Protocol registration. */ -extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); -extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto); +extern int nf_conntrack_l3proto_register(struct net *net, + struct nf_conntrack_l3proto *proto); +extern void nf_conntrack_l3proto_unregister(struct net *net, + struct nf_conntrack_l3proto *proto); extern struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); extern void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 1f53038b0d1b..b2dbcc5cd813 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -20,6 +20,13 @@ struct nf_proto_net { unsigned int users; }; +struct nf_ip_net { +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + struct ctl_table_header *ctl_table_header; + struct ctl_table *ctl_table; +#endif +}; + struct netns_ct { atomic_t count; unsigned int expect_count; @@ -40,6 +47,7 @@ struct netns_ct { unsigned int sysctl_log_invalid; /* Log invalid packets */ int sysctl_auto_assign_helper; bool auto_assign_helper_warned; + struct nf_ip_net nf_ct_proto; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; struct ctl_table_header *acct_sysctl_header; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 46ec515db129..0c0fb906c19d 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -409,7 +409,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) goto cleanup_udp; } - ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4); + ret = nf_conntrack_l3proto_register(&init_net, &nf_conntrack_l3proto_ipv4); if (ret < 0) { pr_err("nf_conntrack_ipv4: can't register ipv4\n"); goto cleanup_icmp; @@ -432,7 +432,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); #endif cleanup_ipv4: - nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); + nf_conntrack_l3proto_unregister(&init_net, &nf_conntrack_l3proto_ipv4); cleanup_icmp: nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmp); cleanup_udp: @@ -451,7 +451,7 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) nf_conntrack_ipv4_compat_fini(); #endif nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); - nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); + nf_conntrack_l3proto_unregister(&init_net, &nf_conntrack_l3proto_ipv4); nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmp); nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udp4); nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_tcp4); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 51ad9f104421..7334cbfd6003 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -358,7 +358,7 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) goto cleanup_udp; } - ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6); + ret = nf_conntrack_l3proto_register(&init_net, &nf_conntrack_l3proto_ipv6); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register ipv6\n"); goto cleanup_icmpv6; @@ -374,7 +374,7 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) return ret; cleanup_ipv6: - nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); + nf_conntrack_l3proto_unregister(&init_net, &nf_conntrack_l3proto_ipv6); cleanup_icmpv6: nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmpv6); cleanup_udp: @@ -388,7 +388,7 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) { synchronize_net(); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); - nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); + nf_conntrack_l3proto_unregister(&init_net, &nf_conntrack_l3proto_ipv6); nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmpv6); nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udp6); nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_tcp6); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 7ee31ac0a12c..a8daf0faadb7 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -168,31 +168,57 @@ static int kill_l4proto(struct nf_conn *i, void *data) nf_ct_l3num(i) == l4proto->l3proto; } -static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto) +static struct nf_ip_net *nf_ct_l3proto_net(struct net *net, + struct nf_conntrack_l3proto *l3proto) +{ + if (l3proto->l3proto == PF_INET) + return &net->ct.nf_ct_proto; + else + return NULL; +} + +static int nf_ct_l3proto_register_sysctl(struct net *net, + struct nf_conntrack_l3proto *l3proto) { int err = 0; + struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto); + /* nf_conntrack_l3proto_ipv6 doesn't support sysctl */ + if (in == NULL) + return 0; -#ifdef CONFIG_SYSCTL - if (l3proto->ctl_table != NULL) { - err = nf_ct_register_sysctl(&init_net, - &l3proto->ctl_table_header, +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + if (in->ctl_table != NULL) { + err = nf_ct_register_sysctl(net, + &in->ctl_table_header, l3proto->ctl_table_path, - l3proto->ctl_table, NULL); + in->ctl_table, + NULL); + if (err < 0) { + kfree(in->ctl_table); + in->ctl_table = NULL; + } } #endif return err; } -static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto) +static void nf_ct_l3proto_unregister_sysctl(struct net *net, + struct nf_conntrack_l3proto *l3proto) { -#ifdef CONFIG_SYSCTL - if (l3proto->ctl_table_header != NULL) - nf_ct_unregister_sysctl(&l3proto->ctl_table_header, - &l3proto->ctl_table, NULL); + struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto); + + if (in == NULL) + return; +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + if (in->ctl_table_header != NULL) + nf_ct_unregister_sysctl(&in->ctl_table_header, + &in->ctl_table, + NULL); #endif } -int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) +static int +nf_conntrack_l3proto_register_net(struct nf_conntrack_l3proto *proto) { int ret = 0; struct nf_conntrack_l3proto *old; @@ -211,10 +237,6 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) goto out_unlock; } - ret = nf_ct_l3proto_register_sysctl(proto); - if (ret < 0) - goto out_unlock; - if (proto->nlattr_tuple_size) proto->nla_size = 3 * proto->nlattr_tuple_size(); @@ -223,13 +245,32 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) out_unlock: mutex_unlock(&nf_ct_proto_mutex); return ret; + } -EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register); -void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) +int nf_conntrack_l3proto_register(struct net *net, + struct nf_conntrack_l3proto *proto) { - struct net *net; + int ret = 0; + + if (net == &init_net) + ret = nf_conntrack_l3proto_register_net(proto); + if (ret < 0) + return ret; + + if (proto->init_net) { + ret = proto->init_net(net); + if (ret < 0) + return ret; + } + return nf_ct_l3proto_register_sysctl(net, proto); +} +EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register); + +static void +nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto) +{ BUG_ON(proto->l3proto >= AF_MAX); mutex_lock(&nf_ct_proto_mutex); @@ -238,15 +279,22 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) ) != proto); rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], &nf_conntrack_l3proto_generic); - nf_ct_l3proto_unregister_sysctl(proto); mutex_unlock(&nf_ct_proto_mutex); synchronize_rcu(); +} + +void nf_conntrack_l3proto_unregister(struct net *net, + struct nf_conntrack_l3proto *proto) +{ + if (net == &init_net) + nf_conntrack_l3proto_unregister_net(proto); + + nf_ct_l3proto_unregister_sysctl(net, proto); /* Remove all contrack entries for this protocol */ rtnl_lock(); - for_each_net(net) - nf_ct_iterate_cleanup(net, kill_l3proto, proto); + nf_ct_iterate_cleanup(net, kill_l3proto, proto); rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); -- cgit v1.2.3 From 15f585bd76b6bd2974b23c9e69ff038a0826a0be Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:11 +0000 Subject: netfilter: nf_ct_generic: add namespace support This patch adds namespace support for the generic layer 4 protocol tracker. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 4 +-- include/net/netns/conntrack.h | 6 ++++ net/netfilter/nf_conntrack_core.c | 17 ++++------- net/netfilter/nf_conntrack_proto.c | 46 +++++++++++++++++++----------- net/netfilter/nf_conntrack_proto_generic.c | 38 ++++++++++++++++++++++-- 5 files changed, 78 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index aced085132e7..d8f5b9f52169 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -28,8 +28,8 @@ extern unsigned int nf_conntrack_in(struct net *net, extern int nf_conntrack_init(struct net *net); extern void nf_conntrack_cleanup(struct net *net); -extern int nf_conntrack_proto_init(void); -extern void nf_conntrack_proto_fini(void); +extern int nf_conntrack_proto_init(struct net *net); +extern void nf_conntrack_proto_fini(struct net *net); extern bool nf_ct_get_tuple(const struct sk_buff *skb, diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index b2dbcc5cd813..0ef8592d48bf 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -20,7 +20,13 @@ struct nf_proto_net { unsigned int users; }; +struct nf_generic_net { + struct nf_proto_net pn; + unsigned int timeout; +}; + struct nf_ip_net { + struct nf_generic_net generic; #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ac3af97cc468..068f2e0ec58e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1333,7 +1333,6 @@ static void nf_conntrack_cleanup_init_net(void) while (untrack_refs() > 0) schedule(); - nf_conntrack_proto_fini(); #ifdef CONFIG_NF_CONNTRACK_ZONES nf_ct_extend_unregister(&nf_ct_zone_extend); #endif @@ -1372,7 +1371,7 @@ void nf_conntrack_cleanup(struct net *net) netfilter framework. Roll on, two-stage module delete... */ synchronize_net(); - + nf_conntrack_proto_fini(net); nf_conntrack_cleanup_net(net); if (net_eq(net, &init_net)) { @@ -1496,11 +1495,6 @@ static int nf_conntrack_init_init_net(void) printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", NF_CONNTRACK_VERSION, nf_conntrack_htable_size, nf_conntrack_max); - - ret = nf_conntrack_proto_init(); - if (ret < 0) - goto err_proto; - #ifdef CONFIG_NF_CONNTRACK_ZONES ret = nf_ct_extend_register(&nf_ct_zone_extend); if (ret < 0) @@ -1518,9 +1512,7 @@ static int nf_conntrack_init_init_net(void) #ifdef CONFIG_NF_CONNTRACK_ZONES err_extend: - nf_conntrack_proto_fini(); #endif -err_proto: return ret; } @@ -1583,9 +1575,7 @@ static int nf_conntrack_init_net(struct net *net) ret = nf_conntrack_helper_init(net); if (ret < 0) goto err_helper; - return 0; - err_helper: nf_conntrack_timeout_fini(net); err_timeout: @@ -1622,6 +1612,9 @@ int nf_conntrack_init(struct net *net) if (ret < 0) goto out_init_net; } + ret = nf_conntrack_proto_init(net); + if (ret < 0) + goto out_proto; ret = nf_conntrack_init_net(net); if (ret < 0) goto out_net; @@ -1637,6 +1630,8 @@ int nf_conntrack_init(struct net *net) return 0; out_net: + nf_conntrack_proto_fini(net); +out_proto: if (net_eq(net, &init_net)) nf_conntrack_cleanup_init_net(); out_init_net: diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index a8daf0faadb7..b095b4aefd7c 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -302,10 +302,16 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, struct nf_conntrack_l4proto *l4proto) { - if (l4proto->net_id) - return net_generic(net, *l4proto->net_id); - else - return NULL; + switch (l4proto->l4proto) { + case 255: /* l4proto_generic */ + return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; + default: + if (l4proto->net_id) + return net_generic(net, *l4proto->net_id); + else + return NULL; + } + return NULL; } static @@ -487,28 +493,34 @@ void nf_conntrack_l4proto_unregister(struct net *net, } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); -int nf_conntrack_proto_init(void) +int nf_conntrack_proto_init(struct net *net) { unsigned int i; int err; - - err = nf_ct_l4proto_register_sysctl(&init_net, &nf_conntrack_l4proto_generic); + err = nf_conntrack_l4proto_generic.init_net(net); + if (err < 0) + return err; + err = nf_ct_l4proto_register_sysctl(net, + &nf_conntrack_l4proto_generic); if (err < 0) return err; - for (i = 0; i < AF_MAX; i++) - rcu_assign_pointer(nf_ct_l3protos[i], - &nf_conntrack_l3proto_generic); + if (net == &init_net) { + for (i = 0; i < AF_MAX; i++) + rcu_assign_pointer(nf_ct_l3protos[i], + &nf_conntrack_l3proto_generic); + } return 0; } -void nf_conntrack_proto_fini(void) +void nf_conntrack_proto_fini(struct net *net) { unsigned int i; - - nf_ct_l4proto_unregister_sysctl(&init_net, &nf_conntrack_l4proto_generic); - - /* free l3proto protocol tables */ - for (i = 0; i < PF_MAX; i++) - kfree(nf_ct_protos[i]); + nf_ct_l4proto_unregister_sysctl(net, + &nf_conntrack_l4proto_generic); + if (net == &init_net) { + /* free l3proto protocol tables */ + for (i = 0; i < PF_MAX; i++) + kfree(nf_ct_protos[i]); + } } diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index d8923d54b358..19bc880eb4e2 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -14,6 +14,11 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; +static inline struct nf_generic_net *generic_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.generic; +} + static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) @@ -42,7 +47,7 @@ static int generic_print_tuple(struct seq_file *s, static unsigned int *generic_get_timeouts(struct net *net) { - return &nf_ct_generic_timeout; + return &(generic_pernet(net)->timeout); } /* Returns verdict for packet, or -1 for invalid. */ @@ -110,7 +115,6 @@ static struct ctl_table_header *generic_sysctl_header; static struct ctl_table generic_sysctl_table[] = { { .procname = "nf_conntrack_generic_timeout", - .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -121,7 +125,6 @@ static struct ctl_table generic_sysctl_table[] = { static struct ctl_table generic_compat_sysctl_table[] = { { .procname = "ip_conntrack_generic_timeout", - .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -131,6 +134,34 @@ static struct ctl_table generic_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ +static int generic_init_net(struct net *net) +{ + struct nf_generic_net *gn = generic_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)gn; + gn->timeout = nf_ct_generic_timeout; +#ifdef CONFIG_SYSCTL + pn->ctl_table = kmemdup(generic_sysctl_table, + sizeof(generic_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + pn->ctl_table[0].data = &gn->timeout; + +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + pn->ctl_compat_table = kmemdup(generic_compat_sysctl_table, + sizeof(generic_compat_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_compat_table) { + kfree(pn->ctl_table); + pn->ctl_table = NULL; + return -ENOMEM; + } + pn->ctl_compat_table[0].data = &gn->timeout; +#endif +#endif + return 0; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = { .l3proto = PF_UNSPEC, @@ -158,4 +189,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = .ctl_compat_table = generic_compat_sysctl_table, #endif #endif + .init_net = generic_init_net, }; -- cgit v1.2.3 From d2ba1fde42af44fbce361202e9af13daff9e4381 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:12 +0000 Subject: netfilter: nf_ct_tcp: add namespace support This patch adds namespace support for TCP protocol tracker. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 10 ++ net/netfilter/nf_conntrack_proto.c | 2 + net/netfilter/nf_conntrack_proto_tcp.c | 162 +++++++++++++++++++++++++++------ 3 files changed, 145 insertions(+), 29 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 0ef8592d48bf..680d799ece8b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -4,6 +4,7 @@ #include #include #include +#include struct ctl_table_header; struct nf_conntrack_ecache; @@ -25,8 +26,17 @@ struct nf_generic_net { unsigned int timeout; }; +struct nf_tcp_net { + struct nf_proto_net pn; + unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX]; + unsigned int tcp_loose; + unsigned int tcp_be_liberal; + unsigned int tcp_max_retrans; +}; + struct nf_ip_net { struct nf_generic_net generic; + struct nf_tcp_net tcp; #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b095b4aefd7c..8a71e8bb0d6c 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -303,6 +303,8 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, struct nf_conntrack_l4proto *l4proto) { switch (l4proto->l4proto) { + case IPPROTO_TCP: + return (struct nf_proto_net *)&net->ct.nf_ct_proto.tcp; case 255: /* l4proto_generic */ return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; default: diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 21ff1a99f534..a053f6786b3c 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -270,6 +270,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { } }; +static inline struct nf_tcp_net *tcp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.tcp; +} + static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { @@ -516,6 +521,7 @@ static bool tcp_in_window(const struct nf_conn *ct, u_int8_t pf) { struct net *net = nf_ct_net(ct); + struct nf_tcp_net *tn = tcp_pernet(net); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; @@ -720,7 +726,7 @@ static bool tcp_in_window(const struct nf_conn *ct, } else { res = false; if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || - nf_ct_tcp_be_liberal) + tn->tcp_be_liberal) res = true; if (!res && LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, @@ -828,6 +834,7 @@ static int tcp_packet(struct nf_conn *ct, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); + struct nf_tcp_net *tn = tcp_pernet(net); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; @@ -1020,7 +1027,7 @@ static int tcp_packet(struct nf_conn *ct, && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; - if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && + if (ct->proto.tcp.retrans >= tn->tcp_max_retrans && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & @@ -1065,6 +1072,8 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, enum tcp_conntrack new_state; const struct tcphdr *th; struct tcphdr _tcph; + struct net *net = nf_ct_net(ct); + struct nf_tcp_net *tn = tcp_pernet(net); const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; @@ -1093,7 +1102,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.tcp.seen[0].td_end; tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); - } else if (nf_ct_tcp_loose == 0) { + } else if (tn->tcp_loose == 0) { /* Don't try to pick up connections. */ return false; } else { @@ -1360,91 +1369,78 @@ static struct ctl_table_header *tcp_sysctl_header; static struct ctl_table tcp_sysctl_table[] = { { .procname = "nf_conntrack_tcp_timeout_syn_sent", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_syn_recv", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_established", - .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_fin_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_close_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_last_ack", - .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_time_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_close", - .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_max_retrans", - .data = &tcp_timeouts[TCP_CONNTRACK_RETRANS], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_unacknowledged", - .data = &tcp_timeouts[TCP_CONNTRACK_UNACK], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_loose", - .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "nf_conntrack_tcp_be_liberal", - .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "nf_conntrack_tcp_max_retrans", - .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, @@ -1456,91 +1452,78 @@ static struct ctl_table tcp_sysctl_table[] = { static struct ctl_table tcp_compat_sysctl_table[] = { { .procname = "ip_conntrack_tcp_timeout_syn_sent", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_syn_sent2", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_syn_recv", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_established", - .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_fin_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_close_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_last_ack", - .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_time_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_close", - .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_max_retrans", - .data = &tcp_timeouts[TCP_CONNTRACK_RETRANS], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_loose", - .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "ip_conntrack_tcp_be_liberal", - .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "ip_conntrack_tcp_max_retrans", - .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, @@ -1550,6 +1533,125 @@ static struct ctl_table tcp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ +static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn) +{ +#ifdef CONFIG_SYSCTL + struct nf_tcp_net *tn = (struct nf_tcp_net *)pn; + + if (pn->ctl_table) + return 0; + + pn->ctl_table = kmemdup(tcp_sysctl_table, + sizeof(tcp_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + + pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; + pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; + pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; + pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; + pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; + pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; + pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; + pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; + pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; + pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK]; + pn->ctl_table[10].data = &tn->tcp_loose; + pn->ctl_table[11].data = &tn->tcp_be_liberal; + pn->ctl_table[12].data = &tn->tcp_max_retrans; +#endif + return 0; +} + +static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) +{ +#ifdef CONFIG_SYSCTL +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + struct nf_tcp_net *tn = (struct nf_tcp_net *)pn; + pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table, + sizeof(tcp_compat_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_compat_table) + return -ENOMEM; + + pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; + pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2]; + pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; + pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; + pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; + pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; + pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; + pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; + pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; + pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; + pn->ctl_compat_table[10].data = &tn->tcp_loose; + pn->ctl_compat_table[11].data = &tn->tcp_be_liberal; + pn->ctl_compat_table[12].data = &tn->tcp_max_retrans; +#endif +#endif + return 0; +} + +static int tcpv4_init_net(struct net *net) +{ + int i; + int ret = 0; + struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)tn; + +#ifdef CONFIG_SYSCTL + if (!pn->ctl_table) { +#else + if (!pn->user++) { +#endif + for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) + tn->timeouts[i] = tcp_timeouts[i]; + + tn->tcp_loose = nf_ct_tcp_loose; + tn->tcp_be_liberal = nf_ct_tcp_be_liberal; + tn->tcp_max_retrans = nf_ct_tcp_max_retrans; + } + + ret = tcp_kmemdup_compat_sysctl_table(pn); + + if (ret < 0) + return ret; + + ret = tcp_kmemdup_sysctl_table(pn); + +#ifdef CONFIG_SYSCTL +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + if (ret < 0) { + kfree(pn->ctl_compat_table); + pn->ctl_compat_table = NULL; + } +#endif +#endif + return ret; +} + +static int tcpv6_init_net(struct net *net) +{ + int i; + struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)tn; + +#ifdef CONFIG_SYSCTL + if (!pn->ctl_table) { +#else + if (!pn->user++) { +#endif + for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) + tn->timeouts[i] = tcp_timeouts[i]; + tn->tcp_loose = nf_ct_tcp_loose; + tn->tcp_be_liberal = nf_ct_tcp_be_liberal; + tn->tcp_max_retrans = nf_ct_tcp_max_retrans; + } + + return tcp_kmemdup_sysctl_table(pn); +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = { .l3proto = PF_INET, @@ -1590,6 +1692,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .ctl_compat_table = tcp_compat_sysctl_table, #endif #endif + .init_net = tcpv4_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); @@ -1630,5 +1733,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .ctl_table_header = &tcp_sysctl_header, .ctl_table = tcp_sysctl_table, #endif + .init_net = tcpv6_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6); -- cgit v1.2.3 From 0ce490ad4387a67ee8ca5253476272d508fc0b6f Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:13 +0000 Subject: netfilter: nf_ct_udp: add namespace support This patch adds namespace support for UDP protocol tracker. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 12 ++++ net/netfilter/nf_conntrack_proto.c | 2 + net/netfilter/nf_conntrack_proto_udp.c | 100 +++++++++++++++++++++++++++++---- 3 files changed, 103 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 680d799ece8b..7bd14ab8ce1c 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -34,9 +34,21 @@ struct nf_tcp_net { unsigned int tcp_max_retrans; }; +enum udp_conntrack { + UDP_CT_UNREPLIED, + UDP_CT_REPLIED, + UDP_CT_MAX +}; + +struct nf_udp_net { + struct nf_proto_net pn; + unsigned int timeouts[UDP_CT_MAX]; +}; + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; + struct nf_udp_net udp; #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 8a71e8bb0d6c..9c6aee51dea2 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -305,6 +305,8 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, switch (l4proto->l4proto) { case IPPROTO_TCP: return (struct nf_proto_net *)&net->ct.nf_ct_proto.tcp; + case IPPROTO_UDP: + return (struct nf_proto_net *)&net->ct.nf_ct_proto.udp; case 255: /* l4proto_generic */ return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; default: diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 7259a6bdeb49..f56c8905ddfb 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -25,17 +25,16 @@ #include #include -enum udp_conntrack { - UDP_CT_UNREPLIED, - UDP_CT_REPLIED, - UDP_CT_MAX -}; - static unsigned int udp_timeouts[UDP_CT_MAX] = { [UDP_CT_UNREPLIED] = 30*HZ, [UDP_CT_REPLIED] = 180*HZ, }; +static inline struct nf_udp_net *udp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.udp; +} + static bool udp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) @@ -73,7 +72,7 @@ static int udp_print_tuple(struct seq_file *s, static unsigned int *udp_get_timeouts(struct net *net) { - return udp_timeouts; + return udp_pernet(net)->timeouts; } /* Returns verdict for packet, and may modify conntracktype */ @@ -205,14 +204,12 @@ static struct ctl_table_header *udp_sysctl_header; static struct ctl_table udp_sysctl_table[] = { { .procname = "nf_conntrack_udp_timeout", - .data = &udp_timeouts[UDP_CT_UNREPLIED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_udp_timeout_stream", - .data = &udp_timeouts[UDP_CT_REPLIED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -223,14 +220,12 @@ static struct ctl_table udp_sysctl_table[] = { static struct ctl_table udp_compat_sysctl_table[] = { { .procname = "ip_conntrack_udp_timeout", - .data = &udp_timeouts[UDP_CT_UNREPLIED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_udp_timeout_stream", - .data = &udp_timeouts[UDP_CT_REPLIED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -240,6 +235,87 @@ static struct ctl_table udp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ +static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn) +{ +#ifdef CONFIG_SYSCTL + struct nf_udp_net *un = (struct nf_udp_net *)pn; + if (pn->ctl_table) + return 0; + pn->ctl_table = kmemdup(udp_sysctl_table, + sizeof(udp_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + pn->ctl_table[0].data = &un->timeouts[UDP_CT_UNREPLIED]; + pn->ctl_table[1].data = &un->timeouts[UDP_CT_REPLIED]; +#endif + return 0; +} + +static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) +{ +#ifdef CONFIG_SYSCTL +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + struct nf_udp_net *un = (struct nf_udp_net *)pn; + pn->ctl_compat_table = kmemdup(udp_compat_sysctl_table, + sizeof(udp_compat_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_compat_table) + return -ENOMEM; + + pn->ctl_compat_table[0].data = &un->timeouts[UDP_CT_UNREPLIED]; + pn->ctl_compat_table[1].data = &un->timeouts[UDP_CT_REPLIED]; +#endif +#endif + return 0; +} + +static void udp_init_net_data(struct nf_udp_net *un) +{ + int i; +#ifdef CONFIG_SYSCTL + if (!un->pn.ctl_table) { +#else + if (!un->pn.user++) { +#endif + for (i = 0; i < UDP_CT_MAX; i++) + un->timeouts[i] = udp_timeouts[i]; + } +} + +static int udpv4_init_net(struct net *net) +{ + int ret; + struct nf_udp_net *un = udp_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)un; + + udp_init_net_data(un); + + ret = udp_kmemdup_compat_sysctl_table(pn); + if (ret < 0) + return ret; + + ret = udp_kmemdup_sysctl_table(pn); +#ifdef CONFIG_SYSCTL +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + if (ret < 0) { + kfree(pn->ctl_compat_table); + pn->ctl_compat_table = NULL; + } +#endif +#endif + return ret; +} + +static int udpv6_init_net(struct net *net) +{ + struct nf_udp_net *un = udp_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)un; + + udp_init_net_data(un); + return udp_kmemdup_sysctl_table(pn); +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = { .l3proto = PF_INET, @@ -275,6 +351,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .ctl_compat_table = udp_compat_sysctl_table, #endif #endif + .init_net = udpv4_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); @@ -310,5 +387,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .ctl_table_header = &udp_sysctl_header, .ctl_table = udp_sysctl_table, #endif + .init_net = udpv6_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); -- cgit v1.2.3 From 4b626b9c5d35b4f99b073dc5d6457abddcbcf429 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:14 +0000 Subject: netfilter: nf_ct_icmp: add namespace support This patch adds namespace support for ICMP protocol tracker. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 6 +++++ net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 38 +++++++++++++++++++++++++--- net/netfilter/nf_conntrack_proto.c | 2 ++ 3 files changed, 43 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 7bd14ab8ce1c..3d8e9e3b08a6 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -45,10 +45,16 @@ struct nf_udp_net { unsigned int timeouts[UDP_CT_MAX]; }; +struct nf_icmp_net { + struct nf_proto_net pn; + unsigned int timeout; +}; + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; struct nf_udp_net udp; + struct nf_icmp_net icmp; #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 0847e373d33c..a0eabeb36b9f 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -23,6 +23,11 @@ static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; +static inline struct nf_icmp_net *icmp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmp; +} + static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { @@ -77,7 +82,7 @@ static int icmp_print_tuple(struct seq_file *s, static unsigned int *icmp_get_timeouts(struct net *net) { - return &nf_ct_icmp_timeout; + return &icmp_pernet(net)->timeout; } /* Returns verdict for packet, or -1 for invalid. */ @@ -312,7 +317,6 @@ static struct ctl_table_header *icmp_sysctl_header; static struct ctl_table icmp_sysctl_table[] = { { .procname = "nf_conntrack_icmp_timeout", - .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -323,7 +327,6 @@ static struct ctl_table icmp_sysctl_table[] = { static struct ctl_table icmp_compat_sysctl_table[] = { { .procname = "ip_conntrack_icmp_timeout", - .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -333,6 +336,34 @@ static struct ctl_table icmp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ +static int icmp_init_net(struct net *net) +{ + struct nf_icmp_net *in = icmp_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)in; + in->timeout = nf_ct_icmp_timeout; + +#ifdef CONFIG_SYSCTL + pn->ctl_table = kmemdup(icmp_sysctl_table, + sizeof(icmp_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + pn->ctl_table[0].data = &in->timeout; +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table, + sizeof(icmp_compat_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_compat_table) { + kfree(pn->ctl_table); + pn->ctl_table = NULL; + return -ENOMEM; + } + pn->ctl_compat_table[0].data = &in->timeout; +#endif +#endif + return 0; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = { .l3proto = PF_INET, @@ -369,4 +400,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .ctl_compat_table = icmp_compat_sysctl_table, #endif #endif + .init_net = icmp_init_net, }; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 9c6aee51dea2..dbade5f2b1d3 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -307,6 +307,8 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, return (struct nf_proto_net *)&net->ct.nf_ct_proto.tcp; case IPPROTO_UDP: return (struct nf_proto_net *)&net->ct.nf_ct_proto.udp; + case IPPROTO_ICMP: + return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmp; case 255: /* l4proto_generic */ return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; default: -- cgit v1.2.3 From 7080ba0955438ecd2885c1b73fbd9760b1594a41 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:15 +0000 Subject: netfilter: nf_ct_icmp: add namespace support This patch adds namespace support for ICMPv6 protocol tracker. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 1 + net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 25 +++++++++++++++++++++++-- net/netfilter/nf_conntrack_proto.c | 2 ++ 3 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 3d8e9e3b08a6..3aecdc7a84fb 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -55,6 +55,7 @@ struct nf_ip_net { struct nf_tcp_net tcp; struct nf_udp_net udp; struct nf_icmp_net icmp; + struct nf_icmp_net icmpv6; #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 3e81904fbbcd..f606355200d8 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -29,6 +29,11 @@ static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ; +static inline struct nf_icmp_net *icmpv6_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmpv6; +} + static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) @@ -90,7 +95,7 @@ static int icmpv6_print_tuple(struct seq_file *s, static unsigned int *icmpv6_get_timeouts(struct net *net) { - return &nf_ct_icmpv6_timeout; + return &icmpv6_pernet(net)->timeout; } /* Returns verdict for packet, or -1 for invalid. */ @@ -319,7 +324,6 @@ static struct ctl_table_header *icmpv6_sysctl_header; static struct ctl_table icmpv6_sysctl_table[] = { { .procname = "nf_conntrack_icmpv6_timeout", - .data = &nf_ct_icmpv6_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -328,6 +332,22 @@ static struct ctl_table icmpv6_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ +static int icmpv6_init_net(struct net *net) +{ + struct nf_icmp_net *in = icmpv6_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)in; + in->timeout = nf_ct_icmpv6_timeout; +#ifdef CONFIG_SYSCTL + pn->ctl_table = kmemdup(icmpv6_sysctl_table, + sizeof(icmpv6_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + pn->ctl_table[0].data = &in->timeout; +#endif + return 0; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = { .l3proto = PF_INET6, @@ -359,4 +379,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .ctl_table_header = &icmpv6_sysctl_header, .ctl_table = icmpv6_sysctl_table, #endif + .init_net = icmpv6_init_net, }; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index dbade5f2b1d3..1ea919450fc3 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -309,6 +309,8 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, return (struct nf_proto_net *)&net->ct.nf_ct_proto.udp; case IPPROTO_ICMP: return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmp; + case IPPROTO_ICMPV6: + return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmpv6; case 255: /* l4proto_generic */ return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; default: -- cgit v1.2.3 From e76d0af5e45f4152e3fdcc103b753a8aff93fcb5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 5 Jun 2012 18:35:48 +0200 Subject: netfilter: nf_conntrack: remove now unused sysctl for nf_conntrack_l[3|4]proto Since the sysctl data for l[3|4]proto now resides in pernet nf_proto_net. We can now remove this unused fields from struct nf_contrack_l[3,4]proto. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 2 -- include/net/netfilter/nf_conntrack_l4proto.h | 10 ---------- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 1 - net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 8 -------- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 5 ----- net/netfilter/nf_conntrack_proto_generic.c | 8 -------- net/netfilter/nf_conntrack_proto_sctp.c | 15 --------------- net/netfilter/nf_conntrack_proto_tcp.c | 15 --------------- net/netfilter/nf_conntrack_proto_udp.c | 15 --------------- net/netfilter/nf_conntrack_proto_udplite.c | 12 ------------ 10 files changed, 91 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index d6df8c71a7fe..6f7c13f4ac03 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -64,9 +64,7 @@ struct nf_conntrack_l3proto { size_t nla_size; #ifdef CONFIG_SYSCTL - struct ctl_table_header *ctl_table_header; const char *ctl_table_path; - struct ctl_table *ctl_table; #endif /* CONFIG_SYSCTL */ /* Init l3proto pernet data */ diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index d621c91de5c8..cfa2f89b031d 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -93,16 +93,6 @@ struct nf_conntrack_l4proto { unsigned int nlattr_max; const struct nla_policy *nla_policy; } ctnl_timeout; -#endif - -#ifdef CONFIG_SYSCTL - struct ctl_table_header **ctl_table_header; - struct ctl_table *ctl_table; - unsigned int *ctl_table_users; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - struct ctl_table_header *ctl_compat_table_header; - struct ctl_table *ctl_compat_table; -#endif #endif int *net_id; /* Init l4proto pernet data */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5c66203af51c..d79b961a8009 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -380,7 +380,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { #endif #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) .ctl_table_path = "net/ipv4/netfilter", - .ctl_table = ip_ct_sysctl_table, #endif .init_net = ipv4_init_net, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index a0eabeb36b9f..2bca7a5e422b 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -313,7 +313,6 @@ icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static struct ctl_table_header *icmp_sysctl_header; static struct ctl_table icmp_sysctl_table[] = { { .procname = "nf_conntrack_icmp_timeout", @@ -393,12 +392,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .nla_policy = icmp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_header = &icmp_sysctl_header, - .ctl_table = icmp_sysctl_table, -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - .ctl_compat_table = icmp_compat_sysctl_table, -#endif -#endif .init_net = icmp_init_net, }; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index f606355200d8..1b7818f15f3d 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -320,7 +320,6 @@ icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static struct ctl_table_header *icmpv6_sysctl_header; static struct ctl_table icmpv6_sysctl_table[] = { { .procname = "nf_conntrack_icmpv6_timeout", @@ -375,9 +374,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .nla_policy = icmpv6_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_header = &icmpv6_sysctl_header, - .ctl_table = icmpv6_sysctl_table, -#endif .init_net = icmpv6_init_net, }; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 19bc880eb4e2..e4e2d2a38d3f 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -111,7 +111,6 @@ generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static struct ctl_table_header *generic_sysctl_header; static struct ctl_table generic_sysctl_table[] = { { .procname = "nf_conntrack_generic_timeout", @@ -182,12 +181,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = .nla_policy = generic_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_header = &generic_sysctl_header, - .ctl_table = generic_sysctl_table, -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - .ctl_compat_table = generic_compat_sysctl_table, -#endif -#endif .init_net = generic_init_net, }; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 9e5738db34df..d785f2c4182b 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -610,8 +610,6 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { #ifdef CONFIG_SYSCTL -static unsigned int sctp_sysctl_table_users; -static struct ctl_table_header *sctp_sysctl_header; static struct ctl_table sctp_sysctl_table[] = { { .procname = "nf_conntrack_sctp_timeout_closed", @@ -832,14 +830,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .nla_policy = sctp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &sctp_sysctl_table_users, - .ctl_table_header = &sctp_sysctl_header, - .ctl_table = sctp_sysctl_table, -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - .ctl_compat_table = sctp_compat_sysctl_table, -#endif -#endif .net_id = &sctp_net_id, .init_net = sctpv4_init_net, }; @@ -873,11 +863,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .nla_policy = sctp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#endif -#ifdef CONFIG_SYSCTL - .ctl_table_users = &sctp_sysctl_table_users, - .ctl_table_header = &sctp_sysctl_header, - .ctl_table = sctp_sysctl_table, #endif .net_id = &sctp_net_id, .init_net = sctpv6_init_net, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a053f6786b3c..e57f2a888dae 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1364,8 +1364,6 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static unsigned int tcp_sysctl_table_users; -static struct ctl_table_header *tcp_sysctl_header; static struct ctl_table tcp_sysctl_table[] = { { .procname = "nf_conntrack_tcp_timeout_syn_sent", @@ -1684,14 +1682,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .nla_policy = tcp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &tcp_sysctl_table_users, - .ctl_table_header = &tcp_sysctl_header, - .ctl_table = tcp_sysctl_table, -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - .ctl_compat_table = tcp_compat_sysctl_table, -#endif -#endif .init_net = tcpv4_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); @@ -1728,11 +1718,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .nla_policy = tcp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &tcp_sysctl_table_users, - .ctl_table_header = &tcp_sysctl_header, - .ctl_table = tcp_sysctl_table, -#endif .init_net = tcpv6_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index f56c8905ddfb..db7abad44bc5 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -199,8 +199,6 @@ udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static unsigned int udp_sysctl_table_users; -static struct ctl_table_header *udp_sysctl_header; static struct ctl_table udp_sysctl_table[] = { { .procname = "nf_conntrack_udp_timeout", @@ -343,14 +341,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .nla_policy = udp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &udp_sysctl_table_users, - .ctl_table_header = &udp_sysctl_header, - .ctl_table = udp_sysctl_table, -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - .ctl_compat_table = udp_compat_sysctl_table, -#endif -#endif .init_net = udpv4_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); @@ -382,11 +372,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .nla_policy = udp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &udp_sysctl_table_users, - .ctl_table_header = &udp_sysctl_header, - .ctl_table = udp_sysctl_table, -#endif .init_net = udpv6_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 7f85b0850a44..2e25e985e8cf 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -215,8 +215,6 @@ udplite_timeout_nla_policy[CTA_TIMEOUT_UDPLITE_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static unsigned int udplite_sysctl_table_users; -static struct ctl_table_header *udplite_sysctl_header; static struct ctl_table udplite_sysctl_table[] = { { .procname = "nf_conntrack_udplite_timeout", @@ -287,11 +285,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .nla_policy = udplite_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &udplite_sysctl_table_users, - .ctl_table_header = &udplite_sysctl_header, - .ctl_table = udplite_sysctl_table, -#endif .net_id = &udplite_net_id, .init_net = udplite_init_net, }; @@ -324,11 +317,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .nla_policy = udplite_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &udplite_sysctl_table_users, - .ctl_table_header = &udplite_sysctl_header, - .ctl_table = udplite_sysctl_table, -#endif .net_id = &udplite_net_id, .init_net = udplite_init_net, }; -- cgit v1.2.3 From 8264deb81853462da5cbcfb19b54c4fd9f3d88ba Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:23 +0000 Subject: netfilter: nf_conntrack: add namespace support for cttimeout This patch adds namespace support for cttimeout. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 3 ++- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 6 ++++-- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 6 ++++-- net/netfilter/nf_conntrack_proto_dccp.c | 5 +++-- net/netfilter/nf_conntrack_proto_generic.c | 6 ++++-- net/netfilter/nf_conntrack_proto_gre.c | 8 +++++--- net/netfilter/nf_conntrack_proto_sctp.c | 6 ++++-- net/netfilter/nf_conntrack_proto_tcp.c | 6 ++++-- net/netfilter/nf_conntrack_proto_udp.c | 8 +++++--- net/netfilter/nf_conntrack_proto_udplite.c | 8 +++++--- net/netfilter/nfnetlink_cttimeout.c | 13 ++++++++----- 11 files changed, 48 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index cfa2f89b031d..81c52b5205f2 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -87,7 +87,8 @@ struct nf_conntrack_l4proto { #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) struct { size_t obj_size; - int (*nlattr_to_obj)(struct nlattr *tb[], void *data); + int (*nlattr_to_obj)(struct nlattr *tb[], + struct net *net, void *data); int (*obj_to_nlattr)(struct sk_buff *skb, const void *data); unsigned int nlattr_max; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 2bca7a5e422b..041923cb67ad 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -279,16 +279,18 @@ static int icmp_nlattr_tuple_size(void) #include #include -static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeout = data; + struct nf_icmp_net *in = icmp_pernet(net); if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { *timeout = ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; } else { /* Set default ICMP timeout. */ - *timeout = nf_ct_icmp_timeout; + *timeout = in->timeout; } return 0; } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 1b7818f15f3d..63ed0121836c 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -286,16 +286,18 @@ static int icmpv6_nlattr_tuple_size(void) #include #include -static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeout = data; + struct nf_icmp_net *in = icmpv6_pernet(net); if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) { *timeout = ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ; } else { /* Set default ICMPv6 timeout. */ - *timeout = nf_ct_icmpv6_timeout; + *timeout = in->timeout; } return 0; } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 8d798a613e3f..c33f76af913f 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -712,9 +712,10 @@ static int dccp_nlattr_size(void) #include #include -static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { - struct dccp_net *dn = dccp_pernet(&init_net); + struct dccp_net *dn = dccp_pernet(net); unsigned int *timeouts = data; int i; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index e4e2d2a38d3f..bb0e74fe0fae 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -75,16 +75,18 @@ static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, #include #include -static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeout = data; + struct nf_generic_net *gn = generic_pernet(net); if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT]) *timeout = ntohl(nla_get_be32(tb[CTA_TIMEOUT_GENERIC_TIMEOUT])) * HZ; else { /* Set default generic timeout. */ - *timeout = nf_ct_generic_timeout; + *timeout = gn->timeout; } return 0; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index e36973f9ef59..25ba5a2f5edc 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -304,13 +304,15 @@ static void gre_destroy(struct nf_conn *ct) #include #include -static int gre_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int gre_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeouts = data; + struct netns_proto_gre *net_gre = gre_pernet(net); /* set default timeouts for GRE. */ - timeouts[GRE_CT_UNREPLIED] = gre_timeouts[GRE_CT_UNREPLIED]; - timeouts[GRE_CT_REPLIED] = gre_timeouts[GRE_CT_REPLIED]; + timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED]; + timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED]; if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) { timeouts[GRE_CT_UNREPLIED] = diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index d785f2c4182b..8fb0582ad397 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -562,14 +562,16 @@ static int sctp_nlattr_size(void) #include #include -static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeouts = data; + struct sctp_net *sn = sctp_pernet(net); int i; /* set default SCTP timeouts. */ for (i=0; itimeouts[i]; /* there's a 1:1 mapping between attributes and protocol states. */ for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i #include -static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeouts = data; + struct nf_tcp_net *tn = tcp_pernet(net); int i; /* set default TCP timeouts. */ for (i=0; itimeouts[i]; if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) { timeouts[TCP_CONNTRACK_SYN_SENT] = diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index db7abad44bc5..360565a95de4 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -156,13 +156,15 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, #include #include -static int udp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int udp_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeouts = data; + struct nf_udp_net *un = udp_pernet(net); /* set default timeouts for UDP. */ - timeouts[UDP_CT_UNREPLIED] = udp_timeouts[UDP_CT_UNREPLIED]; - timeouts[UDP_CT_REPLIED] = udp_timeouts[UDP_CT_REPLIED]; + timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED]; + timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED]; if (tb[CTA_TIMEOUT_UDP_UNREPLIED]) { timeouts[UDP_CT_UNREPLIED] = diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 2e25e985e8cf..b32e700f8dde 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -172,13 +172,15 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, #include #include -static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeouts = data; + struct udplite_net *un = udplite_pernet(net); /* set default timeouts for UDPlite. */ - timeouts[UDPLITE_CT_UNREPLIED] = udplite_timeouts[UDPLITE_CT_UNREPLIED]; - timeouts[UDPLITE_CT_REPLIED] = udplite_timeouts[UDPLITE_CT_REPLIED]; + timeouts[UDPLITE_CT_UNREPLIED] = un->timeouts[UDPLITE_CT_UNREPLIED]; + timeouts[UDPLITE_CT_REPLIED] = un->timeouts[UDPLITE_CT_REPLIED]; if (tb[CTA_TIMEOUT_UDPLITE_UNREPLIED]) { timeouts[UDPLITE_CT_UNREPLIED] = diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 3e655288d1d6..cdecbc8fe965 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -49,8 +49,9 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { static int ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, - struct nf_conntrack_l4proto *l4proto, - const struct nlattr *attr) + struct nf_conntrack_l4proto *l4proto, + struct net *net, + const struct nlattr *attr) { int ret = 0; @@ -60,7 +61,8 @@ ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr, l4proto->ctnl_timeout.nla_policy); - ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, &timeout->data); + ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, + &timeout->data); } return ret; } @@ -74,6 +76,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, __u8 l4num; struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout, *matching = NULL; + struct net *net = sock_net(skb->sk); char *name; int ret; @@ -117,7 +120,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, goto err_proto_put; } - ret = ctnl_timeout_parse_policy(matching, l4proto, + ret = ctnl_timeout_parse_policy(matching, l4proto, net, cda[CTA_TIMEOUT_DATA]); return ret; } @@ -132,7 +135,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, goto err_proto_put; } - ret = ctnl_timeout_parse_policy(timeout, l4proto, + ret = ctnl_timeout_parse_policy(timeout, l4proto, net, cda[CTA_TIMEOUT_DATA]); if (ret < 0) goto err; -- cgit v1.2.3 From c8a627ed06d6d49bf65015a2185c519335c4c83f Mon Sep 17 00:00:00 2001 From: Gao feng Date: Fri, 8 Jun 2012 01:20:41 +0000 Subject: inetpeer: add namespace support for inetpeer now inetpeer doesn't support namespace,the information will be leaking across namespace. this patch move the global vars v4_peers and v6_peers to netns_ipv4 and netns_ipv6 as a field peers. add struct pernet_operations inetpeer_ops to initial pernet inetpeer data. and change family_to_base and inet_getpeer to support namespace. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- include/net/inetpeer.h | 10 ++++--- include/net/netns/ipv4.h | 2 +- include/net/netns/ipv6.h | 1 + net/ipv4/inetpeer.c | 68 +++++++++++++++++++++++++++++++++++------------- net/ipv4/route.c | 2 +- 5 files changed, 59 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 2040bff945d4..fef9dfab5d45 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -75,7 +75,9 @@ static inline bool inet_metrics_new(const struct inet_peer *p) } /* can be called with or without local BH being disabled */ -struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create); +struct inet_peer *inet_getpeer(struct net *net, + const struct inetpeer_addr *daddr, + int create); static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create) { @@ -83,7 +85,7 @@ static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create) daddr.addr.a4 = v4daddr; daddr.family = AF_INET; - return inet_getpeer(&daddr, create); + return inet_getpeer(&init_net, &daddr, create); } static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, int create) @@ -92,14 +94,14 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, *(struct in6_addr *)daddr.addr.a6 = *v6daddr; daddr.family = AF_INET6; - return inet_getpeer(&daddr, create); + return inet_getpeer(&init_net, &daddr, create); } /* can be called from BH context or outside */ extern void inet_putpeer(struct inet_peer *p); extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); -extern void inetpeer_invalidate_tree(int family); +extern void inetpeer_invalidate_tree(struct net *net, int family); /* * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index bbd023a1c9b9..227f0cd9d3f6 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -30,7 +30,7 @@ struct netns_ipv4 { struct sock **icmp_sk; struct sock *tcp_sock; - + struct inet_peer_base *peers; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index b42be53587ba..df0a5456a3fd 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -33,6 +33,7 @@ struct netns_ipv6 { struct netns_sysctl_ipv6 sysctl; struct ipv6_devconf *devconf_all; struct ipv6_devconf *devconf_dflt; + struct inet_peer_base *peers; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *ip6table_filter; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index dfba343b2509..1c8527349c86 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -88,18 +88,6 @@ struct inet_peer_base { int total; }; -static struct inet_peer_base v4_peers = { - .root = peer_avl_empty_rcu, - .lock = __SEQLOCK_UNLOCKED(v4_peers.lock), - .total = 0, -}; - -static struct inet_peer_base v6_peers = { - .root = peer_avl_empty_rcu, - .lock = __SEQLOCK_UNLOCKED(v6_peers.lock), - .total = 0, -}; - #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ /* Exported for sysctl_net_ipv4. */ @@ -153,6 +141,46 @@ static void inetpeer_gc_worker(struct work_struct *work) schedule_delayed_work(&gc_work, gc_delay); } +static int __net_init inetpeer_net_init(struct net *net) +{ + net->ipv4.peers = kzalloc(sizeof(struct inet_peer_base), + GFP_KERNEL); + if (net->ipv4.peers == NULL) + return -ENOMEM; + + net->ipv4.peers->root = peer_avl_empty_rcu; + seqlock_init(&net->ipv4.peers->lock); + + net->ipv6.peers = kzalloc(sizeof(struct inet_peer_base), + GFP_KERNEL); + if (net->ipv6.peers == NULL) + goto out_ipv6; + + net->ipv6.peers->root = peer_avl_empty_rcu; + seqlock_init(&net->ipv6.peers->lock); + + return 0; +out_ipv6: + kfree(net->ipv4.peers); + return -ENOMEM; +} + +static void __net_exit inetpeer_net_exit(struct net *net) +{ + inetpeer_invalidate_tree(net, AF_INET); + kfree(net->ipv4.peers); + net->ipv4.peers = NULL; + + inetpeer_invalidate_tree(net, AF_INET6); + kfree(net->ipv6.peers); + net->ipv6.peers = NULL; +} + +static struct pernet_operations inetpeer_ops = { + .init = inetpeer_net_init, + .exit = inetpeer_net_exit, +}; + /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { @@ -177,6 +205,7 @@ void __init inet_initpeers(void) NULL); INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker); + register_pernet_subsys(&inetpeer_ops); } static int addr_compare(const struct inetpeer_addr *a, @@ -401,9 +430,10 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, call_rcu(&p->rcu, inetpeer_free_rcu); } -static struct inet_peer_base *family_to_base(int family) +static struct inet_peer_base *family_to_base(struct net *net, + int family) { - return family == AF_INET ? &v4_peers : &v6_peers; + return family == AF_INET ? net->ipv4.peers : net->ipv6.peers; } /* perform garbage collect on all items stacked during a lookup */ @@ -443,10 +473,12 @@ static int inet_peer_gc(struct inet_peer_base *base, return cnt; } -struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create) +struct inet_peer *inet_getpeer(struct net *net, + const struct inetpeer_addr *daddr, + int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer_base *base = family_to_base(daddr->family); + struct inet_peer_base *base = family_to_base(net, daddr->family); struct inet_peer *p; unsigned int sequence; int invalidated, gccnt = 0; @@ -571,10 +603,10 @@ static void inetpeer_inval_rcu(struct rcu_head *head) schedule_delayed_work(&gc_work, gc_delay); } -void inetpeer_invalidate_tree(int family) +void inetpeer_invalidate_tree(struct net *net, int family) { struct inet_peer *old, *new, *prev; - struct inet_peer_base *base = family_to_base(family); + struct inet_peer_base *base = family_to_base(net, family); write_seqlock_bh(&base->lock); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 98b30d08efe9..006c21cc2324 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -938,7 +938,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); - inetpeer_invalidate_tree(AF_INET); + inetpeer_invalidate_tree(net, AF_INET); } /* -- cgit v1.2.3 From 54db0cc2ba0d38166acc2d6bae21721405305537 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Fri, 8 Jun 2012 01:21:40 +0000 Subject: inetpeer: add parameter net for inet_getpeer_v4,v6 add struct net as a parameter of inet_getpeer_v[4,6], use net to replace &init_net. and modify some places to provide net for inet_getpeer_v[4,6] Signed-off-by: Gao feng Signed-off-by: David S. Miller --- include/net/inetpeer.h | 12 ++++++++---- net/ipv4/inet_fragment.c | 2 +- net/ipv4/ip_fragment.c | 6 +++++- net/ipv4/route.c | 8 +++++--- net/ipv4/tcp_ipv4.c | 6 ++++-- net/ipv6/route.c | 3 ++- net/ipv6/tcp_ipv6.c | 6 ++++-- 7 files changed, 29 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index fef9dfab5d45..20e67db69ac9 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -79,22 +79,26 @@ struct inet_peer *inet_getpeer(struct net *net, const struct inetpeer_addr *daddr, int create); -static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create) +static inline struct inet_peer *inet_getpeer_v4(struct net *net, + __be32 v4daddr, + int create) { struct inetpeer_addr daddr; daddr.addr.a4 = v4daddr; daddr.family = AF_INET; - return inet_getpeer(&init_net, &daddr, create); + return inet_getpeer(net, &daddr, create); } -static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, int create) +static inline struct inet_peer *inet_getpeer_v6(struct net *net, + const struct in6_addr *v6daddr, + int create) { struct inetpeer_addr daddr; *(struct in6_addr *)daddr.addr.a6 = *v6daddr; daddr.family = AF_INET6; - return inet_getpeer(&init_net, &daddr, create); + return inet_getpeer(net, &daddr, create); } /* can be called from BH context or outside */ diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 5ff2a51b6d0c..85190e69297b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -243,12 +243,12 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, if (q == NULL) return NULL; + q->net = nf; f->constructor(q, arg); atomic_add(f->qsize, &nf->mem); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); atomic_set(&q->refcnt, 1); - q->net = nf; return q; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 9dbd3dd6022d..22c6bab9717a 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -171,6 +171,10 @@ static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) static void ip4_frag_init(struct inet_frag_queue *q, void *a) { struct ipq *qp = container_of(q, struct ipq, q); + struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, + frags); + struct net *net = container_of(ipv4, struct net, ipv4); + struct ip4_create_arg *arg = a; qp->protocol = arg->iph->protocol; @@ -180,7 +184,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) qp->daddr = arg->iph->daddr; qp->user = arg->user; qp->peer = sysctl_ipfrag_max_dist ? - inet_getpeer_v4(arg->iph->saddr, 1) : NULL; + inet_getpeer_v4(net, arg->iph->saddr, 1) : NULL; } static __inline__ void ip4_frag_free(struct inet_frag_queue *q) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 006c21cc2324..2c9f73f3b7cc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1328,9 +1328,10 @@ static u32 rt_peer_genid(void) void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) { + struct net *net = dev_net(rt->dst.dev); struct inet_peer *peer; - peer = inet_getpeer_v4(daddr, create); + peer = inet_getpeer_v4(net, daddr, create); if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) inet_putpeer(peer); @@ -1694,7 +1695,7 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, unsigned short est_mtu = 0; struct inet_peer *peer; - peer = inet_getpeer_v4(iph->daddr, 1); + peer = inet_getpeer_v4(net, iph->daddr, 1); if (peer) { unsigned short mtu = new_mtu; @@ -1935,6 +1936,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, struct fib_info *fi) { + struct net *net = dev_net(rt->dst.dev); struct inet_peer *peer; int create = 0; @@ -1944,7 +1946,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; - rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); + rt->peer = peer = inet_getpeer_v4(net, rt->rt_dst, create); if (peer) { rt->rt_peer_genid = rt_peer_genid(); if (inet_metrics_new(peer)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3d9c1a4b8819..f485b451f928 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1824,11 +1824,12 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) { struct rtable *rt = (struct rtable *) __sk_dst_get(sk); struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); struct inet_peer *peer; if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr) { - peer = inet_getpeer_v4(inet->inet_daddr, 1); + peer = inet_getpeer_v4(net, inet->inet_daddr, 1); *release_it = true; } else { if (!rt->peer) @@ -1844,8 +1845,9 @@ EXPORT_SYMBOL(tcp_v4_get_peer); void *tcp_v4_tw_get_peer(struct sock *sk) { const struct inet_timewait_sock *tw = inet_twsk(sk); + struct net *net = sock_net(sk); - return inet_getpeer_v4(tw->tw_daddr, 1); + return inet_getpeer_v4(net, tw->tw_daddr, 1); } EXPORT_SYMBOL(tcp_v4_tw_get_peer); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 999a982ad3fd..4eca0130cce7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -306,9 +306,10 @@ static u32 rt6_peer_genid(void) void rt6_bind_peer(struct rt6_info *rt, int create) { + struct net *net = dev_net(rt->dst.dev); struct inet_peer *peer; - peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); + peer = inet_getpeer_v6(net, &rt->rt6i_dst.addr, create); if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) inet_putpeer(peer); else diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 80758255556c..1a9cdd09f11c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1736,11 +1736,12 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) { struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk); struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); struct inet_peer *peer; if (!rt || !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) { - peer = inet_getpeer_v6(&np->daddr, 1); + peer = inet_getpeer_v6(net, &np->daddr, 1); *release_it = true; } else { if (!rt->rt6i_peer) @@ -1756,11 +1757,12 @@ static void *tcp_v6_tw_get_peer(struct sock *sk) { const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); const struct inet_timewait_sock *tw = inet_twsk(sk); + struct net *net = sock_net(sk); if (tw->tw_family == AF_INET) return tcp_v4_tw_get_peer(sk); - return inet_getpeer_v6(&tw6->tw_v6_daddr, 1); + return inet_getpeer_v6(net, &tw6->tw_v6_daddr, 1); } static struct timewait_sock_ops tcp6_timewait_sock_ops = { -- cgit v1.2.3 From 7123aaa3a1416529ce461e98108e6b343b294643 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Jun 2012 05:03:21 +0000 Subject: af_unix: speedup /proc/net/unix /proc/net/unix has quadratic behavior, and can hold unix_table_lock for a while if high number of unix sockets are alive. (90 ms for 200k sockets...) We already have a hash table, so its quite easy to use it. Problem is unbound sockets are still hashed in a single hash slot (unix_socket_table[UNIX_HASH_TABLE]) This patch also spreads unbound sockets to 256 hash slots, to speedup both /proc/net/unix and unix_diag. Time to read /proc/net/unix with 200k unix sockets : (time dd if=/proc/net/unix of=/dev/null bs=4k) before : 520 secs after : 2 secs Signed-off-by: Eric Dumazet Cc: Steven Whitehouse Cc: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/af_unix.h | 3 +- net/unix/af_unix.c | 110 +++++++++++++++++++++++++++++--------------------- net/unix/diag.c | 6 ++- 3 files changed, 70 insertions(+), 49 deletions(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 2ee33da36a7a..b5f8988e4283 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -14,10 +14,11 @@ extern struct sock *unix_get_socket(struct file *filp); extern struct sock *unix_peer_get(struct sock *); #define UNIX_HASH_SIZE 256 +#define UNIX_HASH_BITS 8 extern unsigned int unix_tot_inflight; extern spinlock_t unix_table_lock; -extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; +extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { atomic_t refcnt; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 641f2e47f165..cf83f6b5ac91 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -115,15 +115,24 @@ #include #include -struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; +struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); DEFINE_SPINLOCK(unix_table_lock); EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks; -#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) -#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) +static struct hlist_head *unix_sockets_unbound(void *addr) +{ + unsigned long hash = (unsigned long)addr; + + hash ^= hash >> 16; + hash ^= hash >> 8; + hash %= UNIX_HASH_SIZE; + return &unix_socket_table[UNIX_HASH_SIZE + hash]; +} + +#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -645,7 +654,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); - unix_insert_socket(unix_sockets_unbound, sk); + unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) atomic_long_dec(&unix_nr_socks); @@ -2239,47 +2248,58 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, } #ifdef CONFIG_PROC_FS -static struct sock *first_unix_socket(int *i) -{ - for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); - } - return NULL; -} -static struct sock *next_unix_socket(int *i, struct sock *s) -{ - struct sock *next = sk_next(s); - /* More in this chain? */ - if (next) - return next; - /* Look for next non-empty chain. */ - for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); - } - return NULL; -} +#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) struct unix_iter_state { struct seq_net_private p; - int i; }; -static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos) +static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) { - struct unix_iter_state *iter = seq->private; - loff_t off = 0; - struct sock *s; + unsigned long offset = get_offset(*pos); + unsigned long bucket = get_bucket(*pos); + struct sock *sk; + unsigned long count = 0; - for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { - if (sock_net(s) != seq_file_net(seq)) + for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { + if (sock_net(sk) != seq_file_net(seq)) continue; - if (off == pos) - return s; - ++off; + if (++count == offset) + break; } + + return sk; +} + +static struct sock *unix_next_socket(struct seq_file *seq, + struct sock *sk, + loff_t *pos) +{ + unsigned long bucket; + + while (sk > (struct sock *)SEQ_START_TOKEN) { + sk = sk_next(sk); + if (!sk) + goto next_bucket; + if (sock_net(sk) == seq_file_net(seq)) + return sk; + } + + do { + sk = unix_from_bucket(seq, pos); + if (sk) + return sk; + +next_bucket: + bucket = get_bucket(*pos) + 1; + *pos = set_bucket_offset(bucket, 1); + } while (bucket < ARRAY_SIZE(unix_socket_table)); + return NULL; } @@ -2287,22 +2307,20 @@ static void *unix_seq_start(struct seq_file *seq, loff_t *pos) __acquires(unix_table_lock) { spin_lock(&unix_table_lock); - return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN; + + if (!*pos) + return SEQ_START_TOKEN; + + if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) + return NULL; + + return unix_next_socket(seq, NULL, pos); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct unix_iter_state *iter = seq->private; - struct sock *sk = v; ++*pos; - - if (v == SEQ_START_TOKEN) - sk = first_unix_socket(&iter->i); - else - sk = next_unix_socket(&iter->i, sk); - while (sk && (sock_net(sk) != seq_file_net(seq))) - sk = next_unix_socket(&iter->i, sk); - return sk; + return unix_next_socket(seq, v, pos); } static void unix_seq_stop(struct seq_file *seq, void *v) diff --git a/net/unix/diag.c b/net/unix/diag.c index 47d3002737f5..7e8a24bff34a 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -195,7 +195,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) num = s_num = cb->args[1]; spin_lock(&unix_table_lock); - for (slot = s_slot; slot <= UNIX_HASH_SIZE; s_num = 0, slot++) { + for (slot = s_slot; + slot < ARRAY_SIZE(unix_socket_table); + s_num = 0, slot++) { struct sock *sk; struct hlist_node *node; @@ -228,7 +230,7 @@ static struct sock *unix_lookup_by_ino(int ino) struct sock *sk; spin_lock(&unix_table_lock); - for (i = 0; i <= UNIX_HASH_SIZE; i++) { + for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { struct hlist_node *node; sk_for_each(sk, node, &unix_socket_table[i]) -- cgit v1.2.3 From 1c2e004183178e1947882cd2e74f37826f45230e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 8 Jun 2012 23:31:13 +0800 Subject: Bluetooth: Add support for encryption key refresh With LE/SMP the completion of a security level elavation from medium to high is indicated by a HCI Encryption Key Refresh Complete event. The necessary behavior upon receiving this event is a mix of what's done for auth_complete and encryption_change, which is also where most of the event handling code has been copied from. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 6 ++++++ net/bluetooth/hci_event.c | 48 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 66a7b579e31c..3def64ba77fa 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1144,6 +1144,12 @@ struct extended_inquiry_info { __u8 data[240]; } __packed; +#define HCI_EV_KEY_REFRESH_COMPLETE 0x30 +struct hci_ev_key_refresh_complete { + __u8 status; + __le16 handle; +} __packed; + #define HCI_EV_IO_CAPA_REQUEST 0x31 struct hci_ev_io_capa_request { bdaddr_t bdaddr; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4eefb7f65cf6..94ad124a4ea3 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3043,6 +3043,50 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct hci_dev_unlock(hdev); } +static void hci_key_refresh_complete_evt(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_ev_key_refresh_complete *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %u handle %u", hdev->name, ev->status, + __le16_to_cpu(ev->handle)); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (!conn) + goto unlock; + + if (!ev->status) + conn->sec_level = conn->pending_sec_level; + + clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); + + if (ev->status && conn->state == BT_CONNECTED) { + hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); + hci_conn_put(conn); + goto unlock; + } + + if (conn->state == BT_CONFIG) { + if (!ev->status) + conn->state = BT_CONNECTED; + + hci_proto_connect_cfm(conn, ev->status); + hci_conn_put(conn); + } else { + hci_auth_cfm(conn, ev->status); + + hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; + hci_conn_put(conn); + } + +unlock: + hci_dev_unlock(hdev); +} + static inline u8 hci_get_auth_req(struct hci_conn *conn) { /* If remote requests dedicated bonding follow that lead */ @@ -3559,6 +3603,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_extended_inquiry_result_evt(hdev, skb); break; + case HCI_EV_KEY_REFRESH_COMPLETE: + hci_key_refresh_complete_evt(hdev, skb); + break; + case HCI_EV_IO_CAPA_REQUEST: hci_io_capa_request_evt(hdev, skb); break; -- cgit v1.2.3 From fbfe95a42e90b3dd079cc9019ba7d7700feee0f6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 8 Jun 2012 23:24:18 -0700 Subject: inet: Create and use rt{,6}_get_peer_create(). There's a lot of places that open-code rt{,6}_get_peer() only because they want to set 'create' to one. So add an rt{,6}_get_peer_create() for their sake. There were also a few spots open-coding plain rt{,6}_get_peer() and those are transformed here as well. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 17 +++++++++++++---- include/net/route.h | 14 ++++++++++++-- net/ipv4/icmp.c | 5 ++--- net/ipv4/route.c | 35 +++++++++-------------------------- net/ipv4/tcp_ipv4.c | 4 +--- net/ipv6/icmp.c | 6 +++--- net/ipv6/ip6_output.c | 11 ++++------- net/ipv6/ndisc.c | 6 +++--- net/ipv6/route.c | 5 +---- net/ipv6/tcp_ipv6.c | 4 +--- 10 files changed, 49 insertions(+), 58 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 37c1a1ed82c1..73d750288121 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -53,18 +53,27 @@ static inline unsigned int rt6_flags2srcprefs(int flags) return (flags >> 3) & 7; } -extern void rt6_bind_peer(struct rt6_info *rt, - int create); +extern void rt6_bind_peer(struct rt6_info *rt, int create); -static inline struct inet_peer *rt6_get_peer(struct rt6_info *rt) +static inline struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) { if (rt->rt6i_peer) return rt->rt6i_peer; - rt6_bind_peer(rt, 0); + rt6_bind_peer(rt, create); return rt->rt6i_peer; } +static inline struct inet_peer *rt6_get_peer(struct rt6_info *rt) +{ + return __rt6_get_peer(rt, 0); +} + +static inline struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) +{ + return __rt6_get_peer(rt, 1); +} + extern void ip6_route_input(struct sk_buff *skb); extern struct dst_entry * ip6_route_output(struct net *net, diff --git a/include/net/route.h b/include/net/route.h index ed2b78e2375d..433fc6c1d404 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -296,15 +296,25 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable extern void rt_bind_peer(struct rtable *rt, __be32 daddr, int create); -static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr) +static inline struct inet_peer *__rt_get_peer(struct rtable *rt, __be32 daddr, int create) { if (rt->peer) return rt->peer; - rt_bind_peer(rt, daddr, 0); + rt_bind_peer(rt, daddr, create); return rt->peer; } +static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr) +{ + return __rt_get_peer(rt, daddr, 0); +} + +static inline struct inet_peer *rt_get_peer_create(struct rtable *rt, __be32 daddr) +{ + return __rt_get_peer(rt, daddr, 1); +} + static inline int inet_iif(const struct sk_buff *skb) { return skb_rtable(skb)->rt_iif; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c75efbdc71cb..0c78ef1e5dde 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -253,9 +253,8 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, /* Limit if icmp type is enabled in ratemask. */ if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { - if (!rt->peer) - rt_bind_peer(rt, fl4->daddr, 1); - rc = inet_peer_xrlim_allow(rt->peer, + struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr); + rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); } out: diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2c9f73f3b7cc..7a4d724765e6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -162,10 +162,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) struct inet_peer *peer; u32 *p = NULL; - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (peer) { u32 *old_p = __DST_METRICS_PTR(old); unsigned long prev, new; @@ -1364,14 +1361,13 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) struct rtable *rt = (struct rtable *) dst; if (rt && !(rt->dst.flags & DST_NOPEER)) { - if (rt->peer == NULL) - rt_bind_peer(rt, rt->rt_dst, 1); + struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst); /* If peer is attached to destination, it is never detached, so that we need not to grab a lock to dereference it. */ - if (rt->peer) { - iph->id = htons(inet_getid(rt->peer, more)); + if (peer) { + iph->id = htons(inet_getid(peer, more)); return; } } else if (!rt) @@ -1481,10 +1477,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->rt_gateway != old_gw) continue; - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (peer) { if (peer->redirect_learned.a4 != new_gw) { peer->redirect_learned.a4 = new_gw; @@ -1579,9 +1572,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) log_martians = IN_DEV_LOG_MARTIANS(in_dev); rcu_read_unlock(); - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (!peer) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); return; @@ -1646,9 +1637,7 @@ static int ip_error(struct sk_buff *skb) break; } - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); send = true; if (peer) { @@ -1754,9 +1743,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst_confirm(dst); - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (peer) { unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); @@ -1782,12 +1769,8 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static void ipv4_validate_peer(struct rtable *rt) { if (rt->rt_peer_genid != rt_peer_genid()) { - struct inet_peer *peer; - - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 0); + struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst); - peer = rt->peer; if (peer) { check_peer_pmtu(&rt->dst, peer); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f485b451f928..833e8d96a636 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1832,9 +1832,7 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) peer = inet_getpeer_v4(net, inet->inet_daddr, 1); *release_it = true; } else { - if (!rt->peer) - rt_bind_peer(rt, inet->inet_daddr, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, inet->inet_daddr); *release_it = false; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 091a2971c7b7..ed89bba745a1 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -188,14 +188,14 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, } else { struct rt6_info *rt = (struct rt6_info *)dst; int tmo = net->ipv6.sysctl.icmpv6_time; + struct inet_peer *peer; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo); + peer = rt6_get_peer_create(rt); + res = inet_peer_xrlim_allow(peer, tmo); } dst_release(dst); return res; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 17b8c67998bb..62fcf3e48aca 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -463,6 +463,7 @@ int ip6_forward(struct sk_buff *skb) */ if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; + struct inet_peer *peer; struct rt6_info *rt; /* @@ -476,13 +477,12 @@ int ip6_forward(struct sk_buff *skb) else target = &hdr->daddr; - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); + peer = rt6_get_peer_create(rt); /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) */ - if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) + if (inet_peer_xrlim_allow(peer, 1*HZ)) ndisc_send_redirect(skb, target); } else { int addrtype = ipv6_addr_type(&hdr->saddr); @@ -602,11 +602,8 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) int old, new; if (rt && !(rt->dst.flags & DST_NOPEER)) { - struct inet_peer *peer; + struct inet_peer *peer = rt6_get_peer_create(rt); - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - peer = rt->rt6i_peer; if (peer) { fhdr->identification = htonl(inet_getid(peer, 0)); return; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 54f62d3b8dd6..69a6330dea91 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1472,6 +1472,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + struct inet_peer *peer; struct sk_buff *buff; struct icmp6hdr *icmph; struct in6_addr saddr_buf; @@ -1518,9 +1519,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) "Redirect: destination is not a neighbour\n"); goto release; } - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - if (!inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) + peer = rt6_get_peer_create(rt); + if (!inet_peer_xrlim_allow(peer, 1*HZ)) goto release; if (dev->addr_len) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4eca0130cce7..8a986be4aeda 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -99,10 +99,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) if (!(rt->dst.flags & DST_HOST)) return NULL; - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - - peer = rt->rt6i_peer; + peer = rt6_get_peer_create(rt); if (peer) { u32 *old_p = __DST_METRICS_PTR(old); unsigned long prev, new; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1a9cdd09f11c..218433cb9928 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1744,9 +1744,7 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) peer = inet_getpeer_v6(net, &np->daddr, 1); *release_it = true; } else { - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - peer = rt->rt6i_peer; + peer = rt6_get_peer_create(rt); *release_it = false; } -- cgit v1.2.3 From 4670fd819e7f47392c7c6fc6168ea2857c66d163 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 01:25:47 -0700 Subject: tcp: Get rid of inetpeer special cases. The get_peer method TCP uses is full of special cases that make no sense accommodating, and it also gets in the way of doing more reasonable things here. First of all, if the socket doesn't have a usable cached route, there is no sense in trying to optimize timewait recycling. Likewise for the case where we have IP options, such as SRR enabled, that make the IP header destination address (and thus the destination address of the route key) differ from that of the connection's destination address. Just return a NULL peer in these cases, and thus we're also able to get rid of the clumsy inetpeer release logic. Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 2 +- include/net/tcp.h | 2 +- net/ipv4/tcp_ipv4.c | 21 ++++++++------------- net/ipv4/tcp_minisocks.c | 5 +---- net/ipv6/tcp_ipv6.c | 21 ++++++++------------- 5 files changed, 19 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 7d83f90f203f..e1b7734c456f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -43,7 +43,7 @@ struct inet_connection_sock_af_ops { struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst); - struct inet_peer *(*get_peer)(struct sock *sk, bool *release_it); + struct inet_peer *(*get_peer)(struct sock *sk); u16 net_header_len; u16 net_frag_header_len; u16 sockaddr_len; diff --git a/include/net/tcp.h b/include/net/tcp.h index e79aa48d9fc1..424591866037 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -327,7 +327,7 @@ extern void tcp_shutdown (struct sock *sk, int how); extern int tcp_v4_rcv(struct sk_buff *skb); -extern struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it); +extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); extern void *tcp_v4_tw_get_peer(struct sock *sk); extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 833e8d96a636..77f049d00dbb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1820,23 +1820,18 @@ do_time_wait: goto discard_it; } -struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) +struct inet_peer *tcp_v4_get_peer(struct sock *sk) { struct rtable *rt = (struct rtable *) __sk_dst_get(sk); struct inet_sock *inet = inet_sk(sk); - struct net *net = sock_net(sk); - struct inet_peer *peer; - - if (!rt || - inet->cork.fl.u.ip4.daddr != inet->inet_daddr) { - peer = inet_getpeer_v4(net, inet->inet_daddr, 1); - *release_it = true; - } else { - peer = rt_get_peer_create(rt, inet->inet_daddr); - *release_it = false; - } - return peer; + /* If we don't have a valid cached route, or we're doing IP + * options which make the IPv4 header destination address + * different from our peer's, do not bother with this. + */ + if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr) + return NULL; + return rt_get_peer_create(rt, inet->inet_daddr); } EXPORT_SYMBOL(tcp_v4_get_peer); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b85d9fe7d663..fef9dbf3af00 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -60,9 +60,8 @@ static bool tcp_remember_stamp(struct sock *sk) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct inet_peer *peer; - bool release_it; - peer = icsk->icsk_af_ops->get_peer(sk, &release_it); + peer = icsk->icsk_af_ops->get_peer(sk); if (peer) { if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && @@ -70,8 +69,6 @@ static bool tcp_remember_stamp(struct sock *sk) peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; peer->tcp_ts = tp->rx_opt.ts_recent; } - if (release_it) - inet_putpeer(peer); return true; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 218433cb9928..b5ecf37b61a6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1732,23 +1732,18 @@ do_time_wait: goto discard_it; } -static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) +static struct inet_peer *tcp_v6_get_peer(struct sock *sk) { struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct net *net = sock_net(sk); - struct inet_peer *peer; - - if (!rt || - !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) { - peer = inet_getpeer_v6(net, &np->daddr, 1); - *release_it = true; - } else { - peer = rt6_get_peer_create(rt); - *release_it = false; - } - return peer; + /* If we don't have a valid cached route, or we're doing IP + * options which make the IPv6 header destination address + * different from our peer's, do not bother with this. + */ + if (!rt || !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) + return NULL; + return rt6_get_peer_create(rt); } static void *tcp_v6_tw_get_peer(struct sock *sk) -- cgit v1.2.3 From d13e14148154e5ce58467e76321eef1dd912c416 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 9 Jun 2012 10:31:09 +0200 Subject: mac80211: add some missing kernel-doc Add a few kernel-doc descriptions that were missed during development. Reported-by: Randy Dunlap Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ net/mac80211/sta_info.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 1937c7d98304..95e39b6a02ec 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1940,6 +1940,11 @@ enum ieee80211_rate_control_changed { * to also unregister the device. If it returns 1, then mac80211 * will also go through the regular complete restart on resume. * + * @set_wakeup: Enable or disable wakeup when WoWLAN configuration is + * modified. The reason is that device_set_wakeup_enable() is + * supposed to be called when the configuration changes, not only + * in suspend(). + * * @add_interface: Called when a netdevice attached to the hardware is * enabled. Because it is not called for monitor mode devices, @start * and @stop must be implemented. @@ -2966,6 +2971,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, * ieee80211_generic_frame_duration - Calculate the duration field for a frame * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @band: the band to calculate the frame duration on * @frame_len: the length of the frame. * @rate: the rate at which the frame is going to be transmitted. * diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 3bb24a121c95..525ce5077e1c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -278,6 +278,8 @@ struct sta_ampdu_mlme { * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) * @beacon_loss_count: number of times beacon loss has triggered + * @supports_40mhz: tracks whether the station advertised 40 MHz support + * as we overwrite its HT parameters with the currently used value */ struct sta_info { /* General information, mostly static */ -- cgit v1.2.3 From 2397849baa7c44c242e5d5142d5d16d1e7ed53d0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 14:56:12 -0700 Subject: [PATCH] tcp: Cache inetpeer in timewait socket, and only when necessary. Since it's guarenteed that we will access the inetpeer if we're trying to do timewait recycling and TCP options were enabled on the connection, just cache the peer in the timewait socket. In the future, inetpeer lookups will be context dependent (per routing realm), and this helps facilitate that as well. Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++- include/net/tcp.h | 1 - include/net/timewait_sock.h | 8 -------- net/ipv4/tcp_ipv4.c | 10 ---------- net/ipv4/tcp_minisocks.c | 27 ++++++++++++++++++++------- net/ipv6/tcp_ipv6.c | 13 ------------- 6 files changed, 22 insertions(+), 40 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4c5b63283377..23e8234f75a5 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -506,8 +506,9 @@ struct tcp_timewait_sock { u32 tw_rcv_wnd; u32 tw_ts_recent; long tw_ts_recent_stamp; + struct inet_peer *tw_peer; #ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *tw_md5_key; + struct tcp_md5sig_key *tw_md5_key; #endif /* Few sockets in timewait have cookies; in that case, then this * object holds a reference to them (tw_cookie_values->kref). diff --git a/include/net/tcp.h b/include/net/tcp.h index 424591866037..9332f342259a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -328,7 +328,6 @@ extern void tcp_shutdown (struct sock *sk, int how); extern int tcp_v4_rcv(struct sk_buff *skb); extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); -extern void *tcp_v4_tw_get_peer(struct sock *sk); extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index 8d6689cb2c66..68f0ecad6c6e 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -22,7 +22,6 @@ struct timewait_sock_ops { int (*twsk_unique)(struct sock *sk, struct sock *sktw, void *twp); void (*twsk_destructor)(struct sock *sk); - void *(*twsk_getpeer)(struct sock *sk); }; static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -41,11 +40,4 @@ static inline void twsk_destructor(struct sock *sk) sk->sk_prot->twsk_prot->twsk_destructor(sk); } -static inline void *twsk_getpeer(struct sock *sk) -{ - if (sk->sk_prot->twsk_prot->twsk_getpeer) - return sk->sk_prot->twsk_prot->twsk_getpeer(sk); - return NULL; -} - #endif /* _TIMEWAIT_SOCK_H */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 77f049d00dbb..fda2ca17135e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1835,20 +1835,10 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk) } EXPORT_SYMBOL(tcp_v4_get_peer); -void *tcp_v4_tw_get_peer(struct sock *sk) -{ - const struct inet_timewait_sock *tw = inet_twsk(sk); - struct net *net = sock_net(sk); - - return inet_getpeer_v4(net, tw->tw_daddr, 1); -} -EXPORT_SYMBOL(tcp_v4_tw_get_peer); - static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp_timewait_sock), .twsk_unique = tcp_twsk_unique, .twsk_destructor= tcp_twsk_destructor, - .twsk_getpeer = tcp_v4_tw_get_peer, }; const struct inet_connection_sock_af_ops ipv4_specific = { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index fef9dbf3af00..cb015317c9f7 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -77,20 +77,19 @@ static bool tcp_remember_stamp(struct sock *sk) static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) { + const struct tcp_timewait_sock *tcptw; struct sock *sk = (struct sock *) tw; struct inet_peer *peer; - peer = twsk_getpeer(sk); + tcptw = tcp_twsk(sk); + peer = tcptw->tw_peer; if (peer) { - const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; peer->tcp_ts = tcptw->tw_ts_recent; } - inet_putpeer(peer); return true; } return false; @@ -314,9 +313,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); bool recycle_ok = false; + bool recycle_on = false; - if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) + if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) { recycle_ok = tcp_remember_stamp(sk); + recycle_on = true; + } if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) tw = inet_twsk_alloc(sk, state); @@ -324,8 +326,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (tw != NULL) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); + struct inet_sock *inet = inet_sk(sk); + struct inet_peer *peer = NULL; - tw->tw_transparent = inet_sk(sk)->transparent; + tw->tw_transparent = inet->transparent; tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; @@ -347,6 +351,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } #endif + if (recycle_on) + peer = icsk->icsk_af_ops->get_peer(sk); + tcptw->tw_peer = peer; + if (peer) + atomic_inc(&peer->refcnt); + #ifdef CONFIG_TCP_MD5SIG /* * The timewait bucket does not have the key DB from the @@ -398,8 +408,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) void tcp_twsk_destructor(struct sock *sk) { -#ifdef CONFIG_TCP_MD5SIG struct tcp_timewait_sock *twsk = tcp_twsk(sk); + + if (twsk->tw_peer) + inet_putpeer(twsk->tw_peer); +#ifdef CONFIG_TCP_MD5SIG if (twsk->tw_md5_key) { tcp_free_md5sig_pool(); kfree_rcu(twsk->tw_md5_key, rcu); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b5ecf37b61a6..f91b0bfd12d5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1746,23 +1746,10 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk) return rt6_get_peer_create(rt); } -static void *tcp_v6_tw_get_peer(struct sock *sk) -{ - const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); - const struct inet_timewait_sock *tw = inet_twsk(sk); - struct net *net = sock_net(sk); - - if (tw->tw_family == AF_INET) - return tcp_v4_tw_get_peer(sk); - - return inet_getpeer_v6(net, &tw6->tw_v6_daddr, 1); -} - static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, .twsk_destructor= tcp_twsk_destructor, - .twsk_getpeer = tcp_v6_tw_get_peer, }; static const struct inet_connection_sock_af_ops ipv6_specific = { -- cgit v1.2.3 From c3426b47190d7c6785230c91a706fd42208b4120 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 16:27:05 -0700 Subject: inet: Initialize per-netns inetpeer roots in net/ipv{4,6}/route.c Instead of net/ipv4/inetpeer.c Signed-off-by: David S. Miller --- include/net/inetpeer.h | 9 +++++++ net/ipv4/inetpeer.c | 64 +++++++++++++------------------------------------- net/ipv4/route.c | 25 ++++++++++++++++++++ net/ipv6/route.c | 34 ++++++++++++++++++++++++++- 4 files changed, 83 insertions(+), 49 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 20e67db69ac9..5a96c8edc526 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -65,6 +65,14 @@ struct inet_peer { atomic_t refcnt; }; +struct inet_peer_base { + struct inet_peer __rcu *root; + seqlock_t lock; + int total; +}; + +extern void inet_peer_base_init(struct inet_peer_base *); + void inet_initpeers(void) __init; #define INETPEER_METRICS_NEW (~(u32) 0) @@ -105,6 +113,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct net *net, extern void inet_putpeer(struct inet_peer *p); extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); +extern void __inetpeer_invalidate_tree(struct inet_peer_base *); extern void inetpeer_invalidate_tree(struct net *net, int family); /* diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 1c8527349c86..d0d72f89b279 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -82,11 +82,13 @@ static const struct inet_peer peer_fake_node = { .avl_height = 0 }; -struct inet_peer_base { - struct inet_peer __rcu *root; - seqlock_t lock; - int total; -}; +void inet_peer_base_init(struct inet_peer_base *bp) +{ + bp->root = peer_avl_empty_rcu; + seqlock_init(&bp->lock); + bp->total = 0; +} +EXPORT_SYMBOL_GPL(inet_peer_base_init); #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ @@ -141,46 +143,6 @@ static void inetpeer_gc_worker(struct work_struct *work) schedule_delayed_work(&gc_work, gc_delay); } -static int __net_init inetpeer_net_init(struct net *net) -{ - net->ipv4.peers = kzalloc(sizeof(struct inet_peer_base), - GFP_KERNEL); - if (net->ipv4.peers == NULL) - return -ENOMEM; - - net->ipv4.peers->root = peer_avl_empty_rcu; - seqlock_init(&net->ipv4.peers->lock); - - net->ipv6.peers = kzalloc(sizeof(struct inet_peer_base), - GFP_KERNEL); - if (net->ipv6.peers == NULL) - goto out_ipv6; - - net->ipv6.peers->root = peer_avl_empty_rcu; - seqlock_init(&net->ipv6.peers->lock); - - return 0; -out_ipv6: - kfree(net->ipv4.peers); - return -ENOMEM; -} - -static void __net_exit inetpeer_net_exit(struct net *net) -{ - inetpeer_invalidate_tree(net, AF_INET); - kfree(net->ipv4.peers); - net->ipv4.peers = NULL; - - inetpeer_invalidate_tree(net, AF_INET6); - kfree(net->ipv6.peers); - net->ipv6.peers = NULL; -} - -static struct pernet_operations inetpeer_ops = { - .init = inetpeer_net_init, - .exit = inetpeer_net_exit, -}; - /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { @@ -205,7 +167,6 @@ void __init inet_initpeers(void) NULL); INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker); - register_pernet_subsys(&inetpeer_ops); } static int addr_compare(const struct inetpeer_addr *a, @@ -603,10 +564,9 @@ static void inetpeer_inval_rcu(struct rcu_head *head) schedule_delayed_work(&gc_work, gc_delay); } -void inetpeer_invalidate_tree(struct net *net, int family) +void __inetpeer_invalidate_tree(struct inet_peer_base *base) { struct inet_peer *old, *new, *prev; - struct inet_peer_base *base = family_to_base(net, family); write_seqlock_bh(&base->lock); @@ -625,4 +585,12 @@ void inetpeer_invalidate_tree(struct net *net, int family) out: write_sequnlock_bh(&base->lock); } +EXPORT_SYMBOL(__inetpeer_invalidate_tree); + +void inetpeer_invalidate_tree(struct net *net, int family) +{ + struct inet_peer_base *base = family_to_base(net, family); + + __inetpeer_invalidate_tree(base); +} EXPORT_SYMBOL(inetpeer_invalidate_tree); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7a4d724765e6..4cd35c3904d4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3385,6 +3385,30 @@ static __net_initdata struct pernet_operations rt_genid_ops = { .init = rt_genid_init, }; +static int __net_init ipv4_inetpeer_init(struct net *net) +{ + struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); + + if (!bp) + return -ENOMEM; + inet_peer_base_init(bp); + net->ipv4.peers = bp; + return 0; +} + +static void __net_exit ipv4_inetpeer_exit(struct net *net) +{ + struct inet_peer_base *bp = net->ipv4.peers; + + net->ipv4.peers = NULL; + __inetpeer_invalidate_tree(bp); + kfree(bp); +} + +static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { + .init = ipv4_inetpeer_init, + .exit = ipv4_inetpeer_exit, +}; #ifdef CONFIG_IP_ROUTE_CLASSID struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; @@ -3465,6 +3489,7 @@ int __init ip_rt_init(void) register_pernet_subsys(&sysctl_route_ops); #endif register_pernet_subsys(&rt_genid_ops); + register_pernet_subsys(&ipv4_inetpeer_ops); return rc; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8a986be4aeda..3e1e4e0da096 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2996,6 +2996,31 @@ static struct pernet_operations ip6_route_net_ops = { .exit = ip6_route_net_exit, }; +static int __net_init ipv6_inetpeer_init(struct net *net) +{ + struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); + + if (!bp) + return -ENOMEM; + inet_peer_base_init(bp); + net->ipv6.peers = bp; + return 0; +} + +static void __net_exit ipv6_inetpeer_exit(struct net *net) +{ + struct inet_peer_base *bp = net->ipv6.peers; + + net->ipv6.peers = NULL; + __inetpeer_invalidate_tree(bp); + kfree(bp); +} + +static __net_initdata struct pernet_operations ipv6_inetpeer_ops = { + .init = ipv6_inetpeer_init, + .exit = ipv6_inetpeer_exit, +}; + static struct notifier_block ip6_route_dev_notifier = { .notifier_call = ip6_route_dev_notify, .priority = 0, @@ -3020,6 +3045,10 @@ int __init ip6_route_init(void) if (ret) goto out_dst_entries; + ret = register_pernet_subsys(&ipv6_inetpeer_ops); + if (ret) + goto out_register_subsys; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; /* Registering of the loopback is done before this portion of code, @@ -3035,7 +3064,7 @@ int __init ip6_route_init(void) #endif ret = fib6_init(); if (ret) - goto out_register_subsys; + goto out_register_inetpeer; ret = xfrm6_init(); if (ret) @@ -3064,6 +3093,8 @@ xfrm6_init: xfrm6_fini(); out_fib6_init: fib6_gc_cleanup(); +out_register_inetpeer: + unregister_pernet_subsys(&ipv6_inetpeer_ops); out_register_subsys: unregister_pernet_subsys(&ip6_route_net_ops); out_dst_entries: @@ -3079,6 +3110,7 @@ void ip6_route_cleanup(void) fib6_rules_cleanup(); xfrm6_fini(); fib6_gc_cleanup(); + unregister_pernet_subsys(&ipv6_inetpeer_ops); unregister_pernet_subsys(&ip6_route_net_ops); dst_entries_destroy(&ip6_dst_blackhole_ops); kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); -- cgit v1.2.3 From 56a6b248eb345c1948ee60bf426de1ff7dd81509 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 16:32:41 -0700 Subject: inet: Consolidate inetpeer_invalidate_tree() interfaces. We only need one interface for this operation, since we always know which inetpeer root we want to flush. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 3 +-- net/ipv4/inetpeer.c | 10 +--------- net/ipv4/route.c | 4 ++-- net/ipv6/route.c | 2 +- 4 files changed, 5 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 5a96c8edc526..733edc641b76 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -113,8 +113,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct net *net, extern void inet_putpeer(struct inet_peer *p); extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); -extern void __inetpeer_invalidate_tree(struct inet_peer_base *); -extern void inetpeer_invalidate_tree(struct net *net, int family); +extern void inetpeer_invalidate_tree(struct inet_peer_base *); /* * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d0d72f89b279..9d89a381f0e1 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -564,7 +564,7 @@ static void inetpeer_inval_rcu(struct rcu_head *head) schedule_delayed_work(&gc_work, gc_delay); } -void __inetpeer_invalidate_tree(struct inet_peer_base *base) +void inetpeer_invalidate_tree(struct inet_peer_base *base) { struct inet_peer *old, *new, *prev; @@ -585,12 +585,4 @@ void __inetpeer_invalidate_tree(struct inet_peer_base *base) out: write_sequnlock_bh(&base->lock); } -EXPORT_SYMBOL(__inetpeer_invalidate_tree); - -void inetpeer_invalidate_tree(struct net *net, int family) -{ - struct inet_peer_base *base = family_to_base(net, family); - - __inetpeer_invalidate_tree(base); -} EXPORT_SYMBOL(inetpeer_invalidate_tree); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4cd35c3904d4..cf78343940de 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -935,7 +935,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); - inetpeer_invalidate_tree(net, AF_INET); + inetpeer_invalidate_tree(net->ipv4.peers); } /* @@ -3401,7 +3401,7 @@ static void __net_exit ipv4_inetpeer_exit(struct net *net) struct inet_peer_base *bp = net->ipv4.peers; net->ipv4.peers = NULL; - __inetpeer_invalidate_tree(bp); + inetpeer_invalidate_tree(bp); kfree(bp); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3e1e4e0da096..7f346d7492d2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3012,7 +3012,7 @@ static void __net_exit ipv6_inetpeer_exit(struct net *net) struct inet_peer_base *bp = net->ipv6.peers; net->ipv6.peers = NULL; - __inetpeer_invalidate_tree(bp); + inetpeer_invalidate_tree(bp); kfree(bp); } -- cgit v1.2.3 From c0efc887dcadbdbfe171f028acfab9c7c00e9dde Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 19:12:36 -0700 Subject: inet: Pass inetpeer root into inet_getpeer*() interfaces. Otherwise we reference potentially non-existing members when ipv6 is disabled. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 10 +++++----- net/ipv4/inetpeer.c | 9 +-------- net/ipv4/ip_fragment.c | 2 +- net/ipv4/route.c | 6 +++--- net/ipv6/route.c | 2 +- 5 files changed, 11 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 733edc641b76..b84b32fd5df1 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -83,11 +83,11 @@ static inline bool inet_metrics_new(const struct inet_peer *p) } /* can be called with or without local BH being disabled */ -struct inet_peer *inet_getpeer(struct net *net, +struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr, int create); -static inline struct inet_peer *inet_getpeer_v4(struct net *net, +static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base, __be32 v4daddr, int create) { @@ -95,10 +95,10 @@ static inline struct inet_peer *inet_getpeer_v4(struct net *net, daddr.addr.a4 = v4daddr; daddr.family = AF_INET; - return inet_getpeer(net, &daddr, create); + return inet_getpeer(base, &daddr, create); } -static inline struct inet_peer *inet_getpeer_v6(struct net *net, +static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base, const struct in6_addr *v6daddr, int create) { @@ -106,7 +106,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct net *net, *(struct in6_addr *)daddr.addr.a6 = *v6daddr; daddr.family = AF_INET6; - return inet_getpeer(net, &daddr, create); + return inet_getpeer(base, &daddr, create); } /* can be called from BH context or outside */ diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 9d89a381f0e1..e4cba56a5349 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -391,12 +391,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, call_rcu(&p->rcu, inetpeer_free_rcu); } -static struct inet_peer_base *family_to_base(struct net *net, - int family) -{ - return family == AF_INET ? net->ipv4.peers : net->ipv6.peers; -} - /* perform garbage collect on all items stacked during a lookup */ static int inet_peer_gc(struct inet_peer_base *base, struct inet_peer __rcu **stack[PEER_MAXDEPTH], @@ -434,12 +428,11 @@ static int inet_peer_gc(struct inet_peer_base *base, return cnt; } -struct inet_peer *inet_getpeer(struct net *net, +struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr, int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer_base *base = family_to_base(net, daddr->family); struct inet_peer *p; unsigned int sequence; int invalidated, gccnt = 0; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 22c6bab9717a..8d07c973409c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -184,7 +184,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) qp->daddr = arg->iph->daddr; qp->user = arg->user; qp->peer = sysctl_ipfrag_max_dist ? - inet_getpeer_v4(net, arg->iph->saddr, 1) : NULL; + inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL; } static __inline__ void ip4_frag_free(struct inet_frag_queue *q) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cf78343940de..2aa663a6ae9e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1328,7 +1328,7 @@ void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) struct net *net = dev_net(rt->dst.dev); struct inet_peer *peer; - peer = inet_getpeer_v4(net, daddr, create); + peer = inet_getpeer_v4(net->ipv4.peers, daddr, create); if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) inet_putpeer(peer); @@ -1684,7 +1684,7 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, unsigned short est_mtu = 0; struct inet_peer *peer; - peer = inet_getpeer_v4(net, iph->daddr, 1); + peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); if (peer) { unsigned short mtu = new_mtu; @@ -1929,7 +1929,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; - rt->peer = peer = inet_getpeer_v4(net, rt->rt_dst, create); + rt->peer = peer = inet_getpeer_v4(net->ipv4.peers, rt->rt_dst, create); if (peer) { rt->rt_peer_genid = rt_peer_genid(); if (inet_metrics_new(peer)) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9586c27e069c..8fc41d502bbd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -306,7 +306,7 @@ void rt6_bind_peer(struct rt6_info *rt, int create) struct net *net = dev_net(rt->dst.dev); struct inet_peer *peer; - peer = inet_getpeer_v6(net, &rt->rt6i_dst.addr, create); + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, create); if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) inet_putpeer(peer); else -- cgit v1.2.3 From 3ddd53f392c4f8e19e1110d1bdef770008b128b8 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Mon, 11 Jun 2012 11:59:10 +0800 Subject: cfg80211: add missing kernel-doc for mesh configuration structure Add the missing kernel-doc for mesh configuration parameters as pointed out by Johannes Berg. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 59 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7319f25250b6..a129ee2f5483 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -790,26 +790,69 @@ struct bss_parameters { int ht_opmode; }; -/* +/** * struct mesh_config - 802.11s mesh configuration * * These parameters can be changed while the mesh is active. + * + * @dot11MeshRetryTimeout: the initial retry timeout in millisecond units used + * by the Mesh Peering Open message + * @dot11MeshConfirmTimeout: the initial retry timeout in millisecond units + * used by the Mesh Peering Open message + * @dot11MeshHoldingTimeout: the confirm timeout in millisecond units used by + * the mesh peering management to close a mesh peering + * @dot11MeshMaxPeerLinks: the maximum number of peer links allowed on this + * mesh interface + * @dot11MeshMaxRetries: the maximum number of peer link open retries that can + * be sent to establish a new peer link instance in a mesh + * @dot11MeshTTL: the value of TTL field set at a source mesh STA + * @element_ttl: the value of TTL field set at a mesh STA for path selection + * elements + * @auto_open_plinks: whether we should automatically open peer links when we + * detect compatible mesh peers + * @dot11MeshNbrOffsetMaxNeighbor: the maximum number of neighbors to + * synchronize to for 11s default synchronization method + * @dot11MeshHWMPmaxPREQretries: the number of action frames containing a PREQ + * that an originator mesh STA can send to a particular path target + * @path_refresh_time: how frequently to refresh mesh paths in milliseconds + * @min_discovery_timeout: the minimum length of time to wait until giving up on + * a path discovery in milliseconds + * @dot11MeshHWMPactivePathTimeout: the time (in TUs) for which mesh STAs + * receiving a PREQ shall consider the forwarding information from the + * root to be valid. (TU = time unit) + * @dot11MeshHWMPpreqMinInterval: the minimum interval of time (in TUs) during + * which a mesh STA can send only one action frame containing a PREQ + * element + * @dot11MeshHWMPperrMinInterval: the minimum interval of time (in TUs) during + * which a mesh STA can send only one Action frame containing a PERR + * element + * @dot11MeshHWMPnetDiameterTraversalTime: the interval of time (in TUs) that + * it takes for an HWMP information element to propagate across the mesh + * @dot11MeshHWMPRootMode: the configuration of a mesh STA as root mesh STA + * @dot11MeshHWMPRannInterval: the interval of time (in TUs) between root + * announcements are transmitted + * @dot11MeshGateAnnouncementProtocol: whether to advertise that this mesh + * station has access to a broader network beyond the MBSS. (This is + * missnamed in draft 12.0: dot11MeshGateAnnouncementProtocol set to true + * only means that the station will announce others it's a mesh gate, but + * not necessarily using the gate announcement protocol. Still keeping the + * same nomenclature to be in sync with the spec) + * @dot11MeshForwarding: whether the Mesh STA is forwarding or non-forwarding + * entity (default is TRUE - forwarding entity) + * @rssi_threshold: the threshold for average signal strength of candidate + * station to establish a peer link + * @ht_opmode: mesh HT protection mode */ struct mesh_config { - /* Timeouts in ms */ - /* Mesh plink management parameters */ u16 dot11MeshRetryTimeout; u16 dot11MeshConfirmTimeout; u16 dot11MeshHoldingTimeout; u16 dot11MeshMaxPeerLinks; u8 dot11MeshMaxRetries; u8 dot11MeshTTL; - /* ttl used in path selection information elements */ u8 element_ttl; bool auto_open_plinks; - /* neighbor offset synchronization */ u32 dot11MeshNbrOffsetMaxNeighbor; - /* HWMP parameters */ u8 dot11MeshHWMPmaxPREQretries; u32 path_refresh_time; u16 min_discovery_timeout; @@ -819,10 +862,6 @@ struct mesh_config { u16 dot11MeshHWMPnetDiameterTraversalTime; u8 dot11MeshHWMPRootMode; u16 dot11MeshHWMPRannInterval; - /* This is missnamed in draft 12.0: dot11MeshGateAnnouncementProtocol - * set to true only means that the station will announce others it's a - * mesh gate, but not necessarily using the gate announcement protocol. - * Still keeping the same nomenclature to be in sync with the spec. */ bool dot11MeshGateAnnouncementProtocol; bool dot11MeshForwarding; s32 rssi_threshold; -- cgit v1.2.3 From a4f606ea73d56d15f28653d2242e54d58bb612e5 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Mon, 11 Jun 2012 11:59:36 +0800 Subject: {nl,cfg,mac}80211: fix the coding style related to mesh parameters fix the coding style related to mesh parameters, especially the indentation, as pointed out by Johannes Berg. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 6 +-- include/linux/nl80211.h | 98 ++++++++++++++++++++++--------------------- include/net/cfg80211.h | 12 +++--- net/mac80211/cfg.c | 3 +- net/mac80211/debugfs_netdev.c | 34 +++++++-------- net/wireless/nl80211.c | 97 ++++++++++++++++++++++-------------------- 6 files changed, 130 insertions(+), 120 deletions(-) (limited to 'include/net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ce9af8918514..f831078182de 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1443,7 +1443,7 @@ enum ieee80211_tdls_actioncode { * * @IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET: the default synchronization method * @IEEE80211_SYNC_METHOD_VENDOR: a vendor specific synchronization method - * that will be specified in a vendor specific information element + * that will be specified in a vendor specific information element */ enum { IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET = 1, @@ -1455,7 +1455,7 @@ enum { * * @IEEE80211_PATH_PROTOCOL_HWMP: the default path selection protocol * @IEEE80211_PATH_PROTOCOL_VENDOR: a vendor specific protocol that will - * be specified in a vendor specific information element + * be specified in a vendor specific information element */ enum { IEEE80211_PATH_PROTOCOL_HWMP = 1, @@ -1467,7 +1467,7 @@ enum { * * @IEEE80211_PATH_METRIC_AIRTIME: the default path selection metric * @IEEE80211_PATH_METRIC_VENDOR: a vendor specific metric that will be - * specified in a vendor specific information element + * specified in a vendor specific information element */ enum { IEEE80211_PATH_METRIC_AIRTIME = 1, diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 970afdf5a605..c61e1621822c 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -2090,78 +2090,80 @@ enum nl80211_mntr_flags { * @__NL80211_MESHCONF_INVALID: internal use * * @NL80211_MESHCONF_RETRY_TIMEOUT: specifies the initial retry timeout in - * millisecond units, used by the Peer Link Open message + * millisecond units, used by the Peer Link Open message * * @NL80211_MESHCONF_CONFIRM_TIMEOUT: specifies the initial confirm timeout, in - * millisecond units, used by the peer link management to close a peer link + * millisecond units, used by the peer link management to close a peer link * * @NL80211_MESHCONF_HOLDING_TIMEOUT: specifies the holding timeout, in - * millisecond units + * millisecond units * * @NL80211_MESHCONF_MAX_PEER_LINKS: maximum number of peer links allowed - * on this mesh interface + * on this mesh interface * * @NL80211_MESHCONF_MAX_RETRIES: specifies the maximum number of peer link - * open retries that can be sent to establish a new peer link instance in a - * mesh + * open retries that can be sent to establish a new peer link instance in a + * mesh * * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh - * point. + * point. * * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically - * open peer links when we detect compatible mesh peers. + * open peer links when we detect compatible mesh peers. * * @NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES: the number of action frames - * containing a PREQ that an MP can send to a particular destination (path - * target) + * containing a PREQ that an MP can send to a particular destination (path + * target) * * @NL80211_MESHCONF_PATH_REFRESH_TIME: how frequently to refresh mesh paths - * (in milliseconds) + * (in milliseconds) * * @NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT: minimum length of time to wait - * until giving up on a path discovery (in milliseconds) + * until giving up on a path discovery (in milliseconds) * * @NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT: The time (in TUs) for which mesh - * points receiving a PREQ shall consider the forwarding information from the - * root to be valid. (TU = time unit) + * points receiving a PREQ shall consider the forwarding information from + * the root to be valid. (TU = time unit) * * @NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL: The minimum interval of time (in - * TUs) during which an MP can send only one action frame containing a PREQ - * reference element + * TUs) during which an MP can send only one action frame containing a PREQ + * reference element * * @NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME: The interval of time (in TUs) - * that it takes for an HWMP information element to propagate across the mesh + * that it takes for an HWMP information element to propagate across the + * mesh * * @NL80211_MESHCONF_HWMP_ROOTMODE: whether root mode is enabled or not * * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a - * source mesh point for path selection elements. + * source mesh point for path selection elements. * * @NL80211_MESHCONF_HWMP_RANN_INTERVAL: The interval of time (in TUs) between - * root announcements are transmitted. + * root announcements are transmitted. * * @NL80211_MESHCONF_GATE_ANNOUNCEMENTS: Advertise that this mesh station has - * access to a broader network beyond the MBSS. This is done via Root - * Announcement frames. + * access to a broader network beyond the MBSS. This is done via Root + * Announcement frames. * * @NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL: The minimum interval of time (in - * TUs) during which a mesh STA can send only one Action frame containing a - * PERR element. + * TUs) during which a mesh STA can send only one Action frame containing a + * PERR element. * * @NL80211_MESHCONF_FORWARDING: set Mesh STA as forwarding or non-forwarding - * or forwarding entity (default is TRUE - forwarding entity) + * or forwarding entity (default is TRUE - forwarding entity) * * @NL80211_MESHCONF_RSSI_THRESHOLD: RSSI threshold in dBm. This specifies the - * threshold for average signal strength of candidate station to establish - * a peer link. - * - * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute + * threshold for average signal strength of candidate station to establish + * a peer link. * * @NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR: maximum number of neighbors - * to synchronize to for 11s default synchronization method (see 11C.12.2.2) + * to synchronize to for 11s default synchronization method + * (see 11C.12.2.2) * * @NL80211_MESHCONF_HT_OPMODE: set mesh HT protection mode. * + * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -2203,34 +2205,36 @@ enum nl80211_meshconf_params { * @__NL80211_MESH_SETUP_INVALID: Internal use * * @NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL: Enable this option to use a - * vendor specific path selection algorithm or disable it to use the default - * HWMP. + * vendor specific path selection algorithm or disable it to use the + * default HWMP. * * @NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC: Enable this option to use a - * vendor specific path metric or disable it to use the default Airtime - * metric. + * vendor specific path metric or disable it to use the default Airtime + * metric. * * @NL80211_MESH_SETUP_IE: Information elements for this mesh, for instance, a - * robust security network ie, or a vendor specific information element that - * vendors will use to identify the path selection methods and metrics in use. + * robust security network ie, or a vendor specific information element + * that vendors will use to identify the path selection methods and + * metrics in use. * * @NL80211_MESH_SETUP_USERSPACE_AUTH: Enable this option if an authentication - * daemon will be authenticating mesh candidates. + * daemon will be authenticating mesh candidates. * * @NL80211_MESH_SETUP_USERSPACE_AMPE: Enable this option if an authentication - * daemon will be securing peer link frames. AMPE is a secured version of Mesh - * Peering Management (MPM) and is implemented with the assistance of a - * userspace daemon. When this flag is set, the kernel will send peer - * management frames to a userspace daemon that will implement AMPE - * functionality (security capabilities selection, key confirmation, and key - * management). When the flag is unset (default), the kernel can autonomously - * complete (unsecured) mesh peering without the need of a userspace daemon. - * - * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number + * daemon will be securing peer link frames. AMPE is a secured version of + * Mesh Peering Management (MPM) and is implemented with the assistance of + * a userspace daemon. When this flag is set, the kernel will send peer + * management frames to a userspace daemon that will implement AMPE + * functionality (security capabilities selection, key confirmation, and + * key management). When the flag is unset (default), the kernel can + * autonomously complete (unsecured) mesh peering without the need of a + * userspace daemon. * * @NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC: Enable this option to use a - * vendor specific synchronization method or disable it to use the default - * neighbor offset synchronization + * vendor specific synchronization method or disable it to use the default + * neighbor offset synchronization + * + * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number * * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a129ee2f5483..778e533a9734 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -848,21 +848,21 @@ struct mesh_config { u16 dot11MeshConfirmTimeout; u16 dot11MeshHoldingTimeout; u16 dot11MeshMaxPeerLinks; - u8 dot11MeshMaxRetries; - u8 dot11MeshTTL; - u8 element_ttl; + u8 dot11MeshMaxRetries; + u8 dot11MeshTTL; + u8 element_ttl; bool auto_open_plinks; u32 dot11MeshNbrOffsetMaxNeighbor; - u8 dot11MeshHWMPmaxPREQretries; + u8 dot11MeshHWMPmaxPREQretries; u32 path_refresh_time; u16 min_discovery_timeout; u32 dot11MeshHWMPactivePathTimeout; u16 dot11MeshHWMPpreqMinInterval; u16 dot11MeshHWMPperrMinInterval; u16 dot11MeshHWMPnetDiameterTraversalTime; - u8 dot11MeshHWMPRootMode; + u8 dot11MeshHWMPRootMode; u16 dot11MeshHWMPRannInterval; - bool dot11MeshGateAnnouncementProtocol; + bool dot11MeshGateAnnouncementProtocol; bool dot11MeshForwarding; s32 rssi_threshold; u16 ht_opmode; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 498c94e34427..f41f9bea242a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1571,10 +1571,9 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, conf->dot11MeshGateAnnouncementProtocol = nconf->dot11MeshGateAnnouncementProtocol; } - if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_RANN_INTERVAL, mask)) { + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_RANN_INTERVAL, mask)) conf->dot11MeshHWMPRannInterval = nconf->dot11MeshHWMPRannInterval; - } if (_chg_mesh_attr(NL80211_MESHCONF_FORWARDING, mask)) conf->dot11MeshForwarding = nconf->dot11MeshForwarding; if (_chg_mesh_attr(NL80211_MESHCONF_RSSI_THRESHOLD, mask)) { diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index d4272ff43f71..c429417e1322 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -468,45 +468,45 @@ IEEE80211_IF_FILE(fwded_unicast, u.mesh.mshstats.fwded_unicast, DEC); IEEE80211_IF_FILE(fwded_frames, u.mesh.mshstats.fwded_frames, DEC); IEEE80211_IF_FILE(dropped_frames_ttl, u.mesh.mshstats.dropped_frames_ttl, DEC); IEEE80211_IF_FILE(dropped_frames_congestion, - u.mesh.mshstats.dropped_frames_congestion, DEC); + u.mesh.mshstats.dropped_frames_congestion, DEC); IEEE80211_IF_FILE(dropped_frames_no_route, - u.mesh.mshstats.dropped_frames_no_route, DEC); + u.mesh.mshstats.dropped_frames_no_route, DEC); IEEE80211_IF_FILE(estab_plinks, u.mesh.mshstats.estab_plinks, ATOMIC); /* Mesh parameters */ IEEE80211_IF_FILE(dot11MeshMaxRetries, - u.mesh.mshcfg.dot11MeshMaxRetries, DEC); + u.mesh.mshcfg.dot11MeshMaxRetries, DEC); IEEE80211_IF_FILE(dot11MeshRetryTimeout, - u.mesh.mshcfg.dot11MeshRetryTimeout, DEC); + u.mesh.mshcfg.dot11MeshRetryTimeout, DEC); IEEE80211_IF_FILE(dot11MeshConfirmTimeout, - u.mesh.mshcfg.dot11MeshConfirmTimeout, DEC); + u.mesh.mshcfg.dot11MeshConfirmTimeout, DEC); IEEE80211_IF_FILE(dot11MeshHoldingTimeout, - u.mesh.mshcfg.dot11MeshHoldingTimeout, DEC); + u.mesh.mshcfg.dot11MeshHoldingTimeout, DEC); IEEE80211_IF_FILE(dot11MeshTTL, u.mesh.mshcfg.dot11MeshTTL, DEC); IEEE80211_IF_FILE(element_ttl, u.mesh.mshcfg.element_ttl, DEC); IEEE80211_IF_FILE(auto_open_plinks, u.mesh.mshcfg.auto_open_plinks, DEC); IEEE80211_IF_FILE(dot11MeshMaxPeerLinks, - u.mesh.mshcfg.dot11MeshMaxPeerLinks, DEC); + u.mesh.mshcfg.dot11MeshMaxPeerLinks, DEC); IEEE80211_IF_FILE(dot11MeshHWMPactivePathTimeout, - u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout, DEC); + u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout, DEC); IEEE80211_IF_FILE(dot11MeshHWMPpreqMinInterval, - u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval, DEC); + u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval, DEC); IEEE80211_IF_FILE(dot11MeshHWMPperrMinInterval, - u.mesh.mshcfg.dot11MeshHWMPperrMinInterval, DEC); + u.mesh.mshcfg.dot11MeshHWMPperrMinInterval, DEC); IEEE80211_IF_FILE(dot11MeshHWMPnetDiameterTraversalTime, - u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC); + u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC); IEEE80211_IF_FILE(dot11MeshHWMPmaxPREQretries, - u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries, DEC); + u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries, DEC); IEEE80211_IF_FILE(path_refresh_time, - u.mesh.mshcfg.path_refresh_time, DEC); + u.mesh.mshcfg.path_refresh_time, DEC); IEEE80211_IF_FILE(min_discovery_timeout, - u.mesh.mshcfg.min_discovery_timeout, DEC); + u.mesh.mshcfg.min_discovery_timeout, DEC); IEEE80211_IF_FILE(dot11MeshHWMPRootMode, - u.mesh.mshcfg.dot11MeshHWMPRootMode, DEC); + u.mesh.mshcfg.dot11MeshHWMPRootMode, DEC); IEEE80211_IF_FILE(dot11MeshGateAnnouncementProtocol, - u.mesh.mshcfg.dot11MeshGateAnnouncementProtocol, DEC); + u.mesh.mshcfg.dot11MeshGateAnnouncementProtocol, DEC); IEEE80211_IF_FILE(dot11MeshHWMPRannInterval, - u.mesh.mshcfg.dot11MeshHWMPRannInterval, DEC); + u.mesh.mshcfg.dot11MeshHWMPRannInterval, DEC); IEEE80211_IF_FILE(dot11MeshForwarding, u.mesh.mshcfg.dot11MeshForwarding, DEC); IEEE80211_IF_FILE(rssi_threshold, u.mesh.mshcfg.rssi_threshold, DEC); IEEE80211_IF_FILE(ht_opmode, u.mesh.mshcfg.ht_opmode, DEC); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7ae54b82291f..dd94ee5fb40a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -115,7 +115,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_STA_VLAN] = { .type = NLA_U32 }, [NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ }, [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, - .len = IEEE80211_MAX_MESH_ID_LEN }, + .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 }, [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, @@ -3492,7 +3492,6 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_ELEMENT_TTL] = { .type = NLA_U8 }, [NL80211_MESHCONF_AUTO_OPEN_PLINKS] = { .type = NLA_U8 }, [NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR] = { .type = NLA_U32 }, - [NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES] = { .type = NLA_U8 }, [NL80211_MESHCONF_PATH_REFRESH_TIME] = { .type = NLA_U32 }, [NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = { .type = NLA_U16 }, @@ -3504,8 +3503,8 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_RANN_INTERVAL] = { .type = NLA_U16 }, [NL80211_MESHCONF_GATE_ANNOUNCEMENTS] = { .type = NLA_U8 }, [NL80211_MESHCONF_FORWARDING] = { .type = NLA_U8 }, - [NL80211_MESHCONF_RSSI_THRESHOLD] = { .type = NLA_U32}, - [NL80211_MESHCONF_HT_OPMODE] = { .type = NLA_U16}, + [NL80211_MESHCONF_RSSI_THRESHOLD] = { .type = NLA_U32 }, + [NL80211_MESHCONF_HT_OPMODE] = { .type = NLA_U16 }, }; static const struct nla_policy @@ -3515,7 +3514,7 @@ static const struct nla_policy [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY, - .len = IEEE80211_MAX_DATA_LEN }, + .len = IEEE80211_MAX_DATA_LEN }, [NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG }, }; @@ -3548,63 +3547,71 @@ do {\ /* Fill in the params struct */ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, - mask, NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16); + mask, NL80211_MESHCONF_RETRY_TIMEOUT, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, - mask, NL80211_MESHCONF_CONFIRM_TIMEOUT, nla_get_u16); + mask, NL80211_MESHCONF_CONFIRM_TIMEOUT, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, - mask, NL80211_MESHCONF_HOLDING_TIMEOUT, nla_get_u16); + mask, NL80211_MESHCONF_HOLDING_TIMEOUT, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, - mask, NL80211_MESHCONF_MAX_PEER_LINKS, nla_get_u16); + mask, NL80211_MESHCONF_MAX_PEER_LINKS, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, - mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8); + mask, NL80211_MESHCONF_MAX_RETRIES, + nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, - mask, NL80211_MESHCONF_TTL, nla_get_u8); + mask, NL80211_MESHCONF_TTL, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, - mask, NL80211_MESHCONF_ELEMENT_TTL, nla_get_u8); + mask, NL80211_MESHCONF_ELEMENT_TTL, + nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, - mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor, - mask, NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, - nla_get_u32); + mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, + nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor, mask, + NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, + nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, - mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, - nla_get_u8); + mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, + nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, - mask, NL80211_MESHCONF_PATH_REFRESH_TIME, nla_get_u32); + mask, NL80211_MESHCONF_PATH_REFRESH_TIME, + nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, - mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, - nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, - mask, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, - nla_get_u32); + mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, + nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, mask, + NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, + nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval, - mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, - nla_get_u16); + mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval, - mask, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, - nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshHWMPnetDiameterTraversalTime, - mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, - nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshHWMPRootMode, mask, - NL80211_MESHCONF_HWMP_ROOTMODE, - nla_get_u8); + mask, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshHWMPRannInterval, mask, - NL80211_MESHCONF_HWMP_RANN_INTERVAL, - nla_get_u16); + dot11MeshHWMPnetDiameterTraversalTime, mask, + NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, mask, + NL80211_MESHCONF_HWMP_ROOTMODE, nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, mask, + NL80211_MESHCONF_HWMP_RANN_INTERVAL, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshGateAnnouncementProtocol, mask, - NL80211_MESHCONF_GATE_ANNOUNCEMENTS, - nla_get_u8); + dot11MeshGateAnnouncementProtocol, mask, + NL80211_MESHCONF_GATE_ANNOUNCEMENTS, + nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, - mask, NL80211_MESHCONF_FORWARDING, nla_get_u8); + mask, NL80211_MESHCONF_FORWARDING, + nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, - mask, NL80211_MESHCONF_RSSI_THRESHOLD, nla_get_u32); + mask, NL80211_MESHCONF_RSSI_THRESHOLD, + nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, - mask, NL80211_MESHCONF_HT_OPMODE, nla_get_u16); + mask, NL80211_MESHCONF_HT_OPMODE, + nla_get_u16); if (mask_out) *mask_out = mask; -- cgit v1.2.3 From 97bab73f987e2781129cd6f4b6379bf44d808cc6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 22:36:36 -0700 Subject: inet: Hide route peer accesses behind helpers. We encode the pointer(s) into an unsigned long with one state bit. The state bit is used so we can store the inetpeer tree root to use when resolving the peer later. Later the peer roots will be per-FIB table, and this change works to facilitate that. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++ include/net/ip6_fib.h | 32 +++++++++++++++++++++++++++- include/net/ip6_route.h | 6 +++--- include/net/route.h | 42 +++++++++++++++++++++++++++++++++---- net/ipv4/route.c | 56 +++++++++++++++++++++++++++++-------------------- net/ipv4/xfrm4_policy.c | 10 ++++----- net/ipv6/route.c | 42 +++++++++++++++++++++---------------- net/ipv6/xfrm6_policy.c | 10 ++++----- 8 files changed, 193 insertions(+), 59 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index b84b32fd5df1..d432489e7109 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -71,6 +71,60 @@ struct inet_peer_base { int total; }; +#define INETPEER_BASE_BIT 0x1UL + +static inline struct inet_peer *inetpeer_ptr(unsigned long val) +{ + BUG_ON(val & INETPEER_BASE_BIT); + return (struct inet_peer *) val; +} + +static inline struct inet_peer_base *inetpeer_base_ptr(unsigned long val) +{ + if (!(val & INETPEER_BASE_BIT)) + return NULL; + val &= ~INETPEER_BASE_BIT; + return (struct inet_peer_base *) val; +} + +static inline bool inetpeer_ptr_is_peer(unsigned long val) +{ + return !(val & INETPEER_BASE_BIT); +} + +static inline void __inetpeer_ptr_set_peer(unsigned long *val, struct inet_peer *peer) +{ + /* This implicitly clears INETPEER_BASE_BIT */ + *val = (unsigned long) peer; +} + +static inline bool inetpeer_ptr_set_peer(unsigned long *ptr, struct inet_peer *peer) +{ + unsigned long val = (unsigned long) peer; + unsigned long orig = *ptr; + + if (!(orig & INETPEER_BASE_BIT) || !val || + cmpxchg(ptr, orig, val) != orig) + return false; + return true; +} + +static inline void inetpeer_init_ptr(unsigned long *ptr, struct inet_peer_base *base) +{ + *ptr = (unsigned long) base | INETPEER_BASE_BIT; +} + +static inline void inetpeer_transfer_peer(unsigned long *to, unsigned long *from) +{ + unsigned long val = *from; + + *to = val; + if (inetpeer_ptr_is_peer(val)) { + struct inet_peer *peer = inetpeer_ptr(val); + atomic_inc(&peer->refcnt); + } +} + extern void inet_peer_base_init(struct inet_peer_base *); void inet_initpeers(void) __init; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 0ae759a6c76e..3ac5f155c690 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -107,7 +107,7 @@ struct rt6_info { u32 rt6i_peer_genid; struct inet6_dev *rt6i_idev; - struct inet_peer *rt6i_peer; + unsigned long _rt6i_peer; #ifdef CONFIG_XFRM u32 rt6i_flow_cache_genid; @@ -118,6 +118,36 @@ struct rt6_info { u8 rt6i_protocol; }; +static inline struct inet_peer *rt6_peer_ptr(struct rt6_info *rt) +{ + return inetpeer_ptr(rt->_rt6i_peer); +} + +static inline bool rt6_has_peer(struct rt6_info *rt) +{ + return inetpeer_ptr_is_peer(rt->_rt6i_peer); +} + +static inline void __rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer) +{ + __inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer); +} + +static inline bool rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer) +{ + return inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer); +} + +static inline void rt6_init_peer(struct rt6_info *rt, struct inet_peer_base *base) +{ + inetpeer_init_ptr(&rt->_rt6i_peer, base); +} + +static inline void rt6_transfer_peer(struct rt6_info *rt, struct rt6_info *ort) +{ + inetpeer_transfer_peer(&rt->_rt6i_peer, &ort->_rt6i_peer); +} + static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) { return ((struct rt6_info *)dst)->rt6i_idev; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 73d750288121..f88a85cf31c3 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -57,11 +57,11 @@ extern void rt6_bind_peer(struct rt6_info *rt, int create); static inline struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) { - if (rt->rt6i_peer) - return rt->rt6i_peer; + if (rt6_has_peer(rt)) + return rt6_peer_ptr(rt); rt6_bind_peer(rt, create); - return rt->rt6i_peer; + return rt6_peer_ptr(rt); } static inline struct inet_peer *rt6_get_peer(struct rt6_info *rt) diff --git a/include/net/route.h b/include/net/route.h index 433fc6c1d404..6340c37677fc 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -67,10 +67,44 @@ struct rtable { /* Miscellaneous cached information */ __be32 rt_spec_dst; /* RFC1122 specific destination */ u32 rt_peer_genid; - struct inet_peer *peer; /* long-living peer info */ + unsigned long _peer; /* long-living peer info */ struct fib_info *fi; /* for client ref to shared metrics */ }; +static inline struct inet_peer *rt_peer_ptr(struct rtable *rt) +{ + return inetpeer_ptr(rt->_peer); +} + +static inline bool rt_has_peer(struct rtable *rt) +{ + return inetpeer_ptr_is_peer(rt->_peer); +} + +static inline void __rt_set_peer(struct rtable *rt, struct inet_peer *peer) +{ + __inetpeer_ptr_set_peer(&rt->_peer, peer); +} + +static inline bool rt_set_peer(struct rtable *rt, struct inet_peer *peer) +{ + return inetpeer_ptr_set_peer(&rt->_peer, peer); +} + +static inline void rt_init_peer(struct rtable *rt, struct inet_peer_base *base) +{ + inetpeer_init_ptr(&rt->_peer, base); +} + +static inline void rt_transfer_peer(struct rtable *rt, struct rtable *ort) +{ + rt->_peer = ort->_peer; + if (rt_has_peer(ort)) { + struct inet_peer *peer = rt_peer_ptr(ort); + atomic_inc(&peer->refcnt); + } +} + static inline bool rt_is_input_route(const struct rtable *rt) { return rt->rt_route_iif != 0; @@ -298,11 +332,11 @@ extern void rt_bind_peer(struct rtable *rt, __be32 daddr, int create); static inline struct inet_peer *__rt_get_peer(struct rtable *rt, __be32 daddr, int create) { - if (rt->peer) - return rt->peer; + if (rt_has_peer(rt)) + return rt_peer_ptr(rt); rt_bind_peer(rt, daddr, create); - return rt->peer; + return rt_peer_ptr(rt); } static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2aa663a6ae9e..03e5b614370e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -677,7 +677,7 @@ static inline int rt_fast_clean(struct rtable *rth) static inline int rt_valuable(struct rtable *rth) { return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - (rth->peer && rth->peer->pmtu_expires); + (rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires); } static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) @@ -1325,12 +1325,16 @@ static u32 rt_peer_genid(void) void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) { - struct net *net = dev_net(rt->dst.dev); + struct inet_peer_base *base; struct inet_peer *peer; - peer = inet_getpeer_v4(net->ipv4.peers, daddr, create); + base = inetpeer_base_ptr(rt->_peer); + if (!base) + return; + + peer = inet_getpeer_v4(base, daddr, create); - if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) + if (!rt_set_peer(rt, peer)) inet_putpeer(peer); else rt->rt_peer_genid = rt_peer_genid(); @@ -1533,8 +1537,10 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) rt_genid(dev_net(dst->dev))); rt_del(hash, rt); ret = NULL; - } else if (rt->peer && peer_pmtu_expired(rt->peer)) { - dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); + } else if (rt_has_peer(rt)) { + struct inet_peer *peer = rt_peer_ptr(rt); + if (peer_pmtu_expired(peer)) + dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); } } return ret; @@ -1796,14 +1802,13 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_dst_destroy(struct dst_entry *dst) { struct rtable *rt = (struct rtable *) dst; - struct inet_peer *peer = rt->peer; if (rt->fi) { fib_info_put(rt->fi); rt->fi = NULL; } - if (peer) { - rt->peer = NULL; + if (rt_has_peer(rt)) { + struct inet_peer *peer = rt_peer_ptr(rt); inet_putpeer(peer); } } @@ -1816,8 +1821,11 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) - dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); + if (rt && rt_has_peer(rt)) { + struct inet_peer *peer = rt_peer_ptr(rt); + if (peer_pmtu_cleaned(peer)) + dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig); + } } static int ip_rt_bug(struct sk_buff *skb) @@ -1919,7 +1927,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, struct fib_info *fi) { - struct net *net = dev_net(rt->dst.dev); + struct inet_peer_base *base; struct inet_peer *peer; int create = 0; @@ -1929,8 +1937,12 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; - rt->peer = peer = inet_getpeer_v4(net->ipv4.peers, rt->rt_dst, create); + base = inetpeer_base_ptr(rt->_peer); + BUG_ON(!base); + + peer = inet_getpeer_v4(base, rt->rt_dst, create); if (peer) { + __rt_set_peer(rt, peer); rt->rt_peer_genid = rt_peer_genid(); if (inet_metrics_new(peer)) memcpy(peer->metrics, fi->fib_metrics, @@ -2046,7 +2058,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, dev_net(dev)->ipv4.peers); rth->fi = NULL; if (our) { rth->dst.input= ip_local_deliver; @@ -2174,7 +2186,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, dev_net(rth->dst.dev)->ipv4.peers); rth->fi = NULL; rth->dst.input = ip_forward; @@ -2357,7 +2369,7 @@ local_input: rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, net->ipv4.peers); rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -2561,7 +2573,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_gateway = fl4->daddr; rth->rt_spec_dst= fl4->saddr; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, dev_net(dev_out)->ipv4.peers); rth->fi = NULL; RT_CACHE_STAT_INC(out_slow_tot); @@ -2898,9 +2910,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_src = ort->rt_src; rt->rt_gateway = ort->rt_gateway; rt->rt_spec_dst = ort->rt_spec_dst; - rt->peer = ort->peer; - if (rt->peer) - atomic_inc(&rt->peer->refcnt); + rt_transfer_peer(rt, ort); rt->fi = ort->fi; if (rt->fi) atomic_inc(&rt->fi->fib_clntref); @@ -2938,7 +2948,6 @@ static int rt_fill_info(struct net *net, struct rtmsg *r; struct nlmsghdr *nlh; unsigned long expires = 0; - const struct inet_peer *peer = rt->peer; u32 id = 0, ts = 0, tsage = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); @@ -2994,8 +3003,9 @@ static int rt_fill_info(struct net *net, goto nla_put_failure; error = rt->dst.error; - if (peer) { - inet_peer_refcheck(rt->peer); + if (rt_has_peer(rt)) { + const struct inet_peer *peer = rt_peer_ptr(rt); + inet_peer_refcheck(peer); id = atomic_read(&peer->ip_id_count) & 0xffff; if (peer->tcp_ts_stamp) { ts = peer->tcp_ts; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0d3426cb5c4f..8855d8268552 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -90,9 +90,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.dst.dev = dev; dev_hold(dev); - xdst->u.rt.peer = rt->peer; - if (rt->peer) - atomic_inc(&rt->peer->refcnt); + rt_transfer_peer(&xdst->u.rt, rt); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ @@ -212,8 +210,10 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) dst_destroy_metrics_generic(dst); - if (likely(xdst->u.rt.peer)) - inet_putpeer(xdst->u.rt.peer); + if (rt_has_peer(&xdst->u.rt)) { + struct inet_peer *peer = rt_peer_ptr(&xdst->u.rt); + inet_putpeer(peer); + } xfrm_dst_destroy(xdst); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8fc41d502bbd..17a9b8687f29 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -258,16 +258,18 @@ static struct rt6_info ip6_blk_hole_entry_template = { #endif /* allocate dst with ip6_dst_ops */ -static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, +static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, int flags) { - struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); + struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, + 0, 0, flags); - if (rt) + if (rt) { memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); - + rt6_init_peer(rt, net->ipv6.peers); + } return rt; } @@ -275,7 +277,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - struct inet_peer *peer = rt->rt6i_peer; if (!(rt->dst.flags & DST_HOST)) dst_destroy_metrics_generic(dst); @@ -288,8 +289,8 @@ static void ip6_dst_destroy(struct dst_entry *dst) if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from) dst_release(dst->from); - if (peer) { - rt->rt6i_peer = NULL; + if (rt6_has_peer(rt)) { + struct inet_peer *peer = rt6_peer_ptr(rt); inet_putpeer(peer); } } @@ -303,11 +304,15 @@ static u32 rt6_peer_genid(void) void rt6_bind_peer(struct rt6_info *rt, int create) { - struct net *net = dev_net(rt->dst.dev); + struct inet_peer_base *base; struct inet_peer *peer; - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, create); - if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) + base = inetpeer_base_ptr(rt->_rt6i_peer); + if (!base) + return; + + peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); + if (!rt6_set_peer(rt, peer)) inet_putpeer(peer); else rt->rt6i_peer_genid = rt6_peer_genid(); @@ -950,6 +955,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); if (rt) { memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); + rt6_init_peer(rt, net->ipv6.peers); new = &rt->dst; @@ -994,7 +1000,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { if (rt->rt6i_peer_genid != rt6_peer_genid()) { - if (!rt->rt6i_peer) + if (!rt6_has_peer(rt)) rt6_bind_peer(rt, 0); rt->rt6i_peer_genid = rt6_peer_genid(); } @@ -1108,7 +1114,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (unlikely(!idev)) return ERR_PTR(-ENODEV); - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); + rt = ip6_dst_alloc(net, dev, 0); if (unlikely(!rt)) { in6_dev_put(idev); dst = ERR_PTR(-ENOMEM); @@ -1290,7 +1296,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); + rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT); if (!rt) { err = -ENOMEM; @@ -1812,8 +1818,7 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest) { struct net *net = dev_net(ort->dst.dev); - struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - ort->dst.dev, 0); + struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0); if (rt) { rt->dst.input = ort->dst.input; @@ -2097,8 +2102,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, bool anycast) { struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - net->loopback_dev, 0); + struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0); int err; if (!rt) { @@ -2519,7 +2523,9 @@ static int rt6_fill_node(struct net *net, else expires = INT_MAX; - peer = rt->rt6i_peer; + peer = NULL; + if (rt6_has_peer(rt)) + peer = rt6_peer_ptr(rt); ts = tsage = 0; if (peer && peer->tcp_ts_stamp) { ts = peer->tcp_ts; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8625fba96db9..d7494845efbf 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -99,9 +99,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, if (!xdst->u.rt6.rt6i_idev) return -ENODEV; - xdst->u.rt6.rt6i_peer = rt->rt6i_peer; - if (rt->rt6i_peer) - atomic_inc(&rt->rt6i_peer->refcnt); + rt6_transfer_peer(&xdst->u.rt6, rt); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ @@ -223,8 +221,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); - if (likely(xdst->u.rt6.rt6i_peer)) - inet_putpeer(xdst->u.rt6.rt6i_peer); + if (rt6_has_peer(&xdst->u.rt6)) { + struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6); + inet_putpeer(peer); + } xfrm_dst_destroy(xdst); } -- cgit v1.2.3 From 46517008e1168dc926cf2c47d529efc07eca85c0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Jun 2012 00:04:12 -0700 Subject: ipv4: Kill ip_rt_frag_needed(). There is zero point to this function. It's only real substance is to perform an extremely outdated BSD4.2 ICMP check, which we can safely remove. If you really have a MTU limited link being routed by a BSD4.2 derived system, here's a nickel go buy yourself a real router. The other actions of ip_rt_frag_needed(), checking and conditionally updating the peer, are done by the per-protocol handlers of the ICMP event. TCP, UDP, et al. have a handler which will receive this event and transmit it back into the associated route via dst_ops->update_pmtu(). This simplification is important, because it eliminates the one place where we do not have a proper route context in which to make an inetpeer lookup. Signed-off-by: David S. Miller --- include/net/route.h | 2 -- net/ipv4/icmp.c | 4 +--- net/ipv4/route.c | 61 ---------------------------------------------------- net/rxrpc/ar-error.c | 4 ---- 4 files changed, 1 insertion(+), 70 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 6340c37677fc..cc693a5bb20d 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -215,8 +215,6 @@ static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 s return ip_route_input_common(skb, dst, src, tos, devin, true); } -extern unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, - unsigned short new_mtu, struct net_device *dev); extern void ip_rt_send_redirect(struct sk_buff *skb); extern unsigned int inet_addr_type(struct net *net, __be32 addr); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 0c78ef1e5dde..e1caa1abe5d1 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -673,9 +673,7 @@ static void icmp_unreach(struct sk_buff *skb) LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), &iph->daddr); } else { - info = ip_rt_frag_needed(net, iph, - ntohs(icmph->un.frag.mtu), - skb->dev); + info = ntohs(icmph->un.frag.mtu); if (!info) goto out; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 03e5b614370e..4f5834c4a667 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1664,67 +1664,6 @@ out: kfree_skb(skb); return 0; } -/* - * The last two values are not from the RFC but - * are needed for AMPRnet AX.25 paths. - */ - -static const unsigned short mtu_plateau[] = -{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; - -static inline unsigned short guess_mtu(unsigned short old_mtu) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++) - if (old_mtu > mtu_plateau[i]) - return mtu_plateau[i]; - return 68; -} - -unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, - unsigned short new_mtu, - struct net_device *dev) -{ - unsigned short old_mtu = ntohs(iph->tot_len); - unsigned short est_mtu = 0; - struct inet_peer *peer; - - peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); - if (peer) { - unsigned short mtu = new_mtu; - - if (new_mtu < 68 || new_mtu >= old_mtu) { - /* BSD 4.2 derived systems incorrectly adjust - * tot_len by the IP header length, and report - * a zero MTU in the ICMP message. - */ - if (mtu == 0 && - old_mtu >= 68 + (iph->ihl << 2)) - old_mtu -= iph->ihl << 2; - mtu = guess_mtu(old_mtu); - } - - if (mtu < ip_rt_min_pmtu) - mtu = ip_rt_min_pmtu; - if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { - unsigned long pmtu_expires; - - pmtu_expires = jiffies + ip_rt_mtu_expires; - if (!pmtu_expires) - pmtu_expires = 1UL; - - est_mtu = mtu; - peer->pmtu_learned = mtu; - peer->pmtu_expires = pmtu_expires; - atomic_inc(&__rt_peer_genid); - } - - inet_putpeer(peer); - } - return est_mtu ? : new_mtu; -} - static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) { unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c index 5d6b572a6704..a9206087b4d7 100644 --- a/net/rxrpc/ar-error.c +++ b/net/rxrpc/ar-error.c @@ -81,10 +81,6 @@ void rxrpc_UDP_error_report(struct sock *sk) _net("I/F MTU %u", mtu); } - /* ip_rt_frag_needed() may have eaten the info */ - if (mtu == 0) - mtu = ntohs(icmp_hdr(skb)->un.frag.mtu); - if (mtu == 0) { /* they didn't give us a size, estimate one */ if (mtu > 1500) { -- cgit v1.2.3 From b48c80ece973e9eddb042f6685b482b261ff0d47 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Jun 2012 00:24:21 -0700 Subject: inet: Add family scope inetpeer flushes. This implementation can deal with having many inetpeer roots, which is a necessary prerequisite for per-FIB table rooted peer tables. Each family (AF_INET, AF_INET6) has a sequence number which we bump when we get a family invalidation request. Each peer lookup cheaply checks whether the flush sequence of the root we are using is out of date, and if so flushes it and updates the sequence number. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 2 ++ net/ipv4/inetpeer.c | 28 ++++++++++++++++++++++++++++ net/ipv4/route.c | 2 +- 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index d432489e7109..e15c0862a686 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -68,6 +68,7 @@ struct inet_peer { struct inet_peer_base { struct inet_peer __rcu *root; seqlock_t lock; + u32 flush_seq; int total; }; @@ -168,6 +169,7 @@ extern void inet_putpeer(struct inet_peer *p); extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); extern void inetpeer_invalidate_tree(struct inet_peer_base *); +extern void inetpeer_invalidate_family(int family); /* * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e4cba56a5349..cac02ad1425d 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -86,10 +86,36 @@ void inet_peer_base_init(struct inet_peer_base *bp) { bp->root = peer_avl_empty_rcu; seqlock_init(&bp->lock); + bp->flush_seq = ~0U; bp->total = 0; } EXPORT_SYMBOL_GPL(inet_peer_base_init); +static atomic_t v4_seq = ATOMIC_INIT(0); +static atomic_t v6_seq = ATOMIC_INIT(0); + +static atomic_t *inetpeer_seq_ptr(int family) +{ + return (family == AF_INET ? &v4_seq : &v6_seq); +} + +static inline void flush_check(struct inet_peer_base *base, int family) +{ + atomic_t *fp = inetpeer_seq_ptr(family); + + if (unlikely(base->flush_seq != atomic_read(fp))) { + inetpeer_invalidate_tree(base); + base->flush_seq = atomic_read(fp); + } +} + +void inetpeer_invalidate_family(int family) +{ + atomic_t *fp = inetpeer_seq_ptr(family); + + atomic_inc(fp); +} + #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ /* Exported for sysctl_net_ipv4. */ @@ -437,6 +463,8 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, unsigned int sequence; int invalidated, gccnt = 0; + flush_check(base, daddr->family); + /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4f5834c4a667..456a9470fb54 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -935,7 +935,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); - inetpeer_invalidate_tree(net->ipv4.peers); + inetpeer_invalidate_family(AF_INET); } /* -- cgit v1.2.3 From 8e77327783c753689a1a766ab9d301b81c2529f1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 11 Jun 2012 00:01:52 -0700 Subject: inet: Add inetpeer tree roots to the FIB tables. Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 1 + include/net/ip_fib.h | 12 +++++++----- net/ipv4/fib_trie.c | 3 +++ net/ipv6/ip6_fib.c | 5 +++++ 4 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 3ac5f155c690..a192f7807659 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -237,6 +237,7 @@ struct fib6_table { u32 tb6_id; rwlock_t tb6_lock; struct fib6_node tb6_root; + struct inet_peer_base tb6_peers; }; #define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 78df0866cc38..4b347c0ca094 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -19,6 +19,7 @@ #include #include #include +#include struct fib_config { u8 fc_dst_len; @@ -157,11 +158,12 @@ extern __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); FIB_RES_SADDR(net, res)) struct fib_table { - struct hlist_node tb_hlist; - u32 tb_id; - int tb_default; - int tb_num_default; - unsigned long tb_data[0]; + struct hlist_node tb_hlist; + u32 tb_id; + int tb_default; + int tb_num_default; + struct inet_peer_base tb_peers; + unsigned long tb_data[0]; }; extern int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 18cbc15b20d5..9b0f25930fbc 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1843,6 +1843,8 @@ int fib_table_flush(struct fib_table *tb) if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll); + inetpeer_invalidate_tree(&tb->tb_peers); + pr_debug("trie_flush found=%d\n", found); return found; } @@ -1991,6 +1993,7 @@ struct fib_table *fib_trie_table(u32 id) tb->tb_id = id; tb->tb_default = -1; tb->tb_num_default = 0; + inet_peer_base_init(&tb->tb_peers); t = (struct trie *) tb->tb_data; memset(t, 0, sizeof(*t)); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 0c220a416626..7ef0743f06f0 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -197,6 +197,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) table->tb6_id = id; table->tb6_root.leaf = net->ipv6.ip6_null_entry; table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + inet_peer_base_init(&table->tb6_peers); } return table; @@ -1633,6 +1634,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; net->ipv6.fib6_main_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), @@ -1643,6 +1645,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; net->ipv6.fib6_local_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); #endif fib6_tables_init(net); @@ -1666,8 +1669,10 @@ static void fib6_net_exit(struct net *net) del_timer_sync(&net->ipv6.ip6_fib_timer); #ifdef CONFIG_IPV6_MULTIPLE_TABLES + inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); kfree(net->ipv6.fib6_local_tbl); #endif + inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); kfree(net->ipv6.fib6_main_tbl); kfree(net->ipv6.fib_table_hash); kfree(net->ipv6.rt6_stats); -- cgit v1.2.3 From 7b34ca2ac7063f4ebf07f85fd75253ed84d5c648 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 11 Jun 2012 04:13:57 -0700 Subject: inet: Avoid potential NULL peer dereference. We handle NULL in rt{,6}_set_peer but then our caller will try to pass that NULL pointer into inet_putpeer() which isn't ready for it. Fix this by moving the NULL check one level up, and then remove the now unnecessary NULL check from inetpeer_ptr_set_peer(). Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inetpeer.h | 2 +- net/ipv4/route.c | 11 ++++++----- net/ipv6/route.c | 10 ++++++---- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index e15c0862a686..c27c8f10ebdc 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -104,7 +104,7 @@ static inline bool inetpeer_ptr_set_peer(unsigned long *ptr, struct inet_peer *p unsigned long val = (unsigned long) peer; unsigned long orig = *ptr; - if (!(orig & INETPEER_BASE_BIT) || !val || + if (!(orig & INETPEER_BASE_BIT) || cmpxchg(ptr, orig, val) != orig) return false; return true; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4c33ce3000ed..842510d50453 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1333,11 +1333,12 @@ void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) return; peer = inet_getpeer_v4(base, daddr, create); - - if (!rt_set_peer(rt, peer)) - inet_putpeer(peer); - else - rt->rt_peer_genid = rt_peer_genid(); + if (peer) { + if (!rt_set_peer(rt, peer)) + inet_putpeer(peer); + else + rt->rt_peer_genid = rt_peer_genid(); + } } /* diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d9ba4808f26a..58a3ec23da2f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -313,10 +313,12 @@ void rt6_bind_peer(struct rt6_info *rt, int create) return; peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); - if (!rt6_set_peer(rt, peer)) - inet_putpeer(peer); - else - rt->rt6i_peer_genid = rt6_peer_genid(); + if (peer) { + if (!rt6_set_peer(rt, peer)) + inet_putpeer(peer); + else + rt->rt6i_peer_genid = rt6_peer_genid(); + } } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, -- cgit v1.2.3 From 55afabaa0df0dd139c8796a71beb43d1216fbe43 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 11 Jun 2012 15:52:29 -0700 Subject: inet: Fix BUG triggered by __rt{,6}_get_peer(). If no peer actually gets attached (either because create is zero or the peer allocation fails) we'll trigger a BUG because we unconditionally do an rt{,6}_peer_ptr() afterwards. Fix this by guarding it with the proper check. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- include/net/route.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f88a85cf31c3..a2cda240ca95 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -61,7 +61,7 @@ static inline struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) return rt6_peer_ptr(rt); rt6_bind_peer(rt, create); - return rt6_peer_ptr(rt); + return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); } static inline struct inet_peer *rt6_get_peer(struct rt6_info *rt) diff --git a/include/net/route.h b/include/net/route.h index cc693a5bb20d..2bfbc9329ea9 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -334,7 +334,7 @@ static inline struct inet_peer *__rt_get_peer(struct rtable *rt, __be32 daddr, i return rt_peer_ptr(rt); rt_bind_peer(rt, daddr, create); - return rt_peer_ptr(rt); + return (rt_has_peer(rt) ? rt_peer_ptr(rt) : NULL); } static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr) -- cgit v1.2.3 From 5f246e890502fed387e0f959e2224ea680c03423 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Mon, 11 Jun 2012 11:13:07 +0300 Subject: Bluetooth: Update HCI timeouts constants to use msecs_to_jiffies The HCI constants are always used in form of jiffies. So just include the conversion from msecs in the define itself. This has the advantage of making the code where the timeout is used more readable and avoiding unnecessary conversions. The patch is similar to commit ba13ccd9 doing the same job for L2CAP Reported-by: Marcel Holtmann Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 10 +++++----- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_core.c | 25 +++++++++++-------------- 3 files changed, 17 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 2a6b0b8b7120..7dcd3495edde 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -139,11 +139,11 @@ enum { #define HCIINQUIRY _IOR('H', 240, int) /* HCI timeouts */ -#define HCI_DISCONN_TIMEOUT (2000) /* 2 seconds */ -#define HCI_PAIRING_TIMEOUT (60000) /* 60 seconds */ -#define HCI_INIT_TIMEOUT (10000) /* 10 seconds */ -#define HCI_CMD_TIMEOUT (1000) /* 1 seconds */ -#define HCI_ACL_TX_TIMEOUT (45000) /* 45 seconds */ +#define HCI_DISCONN_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ +#define HCI_PAIRING_TIMEOUT msecs_to_jiffies(60000) /* 60 seconds */ +#define HCI_INIT_TIMEOUT msecs_to_jiffies(10000) /* 10 seconds */ +#define HCI_CMD_TIMEOUT msecs_to_jiffies(1000) /* 1 seconds */ +#define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */ /* HCI data types */ #define HCI_COMMAND_PKT 0x01 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 20fd57367ddc..75766b7f0dc7 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -598,7 +598,7 @@ static inline void hci_conn_put(struct hci_conn *conn) if (conn->type == ACL_LINK || conn->type == LE_LINK) { del_timer(&conn->idle_timer); if (conn->state == BT_CONNECTED) { - timeo = msecs_to_jiffies(conn->disc_timeout); + timeo = conn->disc_timeout; if (!conn->out) timeo *= 2; } else { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 471e4fb1b6e5..e91bf7e15666 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -690,12 +690,11 @@ int hci_dev_open(__u16 dev) set_bit(HCI_INIT, &hdev->flags); hdev->init_last_cmd = 0; - ret = __hci_request(hdev, hci_init_req, 0, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT); if (lmp_host_le_capable(hdev)) ret = __hci_request(hdev, hci_le_init_req, 0, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + HCI_INIT_TIMEOUT); clear_bit(HCI_INIT, &hdev->flags); } @@ -782,8 +781,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) if (!test_bit(HCI_RAW, &hdev->flags) && test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { set_bit(HCI_INIT, &hdev->flags); - __hci_request(hdev, hci_reset_req, 0, - msecs_to_jiffies(HCI_CMD_TIMEOUT)); + __hci_request(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); clear_bit(HCI_INIT, &hdev->flags); } @@ -872,8 +870,7 @@ int hci_dev_reset(__u16 dev) hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; if (!test_bit(HCI_RAW, &hdev->flags)) - ret = __hci_request(hdev, hci_reset_req, 0, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); done: hci_req_unlock(hdev); @@ -913,7 +910,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) switch (cmd) { case HCISETAUTH: err = hci_request(hdev, hci_auth_req, dr.dev_opt, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + HCI_INIT_TIMEOUT); break; case HCISETENCRYPT: @@ -925,23 +922,23 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!test_bit(HCI_AUTH, &hdev->flags)) { /* Auth must be enabled first */ err = hci_request(hdev, hci_auth_req, dr.dev_opt, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + HCI_INIT_TIMEOUT); if (err) break; } err = hci_request(hdev, hci_encrypt_req, dr.dev_opt, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + HCI_INIT_TIMEOUT); break; case HCISETSCAN: err = hci_request(hdev, hci_scan_req, dr.dev_opt, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + HCI_INIT_TIMEOUT); break; case HCISETLINKPOL: err = hci_request(hdev, hci_linkpol_req, dr.dev_opt, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + HCI_INIT_TIMEOUT); break; case HCISETLINKMODE: @@ -2455,7 +2452,7 @@ static void __check_timeout(struct hci_dev *hdev, unsigned int cnt) /* ACL tx timeout must be longer than maximum * link supervision timeout (40.9 seconds) */ if (!cnt && time_after(jiffies, hdev->acl_last_tx + - msecs_to_jiffies(HCI_ACL_TX_TIMEOUT))) + HCI_ACL_TX_TIMEOUT)) hci_link_tx_to(hdev, ACL_LINK); } } @@ -2839,7 +2836,7 @@ static void hci_cmd_work(struct work_struct *work) del_timer(&hdev->cmd_timer); else mod_timer(&hdev->cmd_timer, - jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT)); + jiffies + HCI_CMD_TIMEOUT); } else { skb_queue_head(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); -- cgit v1.2.3 From af7985bf85840e3dc90ba108a679db044f91f00e Mon Sep 17 00:00:00 2001 From: Jefferson Delfes Date: Mon, 11 Jun 2012 09:18:51 -0400 Subject: Bluetooth: Fix flags of mgmt_device_found event Change flags field to matches userspace structure. This field needs to be converted to little endian before forward it. Signed-off-by: Jefferson Delfes Signed-off-by: Gustavo Padovan --- include/net/bluetooth/mgmt.h | 2 +- net/bluetooth/mgmt.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 23fd0546fccb..4348ee8bda69 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -444,7 +444,7 @@ struct mgmt_ev_auth_failed { struct mgmt_ev_device_found { struct mgmt_addr_info addr; __s8 rssi; - __u8 flags[4]; + __le32 flags; __le16 eir_len; __u8 eir[0]; } __packed; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c72307cc25fc..b4816632d724 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3546,9 +3546,9 @@ int mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, ev->addr.type = link_to_bdaddr(link_type, addr_type); ev->rssi = rssi; if (cfm_name) - ev->flags[0] |= MGMT_DEV_FOUND_CONFIRM_NAME; + ev->flags |= MGMT_DEV_FOUND_CONFIRM_NAME; if (!ssp) - ev->flags[0] |= MGMT_DEV_FOUND_LEGACY_PAIRING; + ev->flags |= MGMT_DEV_FOUND_LEGACY_PAIRING; if (eir_len > 0) memcpy(ev->eir, eir, eir_len); @@ -3558,6 +3558,7 @@ int mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, dev_class, 3); ev->eir_len = cpu_to_le16(eir_len); + ev->flags = cpu_to_le32(ev->flags); ev_size = sizeof(*ev) + eir_len; -- cgit v1.2.3 From 73c3df3ba3f2d7fe3ea47f944282f3cda31c5505 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Jun 2012 11:17:14 +0200 Subject: cfg80211/nl80211: fix kernel-doc Add missing entries to nl80211.h and fix the kernel-doc notation in cfg80211.h. Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 33 ++++++++++++++++++++++++++++----- include/net/cfg80211.h | 8 ++++---- 2 files changed, 32 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e7b1fc1fe26b..e4f41bdebc07 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -277,6 +277,12 @@ * @NL80211_CMD_NEW_SURVEY_RESULTS: survey data notification (as a reply to * NL80211_CMD_GET_SURVEY and on the "scan" multicast group) * + * @NL80211_CMD_SET_PMKSA: Add a PMKSA cache entry, using %NL80211_ATTR_MAC + * (for the BSSID) and %NL80211_ATTR_PMKID. + * @NL80211_CMD_DEL_PMKSA: Delete a PMKSA cache entry, using %NL80211_ATTR_MAC + * (for the BSSID) and %NL80211_ATTR_PMKID. + * @NL80211_CMD_FLUSH_PMKSA: Flush all PMKSA cache entries. + * * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain * has been changed and provides details of the request information * that caused the change such as who initiated the regulatory request @@ -456,6 +462,10 @@ * the frame. * @NL80211_CMD_ACTION_TX_STATUS: Alias for @NL80211_CMD_FRAME_TX_STATUS for * backward compatibility. + * + * @NL80211_CMD_SET_POWER_SAVE: Set powersave, using %NL80211_ATTR_PS_STATE + * @NL80211_CMD_GET_POWER_SAVE: Get powersave status in %NL80211_ATTR_PS_STATE + * * @NL80211_CMD_SET_CQM: Connection quality monitor configuration. This command * is used to configure connection quality monitoring notification trigger * levels. @@ -771,6 +781,13 @@ enum nl80211_commands { * section 7.3.2.25.1, e.g. 0x000FAC04) * @NL80211_ATTR_KEY_SEQ: transmit key sequence number (IV/PN) for TKIP and * CCMP keys, each six bytes in little endian + * @NL80211_ATTR_KEY_DEFAULT: Flag attribute indicating the key is default key + * @NL80211_ATTR_KEY_DEFAULT_MGMT: Flag attribute indicating the key is the + * default management key + * @NL80211_ATTR_CIPHER_SUITES_PAIRWISE: For crypto settings for connect or + * other commands, indicates which pairwise cipher suites are used + * @NL80211_ATTR_CIPHER_SUITE_GROUP: For crypto settings for connect or + * other commands, indicates which group cipher suite is used * * @NL80211_ATTR_BEACON_INTERVAL: beacon interval in TU * @NL80211_ATTR_DTIM_PERIOD: DTIM period for beaconing @@ -1006,6 +1023,8 @@ enum nl80211_commands { * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was * acknowledged by the recipient. * + * @NL80211_ATTR_PS_STATE: powersave state, using &enum nl80211_ps_state values. + * * @NL80211_ATTR_CQM: connection quality monitor configuration in a * nested attribute with %NL80211_ATTR_CQM_* sub-attributes. * @@ -1063,7 +1082,7 @@ enum nl80211_commands { * flag isn't set, the frame will be rejected. This is also used as an * nl80211 capability flag. * - * @NL80211_ATTR_BSS_HTOPMODE: HT operation mode (u16) + * @NL80211_ATTR_BSS_HT_OPMODE: HT operation mode (u16) * * @NL80211_ATTR_KEY_DEFAULT_TYPES: A nested attribute containing flags * attributes, specifying what a key should be set as default as. @@ -1087,10 +1106,10 @@ enum nl80211_commands { * indicate which WoW triggers should be enabled. This is also * used by %NL80211_CMD_GET_WOWLAN to get the currently enabled WoWLAN * triggers. - + * * @NL80211_ATTR_SCHED_SCAN_INTERVAL: Interval between scheduled scan * cycles, in msecs. - + * * @NL80211_ATTR_SCHED_SCAN_MATCH: Nested attribute with one or more * sets of attributes to match during scheduled scans. Only BSSs * that match any of the sets will be reported. These are @@ -1117,7 +1136,7 @@ enum nl80211_commands { * are managed in software: interfaces of these types aren't subject to * any restrictions in their number or combinations. * - * @%NL80211_ATTR_REKEY_DATA: nested attribute containing the information + * @NL80211_ATTR_REKEY_DATA: nested attribute containing the information * necessary for GTK rekeying in the device, see &enum nl80211_rekey_data. * * @NL80211_ATTR_SCAN_SUPP_RATES: rates per to be advertised as supported in scan, @@ -1184,7 +1203,6 @@ enum nl80211_commands { * @NL80211_ATTR_FEATURE_FLAGS: This u32 attribute contains flags from * &enum nl80211_feature_flags and is advertised in wiphy information. * @NL80211_ATTR_PROBE_RESP_OFFLOAD: Indicates that the HW responds to probe - * * requests while operating in AP-mode. * This attribute holds a bitmap of the supported protocols for * offloading (see &enum nl80211_probe_resp_offload_support_attr). @@ -2507,6 +2525,11 @@ enum nl80211_band { NL80211_BAND_5GHZ, }; +/** + * enum nl80211_ps_state - powersave state + * @NL80211_PS_DISABLED: powersave is disabled + * @NL80211_PS_ENABLED: powersave is enabled + */ enum nl80211_ps_state { NL80211_PS_DISABLED, NL80211_PS_ENABLED, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 778e533a9734..76d54725ea31 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -627,10 +627,10 @@ struct sta_bss_parameters { * @llid: mesh local link id * @plid: mesh peer link id * @plink_state: mesh peer link state - * @signal: the signal strength, type depends on the wiphy's signal_type - NOTE: For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_. - * @signal_avg: avg signal strength, type depends on the wiphy's signal_type - NOTE: For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_. + * @signal: The signal strength, type depends on the wiphy's signal_type. + * For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_. + * @signal_avg: Average signal strength, type depends on the wiphy's signal_type. + * For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_. * @txrate: current unicast bitrate from this station * @rxrate: current unicast bitrate to this station * @rx_packets: packets received from this station -- cgit v1.2.3 From ac1073a61d73b6277794d2efc872eb7e1b706b5c Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Thu, 14 Jun 2012 02:06:06 +0800 Subject: {nl,cfg,mac}80211: implement dot11MeshHWMProotInterval and dot11MeshHWMPactivePathToRootTimeout Add the mesh configuration parameters dot11MeshHWMProotInterval and dot11MeshHWMPactivePathToRootTimeout to be used by proactive PREQ mechanism. Signed-off-by: Chun-Yeow Yeoh [line-break commit log] Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 9 +++++++++ include/net/cfg80211.h | 9 +++++++++ net/mac80211/cfg.c | 6 ++++++ net/mac80211/debugfs_netdev.c | 6 ++++++ net/wireless/mesh.c | 4 ++++ net/wireless/nl80211.c | 15 ++++++++++++++- 6 files changed, 48 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e4f41bdebc07..6936fabe8797 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -2185,6 +2185,13 @@ enum nl80211_mntr_flags { * * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute * + * @NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT: The time (in TUs) for + * which mesh STAs receiving a proactive PREQ shall consider the forwarding + * information to the root mesh STA to be valid. + * + * @NL80211_MESHCONF_HWMP_ROOT_INTERVAL: The interval of time (in TUs) between + * proactive PREQs are transmitted. + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -2211,6 +2218,8 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_RSSI_THRESHOLD, NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, NL80211_MESHCONF_HT_OPMODE, + NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, + NL80211_MESHCONF_HWMP_ROOT_INTERVAL, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 76d54725ea31..e52b38d7b1b6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -842,6 +842,13 @@ struct bss_parameters { * @rssi_threshold: the threshold for average signal strength of candidate * station to establish a peer link * @ht_opmode: mesh HT protection mode + * + * @dot11MeshHWMPactivePathToRootTimeout: The time (in TUs) for which mesh STAs + * receiving a proactive PREQ shall consider the forwarding information to + * the root mesh STA to be valid. + * + * @dot11MeshHWMProotInterval: The interval of time (in TUs) between proactive + * PREQs are transmitted. */ struct mesh_config { u16 dot11MeshRetryTimeout; @@ -866,6 +873,8 @@ struct mesh_config { bool dot11MeshForwarding; s32 rssi_threshold; u16 ht_opmode; + u32 dot11MeshHWMPactivePathToRootTimeout; + u16 dot11MeshHWMProotInterval; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index cd8b1fb05d42..d93cda1c4215 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1590,6 +1590,12 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, sdata->vif.bss_conf.ht_operation_mode = nconf->ht_opmode; ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT); } + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, mask)) + conf->dot11MeshHWMPactivePathToRootTimeout = + nconf->dot11MeshHWMPactivePathToRootTimeout; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOT_INTERVAL, mask)) + conf->dot11MeshHWMProotInterval = + nconf->dot11MeshHWMProotInterval; return 0; } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index c429417e1322..a8cea70902e4 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -510,6 +510,10 @@ IEEE80211_IF_FILE(dot11MeshHWMPRannInterval, IEEE80211_IF_FILE(dot11MeshForwarding, u.mesh.mshcfg.dot11MeshForwarding, DEC); IEEE80211_IF_FILE(rssi_threshold, u.mesh.mshcfg.rssi_threshold, DEC); IEEE80211_IF_FILE(ht_opmode, u.mesh.mshcfg.ht_opmode, DEC); +IEEE80211_IF_FILE(dot11MeshHWMPactivePathToRootTimeout, + u.mesh.mshcfg.dot11MeshHWMPactivePathToRootTimeout, DEC); +IEEE80211_IF_FILE(dot11MeshHWMProotInterval, + u.mesh.mshcfg.dot11MeshHWMProotInterval, DEC); #endif #define DEBUGFS_ADD_MODE(name, mode) \ @@ -611,6 +615,8 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(dot11MeshGateAnnouncementProtocol); MESHPARAMS_ADD(rssi_threshold); MESHPARAMS_ADD(ht_opmode); + MESHPARAMS_ADD(dot11MeshHWMPactivePathToRootTimeout); + MESHPARAMS_ADD(dot11MeshHWMProotInterval); #undef MESHPARAMS_ADD } #endif diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index b44c736bf9cf..2f141cfd581e 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -14,6 +14,8 @@ #define MESH_PATH_TIMEOUT 5000 #define MESH_RANN_INTERVAL 5000 +#define MESH_PATH_TO_ROOT_TIMEOUT 6000 +#define MESH_ROOT_INTERVAL 5000 /* * Minimum interval between two consecutive PREQs originated by the same @@ -62,6 +64,8 @@ const struct mesh_config default_mesh_config = { .dot11MeshForwarding = true, .rssi_threshold = MESH_RSSI_THRESHOLD, .ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED, + .dot11MeshHWMPactivePathToRootTimeout = MESH_PATH_TO_ROOT_TIMEOUT, + .dot11MeshHWMProotInterval = MESH_ROOT_INTERVAL, }; const struct mesh_setup default_mesh_setup = { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7db0aee8cd5b..f8930db613df 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3469,7 +3469,11 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u32(msg, NL80211_MESHCONF_RSSI_THRESHOLD, cur_params.rssi_threshold) || nla_put_u32(msg, NL80211_MESHCONF_HT_OPMODE, - cur_params.ht_opmode)) + cur_params.ht_opmode) || + nla_put_u32(msg, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, + cur_params.dot11MeshHWMPactivePathToRootTimeout) || + nla_put_u16(msg, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, + cur_params.dot11MeshHWMProotInterval)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -3505,6 +3509,8 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_FORWARDING] = { .type = NLA_U8 }, [NL80211_MESHCONF_RSSI_THRESHOLD] = { .type = NLA_U32 }, [NL80211_MESHCONF_HT_OPMODE] = { .type = NLA_U16 }, + [NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 }, + [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = { .type = NLA_U16 }, }; static const struct nla_policy @@ -3612,6 +3618,13 @@ do {\ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, mask, NL80211_MESHCONF_HT_OPMODE, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout, + mask, + NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, + nla_get_u32); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, + mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, + nla_get_u16); if (mask_out) *mask_out = mask; -- cgit v1.2.3 From 36393395536064e483b73d173f6afc103eadfbc4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 14 Jun 2012 22:21:46 -0700 Subject: ipv4: Handle PMTU in all ICMP error handlers. With ip_rt_frag_needed() removed, we have to explicitly update PMTU information in every ICMP error handler. Create two helper functions to facilitate this. 1) ipv4_sk_update_pmtu() This updates the PMTU when we have a socket context to work with. 2) ipv4_update_pmtu() Raw version, used when no socket context is available. For this interface, we essentially just pass in explicit arguments for the flow identity information we would have extracted from the socket. And you'll notice that ipv4_sk_update_pmtu() is simply implemented in terms of ipv4_update_pmtu() Note that __ip_route_output_key() is used, rather than something like ip_route_output_flow() or ip_route_output_key(). This is because we absolutely do not want to end up with a route that does IPSEC encapsulation and the like. Instead, we only want the route that would get us to the node described by the outermost IP header. Reported-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/route.h | 5 ++++- net/ipv4/ah4.c | 1 + net/ipv4/esp4.c | 1 + net/ipv4/ip_gre.c | 14 ++++++++++---- net/ipv4/ipcomp.c | 1 + net/ipv4/ipip.c | 15 +++++++++++---- net/ipv4/ping.c | 1 + net/ipv4/raw.c | 3 +++ net/ipv4/route.c | 28 ++++++++++++++++++++++++++++ net/ipv4/udp.c | 1 + net/ipv6/sit.c | 15 +++++++++++---- 11 files changed, 72 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index a36ae429ed5d..47eb25ac1f7f 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -215,7 +215,10 @@ static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 s return ip_route_input_common(skb, dst, src, tos, devin, true); } -extern void ip_rt_send_redirect(struct sk_buff *skb); +extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, + int oif, u32 mark, u8 protocol, int flow_flags); +extern void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu); +extern void ip_rt_send_redirect(struct sk_buff *skb); extern unsigned int inet_addr_type(struct net *net, __be32 addr); extern unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr); diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index e8f2617ecd47..916d5ecaf6c6 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -408,6 +408,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) return; pr_debug("pmtu discovery on SA AH/%08x/%08x\n", ntohl(ah->spi), ntohl(iph->daddr)); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index cb982a61536f..7b95b49a36ce 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -494,6 +494,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", ntohl(esph->spi), ntohl(iph->daddr)); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f49047b79609..594cec35ac4d 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -516,9 +516,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info) case ICMP_PORT_UNREACH: /* Impossible event. */ return; - case ICMP_FRAG_NEEDED: - /* Soft state for pmtu is maintained by IP core. */ - return; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -538,7 +535,16 @@ static void ipgre_err(struct sk_buff *skb, u32 info) flags & GRE_KEY ? *(((__be32 *)p) + (grehlen / 4) - 1) : 0, p[1]); - if (t == NULL || t->parms.iph.daddr == 0 || + if (t == NULL) + goto out; + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + t->parms.link, 0, IPPROTO_GRE, 0); + goto out; + } + + if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) goto out; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 63b64c45a826..b91375482d84 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -42,6 +42,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n", spi, &iph->daddr); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2d0f99bf61b3..715338a1b205 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -348,9 +348,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; - case ICMP_FRAG_NEEDED: - /* Soft state for pmtu is maintained by IP core. */ - return 0; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -369,7 +366,17 @@ static int ipip_err(struct sk_buff *skb, u32 info) rcu_read_lock(); t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); - if (t == NULL || t->parms.iph.daddr == 0) + if (t == NULL) + goto out; + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + t->dev->ifindex, 0, IPPROTO_IPIP, 0); + err = 0; + goto out; + } + + if (t->parms.iph.daddr == 0) goto out; err = 0; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2c00e8bf684d..340fcf29a966 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -371,6 +371,7 @@ void ping_err(struct sk_buff *skb, u32 info) break; case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + ipv4_sk_update_pmtu(skb, sk, info); if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4032b818f3e4..659ddfb10947 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -216,6 +216,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) int err = 0; int harderr = 0; + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) + ipv4_sk_update_pmtu(skb, sk, info); + /* Report error on raw socket, if: 1. User requested ip_recverr. 2. Socket is connected (otherwise the error indication diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 655506af47ca..41df5297a412 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1711,6 +1711,34 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } } +void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, + int oif, u32 mark, u8 protocol, int flow_flags) +{ + const struct iphdr *iph = (const struct iphdr *)skb->data; + struct flowi4 fl4; + struct rtable *rt; + + flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, + protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS, + iph->daddr, iph->saddr, 0, 0); + rt = __ip_route_output_key(net, &fl4); + if (!IS_ERR(rt)) { + ip_rt_update_pmtu(&rt->dst, mtu); + ip_rt_put(rt); + } +} +EXPORT_SYMBOL_GPL(ipv4_update_pmtu); + +void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) +{ + const struct inet_sock *inet = inet_sk(sk); + + return ipv4_update_pmtu(skb, sock_net(sk), mtu, + sk->sk_bound_dev_if, sk->sk_mark, + inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + inet_sk_flowi_flags(sk)); +} +EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); static void ipv4_validate_peer(struct rtable *rt) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index eaca73644e79..db017efb76ea 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -615,6 +615,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) break; case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + ipv4_sk_update_pmtu(skb, sk, info); if (inet->pmtudisc != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 60415711563f..49aea94c9be3 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -527,9 +527,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; - case ICMP_FRAG_NEEDED: - /* Soft state for pmtu is maintained by IP core. */ - return 0; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -551,7 +548,17 @@ static int ipip6_err(struct sk_buff *skb, u32 info) skb->dev, iph->daddr, iph->saddr); - if (t == NULL || t->parms.iph.daddr == 0) + if (t == NULL) + goto out; + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + t->dev->ifindex, 0, IPPROTO_IPV6, 0); + err = 0; + goto out; + } + + if (t->parms.iph.daddr == 0) goto out; err = 0; -- cgit v1.2.3 From 81aded24675ebda5de8a68843250ad15584ac38a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 15 Jun 2012 14:54:11 -0700 Subject: ipv6: Handle PMTU in ICMP error handlers. One tricky issue on the ipv6 side vs. ipv4 is that the ICMP callouts to handle the error pass the 32-bit info cookie in network byte order whereas ipv4 passes it around in host byte order. Like the ipv4 side, we have two helper functions. One for when we have a socket context and one for when we do not. ip6ip6 tunnels are not handled here, because they handle PMTU events by essentially relaying another ICMP packet-too-big message back to the original sender. This patch allows us to get rid of rt6_do_pmtu_disc(). It handles all kinds of situations that simply cannot happen when we do the PMTU update directly using a fully resolved route. In fact, the "plen == 128" check in ip6_rt_update_pmtu() can very likely be removed or changed into a BUG_ON() check. We should never have a prefixed ipv6 route when we get there. Another piece of strange history here is that TCP and DCCP, unlike in ipv4, never invoke the update_pmtu() method from their ICMP error handlers. This is incredibly astonishing since this is the context where we have the most accurate context in which to make a PMTU update, namely we have a fully connected socket and associated cached socket route. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 8 +-- net/dccp/ipv6.c | 2 + net/ipv6/ah6.c | 3 +- net/ipv6/esp6.c | 2 + net/ipv6/icmp.c | 6 +- net/ipv6/ipcomp6.c | 2 + net/ipv6/raw.c | 5 +- net/ipv6/route.c | 143 +++++++++++------------------------------------- net/ipv6/tcp_ipv6.c | 2 + net/ipv6/udp.c | 3 + 10 files changed, 54 insertions(+), 122 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a2cda240ca95..58cb3fc34879 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -140,10 +140,10 @@ extern void rt6_redirect(const struct in6_addr *dest, u8 *lladdr, int on_link); -extern void rt6_pmtu_discovery(const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct net_device *dev, - u32 pmtu); +extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, + int oif, u32 mark); +extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, + __be32 mtu); struct netlink_callback; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index fa9512d86f3b..9991be083ad0 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -165,6 +165,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); + dst->ops->update_pmtu(dst, ntohl(info)); + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { dccp_sync_mss(sk, dst_mtu(dst)); } /* else let the usual retransmit timer handle it */ diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index f1a4a2c28ed3..49d4d26bda88 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -621,7 +622,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", ntohl(ah->spi), &iph->daddr); - + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index db1521fcda5b..89a615ba84f8 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -442,6 +443,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n", ntohl(esph->spi), &iph->daddr); + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ed89bba745a1..5247d5c211f9 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -649,7 +649,6 @@ static int icmpv6_rcv(struct sk_buff *skb) struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; - const struct ipv6hdr *orig_hdr; struct icmp6hdr *hdr; u8 type; @@ -661,7 +660,7 @@ static int icmpv6_rcv(struct sk_buff *skb) XFRM_STATE_ICMP)) goto drop_no_count; - if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr))) + if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) goto drop_no_count; nh = skb_network_offset(skb); @@ -722,9 +721,6 @@ static int icmpv6_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto discard_it; hdr = icmp6_hdr(skb); - orig_hdr = (struct ipv6hdr *) (hdr + 1); - rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev, - ntohl(hdr->icmp6_mtu)); /* * Drop through to notify diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 5cb75bfe45b1..92832385a8ef 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n", spi, &iph->daddr); + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 93d69836fded..43b0042f15f4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -328,9 +328,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, return; harderr = icmpv6_err_convert(type, code, &err); - if (type == ICMPV6_PKT_TOOBIG) + if (type == ICMPV6_PKT_TOOBIG) { + ip6_sk_update_pmtu(skb, sk, info); harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); - + } if (np->recverr) { u8 *payload = skb->data; if (!inet->hdrincl) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 58a3ec23da2f..0d41f68daff2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1049,7 +1049,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info*)dst; + dst_confirm(dst); if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { + struct net *net = dev_net(dst->dev); + rt6->rt6i_flags |= RTF_MODIFIED; if (mtu < IPV6_MIN_MTU) { u32 features = dst_metric(dst, RTAX_FEATURES); @@ -1058,9 +1061,39 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst_metric_set(dst, RTAX_FEATURES, features); } dst_metric_set(dst, RTAX_MTU, mtu); + rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); } } +void ip6_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, + int oif, __be32 mark) +{ + const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; + struct dst_entry *dst; + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = oif; + fl6.flowi6_mark = mark; + fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS; + fl6.daddr = iph->daddr; + fl6.saddr = iph->saddr; + fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + + dst = ip6_route_output(net, NULL, &fl6); + if (!dst->error) + ip6_rt_update_pmtu(dst, ntohl(mtu)); + dst_release(dst); +} +EXPORT_SYMBOL_GPL(ip6_update_pmtu); + +void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) +{ + ip6_update_pmtu(skb, sock_net(sk), mtu, + sk->sk_bound_dev_if, sk->sk_mark); +} +EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); + static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; @@ -1703,116 +1736,6 @@ out: dst_release(&rt->dst); } -/* - * Handle ICMP "packet too big" messages - * i.e. Path MTU discovery - */ - -static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr, - struct net *net, u32 pmtu, int ifindex) -{ - struct rt6_info *rt, *nrt; - int allfrag = 0; -again: - rt = rt6_lookup(net, daddr, saddr, ifindex, 0); - if (!rt) - return; - - if (rt6_check_expired(rt)) { - ip6_del_rt(rt); - goto again; - } - - if (pmtu >= dst_mtu(&rt->dst)) - goto out; - - if (pmtu < IPV6_MIN_MTU) { - /* - * According to RFC2460, PMTU is set to the IPv6 Minimum Link - * MTU (1280) and a fragment header should always be included - * after a node receiving Too Big message reporting PMTU is - * less than the IPv6 Minimum Link MTU. - */ - pmtu = IPV6_MIN_MTU; - allfrag = 1; - } - - /* New mtu received -> path was valid. - They are sent only in response to data packets, - so that this nexthop apparently is reachable. --ANK - */ - dst_confirm(&rt->dst); - - /* Host route. If it is static, it would be better - not to override it, but add new one, so that - when cache entry will expire old pmtu - would return automatically. - */ - if (rt->rt6i_flags & RTF_CACHE) { - dst_metric_set(&rt->dst, RTAX_MTU, pmtu); - if (allfrag) { - u32 features = dst_metric(&rt->dst, RTAX_FEATURES); - features |= RTAX_FEATURE_ALLFRAG; - dst_metric_set(&rt->dst, RTAX_FEATURES, features); - } - rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); - rt->rt6i_flags |= RTF_MODIFIED; - goto out; - } - - /* Network route. - Two cases are possible: - 1. It is connected route. Action: COW - 2. It is gatewayed route or NONEXTHOP route. Action: clone it. - */ - if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, daddr, saddr); - else - nrt = rt6_alloc_clone(rt, daddr); - - if (nrt) { - dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); - if (allfrag) { - u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); - features |= RTAX_FEATURE_ALLFRAG; - dst_metric_set(&nrt->dst, RTAX_FEATURES, features); - } - - /* According to RFC 1981, detecting PMTU increase shouldn't be - * happened within 5 mins, the recommended timer is 10 mins. - * Here this route expiration time is set to ip6_rt_mtu_expires - * which is 10 mins. After 10 mins the decreased pmtu is expired - * and detecting PMTU increase will be automatically happened. - */ - rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires); - nrt->rt6i_flags |= RTF_DYNAMIC; - ip6_ins_rt(nrt); - } -out: - dst_release(&rt->dst); -} - -void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr, - struct net_device *dev, u32 pmtu) -{ - struct net *net = dev_net(dev); - - /* - * RFC 1981 states that a node "MUST reduce the size of the packets it - * is sending along the path" that caused the Packet Too Big message. - * Since it's not possible in the general case to determine which - * interface was used to send the original packet, we update the MTU - * on the interface that will be used to send future packets. We also - * update the MTU on the interface that received the Packet Too Big in - * case the original packet was forced out that interface with - * SO_BINDTODEVICE or similar. This is the next best thing to the - * correct behaviour, which would be to update the MTU on all - * interfaces. - */ - rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); - rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); -} - /* * Misc support functions */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f91b0bfd12d5..26a88623940b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -415,6 +415,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); + dst->ops->update_pmtu(dst, ntohl(info)); + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f05099fc5901..051ad481973f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -479,6 +479,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk == NULL) return; + if (type == ICMPV6_PKT_TOOBIG) + ip6_sk_update_pmtu(skb, sk, info); + np = inet6_sk(sk); if (!icmpv6_err_convert(type, code, &err) && !np->recverr) -- cgit v1.2.3 From 2a0c451ade8e1783c5d453948289e4a978d417c9 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 14 Jun 2012 23:00:17 +0000 Subject: ipv6: Prevent access to uninitialized fib_table_hash via /proc/net/ipv6_route /proc/net/ipv6_route reflects the contents of fib_table_hash. The proc handler is installed in ip6_route_net_init() whereas fib_table_hash is allocated in fib6_net_init() _after_ the proc handler has been installed. This opens up a short time frame to access fib_table_hash with its pants down. fib6_init() as a whole can't be moved to an earlier position as it also registers the rtnetlink message handlers which should be registered at the end. Therefore split it into fib6_init() which is run early and fib6_init_late() to register the rtnetlink message handlers. Signed-off-by: Thomas Graf Reviewed-by: Neil Horman Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ net/ipv6/ip6_fib.c | 18 +++++++++++------- net/ipv6/route.c | 16 +++++++++++----- 3 files changed, 24 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 0ae759a6c76e..209af13b0336 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -271,6 +271,8 @@ extern void fib6_run_gc(unsigned long expires, extern void fib6_gc_cleanup(void); extern int fib6_init(void); +extern int fib6_init_late(void); +extern void fib6_cleanup_late(void); #ifdef CONFIG_IPV6_MULTIPLE_TABLES extern int fib6_rules_init(void); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 74c21b924a79..fbd4afff05fa 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1692,21 +1692,25 @@ int __init fib6_init(void) ret = register_pernet_subsys(&fib6_net_ops); if (ret) goto out_kmem_cache_create; - - ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, - NULL); - if (ret) - goto out_unregister_subsys; out: return ret; -out_unregister_subsys: - unregister_pernet_subsys(&fib6_net_ops); out_kmem_cache_create: kmem_cache_destroy(fib6_node_kmem); goto out; } +int __init fib6_init_late(void) +{ + return __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, + NULL); +} + +void fib6_cleanup_late(void) +{ + rtnl_unregister(PF_INET6, RTM_GETROUTE); +} + void fib6_gc_cleanup(void) { unregister_pernet_subsys(&fib6_net_ops); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 999a982ad3fd..dc60bf585966 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3018,10 +3018,14 @@ int __init ip6_route_init(void) if (ret) goto out_kmem_cache; - ret = register_pernet_subsys(&ip6_route_net_ops); + ret = fib6_init(); if (ret) goto out_dst_entries; + ret = register_pernet_subsys(&ip6_route_net_ops); + if (ret) + goto out_fib6_init; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; /* Registering of the loopback is done before this portion of code, @@ -3035,13 +3039,13 @@ int __init ip6_route_init(void) init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #endif - ret = fib6_init(); + ret = fib6_init_late(); if (ret) goto out_register_subsys; ret = xfrm6_init(); if (ret) - goto out_fib6_init; + goto out_fib6_init_late; ret = fib6_rules_init(); if (ret) @@ -3064,10 +3068,12 @@ fib6_rules_init: fib6_rules_cleanup(); xfrm6_init: xfrm6_fini(); -out_fib6_init: - fib6_gc_cleanup(); +out_fib6_init_late: + fib6_cleanup_late(); out_register_subsys: unregister_pernet_subsys(&ip6_route_net_ops); +out_fib6_init: + fib6_gc_cleanup(); out_dst_entries: dst_entries_destroy(&ip6_dst_blackhole_ops); out_kmem_cache: -- cgit v1.2.3 From e8803b6c387129059e04d9e14d49efda250a7361 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 16 Jun 2012 01:12:19 -0700 Subject: Revert "ipv6: Prevent access to uninitialized fib_table_hash via /proc/net/ipv6_route" This reverts commit 2a0c451ade8e1783c5d453948289e4a978d417c9. It causes crashes, because now ip6_null_entry is used before it is initialized. Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 -- net/ipv6/ip6_fib.c | 18 +++++++----------- net/ipv6/route.c | 16 +++++----------- 3 files changed, 12 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 209af13b0336..0ae759a6c76e 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -271,8 +271,6 @@ extern void fib6_run_gc(unsigned long expires, extern void fib6_gc_cleanup(void); extern int fib6_init(void); -extern int fib6_init_late(void); -extern void fib6_cleanup_late(void); #ifdef CONFIG_IPV6_MULTIPLE_TABLES extern int fib6_rules_init(void); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index fbd4afff05fa..74c21b924a79 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1692,25 +1692,21 @@ int __init fib6_init(void) ret = register_pernet_subsys(&fib6_net_ops); if (ret) goto out_kmem_cache_create; + + ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, + NULL); + if (ret) + goto out_unregister_subsys; out: return ret; +out_unregister_subsys: + unregister_pernet_subsys(&fib6_net_ops); out_kmem_cache_create: kmem_cache_destroy(fib6_node_kmem); goto out; } -int __init fib6_init_late(void) -{ - return __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, - NULL); -} - -void fib6_cleanup_late(void) -{ - rtnl_unregister(PF_INET6, RTM_GETROUTE); -} - void fib6_gc_cleanup(void) { unregister_pernet_subsys(&fib6_net_ops); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dc60bf585966..999a982ad3fd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3018,13 +3018,9 @@ int __init ip6_route_init(void) if (ret) goto out_kmem_cache; - ret = fib6_init(); - if (ret) - goto out_dst_entries; - ret = register_pernet_subsys(&ip6_route_net_ops); if (ret) - goto out_fib6_init; + goto out_dst_entries; ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; @@ -3039,13 +3035,13 @@ int __init ip6_route_init(void) init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #endif - ret = fib6_init_late(); + ret = fib6_init(); if (ret) goto out_register_subsys; ret = xfrm6_init(); if (ret) - goto out_fib6_init_late; + goto out_fib6_init; ret = fib6_rules_init(); if (ret) @@ -3068,12 +3064,10 @@ fib6_rules_init: fib6_rules_cleanup(); xfrm6_init: xfrm6_fini(); -out_fib6_init_late: - fib6_cleanup_late(); -out_register_subsys: - unregister_pernet_subsys(&ip6_route_net_ops); out_fib6_init: fib6_gc_cleanup(); +out_register_subsys: + unregister_pernet_subsys(&ip6_route_net_ops); out_dst_entries: dst_entries_destroy(&ip6_dst_blackhole_ops); out_kmem_cache: -- cgit v1.2.3 From 3a8fc53a45c444400259e2e285ba414a87061e3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 15 Jan 2012 16:34:08 +0100 Subject: netfilter: nf_ct_helper: allocate 16 bytes for the helper and policy names This patch modifies the struct nf_conntrack_helper to allocate the room for the helper name. The maximum length is 16 bytes (this was already introduced in 2.6.24). For the maximum length for expectation policy names, I have also selected 16 bytes. This patch is required by the follow-up patch to support user-space connection tracking helpers. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_expect.h | 4 +++- include/net/netfilter/nf_conntrack_helper.h | 2 +- net/netfilter/nf_conntrack_ftp.c | 8 ++------ net/netfilter/nf_conntrack_irc.c | 8 ++------ net/netfilter/nf_conntrack_sane.c | 8 ++------ net/netfilter/nf_conntrack_sip.c | 7 ++----- net/netfilter/nf_conntrack_tftp.c | 8 ++------ 7 files changed, 14 insertions(+), 31 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 4619caadd9d1..983f00263243 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -59,10 +59,12 @@ static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) return nf_ct_net(exp->master); } +#define NF_CT_EXP_POLICY_NAME_LEN 16 + struct nf_conntrack_expect_policy { unsigned int max_expected; unsigned int timeout; - const char *name; + char name[NF_CT_EXP_POLICY_NAME_LEN]; }; #define NF_CT_EXPECT_CLASS_DEFAULT 0 diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 1d1889409b9e..5f5a4d9d4df5 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -19,7 +19,7 @@ struct module; struct nf_conntrack_helper { struct hlist_node hnode; /* Internal use. */ - const char *name; /* name of the module */ + char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */ struct module *me; /* pointer to self */ const struct nf_conntrack_expect_policy *expect_policy; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 8c5c95c6d34f..44e47c9e14fb 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -512,7 +512,6 @@ out_update_nl: } static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly; -static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")] __read_mostly; static const struct nf_conntrack_expect_policy ftp_exp_policy = { .max_expected = 1, @@ -541,7 +540,6 @@ static void nf_conntrack_ftp_fini(void) static int __init nf_conntrack_ftp_init(void) { int i, j = -1, ret = 0; - char *tmpname; ftp_buffer = kmalloc(65536, GFP_KERNEL); if (!ftp_buffer) @@ -561,12 +559,10 @@ static int __init nf_conntrack_ftp_init(void) ftp[i][j].expect_policy = &ftp_exp_policy; ftp[i][j].me = THIS_MODULE; ftp[i][j].help = help; - tmpname = &ftp_names[i][j][0]; if (ports[i] == FTP_PORT) - sprintf(tmpname, "ftp"); + sprintf(ftp[i][j].name, "ftp"); else - sprintf(tmpname, "ftp-%d", ports[i]); - ftp[i][j].name = tmpname; + sprintf(ftp[i][j].name, "ftp-%d", ports[i]); pr_debug("nf_ct_ftp: registering helper for pf: %d " "port: %d\n", diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 81366c118271..009c52cfd1ec 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -221,7 +221,6 @@ static int help(struct sk_buff *skb, unsigned int protoff, } static struct nf_conntrack_helper irc[MAX_PORTS] __read_mostly; -static char irc_names[MAX_PORTS][sizeof("irc-65535")] __read_mostly; static struct nf_conntrack_expect_policy irc_exp_policy; static void nf_conntrack_irc_fini(void); @@ -229,7 +228,6 @@ static void nf_conntrack_irc_fini(void); static int __init nf_conntrack_irc_init(void) { int i, ret; - char *tmpname; if (max_dcc_channels < 1) { printk(KERN_ERR "nf_ct_irc: max_dcc_channels must not be zero\n"); @@ -255,12 +253,10 @@ static int __init nf_conntrack_irc_init(void) irc[i].me = THIS_MODULE; irc[i].help = help; - tmpname = &irc_names[i][0]; if (ports[i] == IRC_PORT) - sprintf(tmpname, "irc"); + sprintf(irc[i].name, "irc"); else - sprintf(tmpname, "irc-%u", i); - irc[i].name = tmpname; + sprintf(irc[i].name, "irc-%u", i); ret = nf_conntrack_helper_register(&irc[i]); if (ret) { diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 8501823b3f9b..ec3fc18c4ef6 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -163,7 +163,6 @@ out: } static struct nf_conntrack_helper sane[MAX_PORTS][2] __read_mostly; -static char sane_names[MAX_PORTS][2][sizeof("sane-65535")] __read_mostly; static const struct nf_conntrack_expect_policy sane_exp_policy = { .max_expected = 1, @@ -190,7 +189,6 @@ static void nf_conntrack_sane_fini(void) static int __init nf_conntrack_sane_init(void) { int i, j = -1, ret = 0; - char *tmpname; sane_buffer = kmalloc(65536, GFP_KERNEL); if (!sane_buffer) @@ -210,12 +208,10 @@ static int __init nf_conntrack_sane_init(void) sane[i][j].expect_policy = &sane_exp_policy; sane[i][j].me = THIS_MODULE; sane[i][j].help = help; - tmpname = &sane_names[i][j][0]; if (ports[i] == SANE_PORT) - sprintf(tmpname, "sane"); + sprintf(sane[i][j].name, "sane"); else - sprintf(tmpname, "sane-%d", ports[i]); - sane[i][j].name = tmpname; + sprintf(sane[i][j].name, "sane-%d", ports[i]); pr_debug("nf_ct_sane: registering helper for pf: %d " "port: %d\n", diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 93faf6a3a637..dfd3ff382243 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1556,7 +1556,6 @@ static void nf_conntrack_sip_fini(void) static int __init nf_conntrack_sip_init(void) { int i, j, ret; - char *tmpname; if (ports_c == 0) ports[ports_c++] = SIP_PORT; @@ -1584,12 +1583,10 @@ static int __init nf_conntrack_sip_init(void) sip[i][j].expect_class_max = SIP_EXPECT_MAX; sip[i][j].me = THIS_MODULE; - tmpname = &sip_names[i][j][0]; if (ports[i] == SIP_PORT) - sprintf(tmpname, "sip"); + sprintf(sip_names[i][j], "sip"); else - sprintf(tmpname, "sip-%u", i); - sip[i][j].name = tmpname; + sprintf(sip_names[i][j], "sip-%u", i); pr_debug("port #%u: %u\n", i, ports[i]); diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 75466fd72f4f..81fc61c05263 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -92,7 +92,6 @@ static int tftp_help(struct sk_buff *skb, } static struct nf_conntrack_helper tftp[MAX_PORTS][2] __read_mostly; -static char tftp_names[MAX_PORTS][2][sizeof("tftp-65535")] __read_mostly; static const struct nf_conntrack_expect_policy tftp_exp_policy = { .max_expected = 1, @@ -112,7 +111,6 @@ static void nf_conntrack_tftp_fini(void) static int __init nf_conntrack_tftp_init(void) { int i, j, ret; - char *tmpname; if (ports_c == 0) ports[ports_c++] = TFTP_PORT; @@ -129,12 +127,10 @@ static int __init nf_conntrack_tftp_init(void) tftp[i][j].me = THIS_MODULE; tftp[i][j].help = tftp_help; - tmpname = &tftp_names[i][j][0]; if (ports[i] == TFTP_PORT) - sprintf(tmpname, "tftp"); + sprintf(tftp[i][j].name, "tftp"); else - sprintf(tmpname, "tftp-%u", i); - tftp[i][j].name = tmpname; + sprintf(tftp[i][j].name, "tftp-%u", i); ret = nf_conntrack_helper_register(&tftp[i][j]); if (ret) { -- cgit v1.2.3 From 3cf4c7e381d9a98a44fd86207b950bd8fef55d20 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 1 Feb 2012 16:18:31 +0100 Subject: netfilter: nf_ct_ext: support variable length extensions We can now define conntrack extensions of variable size. This patch is useful to get rid of these unions: union nf_conntrack_help union nf_conntrack_proto union nf_conntrack_nat_help Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_extend.h | 9 ++++++--- net/netfilter/nf_conntrack_extend.c | 16 +++++++++------- 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 96755c3798a5..8b4d1fc29096 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -80,10 +80,13 @@ static inline void nf_ct_ext_free(struct nf_conn *ct) } /* Add this type, returns pointer to data or NULL. */ -void * -__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); +void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, + size_t var_alloc_len, gfp_t gfp); + #define nf_ct_ext_add(ct, id, gfp) \ - ((id##_TYPE *)__nf_ct_ext_add((ct), (id), (gfp))) + ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), 0, (gfp))) +#define nf_ct_ext_add_length(ct, id, len, gfp) \ + ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), (len), (gfp))) #define NF_CT_EXT_F_PREALLOC 0x0001 diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 641ff5f96718..1a9545965c0d 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -44,7 +44,8 @@ void __nf_ct_ext_destroy(struct nf_conn *ct) EXPORT_SYMBOL(__nf_ct_ext_destroy); static void * -nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) +nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, + size_t var_alloc_len, gfp_t gfp) { unsigned int off, len; struct nf_ct_ext_type *t; @@ -54,8 +55,8 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) t = rcu_dereference(nf_ct_ext_types[id]); BUG_ON(t == NULL); off = ALIGN(sizeof(struct nf_ct_ext), t->align); - len = off + t->len; - alloc_size = t->alloc_size; + len = off + t->len + var_alloc_len; + alloc_size = t->alloc_size + var_alloc_len; rcu_read_unlock(); *ext = kzalloc(alloc_size, gfp); @@ -68,7 +69,8 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) return (void *)(*ext) + off; } -void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) +void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, + size_t var_alloc_len, gfp_t gfp) { struct nf_ct_ext *old, *new; int i, newlen, newoff; @@ -79,7 +81,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) old = ct->ext; if (!old) - return nf_ct_ext_create(&ct->ext, id, gfp); + return nf_ct_ext_create(&ct->ext, id, var_alloc_len, gfp); if (__nf_ct_ext_exist(old, id)) return NULL; @@ -89,7 +91,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) BUG_ON(t == NULL); newoff = ALIGN(old->len, t->align); - newlen = newoff + t->len; + newlen = newoff + t->len + var_alloc_len; rcu_read_unlock(); new = __krealloc(old, newlen, gfp); @@ -117,7 +119,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) memset((void *)new + newoff, 0, newlen - newoff); return (void *)new + newoff; } -EXPORT_SYMBOL(__nf_ct_ext_add); +EXPORT_SYMBOL(__nf_ct_ext_add_length); static void update_alloc_size(struct nf_ct_ext_type *type) { -- cgit v1.2.3 From 1afc56794e03229fa53cfa3c5012704d226e1dec Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 Jun 2012 12:11:50 +0200 Subject: netfilter: nf_ct_helper: implement variable length helper private data This patch uses the new variable length conntrack extensions. Instead of using union nf_conntrack_help that contain all the helper private data information, we allocate variable length area to store the private helper data. This patch includes the modification of all existing helpers. It also includes a couple of include header to avoid compilation warnings. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_sip.h | 2 ++ include/net/netfilter/nf_conntrack.h | 35 ++--------------------- include/net/netfilter/nf_conntrack_helper.h | 15 +++++++++- net/ipv4/netfilter/nf_nat_amanda.c | 4 +-- net/ipv4/netfilter/nf_nat_h323.c | 8 +++--- net/ipv4/netfilter/nf_nat_pptp.c | 6 ++-- net/ipv4/netfilter/nf_nat_tftp.c | 4 +-- net/netfilter/nf_conntrack_core.c | 3 +- net/netfilter/nf_conntrack_ftp.c | 3 +- net/netfilter/nf_conntrack_h323_main.c | 16 +++++++---- net/netfilter/nf_conntrack_helper.c | 11 +++++--- net/netfilter/nf_conntrack_netlink.c | 4 +-- net/netfilter/nf_conntrack_pptp.c | 17 +++++------ net/netfilter/nf_conntrack_proto_gre.c | 16 +++++------ net/netfilter/nf_conntrack_sane.c | 4 +-- net/netfilter/nf_conntrack_sip.c | 25 ++++++++-------- net/netfilter/xt_CT.c | 44 +++++++++++++++++------------ 17 files changed, 111 insertions(+), 106 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 0ce91d56a5f2..0dfc8b7210a3 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -2,6 +2,8 @@ #define __NF_CONNTRACK_SIP_H__ #ifdef __KERNEL__ +#include + #define SIP_PORT 5060 #define SIP_TIMEOUT 3600 diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index cce7f6a798bf..f1494feba79f 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -39,36 +39,6 @@ union nf_conntrack_expect_proto { /* insert expect proto private data here */ }; -/* Add protocol helper include file here */ -#include -#include -#include -#include -#include - -/* per conntrack: application helper private data */ -union nf_conntrack_help { - /* insert conntrack helper private data (master) here */ -#if defined(CONFIG_NF_CONNTRACK_FTP) || defined(CONFIG_NF_CONNTRACK_FTP_MODULE) - struct nf_ct_ftp_master ct_ftp_info; -#endif -#if defined(CONFIG_NF_CONNTRACK_PPTP) || \ - defined(CONFIG_NF_CONNTRACK_PPTP_MODULE) - struct nf_ct_pptp_master ct_pptp_info; -#endif -#if defined(CONFIG_NF_CONNTRACK_H323) || \ - defined(CONFIG_NF_CONNTRACK_H323_MODULE) - struct nf_ct_h323_master ct_h323_info; -#endif -#if defined(CONFIG_NF_CONNTRACK_SANE) || \ - defined(CONFIG_NF_CONNTRACK_SANE_MODULE) - struct nf_ct_sane_master ct_sane_info; -#endif -#if defined(CONFIG_NF_CONNTRACK_SIP) || defined(CONFIG_NF_CONNTRACK_SIP_MODULE) - struct nf_ct_sip_master ct_sip_info; -#endif -}; - #include #include #include @@ -89,12 +59,13 @@ struct nf_conn_help { /* Helper. if any */ struct nf_conntrack_helper __rcu *helper; - union nf_conntrack_help help; - struct hlist_head expectations; /* Current number of expected connections */ u8 expecting[NF_CT_MAX_EXPECT_CLASSES]; + + /* private helper information. */ + char data[]; }; #include diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 5f5a4d9d4df5..061352f71a84 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -11,6 +11,7 @@ #define _NF_CONNTRACK_HELPER_H #include #include +#include struct module; @@ -23,6 +24,9 @@ struct nf_conntrack_helper { struct module *me; /* pointer to self */ const struct nf_conntrack_expect_policy *expect_policy; + /* length of internal data, ie. sizeof(struct nf_ct_*_master) */ + size_t data_len; + /* Tuple of things we will help (compared against server response) */ struct nf_conntrack_tuple tuple; @@ -48,7 +52,7 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); -extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); +extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, struct nf_conntrack_helper *helper, gfp_t gfp); extern int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, gfp_t flags); @@ -60,6 +64,15 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); } +static inline void *nfct_help_data(const struct nf_conn *ct) +{ + struct nf_conn_help *help; + + help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER); + + return (void *)help->data; +} + extern int nf_conntrack_helper_init(struct net *net); extern void nf_conntrack_helper_fini(struct net *net); diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 7b22382ff0e9..3c04d24e2976 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -13,10 +13,10 @@ #include #include -#include -#include #include #include +#include +#include #include MODULE_AUTHOR("Brian J. Murrell "); diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index cad29c121318..c6784a18c1c4 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -95,7 +95,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, TransportAddress *taddr, int count) { - const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + const struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int i; __be16 port; @@ -178,7 +178,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, struct nf_conntrack_expect *rtp_exp, struct nf_conntrack_expect *rtcp_exp) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int i; u_int16_t nated_port; @@ -330,7 +330,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); u_int16_t nated_port = ntohs(port); @@ -419,7 +419,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, TransportAddress *taddr, int idx, __be16 port, struct nf_conntrack_expect *exp) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); u_int16_t nated_port = ntohs(port); union nf_inet_addr addr; diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index c273d58980ae..388140881ebe 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -49,7 +49,7 @@ static void pptp_nat_expected(struct nf_conn *ct, const struct nf_nat_pptp *nat_pptp_info; struct nf_nat_ipv4_range range; - ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; + ct_pptp_info = nfct_help_data(master); nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; /* And here goes the grand finale of corrosion... */ @@ -123,7 +123,7 @@ pptp_outbound_pkt(struct sk_buff *skb, __be16 new_callid; unsigned int cid_off; - ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info; + ct_pptp_info = nfct_help_data(ct); nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; new_callid = ct_pptp_info->pns_call_id; @@ -192,7 +192,7 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig, struct nf_ct_pptp_master *ct_pptp_info; struct nf_nat_pptp *nat_pptp_info; - ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info; + ct_pptp_info = nfct_help_data(ct); nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; /* save original PAC call ID in nat_info */ diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index a2901bf829c0..9dbb8d284f99 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -8,10 +8,10 @@ #include #include -#include -#include #include #include +#include +#include #include MODULE_AUTHOR("Magnus Boden "); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1ee2082b81b5..cf4875565d67 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -819,7 +819,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, __set_bit(IPS_EXPECTED_BIT, &ct->status); ct->master = exp->master; if (exp->helper) { - help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); + help = nf_ct_helper_ext_add(ct, exp->helper, + GFP_ATOMIC); if (help) rcu_assign_pointer(help->helper, exp->helper); } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 44e47c9e14fb..4bb771d1f57a 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -358,7 +358,7 @@ static int help(struct sk_buff *skb, u32 seq; int dir = CTINFO2DIR(ctinfo); unsigned int uninitialized_var(matchlen), uninitialized_var(matchoff); - struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info; + struct nf_ct_ftp_master *ct_ftp_info = nfct_help_data(ct); struct nf_conntrack_expect *exp; union nf_inet_addr *daddr; struct nf_conntrack_man cmd = {}; @@ -554,6 +554,7 @@ static int __init nf_conntrack_ftp_init(void) ftp[i][0].tuple.src.l3num = PF_INET; ftp[i][1].tuple.src.l3num = PF_INET6; for (j = 0; j < 2; j++) { + ftp[i][j].data_len = sizeof(struct nf_ct_ftp_master); ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]); ftp[i][j].tuple.dst.protonum = IPPROTO_TCP; ftp[i][j].expect_policy = &ftp_exp_policy; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 46d69d7f1bb4..ed2199280527 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -114,7 +114,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int *datalen, int *dataoff) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); const struct tcphdr *th; struct tcphdr _tcph; @@ -618,6 +618,7 @@ static const struct nf_conntrack_expect_policy h245_exp_policy = { static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { .name = "H.245", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_UNSPEC, .tuple.dst.protonum = IPPROTO_UDP, .help = h245_help, @@ -1170,6 +1171,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { { .name = "Q.931", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT), .tuple.dst.protonum = IPPROTO_TCP, @@ -1245,7 +1247,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, TransportAddress *taddr, int count) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int ret = 0; int i; @@ -1360,7 +1362,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationRequest *rrq) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int ret; typeof(set_ras_addr_hook) set_ras_addr; @@ -1395,7 +1397,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationConfirm *rcf) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int ret; struct nf_conntrack_expect *exp; @@ -1444,7 +1446,7 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, UnregistrationRequest *urq) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int ret; typeof(set_sig_addr_hook) set_sig_addr; @@ -1476,7 +1478,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, AdmissionRequest *arq) { - const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + const struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); __be16 port; union nf_inet_addr addr; @@ -1743,6 +1745,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { { .name = "RAS", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, @@ -1752,6 +1755,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { { .name = "RAS", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET6, .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 4fa2ff961f5a..9c18ecb0ab81 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -161,11 +161,14 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) } EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); -struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) +struct nf_conn_help * +nf_ct_helper_ext_add(struct nf_conn *ct, + struct nf_conntrack_helper *helper, gfp_t gfp) { struct nf_conn_help *help; - help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp); + help = nf_ct_ext_add_length(ct, NF_CT_EXT_HELPER, + helper->data_len, gfp); if (help) INIT_HLIST_HEAD(&help->expectations); else @@ -218,13 +221,13 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, } if (help == NULL) { - help = nf_ct_helper_ext_add(ct, flags); + help = nf_ct_helper_ext_add(ct, helper, flags); if (help == NULL) { ret = -ENOMEM; goto out; } } else { - memset(&help->help, 0, sizeof(help->help)); + memset(help->data, 0, helper->data_len); } rcu_assign_pointer(help->helper, helper); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6f4b00a8fc73..a08892048b46 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1218,7 +1218,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) if (help->helper) return -EBUSY; /* need to zero data of old helper */ - memset(&help->help, 0, sizeof(help->help)); + memset(help->data, 0, help->helper->data_len); } else { /* we cannot set a helper for an existing conntrack */ return -EOPNOTSUPP; @@ -1440,7 +1440,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, } else { struct nf_conn_help *help; - help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); + help = nf_ct_helper_ext_add(ct, helper, GFP_ATOMIC); if (help == NULL) { err = -ENOMEM; goto err2; diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 31d56b23b9e9..6fed9ec35248 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -174,7 +174,7 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, static void pptp_destroy_siblings(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); - const struct nf_conn_help *help = nfct_help(ct); + const struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); struct nf_conntrack_tuple t; nf_ct_gre_keymap_destroy(ct); @@ -182,16 +182,16 @@ static void pptp_destroy_siblings(struct nf_conn *ct) /* try original (pns->pac) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id; - t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id; + t.src.u.gre.key = ct_pptp_info->pns_call_id; + t.dst.u.gre.key = ct_pptp_info->pac_call_id; if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout original pns->pac ct/exp\n"); /* try reply (pac->pns) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id; - t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id; + t.src.u.gre.key = ct_pptp_info->pac_call_id; + t.dst.u.gre.key = ct_pptp_info->pns_call_id; if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout reply pac->pns ct/exp\n"); } @@ -269,7 +269,7 @@ pptp_inbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; + struct nf_ct_pptp_master *info = nfct_help_data(ct); u_int16_t msg; __be16 cid = 0, pcid = 0; typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound; @@ -396,7 +396,7 @@ pptp_outbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; + struct nf_ct_pptp_master *info = nfct_help_data(ct); u_int16_t msg; __be16 cid = 0, pcid = 0; typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound; @@ -506,7 +506,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, { int dir = CTINFO2DIR(ctinfo); - const struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; + const struct nf_ct_pptp_master *info = nfct_help_data(ct); const struct tcphdr *tcph; struct tcphdr _tcph; const struct pptp_pkt_hdr *pptph; @@ -592,6 +592,7 @@ static const struct nf_conntrack_expect_policy pptp_exp_policy = { static struct nf_conntrack_helper pptp __read_mostly = { .name = "pptp", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_pptp_master), .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = cpu_to_be16(PPTP_CONTROL_PORT), .tuple.dst.protonum = IPPROTO_TCP, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 25ba5a2f5edc..5cac41c2fa09 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -117,10 +117,10 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, { struct net *net = nf_ct_net(ct); struct netns_proto_gre *net_gre = gre_pernet(net); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); struct nf_ct_gre_keymap **kmp, *km; - kmp = &help->help.ct_pptp_info.keymap[dir]; + kmp = &ct_pptp_info->keymap[dir]; if (*kmp) { /* check whether it's a retransmission */ read_lock_bh(&net_gre->keymap_lock); @@ -158,19 +158,19 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); struct netns_proto_gre *net_gre = gre_pernet(net); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); enum ip_conntrack_dir dir; pr_debug("entering for ct %p\n", ct); write_lock_bh(&net_gre->keymap_lock); for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { - if (help->help.ct_pptp_info.keymap[dir]) { + if (ct_pptp_info->keymap[dir]) { pr_debug("removing %p from list\n", - help->help.ct_pptp_info.keymap[dir]); - list_del(&help->help.ct_pptp_info.keymap[dir]->list); - kfree(help->help.ct_pptp_info.keymap[dir]); - help->help.ct_pptp_info.keymap[dir] = NULL; + ct_pptp_info->keymap[dir]); + list_del(&ct_pptp_info->keymap[dir]->list); + kfree(ct_pptp_info->keymap[dir]); + ct_pptp_info->keymap[dir] = NULL; } } write_unlock_bh(&net_gre->keymap_lock); diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index ec3fc18c4ef6..295429f39088 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -69,13 +69,12 @@ static int help(struct sk_buff *skb, void *sb_ptr; int ret = NF_ACCEPT; int dir = CTINFO2DIR(ctinfo); - struct nf_ct_sane_master *ct_sane_info; + struct nf_ct_sane_master *ct_sane_info = nfct_help_data(ct); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple *tuple; struct sane_request *req; struct sane_reply_net_start *reply; - ct_sane_info = &nfct_help(ct)->help.ct_sane_info; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) @@ -203,6 +202,7 @@ static int __init nf_conntrack_sane_init(void) sane[i][0].tuple.src.l3num = PF_INET; sane[i][1].tuple.src.l3num = PF_INET6; for (j = 0; j < 2; j++) { + sane[i][j].data_len = sizeof(struct nf_ct_sane_master); sane[i][j].tuple.src.u.tcp.port = htons(ports[i]); sane[i][j].tuple.dst.protonum = IPPROTO_TCP; sane[i][j].expect_policy = &sane_exp_policy; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index dfd3ff382243..758a1bacc126 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1075,12 +1075,12 @@ static int process_invite_response(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dataoff, dptr, datalen, cseq); - else if (help->help.ct_sip_info.invite_cseq == cseq) + else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } @@ -1091,12 +1091,12 @@ static int process_update_response(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dataoff, dptr, datalen, cseq); - else if (help->help.ct_sip_info.invite_cseq == cseq) + else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } @@ -1107,12 +1107,12 @@ static int process_prack_response(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dataoff, dptr, datalen, cseq); - else if (help->help.ct_sip_info.invite_cseq == cseq) + else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } @@ -1123,13 +1123,13 @@ static int process_invite_request(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); unsigned int ret; flush_expectations(ct, true); ret = process_sdp(skb, dataoff, dptr, datalen, cseq); if (ret == NF_ACCEPT) - help->help.ct_sip_info.invite_cseq = cseq; + ct_sip_info->invite_cseq = cseq; return ret; } @@ -1154,7 +1154,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned int matchoff, matchlen; struct nf_conntrack_expect *exp; @@ -1235,7 +1235,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff, store_cseq: if (ret == NF_ACCEPT) - help->help.ct_sip_info.register_cseq = cseq; + ct_sip_info->register_cseq = cseq; return ret; } @@ -1245,7 +1245,7 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); union nf_inet_addr addr; __be16 port; @@ -1262,7 +1262,7 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff, * responses, so we store the sequence number of the last valid * request and compare it here. */ - if (help->help.ct_sip_info.register_cseq != cseq) + if (ct_sip_info->register_cseq != cseq) return NF_ACCEPT; if (code >= 100 && code <= 199) @@ -1578,6 +1578,7 @@ static int __init nf_conntrack_sip_init(void) sip[i][3].help = sip_help_tcp; for (j = 0; j < ARRAY_SIZE(sip[i]); j++) { + sip[i][j].data_len = sizeof(struct nf_ct_sip_master); sip[i][j].tuple.src.u.udp.port = htons(ports[i]); sip[i][j].expect_policy = sip_exp_policy; sip[i][j].expect_class_max = SIP_EXPECT_MAX; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index a51de9b052be..116018560c60 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -112,6 +112,8 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) goto err3; if (info->helper[0]) { + struct nf_conntrack_helper *helper; + ret = -ENOENT; proto = xt_ct_find_proto(par); if (!proto) { @@ -120,19 +122,21 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) goto err3; } - ret = -ENOMEM; - help = nf_ct_helper_ext_add(ct, GFP_KERNEL); - if (help == NULL) - goto err3; - ret = -ENOENT; - help->helper = nf_conntrack_helper_try_module_get(info->helper, - par->family, - proto); - if (help->helper == NULL) { + helper = nf_conntrack_helper_try_module_get(info->helper, + par->family, + proto); + if (helper == NULL) { pr_info("No such helper \"%s\"\n", info->helper); goto err3; } + + ret = -ENOMEM; + help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); + if (help == NULL) + goto err3; + + help->helper = helper; } __set_bit(IPS_TEMPLATE_BIT, &ct->status); @@ -202,6 +206,8 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) goto err3; if (info->helper[0]) { + struct nf_conntrack_helper *helper; + ret = -ENOENT; proto = xt_ct_find_proto(par); if (!proto) { @@ -210,19 +216,21 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) goto err3; } - ret = -ENOMEM; - help = nf_ct_helper_ext_add(ct, GFP_KERNEL); - if (help == NULL) - goto err3; - ret = -ENOENT; - help->helper = nf_conntrack_helper_try_module_get(info->helper, - par->family, - proto); - if (help->helper == NULL) { + helper = nf_conntrack_helper_try_module_get(info->helper, + par->family, + proto); + if (helper == NULL) { pr_info("No such helper \"%s\"\n", info->helper); goto err3; } + + ret = -ENOMEM; + help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); + if (help == NULL) + goto err3; + + help->helper = helper; } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -- cgit v1.2.3 From 8c88f87cb27ad09086940bdd3e6955e5325ec89a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 Jun 2012 13:31:25 +0200 Subject: netfilter: nfnetlink_queue: add NAT TCP sequence adjustment if packet mangled User-space programs that receive traffic via NFQUEUE may mangle packets. If NAT is enabled, this usually puzzles sequence tracking, leading to traffic disruptions. With this patch, nfnl_queue will make the corresponding NAT TCP sequence adjustment if: 1) The packet has been mangled, 2) the NFQA_CFG_F_CONNTRACK flag has been set, and 3) NAT is detected. There are some records on the Internet complaning about this issue: http://stackoverflow.com/questions/260757/packet-mangling-utilities-besides-iptables By now, we only support TCP since we have no helpers for DCCP or SCTP. Better to add this if we ever have some helper over those layer 4 protocols. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 ++ include/net/netfilter/nf_nat_helper.h | 4 ++++ net/ipv4/netfilter/nf_nat_helper.c | 13 +++++++++++++ net/netfilter/nf_conntrack_netlink.c | 4 ++++ net/netfilter/nfnetlink_queue.c | 19 +++++++++++-------- 5 files changed, 34 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ba65bfbd7f74..dca19e61b30a 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -401,6 +401,8 @@ struct nfq_ct_hook { size_t (*build_size)(const struct nf_conn *ct); int (*build)(struct sk_buff *skb, struct nf_conn *ct); int (*parse)(const struct nlattr *attr, struct nf_conn *ct); + void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, + u32 ctinfo, int off); }; extern struct nfq_ct_hook *nfq_ct_hook; #else diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 02bb6c29dc3d..7d8fb7b46c44 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -54,4 +54,8 @@ extern void nf_nat_follow_master(struct nf_conn *ct, extern s16 nf_nat_get_offset(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); + +extern void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + u32 dir, int off); + #endif diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index af65958f6308..2e59ad0b90ca 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -153,6 +153,19 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, } EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); +void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + u32 ctinfo, int off) +{ + const struct tcphdr *th; + + if (nf_ct_protonum(ct) != IPPROTO_TCP) + return; + + th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb)); + nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); +} +EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust); + static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data, int datalen, __sum16 *check, int oldlen) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d304d5917950..8be0ab9b4758 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -46,6 +46,7 @@ #ifdef CONFIG_NF_NAT_NEEDED #include #include +#include #endif #include @@ -1751,6 +1752,9 @@ static struct nfq_ct_hook ctnetlink_nfqueue_hook = { .build_size = ctnetlink_nfqueue_build_size, .build = ctnetlink_nfqueue_build, .parse = ctnetlink_nfqueue_parse, +#ifdef CONFIG_NF_NAT_NEEDED + .seq_adjust = nf_nat_tcp_seq_adjust, +#endif }; #endif /* CONFIG_NETFILTER_NETLINK_QUEUE */ diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 647923ae9230..ff82c7933dfd 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -502,12 +502,10 @@ err_out: } static int -nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e) +nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) { struct sk_buff *nskb; - int diff; - diff = data_len - e->skb->len; if (diff < 0) { if (pskb_trim(e->skb, data_len)) return -ENOMEM; @@ -767,6 +765,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, unsigned int verdict; struct nf_queue_entry *entry; struct nfq_ct_hook *nfq_ct; + enum ip_conntrack_info uninitialized_var(ctinfo); + struct nf_conn *ct = NULL; queue = instance_lookup(queue_num); if (!queue) @@ -789,20 +789,23 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, nfq_ct = rcu_dereference(nfq_ct_hook); if (nfq_ct != NULL && (queue->flags & NFQA_CFG_F_CONNTRACK) && nfqa[NFQA_CT]) { - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - ct = nf_ct_get(entry->skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) nfq_ct->parse(nfqa[NFQA_CT], ct); } - rcu_read_unlock(); if (nfqa[NFQA_PAYLOAD]) { + u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]); + int diff = payload_len - entry->skb->len; + if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), - nla_len(nfqa[NFQA_PAYLOAD]), entry) < 0) + payload_len, entry, diff) < 0) verdict = NF_DROP; + + if (ct && (ct->status & IPS_NAT_MASK) && diff) + nfq_ct->seq_adjust(skb, ct, ctinfo, diff); } + rcu_read_unlock(); if (nfqa[NFQA_MARK]) entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); -- cgit v1.2.3 From ae243bee397102c51fbf9db440eca3b077e0e702 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 Jun 2012 14:19:42 +0200 Subject: netfilter: ctnetlink: add CTA_HELP_INFO attribute This attribute can be used to modify and to dump the internal protocol information. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink_conntrack.h | 1 + include/net/netfilter/nf_conntrack_helper.h | 1 + net/netfilter/nf_conntrack_netlink.c | 23 ++++++++++++++++++----- 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index e58e4b93c108..768883370080 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -191,6 +191,7 @@ enum ctattr_expect_nat { enum ctattr_help { CTA_HELP_UNSPEC, CTA_HELP_NAME, + CTA_HELP_INFO, __CTA_HELP_MAX }; #define CTA_HELP_MAX (__CTA_HELP_MAX - 1) diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 061352f71a84..84b24c3a3834 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -39,6 +39,7 @@ struct nf_conntrack_helper { void (*destroy)(struct nf_conn *ct); + int (*from_nlattr)(struct nlattr *attr, struct nf_conn *ct); int (*to_nlattr)(struct sk_buff *skb, const struct nf_conn *ct); unsigned int expect_class_max; }; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 8be0ab9b4758..ae156dff4887 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -902,7 +902,8 @@ static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { }; static inline int -ctnetlink_parse_help(const struct nlattr *attr, char **helper_name) +ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, + struct nlattr **helpinfo) { struct nlattr *tb[CTA_HELP_MAX+1]; @@ -913,6 +914,9 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name) *helper_name = nla_data(tb[CTA_HELP_NAME]); + if (tb[CTA_HELP_INFO]) + *helpinfo = tb[CTA_HELP_INFO]; + return 0; } @@ -1173,13 +1177,14 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) struct nf_conntrack_helper *helper; struct nf_conn_help *help = nfct_help(ct); char *helpname = NULL; + struct nlattr *helpinfo = NULL; int err; /* don't change helper of sibling connections */ if (ct->master) return -EBUSY; - err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); + err = ctnetlink_parse_help(cda[CTA_HELP], &helpname, &helpinfo); if (err < 0) return err; @@ -1214,8 +1219,12 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) } if (help) { - if (help->helper == helper) + if (help->helper == helper) { + /* update private helper data if allowed. */ + if (helper->from_nlattr && helpinfo) + helper->from_nlattr(helpinfo, ct); return 0; + } if (help->helper) return -EBUSY; /* need to zero data of old helper */ @@ -1411,8 +1420,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, rcu_read_lock(); if (cda[CTA_HELP]) { char *helpname = NULL; - - err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); + struct nlattr *helpinfo = NULL; + + err = ctnetlink_parse_help(cda[CTA_HELP], &helpname, &helpinfo); if (err < 0) goto err2; @@ -1446,6 +1456,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, err = -ENOMEM; goto err2; } + /* set private helper data if allowed. */ + if (helper->from_nlattr && helpinfo) + helper->from_nlattr(helpinfo, ct); /* not in hash table yet so not strictly necessary */ RCU_INIT_POINTER(help->helper, helper); -- cgit v1.2.3 From 12f7a505331e6b2754684b509f2ac8f0011ce644 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 13 May 2012 21:44:54 +0200 Subject: netfilter: add user-space connection tracking helper infrastructure There are good reasons to supports helpers in user-space instead: * Rapid connection tracking helper development, as developing code in user-space is usually faster. * Reliability: A buggy helper does not crash the kernel. Moreover, we can monitor the helper process and restart it in case of problems. * Security: Avoid complex string matching and mangling in kernel-space running in privileged mode. Going further, we can even think about running user-space helpers as a non-root process. * Extensibility: It allows the development of very specific helpers (most likely non-standard proprietary protocols) that are very likely not to be accepted for mainline inclusion in the form of kernel-space connection tracking helpers. This patch adds the infrastructure to allow the implementation of user-space conntrack helpers by means of the new nfnetlink subsystem `nfnetlink_cthelper' and the existing queueing infrastructure (nfnetlink_queue). I had to add the new hook NF_IP6_PRI_CONNTRACK_HELPER to register ipv[4|6]_helper which results from splitting ipv[4|6]_confirm into two pieces. This change is required not to break NAT sequence adjustment and conntrack confirmation for traffic that is enqueued to our user-space conntrack helpers. Basic operation, in a few steps: 1) Register user-space helper by means of `nfct': nfct helper add ftp inet tcp [ It must be a valid existing helper supported by conntrack-tools ] 2) Add rules to enable the FTP user-space helper which is used to track traffic going to TCP port 21. For locally generated packets: iptables -I OUTPUT -t raw -p tcp --dport 21 -j CT --helper ftp For non-locally generated packets: iptables -I PREROUTING -t raw -p tcp --dport 21 -j CT --helper ftp 3) Run the test conntrackd in helper mode (see example files under doc/helper/conntrackd.conf conntrackd 4) Generate FTP traffic going, if everything is OK, then conntrackd should create expectations (you can check that with `conntrack': conntrack -E expect [NEW] 301 proto=6 src=192.168.1.136 dst=130.89.148.12 sport=0 dport=54037 mask-src=255.255.255.255 mask-dst=255.255.255.255 sport=0 dport=65535 master-src=192.168.1.136 master-dst=130.89.148.12 sport=57127 dport=21 class=0 helper=ftp [DESTROY] 301 proto=6 src=192.168.1.136 dst=130.89.148.12 sport=0 dport=54037 mask-src=255.255.255.255 mask-dst=255.255.255.255 sport=0 dport=65535 master-src=192.168.1.136 master-dst=130.89.148.12 sport=57127 dport=21 class=0 helper=ftp This confirms that our test helper is receiving packets including the conntrack information, and adding expectations in kernel-space. The user-space helper can also store its private tracking information in the conntrack structure in the kernel via the CTA_HELP_INFO. The kernel will consider this a binary blob whose layout is unknown. This information will be included in the information that is transfered to user-space via glue code that integrates nfnetlink_queue and ctnetlink. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/nfnetlink.h | 3 +- include/linux/netfilter/nfnetlink_cthelper.h | 55 ++ include/linux/netfilter_ipv4.h | 1 + include/linux/netfilter_ipv6.h | 1 + include/net/netfilter/nf_conntrack_helper.h | 11 + net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 48 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 43 +- net/netfilter/Kconfig | 8 + net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_helper.c | 21 +- net/netfilter/nfnetlink_cthelper.c | 672 +++++++++++++++++++++++++ 12 files changed, 839 insertions(+), 26 deletions(-) create mode 100644 include/linux/netfilter/nfnetlink_cthelper.h create mode 100644 net/netfilter/nfnetlink_cthelper.c (limited to 'include/net') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 1697036336b6..874ae8f2706b 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -10,6 +10,7 @@ header-y += nfnetlink.h header-y += nfnetlink_acct.h header-y += nfnetlink_compat.h header-y += nfnetlink_conntrack.h +header-y += nfnetlink_cthelper.h header-y += nfnetlink_cttimeout.h header-y += nfnetlink_log.h header-y += nfnetlink_queue.h diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index a1048c1587d1..18341cdb2443 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -50,7 +50,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_IPSET 6 #define NFNL_SUBSYS_ACCT 7 #define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 -#define NFNL_SUBSYS_COUNT 9 +#define NFNL_SUBSYS_CTHELPER 9 +#define NFNL_SUBSYS_COUNT 10 #ifdef __KERNEL__ diff --git a/include/linux/netfilter/nfnetlink_cthelper.h b/include/linux/netfilter/nfnetlink_cthelper.h new file mode 100644 index 000000000000..33659f6fad3e --- /dev/null +++ b/include/linux/netfilter/nfnetlink_cthelper.h @@ -0,0 +1,55 @@ +#ifndef _NFNL_CTHELPER_H_ +#define _NFNL_CTHELPER_H_ + +#define NFCT_HELPER_STATUS_DISABLED 0 +#define NFCT_HELPER_STATUS_ENABLED 1 + +enum nfnl_acct_msg_types { + NFNL_MSG_CTHELPER_NEW, + NFNL_MSG_CTHELPER_GET, + NFNL_MSG_CTHELPER_DEL, + NFNL_MSG_CTHELPER_MAX +}; + +enum nfnl_cthelper_type { + NFCTH_UNSPEC, + NFCTH_NAME, + NFCTH_TUPLE, + NFCTH_QUEUE_NUM, + NFCTH_POLICY, + NFCTH_PRIV_DATA_LEN, + NFCTH_STATUS, + __NFCTH_MAX +}; +#define NFCTH_MAX (__NFCTH_MAX - 1) + +enum nfnl_cthelper_policy_type { + NFCTH_POLICY_SET_UNSPEC, + NFCTH_POLICY_SET_NUM, + NFCTH_POLICY_SET, + NFCTH_POLICY_SET1 = NFCTH_POLICY_SET, + NFCTH_POLICY_SET2, + NFCTH_POLICY_SET3, + NFCTH_POLICY_SET4, + __NFCTH_POLICY_SET_MAX +}; +#define NFCTH_POLICY_SET_MAX (__NFCTH_POLICY_SET_MAX - 1) + +enum nfnl_cthelper_pol_type { + NFCTH_POLICY_UNSPEC, + NFCTH_POLICY_NAME, + NFCTH_POLICY_EXPECT_MAX, + NFCTH_POLICY_EXPECT_TIMEOUT, + __NFCTH_POLICY_MAX +}; +#define NFCTH_POLICY_MAX (__NFCTH_POLICY_MAX - 1) + +enum nfnl_cthelper_tuple_type { + NFCTH_TUPLE_UNSPEC, + NFCTH_TUPLE_L3PROTONUM, + NFCTH_TUPLE_L4PROTONUM, + __NFCTH_TUPLE_MAX, +}; +#define NFCTH_TUPLE_MAX (__NFCTH_TUPLE_MAX - 1) + +#endif /* _NFNL_CTHELPER_H */ diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index fa0946c549d3..e2b12801378d 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -66,6 +66,7 @@ enum nf_ip_hook_priorities { NF_IP_PRI_SECURITY = 50, NF_IP_PRI_NAT_SRC = 100, NF_IP_PRI_SELINUX_LAST = 225, + NF_IP_PRI_CONNTRACK_HELPER = 300, NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX, NF_IP_PRI_LAST = INT_MAX, }; diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 57c025127f1d..7c8a513ce7a3 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -71,6 +71,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_SECURITY = 50, NF_IP6_PRI_NAT_SRC = 100, NF_IP6_PRI_SELINUX_LAST = 225, + NF_IP6_PRI_CONNTRACK_HELPER = 300, NF_IP6_PRI_LAST = INT_MAX, }; diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 84b24c3a3834..9aad956d1008 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -15,6 +15,11 @@ struct module; +enum nf_ct_helper_flags { + NF_CT_HELPER_F_USERSPACE = (1 << 0), + NF_CT_HELPER_F_CONFIGURED = (1 << 1), +}; + #define NF_CT_HELPER_NAME_LEN 16 struct nf_conntrack_helper { @@ -42,6 +47,9 @@ struct nf_conntrack_helper { int (*from_nlattr)(struct nlattr *attr, struct nf_conn *ct); int (*to_nlattr)(struct sk_buff *skb, const struct nf_conn *ct); unsigned int expect_class_max; + + unsigned int flags; + unsigned int queue_num; /* For user-space helpers. */ }; extern struct nf_conntrack_helper * @@ -96,4 +104,7 @@ nf_ct_helper_expectfn_find_by_name(const char *name); struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_symbol(const void *symbol); +extern struct hlist_head *nf_ct_helper_hash; +extern unsigned int nf_ct_helper_hsize; + #endif /*_NF_CONNTRACK_HELPER_H*/ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d79b961a8009..e7ff2dcab6ce 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -95,11 +95,11 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv4_confirm(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int ipv4_helper(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -110,24 +110,38 @@ static unsigned int ipv4_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) - goto out; + return NF_ACCEPT; help = nfct_help(ct); if (!help) - goto out; + return NF_ACCEPT; /* rcu_read_lock()ed by nf_hook_slow */ helper = rcu_dereference(help->helper); if (!helper) - goto out; + return NF_ACCEPT; ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), ct, ctinfo); - if (ret != NF_ACCEPT) { + if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL, "nf_ct_%s: dropping packet", helper->name); - return ret; } + return ret; +} + +static unsigned int ipv4_confirm(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + goto out; /* adjust seqs for loopback traffic only in outgoing direction */ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && @@ -184,6 +198,13 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_CONNTRACK, }, + { + .hook = ipv4_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_HELPER, + }, { .hook = ipv4_confirm, .owner = THIS_MODULE, @@ -191,6 +212,13 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_CONNTRACK_CONFIRM, }, + { + .hook = ipv4_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_HELPER, + }, { .hook = ipv4_confirm, .owner = THIS_MODULE, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index fca10da80ea7..4794f96cf2e0 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -143,11 +143,11 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv6_confirm(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int ipv6_helper(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; const struct nf_conn_help *help; @@ -161,15 +161,15 @@ static unsigned int ipv6_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) - goto out; + return NF_ACCEPT; help = nfct_help(ct); if (!help) - goto out; + return NF_ACCEPT; /* rcu_read_lock()ed by nf_hook_slow */ helper = rcu_dereference(help->helper); if (!helper) - goto out; + return NF_ACCEPT; protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff); @@ -179,12 +179,19 @@ static unsigned int ipv6_confirm(unsigned int hooknum, } ret = helper->help(skb, protoff, ct, ctinfo); - if (ret != NF_ACCEPT) { + if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL, "nf_ct_%s: dropping packet", helper->name); - return ret; } -out: + return ret; +} + +static unsigned int ipv6_confirm(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); } @@ -253,6 +260,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK, }, + { + .hook = ipv6_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP6_PRI_CONNTRACK_HELPER, + }, { .hook = ipv6_confirm, .owner = THIS_MODULE, @@ -260,6 +274,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_LAST, }, + { + .hook = ipv6_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP6_PRI_CONNTRACK_HELPER, + }, { .hook = ipv6_confirm, .owner = THIS_MODULE, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 209c1ed43368..aae6c628991d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -12,6 +12,14 @@ tristate "Netfilter NFACCT over NFNETLINK interface" If this option is enabled, the kernel will include support for extended accounting via NFNETLINK. +config NETFILTER_NETLINK_CTHELPER +tristate "Netfilter CTHELPER over NFNETLINK interface" + depends on NETFILTER_ADVANCED + select NETFILTER_NETLINK + help + If this option is enabled, the kernel will include support + for user-space connection tracking helpers via NFNETLINK. + config NETFILTER_NETLINK_QUEUE tristate "Netfilter NFQUEUE over NFNETLINK interface" depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 4e7960cc7b97..2f3bc0f647ba 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_NETFILTER) = netfilter.o obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o +obj-$(CONFIG_NETFILTER_NETLINK_CTHELPER) += nfnetlink_cthelper.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 9c18ecb0ab81..2918ec2e4509 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -30,8 +30,10 @@ #include static DEFINE_MUTEX(nf_ct_helper_mutex); -static struct hlist_head *nf_ct_helper_hash __read_mostly; -static unsigned int nf_ct_helper_hsize __read_mostly; +struct hlist_head *nf_ct_helper_hash __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_helper_hash); +unsigned int nf_ct_helper_hsize __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_helper_hsize); static unsigned int nf_ct_helper_count __read_mostly; static bool nf_ct_auto_assign_helper __read_mostly = true; @@ -322,6 +324,9 @@ EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol); int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { + int ret = 0; + struct nf_conntrack_helper *cur; + struct hlist_node *n; unsigned int h = helper_hash(&me->tuple); BUG_ON(me->expect_policy == NULL); @@ -329,11 +334,19 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); mutex_lock(&nf_ct_helper_mutex); + hlist_for_each_entry(cur, n, &nf_ct_helper_hash[h], hnode) { + if (strncmp(cur->name, me->name, NF_CT_HELPER_NAME_LEN) == 0 && + cur->tuple.src.l3num == me->tuple.src.l3num && + cur->tuple.dst.protonum == me->tuple.dst.protonum) { + ret = -EEXIST; + goto out; + } + } hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); nf_ct_helper_count++; +out: mutex_unlock(&nf_ct_helper_mutex); - - return 0; + return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c new file mode 100644 index 000000000000..d6836193d479 --- /dev/null +++ b/net/netfilter/nfnetlink_cthelper.c @@ -0,0 +1,672 @@ +/* + * (C) 2012 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation (or any later at your option). + * + * This software has been sponsored by Vyatta Inc. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers"); + +static int +nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + const struct nf_conn_help *help; + struct nf_conntrack_helper *helper; + + help = nfct_help(ct); + if (help == NULL) + return NF_DROP; + + /* rcu_read_lock()ed by nf_hook_slow */ + helper = rcu_dereference(help->helper); + if (helper == NULL) + return NF_DROP; + + /* This is an user-space helper not yet configured, skip. */ + if ((helper->flags & + (NF_CT_HELPER_F_USERSPACE | NF_CT_HELPER_F_CONFIGURED)) == + NF_CT_HELPER_F_USERSPACE) + return NF_ACCEPT; + + /* If the user-space helper is not available, don't block traffic. */ + return NF_QUEUE_NR(helper->queue_num) | NF_VERDICT_FLAG_QUEUE_BYPASS; +} + +static const struct nla_policy nfnl_cthelper_tuple_pol[NFCTH_TUPLE_MAX+1] = { + [NFCTH_TUPLE_L3PROTONUM] = { .type = NLA_U16, }, + [NFCTH_TUPLE_L4PROTONUM] = { .type = NLA_U8, }, +}; + +static int +nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_TUPLE_MAX+1]; + + nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); + + if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) + return -EINVAL; + + tuple->src.l3num = ntohs(nla_get_u16(tb[NFCTH_TUPLE_L3PROTONUM])); + tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); + + return 0; +} + +static int +nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) +{ + const struct nf_conn_help *help = nfct_help(ct); + + if (help->helper->data_len == 0) + return -EINVAL; + + memcpy(&help->data, nla_data(attr), help->helper->data_len); + return 0; +} + +static int +nfnl_cthelper_to_nlattr(struct sk_buff *skb, const struct nf_conn *ct) +{ + const struct nf_conn_help *help = nfct_help(ct); + + if (help->helper->data_len && + nla_put(skb, CTA_HELP_INFO, help->helper->data_len, &help->data)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy nfnl_cthelper_expect_pol[NFCTH_POLICY_MAX+1] = { + [NFCTH_POLICY_NAME] = { .type = NLA_NUL_STRING, + .len = NF_CT_HELPER_NAME_LEN-1 }, + [NFCTH_POLICY_EXPECT_MAX] = { .type = NLA_U32, }, + [NFCTH_POLICY_EXPECT_TIMEOUT] = { .type = NLA_U32, }, +}; + +static int +nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_MAX+1]; + + nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); + + if (!tb[NFCTH_POLICY_NAME] || + !tb[NFCTH_POLICY_EXPECT_MAX] || + !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) + return -EINVAL; + + strncpy(expect_policy->name, + nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN); + expect_policy->max_expected = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + expect_policy->timeout = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); + + return 0; +} + +static const struct nla_policy +nfnl_cthelper_expect_policy_set[NFCTH_POLICY_SET_MAX+1] = { + [NFCTH_POLICY_SET_NUM] = { .type = NLA_U32, }, +}; + +static int +nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, + const struct nlattr *attr) +{ + int i, ret; + struct nf_conntrack_expect_policy *expect_policy; + struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; + + nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set); + + if (!tb[NFCTH_POLICY_SET_NUM]) + return -EINVAL; + + helper->expect_class_max = + ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + + if (helper->expect_class_max != 0 && + helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES) + return -EOVERFLOW; + + expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) * + helper->expect_class_max, GFP_KERNEL); + if (expect_policy == NULL) + return -ENOMEM; + + for (i=0; iexpect_class_max; i++) { + if (!tb[NFCTH_POLICY_SET+i]) + goto err; + + ret = nfnl_cthelper_expect_policy(&expect_policy[i], + tb[NFCTH_POLICY_SET+i]); + if (ret < 0) + goto err; + } + helper->expect_policy = expect_policy; + return 0; +err: + kfree(expect_policy); + return -EINVAL; +} + +static int +nfnl_cthelper_create(const struct nlattr * const tb[], + struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_helper *helper; + int ret; + + if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) + return -EINVAL; + + helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL); + if (helper == NULL) + return -ENOMEM; + + ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); + if (ret < 0) + goto err; + + strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); + helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); + helper->flags |= NF_CT_HELPER_F_USERSPACE; + memcpy(&helper->tuple, tuple, sizeof(struct nf_conntrack_tuple)); + + helper->me = THIS_MODULE; + helper->help = nfnl_userspace_cthelper; + helper->from_nlattr = nfnl_cthelper_from_nlattr; + helper->to_nlattr = nfnl_cthelper_to_nlattr; + + /* Default to queue number zero, this can be updated at any time. */ + if (tb[NFCTH_QUEUE_NUM]) + helper->queue_num = ntohl(nla_get_be32(tb[NFCTH_QUEUE_NUM])); + + if (tb[NFCTH_STATUS]) { + int status = ntohl(nla_get_be32(tb[NFCTH_STATUS])); + + switch(status) { + case NFCT_HELPER_STATUS_ENABLED: + helper->flags |= NF_CT_HELPER_F_CONFIGURED; + break; + case NFCT_HELPER_STATUS_DISABLED: + helper->flags &= ~NF_CT_HELPER_F_CONFIGURED; + break; + } + } + + ret = nf_conntrack_helper_register(helper); + if (ret < 0) + goto err; + + return 0; +err: + kfree(helper); + return ret; +} + +static int +nfnl_cthelper_update(const struct nlattr * const tb[], + struct nf_conntrack_helper *helper) +{ + int ret; + + if (tb[NFCTH_PRIV_DATA_LEN]) + return -EBUSY; + + if (tb[NFCTH_POLICY]) { + ret = nfnl_cthelper_parse_expect_policy(helper, + tb[NFCTH_POLICY]); + if (ret < 0) + return ret; + } + if (tb[NFCTH_QUEUE_NUM]) + helper->queue_num = ntohl(nla_get_be32(tb[NFCTH_QUEUE_NUM])); + + if (tb[NFCTH_STATUS]) { + int status = ntohl(nla_get_be32(tb[NFCTH_STATUS])); + + switch(status) { + case NFCT_HELPER_STATUS_ENABLED: + helper->flags |= NF_CT_HELPER_F_CONFIGURED; + break; + case NFCT_HELPER_STATUS_DISABLED: + helper->flags &= ~NF_CT_HELPER_F_CONFIGURED; + break; + } + } + return 0; +} + +static int +nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + const char *helper_name; + struct nf_conntrack_helper *cur, *helper = NULL; + struct nf_conntrack_tuple tuple; + struct hlist_node *n; + int ret = 0, i; + + if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) + return -EINVAL; + + helper_name = nla_data(tb[NFCTH_NAME]); + + ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); + if (ret < 0) + return ret; + + rcu_read_lock(); + for (i = 0; i < nf_ct_helper_hsize && !helper; i++) { + hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) { + + /* skip non-userspace conntrack helpers. */ + if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) + continue; + + if (strncmp(cur->name, helper_name, + NF_CT_HELPER_NAME_LEN) != 0) + continue; + + if ((tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; + + if (nlh->nlmsg_flags & NLM_F_EXCL) { + ret = -EEXIST; + goto err; + } + helper = cur; + break; + } + } + rcu_read_unlock(); + + if (helper == NULL) + ret = nfnl_cthelper_create(tb, &tuple); + else + ret = nfnl_cthelper_update(tb, helper); + + return ret; +err: + rcu_read_unlock(); + return ret; +} + +static int +nfnl_cthelper_dump_tuple(struct sk_buff *skb, + struct nf_conntrack_helper *helper) +{ + struct nlattr *nest_parms; + + nest_parms = nla_nest_start(skb, NFCTH_TUPLE | NLA_F_NESTED); + if (nest_parms == NULL) + goto nla_put_failure; + + if (nla_put_be16(skb, NFCTH_TUPLE_L3PROTONUM, + htons(helper->tuple.src.l3num))) + goto nla_put_failure; + + if (nla_put_u8(skb, NFCTH_TUPLE_L4PROTONUM, helper->tuple.dst.protonum)) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms); + return 0; + +nla_put_failure: + return -1; +} + +static int +nfnl_cthelper_dump_policy(struct sk_buff *skb, + struct nf_conntrack_helper *helper) +{ + int i; + struct nlattr *nest_parms1, *nest_parms2; + + nest_parms1 = nla_nest_start(skb, NFCTH_POLICY | NLA_F_NESTED); + if (nest_parms1 == NULL) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM, + htonl(helper->expect_class_max))) + goto nla_put_failure; + + for (i=0; iexpect_class_max; i++) { + nest_parms2 = nla_nest_start(skb, + (NFCTH_POLICY_SET+i) | NLA_F_NESTED); + if (nest_parms2 == NULL) + goto nla_put_failure; + + if (nla_put_string(skb, NFCTH_POLICY_NAME, + helper->expect_policy[i].name)) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_POLICY_EXPECT_MAX, + htonl(helper->expect_policy[i].max_expected))) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_POLICY_EXPECT_TIMEOUT, + htonl(helper->expect_policy[i].timeout))) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms2); + } + nla_nest_end(skb, nest_parms1); + return 0; + +nla_put_failure: + return -1; +} + +static int +nfnl_cthelper_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, + int event, struct nf_conntrack_helper *helper) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = pid ? NLM_F_MULTI : 0; + int status; + + event |= NFNL_SUBSYS_CTHELPER << 8; + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFCTH_NAME, helper->name)) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_QUEUE_NUM, htonl(helper->queue_num))) + goto nla_put_failure; + + if (nfnl_cthelper_dump_tuple(skb, helper) < 0) + goto nla_put_failure; + + if (nfnl_cthelper_dump_policy(skb, helper) < 0) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_PRIV_DATA_LEN, htonl(helper->data_len))) + goto nla_put_failure; + + if (helper->flags & NF_CT_HELPER_F_CONFIGURED) + status = NFCT_HELPER_STATUS_ENABLED; + else + status = NFCT_HELPER_STATUS_DISABLED; + + if (nla_put_be32(skb, NFCTH_STATUS, htonl(status))) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +nfnl_cthelper_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nf_conntrack_helper *cur, *last; + struct hlist_node *n; + + rcu_read_lock(); + last = (struct nf_conntrack_helper *)cb->args[1]; + for (; cb->args[0] < nf_ct_helper_hsize; cb->args[0]++) { +restart: + hlist_for_each_entry_rcu(cur, n, + &nf_ct_helper_hash[cb->args[0]], hnode) { + + /* skip non-userspace conntrack helpers. */ + if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) + continue; + + if (cb->args[1]) { + if (cur != last) + continue; + cb->args[1] = 0; + } + if (nfnl_cthelper_fill_info(skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + NFNL_MSG_CTHELPER_NEW, cur) < 0) { + cb->args[1] = (unsigned long)cur; + goto out; + } + } + } + if (cb->args[1]) { + cb->args[1] = 0; + goto restart; + } +out: + rcu_read_unlock(); + return skb->len; +} + +static int +nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + int ret = -ENOENT, i; + struct nf_conntrack_helper *cur; + struct hlist_node *n; + struct sk_buff *skb2; + char *helper_name = NULL; + struct nf_conntrack_tuple tuple; + bool tuple_set = false; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nfnl_cthelper_dump_table, + }; + return netlink_dump_start(nfnl, skb, nlh, &c); + } + + if (tb[NFCTH_NAME]) + helper_name = nla_data(tb[NFCTH_NAME]); + + if (tb[NFCTH_TUPLE]) { + ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); + if (ret < 0) + return ret; + + tuple_set = true; + } + + for (i = 0; i < nf_ct_helper_hsize; i++) { + hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) { + + /* skip non-userspace conntrack helpers. */ + if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) + continue; + + if (helper_name && strncmp(cur->name, helper_name, + NF_CT_HELPER_NAME_LEN) != 0) { + continue; + } + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + ret = -ENOMEM; + break; + } + + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_CTHELPER_NEW, cur); + if (ret <= 0) { + kfree_skb(skb2); + break; + } + + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; + } + } + return ret; +} + +static int +nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + char *helper_name = NULL; + struct nf_conntrack_helper *cur; + struct hlist_node *n, *tmp; + struct nf_conntrack_tuple tuple; + bool tuple_set = false, found = false; + int i, j = 0, ret; + + if (tb[NFCTH_NAME]) + helper_name = nla_data(tb[NFCTH_NAME]); + + if (tb[NFCTH_TUPLE]) { + ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); + if (ret < 0) + return ret; + + tuple_set = true; + } + + for (i = 0; i < nf_ct_helper_hsize; i++) { + hlist_for_each_entry_safe(cur, n, tmp, &nf_ct_helper_hash[i], + hnode) { + /* skip non-userspace conntrack helpers. */ + if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) + continue; + + j++; + + if (helper_name && strncmp(cur->name, helper_name, + NF_CT_HELPER_NAME_LEN) != 0) { + continue; + } + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; + + found = true; + nf_conntrack_helper_unregister(cur); + } + } + /* Make sure we return success if we flush and there is no helpers */ + return (found || j == 0) ? 0 : -ENOENT; +} + +static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { + [NFCTH_NAME] = { .type = NLA_NUL_STRING, + .len = NF_CT_HELPER_NAME_LEN-1 }, + [NFCTH_QUEUE_NUM] = { .type = NLA_U32, }, +}; + +static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = { + [NFNL_MSG_CTHELPER_NEW] = { .call = nfnl_cthelper_new, + .attr_count = NFCTH_MAX, + .policy = nfnl_cthelper_policy }, + [NFNL_MSG_CTHELPER_GET] = { .call = nfnl_cthelper_get, + .attr_count = NFCTH_MAX, + .policy = nfnl_cthelper_policy }, + [NFNL_MSG_CTHELPER_DEL] = { .call = nfnl_cthelper_del, + .attr_count = NFCTH_MAX, + .policy = nfnl_cthelper_policy }, +}; + +static const struct nfnetlink_subsystem nfnl_cthelper_subsys = { + .name = "cthelper", + .subsys_id = NFNL_SUBSYS_CTHELPER, + .cb_count = NFNL_MSG_CTHELPER_MAX, + .cb = nfnl_cthelper_cb, +}; + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTHELPER); + +static int __init nfnl_cthelper_init(void) +{ + int ret; + + ret = nfnetlink_subsys_register(&nfnl_cthelper_subsys); + if (ret < 0) { + pr_err("nfnl_cthelper: cannot register with nfnetlink.\n"); + goto err_out; + } + return 0; +err_out: + return ret; +} + +static void __exit nfnl_cthelper_exit(void) +{ + struct nf_conntrack_helper *cur; + struct hlist_node *n, *tmp; + int i; + + nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); + + for (i=0; iflags & NF_CT_HELPER_F_USERSPACE)) + continue; + + nf_conntrack_helper_unregister(cur); + } + } +} + +module_init(nfnl_cthelper_init); +module_exit(nfnl_cthelper_exit); -- cgit v1.2.3 From 7f95e1880e70bb351b992b01ef70ff083fc00d30 Mon Sep 17 00:00:00 2001 From: Eldad Zack Date: Sat, 16 Jun 2012 15:14:49 +0200 Subject: include/net/dst.h: neaten asterisk placement Fix code style - place the asterisk where it belongs. Signed-off-by: Eldad Zack Signed-off-by: David S. Miller --- include/net/dst.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 8197eadca819..f0bf3b8d5911 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -48,8 +48,8 @@ struct dst_entry { #else void *__pad1; #endif - int (*input)(struct sk_buff*); - int (*output)(struct sk_buff*); + int (*input)(struct sk_buff *); + int (*output)(struct sk_buff *); int flags; #define DST_HOST 0x0001 @@ -241,7 +241,7 @@ dst_metric_locked(const struct dst_entry *dst, int metric) return dst_metric(dst, RTAX_LOCK) & (1<lastuse = time; } -static inline -struct dst_entry * dst_clone(struct dst_entry * dst) +static inline struct dst_entry *dst_clone(struct dst_entry *dst) { if (dst) atomic_inc(&dst->__refcnt); @@ -371,12 +370,12 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) } extern int dst_discard(struct sk_buff *skb); -extern void *dst_alloc(struct dst_ops * ops, struct net_device *dev, +extern void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, int flags); -extern void __dst_free(struct dst_entry * dst); -extern struct dst_entry *dst_destroy(struct dst_entry * dst); +extern void __dst_free(struct dst_entry *dst); +extern struct dst_entry *dst_destroy(struct dst_entry *dst); -static inline void dst_free(struct dst_entry * dst) +static inline void dst_free(struct dst_entry *dst) { if (dst->obsolete > 1) return; -- cgit v1.2.3 From 31fdc5553b42abd7e29bb7b89f6ba07514eb4763 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 13 Jun 2012 22:29:03 +0000 Subject: net: remove my future former mail address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Cc: Sakari Ailus Signed-off-by: David S. Miller --- include/net/phonet/gprs.h | 2 +- net/caif/caif_dev.c | 3 +-- net/phonet/af_phonet.c | 4 ++-- net/phonet/datagram.c | 4 ++-- net/phonet/pep-gprs.c | 2 +- net/phonet/pep.c | 2 +- net/phonet/pn_dev.c | 4 ++-- net/phonet/pn_netlink.c | 4 ++-- net/phonet/socket.c | 4 ++-- net/phonet/sysctl.c | 2 +- 10 files changed, 15 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/phonet/gprs.h b/include/net/phonet/gprs.h index 928daf595beb..bcd525e39a0b 100644 --- a/include/net/phonet/gprs.h +++ b/include/net/phonet/gprs.h @@ -5,7 +5,7 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Author: Rémi Denis-Courmont + * Author: Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index aa6f716524fd..554b31289607 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -4,8 +4,7 @@ * Author: Sjur Brendeland/sjur.brandeland@stericsson.com * License terms: GNU General Public License (GPL) version 2 * - * Borrowed heavily from file: pn_dev.c. Thanks to - * Remi Denis-Courmont + * Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont * and Sakari Ailus */ diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 779ce4ff92ec..5a940dbd74a3 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -5,8 +5,8 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Contact: Remi Denis-Courmont - * Original author: Sakari Ailus + * Authors: Sakari Ailus + * Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index bf35b4e1a14c..12c30f3e643e 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -5,8 +5,8 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Contact: Remi Denis-Courmont - * Original author: Sakari Ailus + * Authors: Sakari Ailus + * Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index d01208968c83..a2fba7edfd1f 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -5,7 +5,7 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Author: Rémi Denis-Courmont + * Author: Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 9dd4f926f7d1..576f22c9c76e 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -5,7 +5,7 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Author: Rémi Denis-Courmont + * Author: Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 36f75a9e2c3d..5bf6341e2dd4 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -5,8 +5,8 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Contact: Remi Denis-Courmont - * Original author: Sakari Ailus + * Authors: Sakari Ailus + * Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index cfdf135fcd69..7dd762a464e5 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -5,8 +5,8 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Contact: Remi Denis-Courmont - * Original author: Sakari Ailus + * Authors: Sakari Ailus + * Remi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 89cfa9ce4939..0acc943f713a 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -5,8 +5,8 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Contact: Remi Denis-Courmont - * Original author: Sakari Ailus + * Authors: Sakari Ailus + * Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 696348fd31a1..d6bbbbd0af18 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -5,7 +5,7 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Contact: Remi Denis-Courmont + * Author: Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License -- cgit v1.2.3 From 728b19e5fb9bbebbd580784a092b786fe379ed8e Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Thu, 14 Jun 2012 02:06:10 +0800 Subject: {nl,cfg,mac}80211: implement dot11MeshHWMPconfirmationInterval As defined in section 13.10.9.3 Case D (802.11-2012), this control variable is used to limit the mesh STA to send only one PREQ to a root mesh STA within this interval of time (in TUs). The default value for this variable is set to 2000 TUs. However, for current implementation, the maximum configurable of dot11MeshHWMPconfirmationInterval is restricted by dot11MeshHWMPactivePathTimeout. Signed-off-by: Chun-Yeow Yeoh [line-break commit log] Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 5 +++++ include/net/cfg80211.h | 4 ++++ net/mac80211/cfg.c | 3 +++ net/mac80211/debugfs_netdev.c | 3 +++ net/mac80211/mesh.h | 2 ++ net/mac80211/mesh_hwmp.c | 7 ++++++- net/wireless/mesh.c | 2 ++ net/wireless/nl80211.c | 9 ++++++++- 8 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 6936fabe8797..b7c3b737ddde 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -2192,6 +2192,10 @@ enum nl80211_mntr_flags { * @NL80211_MESHCONF_HWMP_ROOT_INTERVAL: The interval of time (in TUs) between * proactive PREQs are transmitted. * + * @NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL: The minimum interval of time + * (in TUs) during which a mesh STA can send only one Action frame + * containing a PREQ element for root path confirmation. + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -2220,6 +2224,7 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_HT_OPMODE, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, + NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e52b38d7b1b6..f0163a10b8ce 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -849,6 +849,9 @@ struct bss_parameters { * * @dot11MeshHWMProotInterval: The interval of time (in TUs) between proactive * PREQs are transmitted. + * @dot11MeshHWMPconfirmationInterval: The minimum interval of time (in TUs) + * during which a mesh STA can send only one Action frame containing + * a PREQ element for root path confirmation. */ struct mesh_config { u16 dot11MeshRetryTimeout; @@ -875,6 +878,7 @@ struct mesh_config { u16 ht_opmode; u32 dot11MeshHWMPactivePathToRootTimeout; u16 dot11MeshHWMProotInterval; + u16 dot11MeshHWMPconfirmationInterval; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5bd316c0a63d..6e25ac4873c7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1596,6 +1596,9 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOT_INTERVAL, mask)) conf->dot11MeshHWMProotInterval = nconf->dot11MeshHWMProotInterval; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, mask)) + conf->dot11MeshHWMPconfirmationInterval = + nconf->dot11MeshHWMPconfirmationInterval; return 0; } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index a8cea70902e4..512c894893d6 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -514,6 +514,8 @@ IEEE80211_IF_FILE(dot11MeshHWMPactivePathToRootTimeout, u.mesh.mshcfg.dot11MeshHWMPactivePathToRootTimeout, DEC); IEEE80211_IF_FILE(dot11MeshHWMProotInterval, u.mesh.mshcfg.dot11MeshHWMProotInterval, DEC); +IEEE80211_IF_FILE(dot11MeshHWMPconfirmationInterval, + u.mesh.mshcfg.dot11MeshHWMPconfirmationInterval, DEC); #endif #define DEBUGFS_ADD_MODE(name, mode) \ @@ -617,6 +619,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(ht_opmode); MESHPARAMS_ADD(dot11MeshHWMPactivePathToRootTimeout); MESHPARAMS_ADD(dot11MeshHWMProotInterval); + MESHPARAMS_ADD(dot11MeshHWMPconfirmationInterval); #undef MESHPARAMS_ADD } #endif diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index c7400a23b64b..faaa39bcfd10 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -104,6 +104,7 @@ enum mesh_deferred_task_flags { * an mpath to a hash bucket on a path table. * @rann_snd_addr: the RANN sender address * @rann_metric: the aggregated path metric towards the root node + * @last_preq_to_root: Timestamp of last PREQ sent to root * @is_root: the destination station of this path is a root node * @is_gate: the destination station of this path is a mesh gate * @@ -131,6 +132,7 @@ struct mesh_path { spinlock_t state_lock; u8 rann_snd_addr[ETH_ALEN]; u32 rann_metric; + unsigned long last_preq_to_root; bool is_root; bool is_gate; }; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 35e3acbe2262..bea52479e3aa 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -98,6 +98,8 @@ static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae) #define max_preq_retries(s) (s->u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries) #define disc_timeout_jiff(s) \ msecs_to_jiffies(sdata->u.mesh.mshcfg.min_discovery_timeout) +#define root_path_confirmation_jiffies(s) \ + msecs_to_jiffies(sdata->u.mesh.mshcfg.dot11MeshHWMPconfirmationInterval) enum mpath_frame_type { MPATH_PREQ = 0, @@ -811,11 +813,14 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } if ((!(mpath->flags & (MESH_PATH_ACTIVE | MESH_PATH_RESOLVING)) || - time_after(jiffies, mpath->exp_time - 1*HZ)) && + (time_after(jiffies, mpath->last_preq_to_root + + root_path_confirmation_jiffies(sdata)) || + time_before(jiffies, mpath->last_preq_to_root))) && !(mpath->flags & MESH_PATH_FIXED)) { mhwmp_dbg("%s time to refresh root mpath %pM", sdata->name, orig_addr); mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); + mpath->last_preq_to_root = jiffies; } if ((SN_LT(mpath->sn, orig_sn) || (mpath->sn == orig_sn && diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 2f141cfd581e..3b73b07486cf 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -16,6 +16,7 @@ #define MESH_RANN_INTERVAL 5000 #define MESH_PATH_TO_ROOT_TIMEOUT 6000 #define MESH_ROOT_INTERVAL 5000 +#define MESH_ROOT_CONFIRMATION_INTERVAL 2000 /* * Minimum interval between two consecutive PREQs originated by the same @@ -66,6 +67,7 @@ const struct mesh_config default_mesh_config = { .ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED, .dot11MeshHWMPactivePathToRootTimeout = MESH_PATH_TO_ROOT_TIMEOUT, .dot11MeshHWMProotInterval = MESH_ROOT_INTERVAL, + .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL, }; const struct mesh_setup default_mesh_setup = { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f8930db613df..a363ca17bfc5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3473,7 +3473,9 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u32(msg, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, cur_params.dot11MeshHWMPactivePathToRootTimeout) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, - cur_params.dot11MeshHWMProotInterval)) + cur_params.dot11MeshHWMProotInterval) || + nla_put_u16(msg, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, + cur_params.dot11MeshHWMPconfirmationInterval)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -3511,6 +3513,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HT_OPMODE] = { .type = NLA_U16 }, [NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = { .type = NLA_U16 }, + [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 }, }; static const struct nla_policy @@ -3625,6 +3628,10 @@ do {\ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, + dot11MeshHWMPconfirmationInterval, mask, + NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, + nla_get_u16); if (mask_out) *mask_out = mask; -- cgit v1.2.3 From 7c62234547255ce4c385a218915965bc2f14fe45 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Jun 2012 02:10:57 +0200 Subject: netfilter: nfnetlink_queue: fix compilation with NF_CONNTRACK disabled In "9cb0176 netfilter: add glue code to integrate nfnetlink_queue and ctnetlink" the compilation with NF_CONNTRACK disabled is broken. This patch fixes this issue. I have moved the conntrack part into nfnetlink_queue_ct.c to avoid peppering the entire nfnetlink_queue.c code with ifdefs. I also needed to rename nfnetlink_queue.c to nfnetlink_queue_pkt.c to update the net/netfilter/Makefile to support conditional compilation of the conntrack integration. This patch also adds CONFIG_NETFILTER_QUEUE_CT in case you want to explicitly disable the integration between nf_conntrack and nfnetlink_queue. Reported-by: Andrew Morton Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nfnetlink_queue.h | 43 ++ net/netfilter/Kconfig | 9 + net/netfilter/Makefile | 2 + net/netfilter/nf_conntrack_netlink.c | 11 +- net/netfilter/nfnetlink_queue.c | 1121 ------------------------------- net/netfilter/nfnetlink_queue_core.c | 1090 ++++++++++++++++++++++++++++++ net/netfilter/nfnetlink_queue_ct.c | 97 +++ 7 files changed, 1245 insertions(+), 1128 deletions(-) create mode 100644 include/net/netfilter/nfnetlink_queue.h delete mode 100644 net/netfilter/nfnetlink_queue.c create mode 100644 net/netfilter/nfnetlink_queue_core.c create mode 100644 net/netfilter/nfnetlink_queue_ct.c (limited to 'include/net') diff --git a/include/net/netfilter/nfnetlink_queue.h b/include/net/netfilter/nfnetlink_queue.h new file mode 100644 index 000000000000..9f8095c108e4 --- /dev/null +++ b/include/net/netfilter/nfnetlink_queue.h @@ -0,0 +1,43 @@ +#ifndef _NET_NFNL_QUEUE_H_ +#define _NET_NFNL_QUEUE_H_ + +#include + +struct nf_conn; + +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size, + enum ip_conntrack_info *ctinfo); +struct nf_conn *nfqnl_ct_parse(const struct sk_buff *skb, + const struct nlattr *attr, + enum ip_conntrack_info *ctinfo); +int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo); +void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int diff); +#else +inline struct nf_conn * +nfqnl_ct_get(struct sk_buff *entskb, size_t *size, enum ip_conntrack_info *ctinfo) +{ + return NULL; +} + +inline struct nf_conn *nfqnl_ct_parse(const struct sk_buff *skb, + const struct nlattr *attr, + enum ip_conntrack_info *ctinfo) +{ + return NULL; +} + +inline int +nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + return 0; +} + +inline void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int diff) +{ +} +#endif /* NF_CONNTRACK */ +#endif diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f1a52ba3e4c5..c19b214ffd57 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -340,6 +340,7 @@ config NF_CT_NETLINK_HELPER select NETFILTER_NETLINK depends on NF_CT_NETLINK depends on NETFILTER_NETLINK_QUEUE + depends on NETFILTER_NETLINK_QUEUE_CT depends on NETFILTER_ADVANCED help This option enables the user-space connection tracking helpers @@ -347,6 +348,14 @@ config NF_CT_NETLINK_HELPER If unsure, say `N'. +config NETFILTER_NETLINK_QUEUE_CT + bool "NFQUEUE integration with Connection Tracking" + default n + depends on NETFILTER_NETLINK_QUEUE + help + If this option is enabled, NFQUEUE can include Connection Tracking + information together with the packet is the enqueued via NFNETLINK. + endif # NF_CONNTRACK # transparent proxy support diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 7cc20199fe8c..1c5160f2278e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -9,6 +9,8 @@ obj-$(CONFIG_NETFILTER) = netfilter.o obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o +nfnetlink_queue-y := nfnetlink_queue_core.o +nfnetlink_queue-$(CONFIG_NETFILTER_NETLINK_QUEUE_CT) += nfnetlink_queue_ct.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 76271a1301a5..31d1d8f3a6ce 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1627,8 +1627,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; } -#if defined(CONFIG_NETFILTER_NETLINK_QUEUE) || \ - defined(CONFIG_NETFILTER_NETLINK_QUEUE_MODULE) +#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT static size_t ctnetlink_nfqueue_build_size(const struct nf_conn *ct) { @@ -1762,7 +1761,7 @@ static struct nfq_ct_hook ctnetlink_nfqueue_hook = { .seq_adjust = nf_nat_tcp_seq_adjust, #endif }; -#endif /* CONFIG_NETFILTER_NETLINK_QUEUE */ +#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */ /*********************************************************************** * EXPECT @@ -2568,8 +2567,7 @@ static int __init ctnetlink_init(void) pr_err("ctnetlink_init: cannot register pernet operations\n"); goto err_unreg_exp_subsys; } -#if defined(CONFIG_NETFILTER_NETLINK_QUEUE) || \ - defined(CONFIG_NETFILTER_NETLINK_QUEUE_MODULE) +#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT /* setup interaction between nf_queue and nf_conntrack_netlink. */ RCU_INIT_POINTER(nfq_ct_hook, &ctnetlink_nfqueue_hook); #endif @@ -2590,8 +2588,7 @@ static void __exit ctnetlink_exit(void) unregister_pernet_subsys(&ctnetlink_net_ops); nfnetlink_subsys_unregister(&ctnl_exp_subsys); nfnetlink_subsys_unregister(&ctnl_subsys); -#if defined(CONFIG_NETFILTER_NETLINK_QUEUE) || \ - defined(CONFIG_NETFILTER_NETLINK_QUEUE_MODULE) +#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT RCU_INIT_POINTER(nfq_ct_hook, NULL); #endif } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c deleted file mode 100644 index ff82c7933dfd..000000000000 --- a/net/netfilter/nfnetlink_queue.c +++ /dev/null @@ -1,1121 +0,0 @@ -/* - * This is a module which is used for queueing packets and communicating with - * userspace via nfnetlink. - * - * (C) 2005 by Harald Welte - * (C) 2007 by Patrick McHardy - * - * Based on the old ipv4-only ip_queue.c: - * (C) 2000-2002 James Morris - * (C) 2003-2005 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef CONFIG_BRIDGE_NETFILTER -#include "../bridge/br_private.h" -#endif - -#define NFQNL_QMAX_DEFAULT 1024 - -struct nfqnl_instance { - struct hlist_node hlist; /* global list of queues */ - struct rcu_head rcu; - - int peer_pid; - unsigned int queue_maxlen; - unsigned int copy_range; - unsigned int queue_dropped; - unsigned int queue_user_dropped; - - - u_int16_t queue_num; /* number of this queue */ - u_int8_t copy_mode; - u_int32_t flags; /* Set using NFQA_CFG_FLAGS */ -/* - * Following fields are dirtied for each queued packet, - * keep them in same cache line if possible. - */ - spinlock_t lock; - unsigned int queue_total; - unsigned int id_sequence; /* 'sequence' of pkt ids */ - struct list_head queue_list; /* packets in queue */ -}; - -typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); - -static DEFINE_SPINLOCK(instances_lock); - -#define INSTANCE_BUCKETS 16 -static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; - -static inline u_int8_t instance_hashfn(u_int16_t queue_num) -{ - return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; -} - -static struct nfqnl_instance * -instance_lookup(u_int16_t queue_num) -{ - struct hlist_head *head; - struct hlist_node *pos; - struct nfqnl_instance *inst; - - head = &instance_table[instance_hashfn(queue_num)]; - hlist_for_each_entry_rcu(inst, pos, head, hlist) { - if (inst->queue_num == queue_num) - return inst; - } - return NULL; -} - -static struct nfqnl_instance * -instance_create(u_int16_t queue_num, int pid) -{ - struct nfqnl_instance *inst; - unsigned int h; - int err; - - spin_lock(&instances_lock); - if (instance_lookup(queue_num)) { - err = -EEXIST; - goto out_unlock; - } - - inst = kzalloc(sizeof(*inst), GFP_ATOMIC); - if (!inst) { - err = -ENOMEM; - goto out_unlock; - } - - inst->queue_num = queue_num; - inst->peer_pid = pid; - inst->queue_maxlen = NFQNL_QMAX_DEFAULT; - inst->copy_range = 0xfffff; - inst->copy_mode = NFQNL_COPY_NONE; - spin_lock_init(&inst->lock); - INIT_LIST_HEAD(&inst->queue_list); - - if (!try_module_get(THIS_MODULE)) { - err = -EAGAIN; - goto out_free; - } - - h = instance_hashfn(queue_num); - hlist_add_head_rcu(&inst->hlist, &instance_table[h]); - - spin_unlock(&instances_lock); - - return inst; - -out_free: - kfree(inst); -out_unlock: - spin_unlock(&instances_lock); - return ERR_PTR(err); -} - -static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, - unsigned long data); - -static void -instance_destroy_rcu(struct rcu_head *head) -{ - struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance, - rcu); - - nfqnl_flush(inst, NULL, 0); - kfree(inst); - module_put(THIS_MODULE); -} - -static void -__instance_destroy(struct nfqnl_instance *inst) -{ - hlist_del_rcu(&inst->hlist); - call_rcu(&inst->rcu, instance_destroy_rcu); -} - -static void -instance_destroy(struct nfqnl_instance *inst) -{ - spin_lock(&instances_lock); - __instance_destroy(inst); - spin_unlock(&instances_lock); -} - -static inline void -__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) -{ - list_add_tail(&entry->list, &queue->queue_list); - queue->queue_total++; -} - -static void -__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) -{ - list_del(&entry->list); - queue->queue_total--; -} - -static struct nf_queue_entry * -find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) -{ - struct nf_queue_entry *entry = NULL, *i; - - spin_lock_bh(&queue->lock); - - list_for_each_entry(i, &queue->queue_list, list) { - if (i->id == id) { - entry = i; - break; - } - } - - if (entry) - __dequeue_entry(queue, entry); - - spin_unlock_bh(&queue->lock); - - return entry; -} - -static void -nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) -{ - struct nf_queue_entry *entry, *next; - - spin_lock_bh(&queue->lock); - list_for_each_entry_safe(entry, next, &queue->queue_list, list) { - if (!cmpfn || cmpfn(entry, data)) { - list_del(&entry->list); - queue->queue_total--; - nf_reinject(entry, NF_DROP); - } - } - spin_unlock_bh(&queue->lock); -} - -static struct sk_buff * -nfqnl_build_packet_message(struct nfqnl_instance *queue, - struct nf_queue_entry *entry, - __be32 **packet_id_ptr) -{ - sk_buff_data_t old_tail; - size_t size; - size_t data_len = 0; - struct sk_buff *skb; - struct nlattr *nla; - struct nfqnl_msg_packet_hdr *pmsg; - struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; - struct sk_buff *entskb = entry->skb; - struct net_device *indev; - struct net_device *outdev; - struct nfq_ct_hook *nfq_ct; - struct nf_conn *ct = NULL; - enum ip_conntrack_info uninitialized_var(ctinfo); - - size = NLMSG_SPACE(sizeof(struct nfgenmsg)) - + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) - + nla_total_size(sizeof(u_int32_t)) /* ifindex */ - + nla_total_size(sizeof(u_int32_t)) /* ifindex */ -#ifdef CONFIG_BRIDGE_NETFILTER - + nla_total_size(sizeof(u_int32_t)) /* ifindex */ - + nla_total_size(sizeof(u_int32_t)) /* ifindex */ -#endif - + nla_total_size(sizeof(u_int32_t)) /* mark */ - + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) - + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); - - outdev = entry->outdev; - - switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { - case NFQNL_COPY_META: - case NFQNL_COPY_NONE: - break; - - case NFQNL_COPY_PACKET: - if (entskb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(entskb)) - return NULL; - - data_len = ACCESS_ONCE(queue->copy_range); - if (data_len == 0 || data_len > entskb->len) - data_len = entskb->len; - - size += nla_total_size(data_len); - break; - } - - /* rcu_read_lock()ed by __nf_queue already. */ - nfq_ct = rcu_dereference(nfq_ct_hook); - if (nfq_ct != NULL && (queue->flags & NFQA_CFG_F_CONNTRACK)) { - ct = nf_ct_get(entskb, &ctinfo); - if (ct) { - if (!nf_ct_is_untracked(ct)) - size += nfq_ct->build_size(ct); - else - ct = NULL; - } - } - - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) - goto nlmsg_failure; - - old_tail = skb->tail; - nlh = NLMSG_PUT(skb, 0, 0, - NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, - sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); - nfmsg->nfgen_family = entry->pf; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(queue->queue_num); - - nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg)); - pmsg = nla_data(nla); - pmsg->hw_protocol = entskb->protocol; - pmsg->hook = entry->hook; - *packet_id_ptr = &pmsg->packet_id; - - indev = entry->indev; - if (indev) { -#ifndef CONFIG_BRIDGE_NETFILTER - if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex))) - goto nla_put_failure; -#else - if (entry->pf == PF_BRIDGE) { - /* Case 1: indev is physical input device, we need to - * look for bridge group (when called from - * netfilter_bridge) */ - if (nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV, - htonl(indev->ifindex)) || - /* this is the bridge group "brX" */ - /* rcu_read_lock()ed by __nf_queue */ - nla_put_be32(skb, NFQA_IFINDEX_INDEV, - htonl(br_port_get_rcu(indev)->br->dev->ifindex))) - goto nla_put_failure; - } else { - /* Case 2: indev is bridge group, we need to look for - * physical device (when called from ipv4) */ - if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, - htonl(indev->ifindex))) - goto nla_put_failure; - if (entskb->nf_bridge && entskb->nf_bridge->physindev && - nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV, - htonl(entskb->nf_bridge->physindev->ifindex))) - goto nla_put_failure; - } -#endif - } - - if (outdev) { -#ifndef CONFIG_BRIDGE_NETFILTER - if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) - goto nla_put_failure; -#else - if (entry->pf == PF_BRIDGE) { - /* Case 1: outdev is physical output device, we need to - * look for bridge group (when called from - * netfilter_bridge) */ - if (nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV, - htonl(outdev->ifindex)) || - /* this is the bridge group "brX" */ - /* rcu_read_lock()ed by __nf_queue */ - nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, - htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) - goto nla_put_failure; - } else { - /* Case 2: outdev is bridge group, we need to look for - * physical output device (when called from ipv4) */ - if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, - htonl(outdev->ifindex))) - goto nla_put_failure; - if (entskb->nf_bridge && entskb->nf_bridge->physoutdev && - nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV, - htonl(entskb->nf_bridge->physoutdev->ifindex))) - goto nla_put_failure; - } -#endif - } - - if (entskb->mark && - nla_put_be32(skb, NFQA_MARK, htonl(entskb->mark))) - goto nla_put_failure; - - if (indev && entskb->dev && - entskb->mac_header != entskb->network_header) { - struct nfqnl_msg_packet_hw phw; - int len = dev_parse_header(entskb, phw.hw_addr); - if (len) { - phw.hw_addrlen = htons(len); - if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw)) - goto nla_put_failure; - } - } - - if (entskb->tstamp.tv64) { - struct nfqnl_msg_packet_timestamp ts; - struct timeval tv = ktime_to_timeval(entskb->tstamp); - ts.sec = cpu_to_be64(tv.tv_sec); - ts.usec = cpu_to_be64(tv.tv_usec); - - if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts)) - goto nla_put_failure; - } - - if (data_len) { - struct nlattr *nla; - int sz = nla_attr_size(data_len); - - if (skb_tailroom(skb) < nla_total_size(data_len)) { - printk(KERN_WARNING "nf_queue: no tailroom!\n"); - goto nlmsg_failure; - } - - nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); - nla->nla_type = NFQA_PAYLOAD; - nla->nla_len = sz; - - if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) - BUG(); - } - - if (ct) { - struct nlattr *nest_parms; - u_int32_t tmp; - - nest_parms = nla_nest_start(skb, NFQA_CT | NLA_F_NESTED); - if (!nest_parms) - goto nla_put_failure; - - if (nfq_ct->build(skb, ct) < 0) - goto nla_put_failure; - - nla_nest_end(skb, nest_parms); - - tmp = ctinfo; - if (nla_put_u32(skb, NFQA_CT_INFO, htonl(ctinfo))) - goto nla_put_failure; - } - - nlh->nlmsg_len = skb->tail - old_tail; - return skb; - -nlmsg_failure: -nla_put_failure: - if (skb) - kfree_skb(skb); - net_err_ratelimited("nf_queue: error creating packet message\n"); - return NULL; -} - -static int -nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) -{ - struct sk_buff *nskb; - struct nfqnl_instance *queue; - int err = -ENOBUFS; - __be32 *packet_id_ptr; - int failopen = 0; - - /* rcu_read_lock()ed by nf_hook_slow() */ - queue = instance_lookup(queuenum); - if (!queue) { - err = -ESRCH; - goto err_out; - } - - if (queue->copy_mode == NFQNL_COPY_NONE) { - err = -EINVAL; - goto err_out; - } - - nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); - if (nskb == NULL) { - err = -ENOMEM; - goto err_out; - } - spin_lock_bh(&queue->lock); - - if (!queue->peer_pid) { - err = -EINVAL; - goto err_out_free_nskb; - } - if (queue->queue_total >= queue->queue_maxlen) { - if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { - failopen = 1; - err = 0; - } else { - queue->queue_dropped++; - net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n", - queue->queue_total); - } - goto err_out_free_nskb; - } - entry->id = ++queue->id_sequence; - *packet_id_ptr = htonl(entry->id); - - /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); - if (err < 0) { - queue->queue_user_dropped++; - goto err_out_unlock; - } - - __enqueue_entry(queue, entry); - - spin_unlock_bh(&queue->lock); - return 0; - -err_out_free_nskb: - kfree_skb(nskb); -err_out_unlock: - spin_unlock_bh(&queue->lock); - if (failopen) - nf_reinject(entry, NF_ACCEPT); -err_out: - return err; -} - -static int -nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) -{ - struct sk_buff *nskb; - - if (diff < 0) { - if (pskb_trim(e->skb, data_len)) - return -ENOMEM; - } else if (diff > 0) { - if (data_len > 0xFFFF) - return -EINVAL; - if (diff > skb_tailroom(e->skb)) { - nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), - diff, GFP_ATOMIC); - if (!nskb) { - printk(KERN_WARNING "nf_queue: OOM " - "in mangle, dropping packet\n"); - return -ENOMEM; - } - kfree_skb(e->skb); - e->skb = nskb; - } - skb_put(e->skb, diff); - } - if (!skb_make_writable(e->skb, data_len)) - return -ENOMEM; - skb_copy_to_linear_data(e->skb, data, data_len); - e->skb->ip_summed = CHECKSUM_NONE; - return 0; -} - -static int -nfqnl_set_mode(struct nfqnl_instance *queue, - unsigned char mode, unsigned int range) -{ - int status = 0; - - spin_lock_bh(&queue->lock); - switch (mode) { - case NFQNL_COPY_NONE: - case NFQNL_COPY_META: - queue->copy_mode = mode; - queue->copy_range = 0; - break; - - case NFQNL_COPY_PACKET: - queue->copy_mode = mode; - /* we're using struct nlattr which has 16bit nla_len */ - if (range > 0xffff) - queue->copy_range = 0xffff; - else - queue->copy_range = range; - break; - - default: - status = -EINVAL; - - } - spin_unlock_bh(&queue->lock); - - return status; -} - -static int -dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) -{ - if (entry->indev) - if (entry->indev->ifindex == ifindex) - return 1; - if (entry->outdev) - if (entry->outdev->ifindex == ifindex) - return 1; -#ifdef CONFIG_BRIDGE_NETFILTER - if (entry->skb->nf_bridge) { - if (entry->skb->nf_bridge->physindev && - entry->skb->nf_bridge->physindev->ifindex == ifindex) - return 1; - if (entry->skb->nf_bridge->physoutdev && - entry->skb->nf_bridge->physoutdev->ifindex == ifindex) - return 1; - } -#endif - return 0; -} - -/* drop all packets with either indev or outdev == ifindex from all queue - * instances */ -static void -nfqnl_dev_drop(int ifindex) -{ - int i; - - rcu_read_lock(); - - for (i = 0; i < INSTANCE_BUCKETS; i++) { - struct hlist_node *tmp; - struct nfqnl_instance *inst; - struct hlist_head *head = &instance_table[i]; - - hlist_for_each_entry_rcu(inst, tmp, head, hlist) - nfqnl_flush(inst, dev_cmp, ifindex); - } - - rcu_read_unlock(); -} - -#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) - -static int -nfqnl_rcv_dev_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - /* Drop any packets associated with the downed device */ - if (event == NETDEV_DOWN) - nfqnl_dev_drop(dev->ifindex); - return NOTIFY_DONE; -} - -static struct notifier_block nfqnl_dev_notifier = { - .notifier_call = nfqnl_rcv_dev_event, -}; - -static int -nfqnl_rcv_nl_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct netlink_notify *n = ptr; - - if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { - int i; - - /* destroy all instances for this pid */ - spin_lock(&instances_lock); - for (i = 0; i < INSTANCE_BUCKETS; i++) { - struct hlist_node *tmp, *t2; - struct nfqnl_instance *inst; - struct hlist_head *head = &instance_table[i]; - - hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { - if ((n->net == &init_net) && - (n->pid == inst->peer_pid)) - __instance_destroy(inst); - } - } - spin_unlock(&instances_lock); - } - return NOTIFY_DONE; -} - -static struct notifier_block nfqnl_rtnl_notifier = { - .notifier_call = nfqnl_rcv_nl_event, -}; - -static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { - [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, - [NFQA_MARK] = { .type = NLA_U32 }, - [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, - [NFQA_CT] = { .type = NLA_UNSPEC }, -}; - -static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { - [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, - [NFQA_MARK] = { .type = NLA_U32 }, -}; - -static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) -{ - struct nfqnl_instance *queue; - - queue = instance_lookup(queue_num); - if (!queue) - return ERR_PTR(-ENODEV); - - if (queue->peer_pid != nlpid) - return ERR_PTR(-EPERM); - - return queue; -} - -static struct nfqnl_msg_verdict_hdr* -verdicthdr_get(const struct nlattr * const nfqa[]) -{ - struct nfqnl_msg_verdict_hdr *vhdr; - unsigned int verdict; - - if (!nfqa[NFQA_VERDICT_HDR]) - return NULL; - - vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); - verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK; - if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN) - return NULL; - return vhdr; -} - -static int nfq_id_after(unsigned int id, unsigned int max) -{ - return (int)(id - max) > 0; -} - -static int -nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) -{ - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); - struct nf_queue_entry *entry, *tmp; - unsigned int verdict, maxid; - struct nfqnl_msg_verdict_hdr *vhdr; - struct nfqnl_instance *queue; - LIST_HEAD(batch_list); - u16 queue_num = ntohs(nfmsg->res_id); - - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); - if (IS_ERR(queue)) - return PTR_ERR(queue); - - vhdr = verdicthdr_get(nfqa); - if (!vhdr) - return -EINVAL; - - verdict = ntohl(vhdr->verdict); - maxid = ntohl(vhdr->id); - - spin_lock_bh(&queue->lock); - - list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) { - if (nfq_id_after(entry->id, maxid)) - break; - __dequeue_entry(queue, entry); - list_add_tail(&entry->list, &batch_list); - } - - spin_unlock_bh(&queue->lock); - - if (list_empty(&batch_list)) - return -ENOENT; - - list_for_each_entry_safe(entry, tmp, &batch_list, list) { - if (nfqa[NFQA_MARK]) - entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); - nf_reinject(entry, verdict); - } - return 0; -} - -static int -nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) -{ - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); - u_int16_t queue_num = ntohs(nfmsg->res_id); - - struct nfqnl_msg_verdict_hdr *vhdr; - struct nfqnl_instance *queue; - unsigned int verdict; - struct nf_queue_entry *entry; - struct nfq_ct_hook *nfq_ct; - enum ip_conntrack_info uninitialized_var(ctinfo); - struct nf_conn *ct = NULL; - - queue = instance_lookup(queue_num); - if (!queue) - - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); - if (IS_ERR(queue)) - return PTR_ERR(queue); - - vhdr = verdicthdr_get(nfqa); - if (!vhdr) - return -EINVAL; - - verdict = ntohl(vhdr->verdict); - - entry = find_dequeue_entry(queue, ntohl(vhdr->id)); - if (entry == NULL) - return -ENOENT; - - rcu_read_lock(); - nfq_ct = rcu_dereference(nfq_ct_hook); - if (nfq_ct != NULL && - (queue->flags & NFQA_CFG_F_CONNTRACK) && nfqa[NFQA_CT]) { - ct = nf_ct_get(entry->skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) - nfq_ct->parse(nfqa[NFQA_CT], ct); - } - - if (nfqa[NFQA_PAYLOAD]) { - u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]); - int diff = payload_len - entry->skb->len; - - if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), - payload_len, entry, diff) < 0) - verdict = NF_DROP; - - if (ct && (ct->status & IPS_NAT_MASK) && diff) - nfq_ct->seq_adjust(skb, ct, ctinfo, diff); - } - rcu_read_unlock(); - - if (nfqa[NFQA_MARK]) - entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); - - nf_reinject(entry, verdict); - return 0; -} - -static int -nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) -{ - return -ENOTSUPP; -} - -static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { - [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, - [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, -}; - -static const struct nf_queue_handler nfqh = { - .name = "nf_queue", - .outfn = &nfqnl_enqueue_packet, -}; - -static int -nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) -{ - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); - u_int16_t queue_num = ntohs(nfmsg->res_id); - struct nfqnl_instance *queue; - struct nfqnl_msg_config_cmd *cmd = NULL; - int ret = 0; - - if (nfqa[NFQA_CFG_CMD]) { - cmd = nla_data(nfqa[NFQA_CFG_CMD]); - - /* Commands without queue context - might sleep */ - switch (cmd->command) { - case NFQNL_CFG_CMD_PF_BIND: - return nf_register_queue_handler(ntohs(cmd->pf), - &nfqh); - case NFQNL_CFG_CMD_PF_UNBIND: - return nf_unregister_queue_handler(ntohs(cmd->pf), - &nfqh); - } - } - - rcu_read_lock(); - queue = instance_lookup(queue_num); - if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { - ret = -EPERM; - goto err_out_unlock; - } - - if (cmd != NULL) { - switch (cmd->command) { - case NFQNL_CFG_CMD_BIND: - if (queue) { - ret = -EBUSY; - goto err_out_unlock; - } - queue = instance_create(queue_num, NETLINK_CB(skb).pid); - if (IS_ERR(queue)) { - ret = PTR_ERR(queue); - goto err_out_unlock; - } - break; - case NFQNL_CFG_CMD_UNBIND: - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - instance_destroy(queue); - break; - case NFQNL_CFG_CMD_PF_BIND: - case NFQNL_CFG_CMD_PF_UNBIND: - break; - default: - ret = -ENOTSUPP; - break; - } - } - - if (nfqa[NFQA_CFG_PARAMS]) { - struct nfqnl_msg_config_params *params; - - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - params = nla_data(nfqa[NFQA_CFG_PARAMS]); - nfqnl_set_mode(queue, params->copy_mode, - ntohl(params->copy_range)); - } - - if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { - __be32 *queue_maxlen; - - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); - spin_lock_bh(&queue->lock); - queue->queue_maxlen = ntohl(*queue_maxlen); - spin_unlock_bh(&queue->lock); - } - - if (nfqa[NFQA_CFG_FLAGS]) { - __u32 flags, mask; - - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - - if (!nfqa[NFQA_CFG_MASK]) { - /* A mask is needed to specify which flags are being - * changed. - */ - ret = -EINVAL; - goto err_out_unlock; - } - - flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS])); - mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK])); - - spin_lock_bh(&queue->lock); - queue->flags &= ~mask; - queue->flags |= flags & mask; - spin_unlock_bh(&queue->lock); - } - -err_out_unlock: - rcu_read_unlock(); - return ret; -} - -static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { - [NFQNL_MSG_PACKET] = { .call_rcu = nfqnl_recv_unsupp, - .attr_count = NFQA_MAX, }, - [NFQNL_MSG_VERDICT] = { .call_rcu = nfqnl_recv_verdict, - .attr_count = NFQA_MAX, - .policy = nfqa_verdict_policy }, - [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, - .attr_count = NFQA_CFG_MAX, - .policy = nfqa_cfg_policy }, - [NFQNL_MSG_VERDICT_BATCH]={ .call_rcu = nfqnl_recv_verdict_batch, - .attr_count = NFQA_MAX, - .policy = nfqa_verdict_batch_policy }, -}; - -static const struct nfnetlink_subsystem nfqnl_subsys = { - .name = "nf_queue", - .subsys_id = NFNL_SUBSYS_QUEUE, - .cb_count = NFQNL_MSG_MAX, - .cb = nfqnl_cb, -}; - -#ifdef CONFIG_PROC_FS -struct iter_state { - unsigned int bucket; -}; - -static struct hlist_node *get_first(struct seq_file *seq) -{ - struct iter_state *st = seq->private; - - if (!st) - return NULL; - - for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { - if (!hlist_empty(&instance_table[st->bucket])) - return instance_table[st->bucket].first; - } - return NULL; -} - -static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) -{ - struct iter_state *st = seq->private; - - h = h->next; - while (!h) { - if (++st->bucket >= INSTANCE_BUCKETS) - return NULL; - - h = instance_table[st->bucket].first; - } - return h; -} - -static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) -{ - struct hlist_node *head; - head = get_first(seq); - - if (head) - while (pos && (head = get_next(seq, head))) - pos--; - return pos ? NULL : head; -} - -static void *seq_start(struct seq_file *seq, loff_t *pos) - __acquires(instances_lock) -{ - spin_lock(&instances_lock); - return get_idx(seq, *pos); -} - -static void *seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - (*pos)++; - return get_next(s, v); -} - -static void seq_stop(struct seq_file *s, void *v) - __releases(instances_lock) -{ - spin_unlock(&instances_lock); -} - -static int seq_show(struct seq_file *s, void *v) -{ - const struct nfqnl_instance *inst = v; - - return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", - inst->queue_num, - inst->peer_pid, inst->queue_total, - inst->copy_mode, inst->copy_range, - inst->queue_dropped, inst->queue_user_dropped, - inst->id_sequence, 1); -} - -static const struct seq_operations nfqnl_seq_ops = { - .start = seq_start, - .next = seq_next, - .stop = seq_stop, - .show = seq_show, -}; - -static int nfqnl_open(struct inode *inode, struct file *file) -{ - return seq_open_private(file, &nfqnl_seq_ops, - sizeof(struct iter_state)); -} - -static const struct file_operations nfqnl_file_ops = { - .owner = THIS_MODULE, - .open = nfqnl_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - -#endif /* PROC_FS */ - -static int __init nfnetlink_queue_init(void) -{ - int i, status = -ENOMEM; - - for (i = 0; i < INSTANCE_BUCKETS; i++) - INIT_HLIST_HEAD(&instance_table[i]); - - netlink_register_notifier(&nfqnl_rtnl_notifier); - status = nfnetlink_subsys_register(&nfqnl_subsys); - if (status < 0) { - printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); - goto cleanup_netlink_notifier; - } - -#ifdef CONFIG_PROC_FS - if (!proc_create("nfnetlink_queue", 0440, - proc_net_netfilter, &nfqnl_file_ops)) - goto cleanup_subsys; -#endif - - register_netdevice_notifier(&nfqnl_dev_notifier); - return status; - -#ifdef CONFIG_PROC_FS -cleanup_subsys: - nfnetlink_subsys_unregister(&nfqnl_subsys); -#endif -cleanup_netlink_notifier: - netlink_unregister_notifier(&nfqnl_rtnl_notifier); - return status; -} - -static void __exit nfnetlink_queue_fini(void) -{ - nf_unregister_queue_handlers(&nfqh); - unregister_netdevice_notifier(&nfqnl_dev_notifier); -#ifdef CONFIG_PROC_FS - remove_proc_entry("nfnetlink_queue", proc_net_netfilter); -#endif - nfnetlink_subsys_unregister(&nfqnl_subsys); - netlink_unregister_notifier(&nfqnl_rtnl_notifier); - - rcu_barrier(); /* Wait for completion of call_rcu()'s */ -} - -MODULE_DESCRIPTION("netfilter packet queue handler"); -MODULE_AUTHOR("Harald Welte "); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE); - -module_init(nfnetlink_queue_init); -module_exit(nfnetlink_queue_fini); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c new file mode 100644 index 000000000000..d36b95ea8ca3 --- /dev/null +++ b/net/netfilter/nfnetlink_queue_core.c @@ -0,0 +1,1090 @@ +/* + * This is a module which is used for queueing packets and communicating with + * userspace via nfnetlink. + * + * (C) 2005 by Harald Welte + * (C) 2007 by Patrick McHardy + * + * Based on the old ipv4-only ip_queue.c: + * (C) 2000-2002 James Morris + * (C) 2003-2005 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_BRIDGE_NETFILTER +#include "../bridge/br_private.h" +#endif + +#define NFQNL_QMAX_DEFAULT 1024 + +struct nfqnl_instance { + struct hlist_node hlist; /* global list of queues */ + struct rcu_head rcu; + + int peer_pid; + unsigned int queue_maxlen; + unsigned int copy_range; + unsigned int queue_dropped; + unsigned int queue_user_dropped; + + + u_int16_t queue_num; /* number of this queue */ + u_int8_t copy_mode; + u_int32_t flags; /* Set using NFQA_CFG_FLAGS */ +/* + * Following fields are dirtied for each queued packet, + * keep them in same cache line if possible. + */ + spinlock_t lock; + unsigned int queue_total; + unsigned int id_sequence; /* 'sequence' of pkt ids */ + struct list_head queue_list; /* packets in queue */ +}; + +typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); + +static DEFINE_SPINLOCK(instances_lock); + +#define INSTANCE_BUCKETS 16 +static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; + +static inline u_int8_t instance_hashfn(u_int16_t queue_num) +{ + return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; +} + +static struct nfqnl_instance * +instance_lookup(u_int16_t queue_num) +{ + struct hlist_head *head; + struct hlist_node *pos; + struct nfqnl_instance *inst; + + head = &instance_table[instance_hashfn(queue_num)]; + hlist_for_each_entry_rcu(inst, pos, head, hlist) { + if (inst->queue_num == queue_num) + return inst; + } + return NULL; +} + +static struct nfqnl_instance * +instance_create(u_int16_t queue_num, int pid) +{ + struct nfqnl_instance *inst; + unsigned int h; + int err; + + spin_lock(&instances_lock); + if (instance_lookup(queue_num)) { + err = -EEXIST; + goto out_unlock; + } + + inst = kzalloc(sizeof(*inst), GFP_ATOMIC); + if (!inst) { + err = -ENOMEM; + goto out_unlock; + } + + inst->queue_num = queue_num; + inst->peer_pid = pid; + inst->queue_maxlen = NFQNL_QMAX_DEFAULT; + inst->copy_range = 0xfffff; + inst->copy_mode = NFQNL_COPY_NONE; + spin_lock_init(&inst->lock); + INIT_LIST_HEAD(&inst->queue_list); + + if (!try_module_get(THIS_MODULE)) { + err = -EAGAIN; + goto out_free; + } + + h = instance_hashfn(queue_num); + hlist_add_head_rcu(&inst->hlist, &instance_table[h]); + + spin_unlock(&instances_lock); + + return inst; + +out_free: + kfree(inst); +out_unlock: + spin_unlock(&instances_lock); + return ERR_PTR(err); +} + +static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, + unsigned long data); + +static void +instance_destroy_rcu(struct rcu_head *head) +{ + struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance, + rcu); + + nfqnl_flush(inst, NULL, 0); + kfree(inst); + module_put(THIS_MODULE); +} + +static void +__instance_destroy(struct nfqnl_instance *inst) +{ + hlist_del_rcu(&inst->hlist); + call_rcu(&inst->rcu, instance_destroy_rcu); +} + +static void +instance_destroy(struct nfqnl_instance *inst) +{ + spin_lock(&instances_lock); + __instance_destroy(inst); + spin_unlock(&instances_lock); +} + +static inline void +__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) +{ + list_add_tail(&entry->list, &queue->queue_list); + queue->queue_total++; +} + +static void +__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) +{ + list_del(&entry->list); + queue->queue_total--; +} + +static struct nf_queue_entry * +find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) +{ + struct nf_queue_entry *entry = NULL, *i; + + spin_lock_bh(&queue->lock); + + list_for_each_entry(i, &queue->queue_list, list) { + if (i->id == id) { + entry = i; + break; + } + } + + if (entry) + __dequeue_entry(queue, entry); + + spin_unlock_bh(&queue->lock); + + return entry; +} + +static void +nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) +{ + struct nf_queue_entry *entry, *next; + + spin_lock_bh(&queue->lock); + list_for_each_entry_safe(entry, next, &queue->queue_list, list) { + if (!cmpfn || cmpfn(entry, data)) { + list_del(&entry->list); + queue->queue_total--; + nf_reinject(entry, NF_DROP); + } + } + spin_unlock_bh(&queue->lock); +} + +static struct sk_buff * +nfqnl_build_packet_message(struct nfqnl_instance *queue, + struct nf_queue_entry *entry, + __be32 **packet_id_ptr) +{ + sk_buff_data_t old_tail; + size_t size; + size_t data_len = 0; + struct sk_buff *skb; + struct nlattr *nla; + struct nfqnl_msg_packet_hdr *pmsg; + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct sk_buff *entskb = entry->skb; + struct net_device *indev; + struct net_device *outdev; + struct nf_conn *ct = NULL; + enum ip_conntrack_info uninitialized_var(ctinfo); + + size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ +#ifdef CONFIG_BRIDGE_NETFILTER + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ +#endif + + nla_total_size(sizeof(u_int32_t)) /* mark */ + + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) + + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); + + outdev = entry->outdev; + + switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { + case NFQNL_COPY_META: + case NFQNL_COPY_NONE: + break; + + case NFQNL_COPY_PACKET: + if (entskb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(entskb)) + return NULL; + + data_len = ACCESS_ONCE(queue->copy_range); + if (data_len == 0 || data_len > entskb->len) + data_len = entskb->len; + + size += nla_total_size(data_len); + break; + } + + if (queue->flags & NFQA_CFG_F_CONNTRACK) + ct = nfqnl_ct_get(entskb, &size, &ctinfo); + + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + goto nlmsg_failure; + + old_tail = skb->tail; + nlh = NLMSG_PUT(skb, 0, 0, + NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, + sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + nfmsg->nfgen_family = entry->pf; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(queue->queue_num); + + nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg)); + pmsg = nla_data(nla); + pmsg->hw_protocol = entskb->protocol; + pmsg->hook = entry->hook; + *packet_id_ptr = &pmsg->packet_id; + + indev = entry->indev; + if (indev) { +#ifndef CONFIG_BRIDGE_NETFILTER + if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex))) + goto nla_put_failure; +#else + if (entry->pf == PF_BRIDGE) { + /* Case 1: indev is physical input device, we need to + * look for bridge group (when called from + * netfilter_bridge) */ + if (nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV, + htonl(indev->ifindex)) || + /* this is the bridge group "brX" */ + /* rcu_read_lock()ed by __nf_queue */ + nla_put_be32(skb, NFQA_IFINDEX_INDEV, + htonl(br_port_get_rcu(indev)->br->dev->ifindex))) + goto nla_put_failure; + } else { + /* Case 2: indev is bridge group, we need to look for + * physical device (when called from ipv4) */ + if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, + htonl(indev->ifindex))) + goto nla_put_failure; + if (entskb->nf_bridge && entskb->nf_bridge->physindev && + nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV, + htonl(entskb->nf_bridge->physindev->ifindex))) + goto nla_put_failure; + } +#endif + } + + if (outdev) { +#ifndef CONFIG_BRIDGE_NETFILTER + if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) + goto nla_put_failure; +#else + if (entry->pf == PF_BRIDGE) { + /* Case 1: outdev is physical output device, we need to + * look for bridge group (when called from + * netfilter_bridge) */ + if (nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV, + htonl(outdev->ifindex)) || + /* this is the bridge group "brX" */ + /* rcu_read_lock()ed by __nf_queue */ + nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, + htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) + goto nla_put_failure; + } else { + /* Case 2: outdev is bridge group, we need to look for + * physical output device (when called from ipv4) */ + if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, + htonl(outdev->ifindex))) + goto nla_put_failure; + if (entskb->nf_bridge && entskb->nf_bridge->physoutdev && + nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV, + htonl(entskb->nf_bridge->physoutdev->ifindex))) + goto nla_put_failure; + } +#endif + } + + if (entskb->mark && + nla_put_be32(skb, NFQA_MARK, htonl(entskb->mark))) + goto nla_put_failure; + + if (indev && entskb->dev && + entskb->mac_header != entskb->network_header) { + struct nfqnl_msg_packet_hw phw; + int len = dev_parse_header(entskb, phw.hw_addr); + if (len) { + phw.hw_addrlen = htons(len); + if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw)) + goto nla_put_failure; + } + } + + if (entskb->tstamp.tv64) { + struct nfqnl_msg_packet_timestamp ts; + struct timeval tv = ktime_to_timeval(entskb->tstamp); + ts.sec = cpu_to_be64(tv.tv_sec); + ts.usec = cpu_to_be64(tv.tv_usec); + + if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts)) + goto nla_put_failure; + } + + if (data_len) { + struct nlattr *nla; + int sz = nla_attr_size(data_len); + + if (skb_tailroom(skb) < nla_total_size(data_len)) { + printk(KERN_WARNING "nf_queue: no tailroom!\n"); + goto nlmsg_failure; + } + + nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); + nla->nla_type = NFQA_PAYLOAD; + nla->nla_len = sz; + + if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) + BUG(); + } + + if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) + goto nla_put_failure; + + nlh->nlmsg_len = skb->tail - old_tail; + return skb; + +nlmsg_failure: +nla_put_failure: + if (skb) + kfree_skb(skb); + net_err_ratelimited("nf_queue: error creating packet message\n"); + return NULL; +} + +static int +nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) +{ + struct sk_buff *nskb; + struct nfqnl_instance *queue; + int err = -ENOBUFS; + __be32 *packet_id_ptr; + int failopen = 0; + + /* rcu_read_lock()ed by nf_hook_slow() */ + queue = instance_lookup(queuenum); + if (!queue) { + err = -ESRCH; + goto err_out; + } + + if (queue->copy_mode == NFQNL_COPY_NONE) { + err = -EINVAL; + goto err_out; + } + + nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); + if (nskb == NULL) { + err = -ENOMEM; + goto err_out; + } + spin_lock_bh(&queue->lock); + + if (!queue->peer_pid) { + err = -EINVAL; + goto err_out_free_nskb; + } + if (queue->queue_total >= queue->queue_maxlen) { + if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { + failopen = 1; + err = 0; + } else { + queue->queue_dropped++; + net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n", + queue->queue_total); + } + goto err_out_free_nskb; + } + entry->id = ++queue->id_sequence; + *packet_id_ptr = htonl(entry->id); + + /* nfnetlink_unicast will either free the nskb or add it to a socket */ + err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); + if (err < 0) { + queue->queue_user_dropped++; + goto err_out_unlock; + } + + __enqueue_entry(queue, entry); + + spin_unlock_bh(&queue->lock); + return 0; + +err_out_free_nskb: + kfree_skb(nskb); +err_out_unlock: + spin_unlock_bh(&queue->lock); + if (failopen) + nf_reinject(entry, NF_ACCEPT); +err_out: + return err; +} + +static int +nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) +{ + struct sk_buff *nskb; + + if (diff < 0) { + if (pskb_trim(e->skb, data_len)) + return -ENOMEM; + } else if (diff > 0) { + if (data_len > 0xFFFF) + return -EINVAL; + if (diff > skb_tailroom(e->skb)) { + nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), + diff, GFP_ATOMIC); + if (!nskb) { + printk(KERN_WARNING "nf_queue: OOM " + "in mangle, dropping packet\n"); + return -ENOMEM; + } + kfree_skb(e->skb); + e->skb = nskb; + } + skb_put(e->skb, diff); + } + if (!skb_make_writable(e->skb, data_len)) + return -ENOMEM; + skb_copy_to_linear_data(e->skb, data, data_len); + e->skb->ip_summed = CHECKSUM_NONE; + return 0; +} + +static int +nfqnl_set_mode(struct nfqnl_instance *queue, + unsigned char mode, unsigned int range) +{ + int status = 0; + + spin_lock_bh(&queue->lock); + switch (mode) { + case NFQNL_COPY_NONE: + case NFQNL_COPY_META: + queue->copy_mode = mode; + queue->copy_range = 0; + break; + + case NFQNL_COPY_PACKET: + queue->copy_mode = mode; + /* we're using struct nlattr which has 16bit nla_len */ + if (range > 0xffff) + queue->copy_range = 0xffff; + else + queue->copy_range = range; + break; + + default: + status = -EINVAL; + + } + spin_unlock_bh(&queue->lock); + + return status; +} + +static int +dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) +{ + if (entry->indev) + if (entry->indev->ifindex == ifindex) + return 1; + if (entry->outdev) + if (entry->outdev->ifindex == ifindex) + return 1; +#ifdef CONFIG_BRIDGE_NETFILTER + if (entry->skb->nf_bridge) { + if (entry->skb->nf_bridge->physindev && + entry->skb->nf_bridge->physindev->ifindex == ifindex) + return 1; + if (entry->skb->nf_bridge->physoutdev && + entry->skb->nf_bridge->physoutdev->ifindex == ifindex) + return 1; + } +#endif + return 0; +} + +/* drop all packets with either indev or outdev == ifindex from all queue + * instances */ +static void +nfqnl_dev_drop(int ifindex) +{ + int i; + + rcu_read_lock(); + + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp; + struct nfqnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry_rcu(inst, tmp, head, hlist) + nfqnl_flush(inst, dev_cmp, ifindex); + } + + rcu_read_unlock(); +} + +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) + +static int +nfqnl_rcv_dev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + + /* Drop any packets associated with the downed device */ + if (event == NETDEV_DOWN) + nfqnl_dev_drop(dev->ifindex); + return NOTIFY_DONE; +} + +static struct notifier_block nfqnl_dev_notifier = { + .notifier_call = nfqnl_rcv_dev_event, +}; + +static int +nfqnl_rcv_nl_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + + if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { + int i; + + /* destroy all instances for this pid */ + spin_lock(&instances_lock); + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp, *t2; + struct nfqnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) + __instance_destroy(inst); + } + } + spin_unlock(&instances_lock); + } + return NOTIFY_DONE; +} + +static struct notifier_block nfqnl_rtnl_notifier = { + .notifier_call = nfqnl_rcv_nl_event, +}; + +static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { + [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, + [NFQA_MARK] = { .type = NLA_U32 }, + [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, + [NFQA_CT] = { .type = NLA_UNSPEC }, +}; + +static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { + [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, + [NFQA_MARK] = { .type = NLA_U32 }, +}; + +static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) +{ + struct nfqnl_instance *queue; + + queue = instance_lookup(queue_num); + if (!queue) + return ERR_PTR(-ENODEV); + + if (queue->peer_pid != nlpid) + return ERR_PTR(-EPERM); + + return queue; +} + +static struct nfqnl_msg_verdict_hdr* +verdicthdr_get(const struct nlattr * const nfqa[]) +{ + struct nfqnl_msg_verdict_hdr *vhdr; + unsigned int verdict; + + if (!nfqa[NFQA_VERDICT_HDR]) + return NULL; + + vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); + verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK; + if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN) + return NULL; + return vhdr; +} + +static int nfq_id_after(unsigned int id, unsigned int max) +{ + return (int)(id - max) > 0; +} + +static int +nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nf_queue_entry *entry, *tmp; + unsigned int verdict, maxid; + struct nfqnl_msg_verdict_hdr *vhdr; + struct nfqnl_instance *queue; + LIST_HEAD(batch_list); + u16 queue_num = ntohs(nfmsg->res_id); + + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + if (IS_ERR(queue)) + return PTR_ERR(queue); + + vhdr = verdicthdr_get(nfqa); + if (!vhdr) + return -EINVAL; + + verdict = ntohl(vhdr->verdict); + maxid = ntohl(vhdr->id); + + spin_lock_bh(&queue->lock); + + list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) { + if (nfq_id_after(entry->id, maxid)) + break; + __dequeue_entry(queue, entry); + list_add_tail(&entry->list, &batch_list); + } + + spin_unlock_bh(&queue->lock); + + if (list_empty(&batch_list)) + return -ENOENT; + + list_for_each_entry_safe(entry, tmp, &batch_list, list) { + if (nfqa[NFQA_MARK]) + entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); + nf_reinject(entry, verdict); + } + return 0; +} + +static int +nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t queue_num = ntohs(nfmsg->res_id); + + struct nfqnl_msg_verdict_hdr *vhdr; + struct nfqnl_instance *queue; + unsigned int verdict; + struct nf_queue_entry *entry; + enum ip_conntrack_info uninitialized_var(ctinfo); + struct nf_conn *ct = NULL; + + queue = instance_lookup(queue_num); + if (!queue) + + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + if (IS_ERR(queue)) + return PTR_ERR(queue); + + vhdr = verdicthdr_get(nfqa); + if (!vhdr) + return -EINVAL; + + verdict = ntohl(vhdr->verdict); + + entry = find_dequeue_entry(queue, ntohl(vhdr->id)); + if (entry == NULL) + return -ENOENT; + + rcu_read_lock(); + if (nfqa[NFQA_CT] && (queue->flags & NFQA_CFG_F_CONNTRACK)) + ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo); + + if (nfqa[NFQA_PAYLOAD]) { + u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]); + int diff = payload_len - entry->skb->len; + + if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), + payload_len, entry, diff) < 0) + verdict = NF_DROP; + + if (ct) + nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff); + } + rcu_read_unlock(); + + if (nfqa[NFQA_MARK]) + entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); + + nf_reinject(entry, verdict); + return 0; +} + +static int +nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) +{ + return -ENOTSUPP; +} + +static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { + [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, + [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, +}; + +static const struct nf_queue_handler nfqh = { + .name = "nf_queue", + .outfn = &nfqnl_enqueue_packet, +}; + +static int +nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t queue_num = ntohs(nfmsg->res_id); + struct nfqnl_instance *queue; + struct nfqnl_msg_config_cmd *cmd = NULL; + int ret = 0; + + if (nfqa[NFQA_CFG_CMD]) { + cmd = nla_data(nfqa[NFQA_CFG_CMD]); + + /* Commands without queue context - might sleep */ + switch (cmd->command) { + case NFQNL_CFG_CMD_PF_BIND: + return nf_register_queue_handler(ntohs(cmd->pf), + &nfqh); + case NFQNL_CFG_CMD_PF_UNBIND: + return nf_unregister_queue_handler(ntohs(cmd->pf), + &nfqh); + } + } + + rcu_read_lock(); + queue = instance_lookup(queue_num); + if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { + ret = -EPERM; + goto err_out_unlock; + } + + if (cmd != NULL) { + switch (cmd->command) { + case NFQNL_CFG_CMD_BIND: + if (queue) { + ret = -EBUSY; + goto err_out_unlock; + } + queue = instance_create(queue_num, NETLINK_CB(skb).pid); + if (IS_ERR(queue)) { + ret = PTR_ERR(queue); + goto err_out_unlock; + } + break; + case NFQNL_CFG_CMD_UNBIND: + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; + } + instance_destroy(queue); + break; + case NFQNL_CFG_CMD_PF_BIND: + case NFQNL_CFG_CMD_PF_UNBIND: + break; + default: + ret = -ENOTSUPP; + break; + } + } + + if (nfqa[NFQA_CFG_PARAMS]) { + struct nfqnl_msg_config_params *params; + + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; + } + params = nla_data(nfqa[NFQA_CFG_PARAMS]); + nfqnl_set_mode(queue, params->copy_mode, + ntohl(params->copy_range)); + } + + if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { + __be32 *queue_maxlen; + + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; + } + queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); + spin_lock_bh(&queue->lock); + queue->queue_maxlen = ntohl(*queue_maxlen); + spin_unlock_bh(&queue->lock); + } + + if (nfqa[NFQA_CFG_FLAGS]) { + __u32 flags, mask; + + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; + } + + if (!nfqa[NFQA_CFG_MASK]) { + /* A mask is needed to specify which flags are being + * changed. + */ + ret = -EINVAL; + goto err_out_unlock; + } + + flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS])); + mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK])); + + spin_lock_bh(&queue->lock); + queue->flags &= ~mask; + queue->flags |= flags & mask; + spin_unlock_bh(&queue->lock); + } + +err_out_unlock: + rcu_read_unlock(); + return ret; +} + +static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { + [NFQNL_MSG_PACKET] = { .call_rcu = nfqnl_recv_unsupp, + .attr_count = NFQA_MAX, }, + [NFQNL_MSG_VERDICT] = { .call_rcu = nfqnl_recv_verdict, + .attr_count = NFQA_MAX, + .policy = nfqa_verdict_policy }, + [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, + .attr_count = NFQA_CFG_MAX, + .policy = nfqa_cfg_policy }, + [NFQNL_MSG_VERDICT_BATCH]={ .call_rcu = nfqnl_recv_verdict_batch, + .attr_count = NFQA_MAX, + .policy = nfqa_verdict_batch_policy }, +}; + +static const struct nfnetlink_subsystem nfqnl_subsys = { + .name = "nf_queue", + .subsys_id = NFNL_SUBSYS_QUEUE, + .cb_count = NFQNL_MSG_MAX, + .cb = nfqnl_cb, +}; + +#ifdef CONFIG_PROC_FS +struct iter_state { + unsigned int bucket; +}; + +static struct hlist_node *get_first(struct seq_file *seq) +{ + struct iter_state *st = seq->private; + + if (!st) + return NULL; + + for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { + if (!hlist_empty(&instance_table[st->bucket])) + return instance_table[st->bucket].first; + } + return NULL; +} + +static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) +{ + struct iter_state *st = seq->private; + + h = h->next; + while (!h) { + if (++st->bucket >= INSTANCE_BUCKETS) + return NULL; + + h = instance_table[st->bucket].first; + } + return h; +} + +static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) +{ + struct hlist_node *head; + head = get_first(seq); + + if (head) + while (pos && (head = get_next(seq, head))) + pos--; + return pos ? NULL : head; +} + +static void *seq_start(struct seq_file *seq, loff_t *pos) + __acquires(instances_lock) +{ + spin_lock(&instances_lock); + return get_idx(seq, *pos); +} + +static void *seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + return get_next(s, v); +} + +static void seq_stop(struct seq_file *s, void *v) + __releases(instances_lock) +{ + spin_unlock(&instances_lock); +} + +static int seq_show(struct seq_file *s, void *v) +{ + const struct nfqnl_instance *inst = v; + + return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", + inst->queue_num, + inst->peer_pid, inst->queue_total, + inst->copy_mode, inst->copy_range, + inst->queue_dropped, inst->queue_user_dropped, + inst->id_sequence, 1); +} + +static const struct seq_operations nfqnl_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = seq_show, +}; + +static int nfqnl_open(struct inode *inode, struct file *file) +{ + return seq_open_private(file, &nfqnl_seq_ops, + sizeof(struct iter_state)); +} + +static const struct file_operations nfqnl_file_ops = { + .owner = THIS_MODULE, + .open = nfqnl_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +#endif /* PROC_FS */ + +static int __init nfnetlink_queue_init(void) +{ + int i, status = -ENOMEM; + + for (i = 0; i < INSTANCE_BUCKETS; i++) + INIT_HLIST_HEAD(&instance_table[i]); + + netlink_register_notifier(&nfqnl_rtnl_notifier); + status = nfnetlink_subsys_register(&nfqnl_subsys); + if (status < 0) { + printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); + goto cleanup_netlink_notifier; + } + +#ifdef CONFIG_PROC_FS + if (!proc_create("nfnetlink_queue", 0440, + proc_net_netfilter, &nfqnl_file_ops)) + goto cleanup_subsys; +#endif + + register_netdevice_notifier(&nfqnl_dev_notifier); + return status; + +#ifdef CONFIG_PROC_FS +cleanup_subsys: + nfnetlink_subsys_unregister(&nfqnl_subsys); +#endif +cleanup_netlink_notifier: + netlink_unregister_notifier(&nfqnl_rtnl_notifier); + return status; +} + +static void __exit nfnetlink_queue_fini(void) +{ + nf_unregister_queue_handlers(&nfqh); + unregister_netdevice_notifier(&nfqnl_dev_notifier); +#ifdef CONFIG_PROC_FS + remove_proc_entry("nfnetlink_queue", proc_net_netfilter); +#endif + nfnetlink_subsys_unregister(&nfqnl_subsys); + netlink_unregister_notifier(&nfqnl_rtnl_notifier); + + rcu_barrier(); /* Wait for completion of call_rcu()'s */ +} + +MODULE_DESCRIPTION("netfilter packet queue handler"); +MODULE_AUTHOR("Harald Welte "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE); + +module_init(nfnetlink_queue_init); +module_exit(nfnetlink_queue_fini); diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c new file mode 100644 index 000000000000..68ef550066f5 --- /dev/null +++ b/net/netfilter/nfnetlink_queue_ct.c @@ -0,0 +1,97 @@ +/* + * (C) 2012 by Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include + +struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size, + enum ip_conntrack_info *ctinfo) +{ + struct nfq_ct_hook *nfq_ct; + struct nf_conn *ct; + + /* rcu_read_lock()ed by __nf_queue already. */ + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) + return NULL; + + ct = nf_ct_get(entskb, ctinfo); + if (ct) { + if (!nf_ct_is_untracked(ct)) + *size += nfq_ct->build_size(ct); + else + ct = NULL; + } + return ct; +} + +struct nf_conn * +nfqnl_ct_parse(const struct sk_buff *skb, const struct nlattr *attr, + enum ip_conntrack_info *ctinfo) +{ + struct nfq_ct_hook *nfq_ct; + struct nf_conn *ct; + + /* rcu_read_lock()ed by __nf_queue already. */ + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) + return NULL; + + ct = nf_ct_get(skb, ctinfo); + if (ct && !nf_ct_is_untracked(ct)) + nfq_ct->parse(attr, ct); + + return ct; +} + +int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + struct nfq_ct_hook *nfq_ct; + struct nlattr *nest_parms; + u_int32_t tmp; + + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) + return 0; + + nest_parms = nla_nest_start(skb, NFQA_CT | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + if (nfq_ct->build(skb, ct) < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms); + + tmp = ctinfo; + if (nla_put_be32(skb, NFQA_CT_INFO, htonl(tmp))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int diff) +{ + struct nfq_ct_hook *nfq_ct; + + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) + return; + + if ((ct->status & IPS_NAT_MASK) && diff) + nfq_ct->seq_adjust(skb, ct, ctinfo, diff); +} -- cgit v1.2.3 From 9345d40c580d0f3dfc040add0e6371b1a629c1cc Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Fri, 15 Jun 2012 10:36:42 +0300 Subject: Bluetooth: Use AUTO_OFF constant in jiffies Move AUTO_OFF_TIMEOUT to other constants changing name to HCI_AUTO_OFF_TIMEOUT and convert to jiffies. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 3 ++- net/bluetooth/hci_core.c | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 7dcd3495edde..ccd723e0f783 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -142,8 +142,9 @@ enum { #define HCI_DISCONN_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ #define HCI_PAIRING_TIMEOUT msecs_to_jiffies(60000) /* 60 seconds */ #define HCI_INIT_TIMEOUT msecs_to_jiffies(10000) /* 10 seconds */ -#define HCI_CMD_TIMEOUT msecs_to_jiffies(1000) /* 1 seconds */ +#define HCI_CMD_TIMEOUT msecs_to_jiffies(1000) /* 1 second */ #define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */ +#define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ /* HCI data types */ #define HCI_COMMAND_PKT 0x01 diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a2bdf936ed46..32dcb09cdb5d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -33,8 +33,6 @@ #include #include -#define AUTO_OFF_TIMEOUT 2000 - static void hci_rx_work(struct work_struct *work); static void hci_cmd_work(struct work_struct *work); static void hci_tx_work(struct work_struct *work); @@ -1083,8 +1081,7 @@ static void hci_power_on(struct work_struct *work) return; if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) - schedule_delayed_work(&hdev->power_off, - msecs_to_jiffies(AUTO_OFF_TIMEOUT)); + schedule_delayed_work(&hdev->power_off, HCI_AUTO_OFF_TIMEOUT); if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) mgmt_index_added(hdev); -- cgit v1.2.3 From 674147e21195a496164e1c7ff70d0f0b45235f25 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Jun 2012 05:25:46 +0200 Subject: netfilter: fix missing symbols if CONFIG_NETFILTER_NETLINK_QUEUE_CT unset ERROR: "nfqnl_ct_parse" [net/netfilter/nfnetlink_queue.ko] undefined! ERROR: "nfqnl_ct_seq_adjust" [net/netfilter/nfnetlink_queue.ko] undefined! ERROR: "nfqnl_ct_put" [net/netfilter/nfnetlink_queue.ko] undefined! ERROR: "nfqnl_ct_get" [net/netfilter/nfnetlink_queue.ko] undefined! We have to use CONFIG_NETFILTER_NETLINK_QUEUE_CT in include/net/netfilter/nfnetlink_queue.h, not CONFIG_NF_CONNTRACK. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/net/netfilter/nfnetlink_queue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nfnetlink_queue.h b/include/net/netfilter/nfnetlink_queue.h index 9f8095c108e4..86267a529514 100644 --- a/include/net/netfilter/nfnetlink_queue.h +++ b/include/net/netfilter/nfnetlink_queue.h @@ -5,7 +5,7 @@ struct nf_conn; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size, enum ip_conntrack_info *ctinfo); struct nf_conn *nfqnl_ct_parse(const struct sk_buff *skb, -- cgit v1.2.3 From f9242b6b28d61295f2bf7e8adfb1060b382e5381 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 19 Jun 2012 18:56:21 -0700 Subject: inet: Sanitize inet{,6} protocol demux. Don't pretend that inet_protos[] and inet6_protos[] are hashes, thay are just a straight arrays. Remove all unnecessary hash masking. Document MAX_INET_PROTOS. Use RAW_HTABLE_SIZE when appropriate. Reported-by: Ben Hutchings Signed-off-by: David S. Miller --- include/net/protocol.h | 7 +++++-- net/ipv4/af_inet.c | 26 ++++++++++++-------------- net/ipv4/icmp.c | 9 ++++----- net/ipv4/ip_input.c | 5 ++--- net/ipv4/protocol.c | 8 +++----- net/ipv6/icmp.c | 7 ++----- net/ipv6/ip6_input.c | 9 +++------ net/ipv6/protocol.c | 8 +++----- net/ipv6/raw.c | 4 ++-- 9 files changed, 36 insertions(+), 47 deletions(-) (limited to 'include/net') diff --git a/include/net/protocol.h b/include/net/protocol.h index 875f4895b033..a1b1b530c338 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -29,8 +29,11 @@ #include #endif -#define MAX_INET_PROTOS 256 /* Must be a power of 2 */ - +/* This is one larger than the largest protocol value that can be + * found in an ipv4 or ipv6 header. Since in both cases the protocol + * value is presented in a __u8, this is defined to be 256. + */ +#define MAX_INET_PROTOS 256 /* This is used to register protocols. */ struct net_protocol { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e4e8e00a2c91..85a3b1763136 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -242,20 +242,18 @@ void build_ehash_secret(void) } EXPORT_SYMBOL(build_ehash_secret); -static inline int inet_netns_ok(struct net *net, int protocol) +static inline int inet_netns_ok(struct net *net, __u8 protocol) { - int hash; const struct net_protocol *ipprot; if (net_eq(net, &init_net)) return 1; - hash = protocol & (MAX_INET_PROTOS - 1); - ipprot = rcu_dereference(inet_protos[hash]); - - if (ipprot == NULL) + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot == NULL) { /* raw IP is OK */ return 1; + } return ipprot->netns_ok; } @@ -1216,8 +1214,8 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); static int inet_gso_send_check(struct sk_buff *skb) { - const struct iphdr *iph; const struct net_protocol *ops; + const struct iphdr *iph; int proto; int ihl; int err = -EINVAL; @@ -1236,7 +1234,7 @@ static int inet_gso_send_check(struct sk_buff *skb) __skb_pull(skb, ihl); skb_reset_transport_header(skb); iph = ip_hdr(skb); - proto = iph->protocol & (MAX_INET_PROTOS - 1); + proto = iph->protocol; err = -EPROTONOSUPPORT; rcu_read_lock(); @@ -1253,8 +1251,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - struct iphdr *iph; const struct net_protocol *ops; + struct iphdr *iph; int proto; int ihl; int id; @@ -1286,7 +1284,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb_reset_transport_header(skb); iph = ip_hdr(skb); id = ntohs(iph->id); - proto = iph->protocol & (MAX_INET_PROTOS - 1); + proto = iph->protocol; segs = ERR_PTR(-EPROTONOSUPPORT); rcu_read_lock(); @@ -1340,7 +1338,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, goto out; } - proto = iph->protocol & (MAX_INET_PROTOS - 1); + proto = iph->protocol; rcu_read_lock(); ops = rcu_dereference(inet_protos[proto]); @@ -1398,11 +1396,11 @@ out: static int inet_gro_complete(struct sk_buff *skb) { - const struct net_protocol *ops; + __be16 newlen = htons(skb->len - skb_network_offset(skb)); struct iphdr *iph = ip_hdr(skb); - int proto = iph->protocol & (MAX_INET_PROTOS - 1); + const struct net_protocol *ops; + int proto = iph->protocol; int err = -ENOSYS; - __be16 newlen = htons(skb->len - skb_network_offset(skb)); csum_replace2(&iph->check, iph->tot_len, newlen); iph->tot_len = newlen; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e1caa1abe5d1..49a74cc79dc8 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -637,12 +637,12 @@ EXPORT_SYMBOL(icmp_send); static void icmp_unreach(struct sk_buff *skb) { + const struct net_protocol *ipprot; const struct iphdr *iph; struct icmphdr *icmph; - int hash, protocol; - const struct net_protocol *ipprot; - u32 info = 0; struct net *net; + u32 info = 0; + int protocol; net = dev_net(skb_dst(skb)->dev); @@ -731,9 +731,8 @@ static void icmp_unreach(struct sk_buff *skb) */ raw_icmp_error(skb, protocol, info); - hash = protocol & (MAX_INET_PROTOS - 1); rcu_read_lock(); - ipprot = rcu_dereference(inet_protos[hash]); + ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, info); rcu_read_unlock(); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 8590144ca330..c4fe1d271131 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -198,14 +198,13 @@ static int ip_local_deliver_finish(struct sk_buff *skb) rcu_read_lock(); { int protocol = ip_hdr(skb)->protocol; - int hash, raw; const struct net_protocol *ipprot; + int raw; resubmit: raw = raw_local_deliver(skb, protocol); - hash = protocol & (MAX_INET_PROTOS - 1); - ipprot = rcu_dereference(inet_protos[hash]); + ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot != NULL) { int ret; diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 9ae5c01cd0b2..8918eff1426d 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -36,9 +36,7 @@ const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { - int hash = protocol & (MAX_INET_PROTOS - 1); - - return !cmpxchg((const struct net_protocol **)&inet_protos[hash], + return !cmpxchg((const struct net_protocol **)&inet_protos[protocol], NULL, prot) ? 0 : -1; } EXPORT_SYMBOL(inet_add_protocol); @@ -49,9 +47,9 @@ EXPORT_SYMBOL(inet_add_protocol); int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) { - int ret, hash = protocol & (MAX_INET_PROTOS - 1); + int ret; - ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash], + ret = (cmpxchg((const struct net_protocol **)&inet_protos[protocol], prot, NULL) == prot) ? 0 : -1; synchronize_net(); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 5247d5c211f9..c7da1422cbde 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -600,9 +600,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { const struct inet6_protocol *ipprot; int inner_offset; - int hash; - u8 nexthdr; __be16 frag_off; + u8 nexthdr; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) return; @@ -629,10 +628,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) --ANK (980726) */ - hash = nexthdr & (MAX_INET_PROTOS - 1); - rcu_read_lock(); - ipprot = rcu_dereference(inet6_protos[hash]); + ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, NULL, type, code, inner_offset, info); rcu_read_unlock(); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 21a15dfe4a9e..5ab923e51af3 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -168,13 +168,12 @@ drop: static int ip6_input_finish(struct sk_buff *skb) { + struct net *net = dev_net(skb_dst(skb)->dev); const struct inet6_protocol *ipprot; + struct inet6_dev *idev; unsigned int nhoff; int nexthdr; bool raw; - u8 hash; - struct inet6_dev *idev; - struct net *net = dev_net(skb_dst(skb)->dev); /* * Parse extension headers @@ -189,9 +188,7 @@ resubmit: nexthdr = skb_network_header(skb)[nhoff]; raw = raw6_local_deliver(skb, nexthdr); - - hash = nexthdr & (MAX_INET_PROTOS - 1); - if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) { + if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) { int ret; if (ipprot->flags & INET6_PROTO_FINAL) { diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 9a7978fdc02a..053082dfc93e 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -29,9 +29,7 @@ const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) { - int hash = protocol & (MAX_INET_PROTOS - 1); - - return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash], + return !cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol], NULL, prot) ? 0 : -1; } EXPORT_SYMBOL(inet6_add_protocol); @@ -42,9 +40,9 @@ EXPORT_SYMBOL(inet6_add_protocol); int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol) { - int ret, hash = protocol & (MAX_INET_PROTOS - 1); + int ret; - ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash], + ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol], prot, NULL) == prot) ? 0 : -1; synchronize_net(); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 43b0042f15f4..b5c1dcb27737 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -165,7 +165,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) saddr = &ipv6_hdr(skb)->saddr; daddr = saddr + 1; - hash = nexthdr & (MAX_INET_PROTOS - 1); + hash = nexthdr & (RAW_HTABLE_SIZE - 1); read_lock(&raw_v6_hashinfo.lock); sk = sk_head(&raw_v6_hashinfo.ht[hash]); @@ -229,7 +229,7 @@ bool raw6_local_deliver(struct sk_buff *skb, int nexthdr) { struct sock *raw_sk; - raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]); + raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]); if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) raw_sk = NULL; -- cgit v1.2.3 From 41063e9dd11956f2d285e12e4342e1d232ba0ea2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 19 Jun 2012 21:22:05 -0700 Subject: ipv4: Early TCP socket demux. Input packet processing for local sockets involves two major demuxes. One for the route and one for the socket. But we can optimize this down to one demux for certain kinds of local sockets. Currently we only do this for established TCP sockets, but it could at least in theory be expanded to other kinds of connections. If a TCP socket is established then it's identity is fully specified. This means that whatever input route was used during the three-way handshake must work equally well for the rest of the connection since the keys will not change. Once we move to established state, we cache the receive packet's input route to use later. Like the existing cached route in sk->sk_dst_cache used for output packets, we have to check for route invalidations using dst->obsolete and dst->ops->check(). Early demux occurs outside of a socket locked section, so when a route invalidation occurs we defer the fixup of sk->sk_rx_dst until we are actually inside of established state packet processing and thus have the socket locked. Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 4 ++-- include/net/protocol.h | 1 + include/net/sock.h | 2 ++ include/net/tcp.h | 1 + net/core/sock.c | 5 +++++ net/ipv4/af_inet.c | 18 +++++++++-------- net/ipv4/ip_input.c | 39 ++++++++++++++++++++++++------------ net/ipv4/tcp_input.c | 16 ++++++++++++++- net/ipv4/tcp_ipv4.c | 46 +++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_minisocks.c | 2 ++ 10 files changed, 110 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 808fc5f76b03..54be0287eb98 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -379,10 +379,10 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, const __be16 sport, const __be16 dport) { - struct sock *sk; + struct sock *sk = skb_steal_sock(skb); const struct iphdr *iph = ip_hdr(skb); - if (unlikely(sk = skb_steal_sock(skb))) + if (sk) return sk; else return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, diff --git a/include/net/protocol.h b/include/net/protocol.h index a1b1b530c338..967b926cbfb1 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -37,6 +37,7 @@ /* This is used to register protocols. */ struct net_protocol { + int (*early_demux)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); diff --git a/include/net/sock.h b/include/net/sock.h index 4a4521699563..87b424ae750a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -319,6 +319,7 @@ struct sock { unsigned long sk_flags; struct dst_entry *sk_dst_cache; spinlock_t sk_dst_lock; + struct dst_entry *sk_rx_dst; atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; @@ -1426,6 +1427,7 @@ extern struct sk_buff *sock_rmalloc(struct sock *sk, gfp_t priority); extern void sock_wfree(struct sk_buff *skb); extern void sock_rfree(struct sk_buff *skb); +extern void sock_edemux(struct sk_buff *skb); extern int sock_setsockopt(struct socket *sock, int level, int op, char __user *optval, diff --git a/include/net/tcp.h b/include/net/tcp.h index 9332f342259a..6660ffc4963d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -325,6 +325,7 @@ extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); +extern int tcp_v4_early_demux(struct sk_buff *skb); extern int tcp_v4_rcv(struct sk_buff *skb); extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); diff --git a/net/core/sock.c b/net/core/sock.c index 9e5b71fda6ec..929bdcc2383b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1465,6 +1465,11 @@ void sock_rfree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_rfree); +void sock_edemux(struct sk_buff *skb) +{ + sock_put(skb->sk); +} +EXPORT_SYMBOL(sock_edemux); int sock_i_uid(struct sock *sk) { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 85a3b1763136..07a02f6e9696 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -157,6 +157,7 @@ void inet_sock_destruct(struct sock *sk) kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); + dst_release(sk->sk_rx_dst); sk_refcnt_debug_dec(sk); } EXPORT_SYMBOL(inet_sock_destruct); @@ -1518,14 +1519,15 @@ static const struct net_protocol igmp_protocol = { #endif static const struct net_protocol tcp_protocol = { - .handler = tcp_v4_rcv, - .err_handler = tcp_v4_err, - .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, - .no_policy = 1, - .netns_ok = 1, + .early_demux = tcp_v4_early_demux, + .handler = tcp_v4_rcv, + .err_handler = tcp_v4_err, + .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, + .no_policy = 1, + .netns_ok = 1, }; static const struct net_protocol udp_protocol = { diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c4fe1d271131..93b092c9a394 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -323,19 +323,32 @@ static int ip_rcv_finish(struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (skb_dst(skb) == NULL) { - int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); - if (unlikely(err)) { - if (err == -EHOSTUNREACH) - IP_INC_STATS_BH(dev_net(skb->dev), - IPSTATS_MIB_INADDRERRORS); - else if (err == -ENETUNREACH) - IP_INC_STATS_BH(dev_net(skb->dev), - IPSTATS_MIB_INNOROUTES); - else if (err == -EXDEV) - NET_INC_STATS_BH(dev_net(skb->dev), - LINUX_MIB_IPRPFILTER); - goto drop; + const struct net_protocol *ipprot; + int protocol = iph->protocol; + int err; + + rcu_read_lock(); + ipprot = rcu_dereference(inet_protos[protocol]); + err = -ENOENT; + if (ipprot && ipprot->early_demux) + err = ipprot->early_demux(skb); + rcu_read_unlock(); + + if (err) { + err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev); + if (unlikely(err)) { + if (err == -EHOSTUNREACH) + IP_INC_STATS_BH(dev_net(skb->dev), + IPSTATS_MIB_INADDRERRORS); + else if (err == -ENETUNREACH) + IP_INC_STATS_BH(dev_net(skb->dev), + IPSTATS_MIB_INNOROUTES); + else if (err == -EXDEV) + NET_INC_STATS_BH(dev_net(skb->dev), + LINUX_MIB_IPRPFILTER); + goto drop; + } } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b224eb8bce8b..8416f8a68e65 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5518,6 +5518,18 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); int res; + if (sk->sk_rx_dst) { + struct dst_entry *dst = sk->sk_rx_dst; + if (unlikely(dst->obsolete)) { + if (dst->ops->check(dst, 0) == NULL) { + dst_release(dst); + sk->sk_rx_dst = NULL; + } + } + } + if (unlikely(sk->sk_rx_dst == NULL)) + sk->sk_rx_dst = dst_clone(skb_dst(skb)); + /* * Header prediction. * The code loosely follows the one in the famous @@ -5729,8 +5741,10 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_ESTABLISHED); - if (skb != NULL) + if (skb != NULL) { + sk->sk_rx_dst = dst_clone(skb_dst(skb)); security_inet_conn_established(sk, skb); + } /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fda2ca17135e..13857df1dae1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1671,6 +1671,52 @@ csum_err: } EXPORT_SYMBOL(tcp_v4_do_rcv); +int tcp_v4_early_demux(struct sk_buff *skb) +{ + struct net *net = dev_net(skb->dev); + const struct iphdr *iph; + const struct tcphdr *th; + struct sock *sk; + int err; + + err = -ENOENT; + if (skb->pkt_type != PACKET_HOST) + goto out_err; + + if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) + goto out_err; + + iph = ip_hdr(skb); + th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); + + if (th->doff < sizeof(struct tcphdr) / 4) + goto out_err; + + if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4)) + goto out_err; + + sk = __inet_lookup_established(net, &tcp_hashinfo, + iph->saddr, th->source, + iph->daddr, th->dest, + skb->dev->ifindex); + if (sk) { + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk->sk_state != TCP_TIME_WAIT) { + struct dst_entry *dst = sk->sk_rx_dst; + if (dst) + dst = dst_check(dst, 0); + if (dst) { + skb_dst_set_noref(skb, dst); + err = 0; + } + } + } + +out_err: + return err; +} + /* * From tcp_input.c */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index cb015317c9f7..72b7c63b1a39 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -445,6 +445,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_sock *oldtp = tcp_sk(sk); struct tcp_cookie_values *oldcvp = oldtp->cookie_values; + newsk->sk_rx_dst = dst_clone(skb_dst(skb)); + /* TCP Cookie Transactions require space for the cookie pair, * as it differs for each connection. There is no need to * copy any s_data_payload stored at the original socket. -- cgit v1.2.3 From 66572cfc30a4b764150c83ee5d842a3ce17991c9 Mon Sep 17 00:00:00 2001 From: Victor Goldenshtein Date: Thu, 21 Jun 2012 10:56:46 +0300 Subject: mac80211: add command to get current rssi Get current rssi (in dBm) from the driver/FW. Instead of reporting the signal received in the last rx packet, which might be inaccurate if rx traffic is low and beacon filtering is enabled, get the signal from the driver/FW. Signed-off-by: Victor Goldenshtein Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++++ net/mac80211/cfg.c | 21 +++++++++++++-------- net/mac80211/driver-ops.h | 15 +++++++++++++++ net/mac80211/driver-trace.h | 26 ++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d152f54064fd..f11c2f8b00c9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2249,6 +2249,9 @@ enum ieee80211_rate_control_changed { * @get_et_strings: Ethtool API to get a set of strings to describe stats * and perhaps other supported types of ethtool data-sets. * + * @get_rssi: Get current signal strength in dBm, the function is optional + * and can sleep. + * */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -2388,6 +2391,8 @@ struct ieee80211_ops { void (*get_et_strings)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 sset, u8 *data); + int (*get_rssi)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_sta *sta, s8 *rssi_dbm); }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 03aff23c70fd..d0c8f78115cb 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -353,6 +353,7 @@ void sta_set_rate_info_tx(struct sta_info *sta, static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; struct timespec uptime; sinfo->generation = sdata->local->sta_generation; @@ -388,7 +389,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; - sinfo->signal = (s8)sta->last_signal; + if (!local->ops->get_rssi || + drv_get_rssi(local, sdata, &sta->sta, &sinfo->signal)) + sinfo->signal = (s8)sta->last_signal; sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } @@ -517,7 +520,7 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy, * network device. */ - rcu_read_lock(); + mutex_lock(&local->sta_mtx); if (sdata->vif.type == NL80211_IFTYPE_STATION) { sta = sta_info_get_bss(sdata, sdata->u.mgd.bssid); @@ -546,7 +549,7 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy, data[i] = (u8)sinfo.signal_avg; i++; } else { - list_for_each_entry_rcu(sta, &local->sta_list, list) { + list_for_each_entry(sta, &local->sta_list, list) { /* Make sure this station belongs to the proper dev */ if (sta->sdata->dev != dev) continue; @@ -603,7 +606,7 @@ do_survey: else data[i++] = -1LL; - rcu_read_unlock(); + mutex_unlock(&local->sta_mtx); if (WARN_ON(i != STA_STATS_LEN)) return; @@ -629,10 +632,11 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; - rcu_read_lock(); + mutex_lock(&local->sta_mtx); sta = sta_info_get_by_idx(sdata, idx); if (sta) { @@ -641,7 +645,7 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, sta_set_sinfo(sta, sinfo); } - rcu_read_unlock(); + mutex_unlock(&local->sta_mtx); return ret; } @@ -658,10 +662,11 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; - rcu_read_lock(); + mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, mac); if (sta) { @@ -669,7 +674,7 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, sta_set_sinfo(sta, sinfo); } - rcu_read_unlock(); + mutex_unlock(&local->sta_mtx); return ret; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 6d33a0c743ab..933026949df9 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -845,4 +845,19 @@ drv_allow_buffered_frames(struct ieee80211_local *local, more_data); trace_drv_return_void(local); } + +static inline int drv_get_rssi(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + s8 *rssi_dbm) +{ + int ret; + + might_sleep(); + + ret = local->ops->get_rssi(&local->hw, &sdata->vif, sta, rssi_dbm); + trace_drv_get_rssi(local, sta, *rssi_dbm, ret); + + return ret; +} #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 6de00b2c268c..a0f7d357884d 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -1218,6 +1218,32 @@ DEFINE_EVENT(release_evt, drv_allow_buffered_frames, TP_ARGS(local, sta, tids, num_frames, reason, more_data) ); +TRACE_EVENT(drv_get_rssi, + TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, + s8 rssi, int ret), + + TP_ARGS(local, sta, rssi, ret), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + __field(s8, rssi) + __field(int, ret) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + __entry->rssi = rssi; + __entry->ret = ret; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT " rssi:%d ret:%d", + LOCAL_PR_ARG, STA_PR_ARG, __entry->rssi, __entry->ret + ) +); + /* * Tracing for API calls that drivers call. */ -- cgit v1.2.3 From 6648bd7e0e62c0c8c03b15e00c9e7015e232feff Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 21 Jun 2012 13:58:31 +0000 Subject: ipv4: Add sysctl knob to control early socket demux This change is meant to add a control for disabling early socket demux. The main motivation behind this patch is to provide an option to disable the feature as it adds an additional cost to routing that reduces overall throughput by up to 5%. For example one of my systems went from 12.1Mpps to 11.6 after the early socket demux was added. It looks like the reason for the regression is that we are now having to perform two lookups, first the one for an established socket, and then the one for the routing table. By adding this patch and toggling the value for ip_early_demux to 0 I am able to get back to the 12.1Mpps I was previously seeing. [ Move local variables in ip_rcv_finish() down into the basic block in which they are actually used. -DaveM ] Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/sysctl.h | 1 + include/net/ip.h | 3 +++ kernel/sysctl_binary.c | 2 ++ net/ipv4/ip_input.c | 22 +++++++++++++--------- net/ipv4/sysctl_net_ipv4.c | 7 +++++++ 5 files changed, 26 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index c34b4c82b0dc..20825e5f433f 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -425,6 +425,7 @@ enum NET_TCP_ALLOWED_CONG_CONTROL=123, NET_TCP_MAX_SSTHRESH=124, NET_TCP_FRTO_RESPONSE=125, + NET_IPV4_EARLY_DEMUX=126, }; enum { diff --git a/include/net/ip.h b/include/net/ip.h index 83e0619f59d0..50841bd6f10e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -210,6 +210,9 @@ extern int inet_peer_threshold; extern int inet_peer_minttl; extern int inet_peer_maxttl; +/* From ip_input.c */ +extern int sysctl_ip_early_demux; + /* From ip_output.c */ extern int sysctl_ip_dynaddr; diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index a650694883a1..6a3cf8253aae 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -415,6 +415,8 @@ static const struct bin_table bin_net_ipv4_table[] = { { CTL_INT, NET_IPV4_IPFRAG_SECRET_INTERVAL, "ipfrag_secret_interval" }, /* NET_IPV4_IPFRAG_MAX_DIST "ipfrag_max_dist" no longer used */ + { CTL_INT, NET_IPV4_EARLY_DEMUX, "ip_early_demux" }, + { CTL_INT, 2088 /* NET_IPQ_QMAX */, "ip_queue_maxlen" }, /* NET_TCP_DEFAULT_WIN_SCALE unused */ diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 93b092c9a394..bca25179cdb9 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -313,6 +313,8 @@ drop: return true; } +int sysctl_ip_early_demux __read_mostly = 1; + static int ip_rcv_finish(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); @@ -323,16 +325,18 @@ static int ip_rcv_finish(struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (skb_dst(skb) == NULL) { - const struct net_protocol *ipprot; - int protocol = iph->protocol; - int err; + int err = -ENOENT; - rcu_read_lock(); - ipprot = rcu_dereference(inet_protos[protocol]); - err = -ENOENT; - if (ipprot && ipprot->early_demux) - err = ipprot->early_demux(skb); - rcu_read_unlock(); + if (sysctl_ip_early_demux) { + const struct net_protocol *ipprot; + int protocol = iph->protocol; + + rcu_read_lock(); + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot && ipprot->early_demux) + err = ipprot->early_demux(skb); + rcu_read_unlock(); + } if (err) { err = ip_route_input_noref(skb, iph->daddr, iph->saddr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ef32956ed655..12aa0c5867c4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -300,6 +300,13 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "ip_early_demux", + .data = &sysctl_ip_early_demux, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { .procname = "ip_dynaddr", .data = &sysctl_ip_dynaddr, -- cgit v1.2.3 From 7586eceb0abc0ea1c2b023e3e5d4dfd4ff40930a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Jun 2012 05:02:19 +0000 Subject: ipv4: tcp: dont cache output dst for syncookies Don't cache output dst for syncookies, as this adds pressure on IP route cache and rcu subsystem for no gain. Signed-off-by: Eric Dumazet Cc: Hans Schillstrom Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/flow.h | 1 + include/net/inet_connection_sock.h | 3 ++- net/dccp/ipv4.c | 2 +- net/ipv4/inet_connection_sock.c | 8 ++++++-- net/ipv4/route.c | 5 ++++- net/ipv4/tcp_ipv4.c | 12 +++++++----- 6 files changed, 21 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 6c469dbdb917..bd524f598561 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -22,6 +22,7 @@ struct flowi_common { #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_PRECOW_METRICS 0x02 #define FLOWI_FLAG_CAN_SLEEP 0x04 +#define FLOWI_FLAG_RT_NOCACHE 0x08 __u32 flowic_secid; }; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index e1b7734c456f..af3c743a40e4 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -251,7 +251,8 @@ extern int inet_csk_get_port(struct sock *sk, unsigned short snum); extern struct dst_entry* inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, - const struct request_sock *req); + const struct request_sock *req, + bool nocache); extern struct dst_entry* inet_csk_route_child_sock(struct sock *sk, struct sock *newsk, const struct request_sock *req); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 07f5579ca756..3eb76b5f221a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -504,7 +504,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, struct dst_entry *dst; struct flowi4 fl4; - dst = inet_csk_route_req(sk, &fl4, req); + dst = inet_csk_route_req(sk, &fl4, req, false); if (dst == NULL) goto out; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f9ee7417f6a0..034ddbe42adf 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -368,17 +368,21 @@ EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); struct dst_entry *inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, - const struct request_sock *req) + const struct request_sock *req, + bool nocache) { struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options_rcu *opt = inet_rsk(req)->opt; struct net *net = sock_net(sk); + int flags = inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS; + if (nocache) + flags |= FLOWI_FLAG_RT_NOCACHE; flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS, + flags, (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); security_req_classify_flow(req, flowi4_to_flowi(fl4)); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a91f6d33804c..8d62d85e68dc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1156,7 +1156,7 @@ restart: candp = NULL; now = jiffies; - if (!rt_caching(dev_net(rt->dst.dev))) { + if (!rt_caching(dev_net(rt->dst.dev)) || (rt->dst.flags & DST_NOCACHE)) { /* * If we're not caching, just tell the caller we * were successful and don't touch the route. The @@ -2582,6 +2582,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rt_set_nexthop(rth, fl4, res, fi, type, 0); + if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE) + rth->dst.flags |= DST_NOCACHE; + return rth; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 21e22a00481a..b52934f5334e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -825,7 +825,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct request_values *rvp, - u16 queue_mapping) + u16 queue_mapping, + bool nocache) { const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; @@ -833,7 +834,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct sk_buff * skb; /* First, grab a route. */ - if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) + if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL) return -1; skb = tcp_make_synack(sk, dst, req, rvp); @@ -855,7 +856,7 @@ static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, struct request_values *rvp) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - return tcp_v4_send_synack(sk, NULL, req, rvp, 0); + return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); } /* @@ -1388,7 +1389,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && + (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && fl4.daddr == saddr && (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { inet_peer_refcheck(peer); @@ -1424,7 +1425,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (tcp_v4_send_synack(sk, dst, req, (struct request_values *)&tmp_ext, - skb_get_queue_mapping(skb)) || + skb_get_queue_mapping(skb), + want_cookie) || want_cookie) goto drop_and_free; -- cgit v1.2.3 From bdcbd8e0e3ffdad32b14b6373e67bfcf5fd3f002 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Jun 2012 11:29:50 +0200 Subject: mac80211: clean up debugging There are a few things that make the logging and debugging in mac80211 less useful than it should be right now: * a lot of messages should be pr_info, not pr_debug * wholesale use of pr_debug makes it require *both* Kconfig and dynamic configuration * there are still a lot of ifdefs * the style is very inconsistent, sometimes the sdata->name is printed in front Clean up everything, introducing new macros and separating out the station MLME debugging into a new Kconfig symbol. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 24 ----- net/mac80211/Kconfig | 32 +++++-- net/mac80211/Makefile | 2 +- net/mac80211/agg-rx.c | 34 +++---- net/mac80211/agg-tx.c | 72 ++++++++------- net/mac80211/cfg.c | 9 +- net/mac80211/debug.h | 152 +++++++++++++++++++++++++++++++ net/mac80211/debugfs_netdev.c | 5 +- net/mac80211/ht.c | 10 +-- net/mac80211/ibss.c | 89 +++++++++--------- net/mac80211/ieee80211_i.h | 1 + net/mac80211/iface.c | 13 +-- net/mac80211/key.c | 4 +- net/mac80211/mesh.c | 4 - net/mac80211/mesh_hwmp.c | 42 ++++----- net/mac80211/mesh_pathtbl.c | 30 +++---- net/mac80211/mesh_plink.c | 61 +++++++------ net/mac80211/mesh_sync.c | 47 +++++----- net/mac80211/mlme.c | 203 +++++++++++++++++++----------------------- net/mac80211/rx.c | 40 +++------ net/mac80211/sta_info.c | 44 ++++----- net/mac80211/status.c | 11 +-- net/mac80211/tx.c | 43 ++++----- 23 files changed, 519 insertions(+), 453 deletions(-) create mode 100644 net/mac80211/debug.h (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f11c2f8b00c9..510d852d5222 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3842,28 +3842,4 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_vif *vif, */ int ieee80211_ave_rssi(struct ieee80211_vif *vif); -/* Extra debugging macros */ - -#ifdef CONFIG_MAC80211_HT_DEBUG -#define ht_vdbg(fmt, ...) \ - pr_debug(fmt, ##__VA_ARGS__) -#else -#define ht_vdbg(fmt, ...) \ -do { \ - if (0) \ - pr_debug(fmt, ##__VA_ARGS__); \ -} while (0) -#endif - -#ifdef CONFIG_MAC80211_IBSS_DEBUG -#define ibss_vdbg(fmt, ...) \ - pr_debug(fmt, ##__VA_ARGS__) -#else -#define ibss_vdbg(fmt, ...) \ -do { \ - if (0) \ - pr_debug(fmt, ##__VA_ARGS__); \ -} while (0) -#endif - #endif /* MAC80211_H */ diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 323aa19a39d5..7475e266eb4e 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -140,6 +140,26 @@ config MAC80211_VERBOSE_DEBUG Do not select this option. +config MAC80211_MLME_DEBUG + bool "Verbose managed MLME output" + depends on MAC80211_DEBUG_MENU + ---help--- + Selecting this option causes mac80211 to print out + debugging messages for the managed-mode MLME. It + should not be selected on production systems as some + of the messages are remotely triggerable. + + Do not select this option. + +config MAC80211_STA_DEBUG + bool "Verbose station debugging" + depends on MAC80211_DEBUG_MENU + ---help--- + Selecting this option causes mac80211 to print out + debugging messages for station addition/removal. + + Do not select this option. + config MAC80211_HT_DEBUG bool "Verbose HT debugging" depends on MAC80211_DEBUG_MENU @@ -163,7 +183,7 @@ config MAC80211_IBSS_DEBUG Do not select this option. -config MAC80211_VERBOSE_PS_DEBUG +config MAC80211_PS_DEBUG bool "Verbose powersave mode debugging" depends on MAC80211_DEBUG_MENU ---help--- @@ -175,7 +195,7 @@ config MAC80211_VERBOSE_PS_DEBUG Do not select this option. -config MAC80211_VERBOSE_MPL_DEBUG +config MAC80211_MPL_DEBUG bool "Verbose mesh peer link debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH @@ -188,7 +208,7 @@ config MAC80211_VERBOSE_MPL_DEBUG Do not select this option. -config MAC80211_VERBOSE_MPATH_DEBUG +config MAC80211_MPATH_DEBUG bool "Verbose mesh path debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH @@ -201,7 +221,7 @@ config MAC80211_VERBOSE_MPATH_DEBUG Do not select this option. -config MAC80211_VERBOSE_MHWMP_DEBUG +config MAC80211_MHWMP_DEBUG bool "Verbose mesh HWMP routing debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH @@ -214,7 +234,7 @@ config MAC80211_VERBOSE_MHWMP_DEBUG Do not select this option. -config MAC80211_VERBOSE_MESH_SYNC_DEBUG +config MAC80211_MESH_SYNC_DEBUG bool "Verbose mesh mesh synchronization debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH @@ -225,7 +245,7 @@ config MAC80211_VERBOSE_MESH_SYNC_DEBUG Do not select this option. -config MAC80211_VERBOSE_TDLS_DEBUG +config MAC80211_TDLS_DEBUG bool "Verbose TDLS debugging" depends on MAC80211_DEBUG_MENU ---help--- diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 2b1470bac178..231ffa02e496 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -58,4 +58,4 @@ mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y) mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y) -ccflags-y += -D__CHECK_ENDIAN__ +ccflags-y += -D__CHECK_ENDIAN__ -DDEBUG diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 32ef11d69798..186d9919b043 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -74,15 +74,17 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL); - ht_vdbg("Rx BA session stop requested for %pM tid %u %s reason: %d\n", - sta->sta.addr, tid, - initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator", - (int)reason); + ht_dbg(sta->sdata, + "Rx BA session stop requested for %pM tid %u %s reason: %d\n", + sta->sta.addr, tid, + initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator", + (int)reason); if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, &sta->sta, tid, NULL, 0)) - pr_debug("HW problem - can not stop rx aggregation for tid %d\n", - tid); + sdata_info(sta->sdata, + "HW problem - can not stop rx aggregation for tid %d\n", + tid); /* check if this is a self generated aggregation halt */ if (initiator == WLAN_BACK_RECIPIENT && tx) @@ -157,7 +159,7 @@ static void sta_rx_agg_session_timer_expired(unsigned long data) } rcu_read_unlock(); - ht_vdbg("rx session timer expired on tid %d\n", (u16)*ptid); + ht_dbg(sta->sdata, "rx session timer expired on tid %d\n", (u16)*ptid); set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired); ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); @@ -245,7 +247,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_REQUEST_DECLINED; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { - ht_vdbg("Suspend in progress - Denying ADDBA request\n"); + ht_dbg(sta->sdata, "Suspend in progress - Denying ADDBA request\n"); goto end_no_lock; } @@ -257,10 +259,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || (buf_size > IEEE80211_MAX_AMPDU_BUF)) { status = WLAN_STATUS_INVALID_QOS_PARAM; -#ifdef CONFIG_MAC80211_HT_DEBUG - net_dbg_ratelimited("AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n", - mgmt->sa, tid, ba_policy, buf_size); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_dbg_ratelimited(sta->sdata, + "AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n", + mgmt->sa, tid, ba_policy, buf_size); goto end_no_lock; } /* determine default buffer size */ @@ -275,10 +276,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, mutex_lock(&sta->ampdu_mlme.mtx); if (sta->ampdu_mlme.tid_rx[tid]) { -#ifdef CONFIG_MAC80211_HT_DEBUG - net_dbg_ratelimited("unexpected AddBA Req from %pM on tid %u\n", - mgmt->sa, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_dbg_ratelimited(sta->sdata, + "unexpected AddBA Req from %pM on tid %u\n", + mgmt->sa, tid); /* delete existing Rx BA session on the same tid */ ___ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, @@ -317,7 +317,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, &sta->sta, tid, &start_seq_num, 0); - ht_vdbg("Rx A-MPDU request on tid %d result %d\n", tid, ret); + ht_dbg(sta->sdata, "Rx A-MPDU request on tid %d result %d\n", tid, ret); if (ret) { kfree(tid_agg_rx->reorder_buf); kfree(tid_agg_rx->reorder_time); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index da07f01cfe4d..5cc1bf7d8033 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -184,8 +184,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, spin_unlock_bh(&sta->lock); - ht_vdbg("Tx BA session stop requested for %pM tid %u\n", - sta->sta.addr, tid); + ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n", + sta->sta.addr, tid); del_timer_sync(&tid_tx->addba_resp_timer); del_timer_sync(&tid_tx->session_timer); @@ -251,12 +251,13 @@ static void sta_addba_resp_timer_expired(unsigned long data) if (!tid_tx || test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { rcu_read_unlock(); - ht_vdbg("timer expired on tid %d but we are not (or no longer) expecting addBA response there\n", - tid); + ht_dbg(sta->sdata, + "timer expired on tid %d but we are not (or no longer) expecting addBA response there\n", + tid); return; } - ht_vdbg("addBA response timer expired on tid %d\n", tid); + ht_dbg(sta->sdata, "addBA response timer expired on tid %d\n", tid); ieee80211_stop_tx_ba_session(&sta->sta, tid); rcu_read_unlock(); @@ -316,8 +317,9 @@ ieee80211_agg_splice_packets(struct ieee80211_sub_if_data *sdata, ieee80211_stop_queue_agg(sdata, tid); - if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates" - " from the pending queue\n", tid)) + if (WARN(!tid_tx, + "TID %d gone but expected when splicing aggregates from the pending queue\n", + tid)) return; if (!skb_queue_empty(&tid_tx->pending)) { @@ -365,7 +367,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, &sta->sta, tid, &start_seq_num, 0); if (ret) { - ht_vdbg("BA request denied - HW unavailable for tid %d\n", tid); + ht_dbg(sdata, + "BA request denied - HW unavailable for tid %d\n", tid); spin_lock_bh(&sta->lock); ieee80211_agg_splice_packets(sdata, tid_tx, tid); ieee80211_assign_tid_tx(sta, tid, NULL); @@ -378,7 +381,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); - ht_vdbg("activated addBA response timer on tid %d\n", tid); + ht_dbg(sdata, "activated addBA response timer on tid %d\n", tid); spin_lock_bh(&sta->lock); sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; @@ -425,7 +428,7 @@ static void sta_tx_agg_session_timer_expired(unsigned long data) rcu_read_unlock(); - ht_vdbg("tx session timer expired on tid %d\n", (u16)*ptid); + ht_dbg(sta->sdata, "tx session timer expired on tid %d\n", (u16)*ptid); ieee80211_stop_tx_ba_session(&sta->sta, *ptid); } @@ -449,8 +452,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)) return -EINVAL; - ht_vdbg("Open BA session requested for %pM tid %u\n", - pubsta->addr, tid); + ht_dbg(sdata, "Open BA session requested for %pM tid %u\n", + pubsta->addr, tid); if (sdata->vif.type != NL80211_IFTYPE_STATION && sdata->vif.type != NL80211_IFTYPE_MESH_POINT && @@ -460,7 +463,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { - ht_vdbg("BA sessions blocked - Denying BA session request\n"); + ht_dbg(sdata, + "BA sessions blocked - Denying BA session request\n"); return -EINVAL; } @@ -478,8 +482,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, */ if (sta->sdata->vif.type == NL80211_IFTYPE_ADHOC && !sta->sta.ht_cap.ht_supported) { - ht_vdbg("BA request denied - IBSS STA %pM does not advertise HT support\n", - pubsta->addr); + ht_dbg(sdata, + "BA request denied - IBSS STA %pM does not advertise HT support\n", + pubsta->addr); return -EINVAL; } @@ -499,8 +504,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_BURST_RETRIES && time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] + HT_AGG_RETRIES_PERIOD)) { - ht_vdbg("BA request denied - waiting a grace period after %d failed requests on tid %u\n", - sta->ampdu_mlme.addba_req_num[tid], tid); + ht_dbg(sdata, + "BA request denied - waiting a grace period after %d failed requests on tid %u\n", + sta->ampdu_mlme.addba_req_num[tid], tid); ret = -EBUSY; goto err_unlock_sta; } @@ -508,8 +514,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, tid_tx = rcu_dereference_protected_tid_tx(sta, tid); /* check if the TID is not in aggregation flow already */ if (tid_tx || sta->ampdu_mlme.tid_start_tx[tid]) { - ht_vdbg("BA request denied - session is not idle on tid %u\n", - tid); + ht_dbg(sdata, + "BA request denied - session is not idle on tid %u\n", + tid); ret = -EAGAIN; goto err_unlock_sta; } @@ -564,7 +571,7 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, tid_tx = rcu_dereference_protected_tid_tx(sta, tid); - ht_vdbg("Aggregation is on for tid %d\n", tid); + ht_dbg(sta->sdata, "Aggregation is on for tid %d\n", tid); drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_OPERATIONAL, @@ -598,7 +605,8 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) trace_api_start_tx_ba_cb(sdata, ra, tid); if (tid >= STA_TID_NUM) { - ht_vdbg("Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); + ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n", + tid, STA_TID_NUM); return; } @@ -606,7 +614,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) sta = sta_info_get_bss(sdata, ra); if (!sta) { mutex_unlock(&local->sta_mtx); - ht_vdbg("Could not find station: %pM\n", ra); + ht_dbg(sdata, "Could not find station: %pM\n", ra); return; } @@ -614,7 +622,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (WARN_ON(!tid_tx)) { - ht_vdbg("addBA was not requested!\n"); + ht_dbg(sdata, "addBA was not requested!\n"); goto unlock; } @@ -714,17 +722,18 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) trace_api_stop_tx_ba_cb(sdata, ra, tid); if (tid >= STA_TID_NUM) { - ht_vdbg("Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); + ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n", + tid, STA_TID_NUM); return; } - ht_vdbg("Stopping Tx BA session for %pM tid %d\n", ra, tid); + ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", ra, tid); mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, ra); if (!sta) { - ht_vdbg("Could not find station: %pM\n", ra); + ht_dbg(sdata, "Could not find station: %pM\n", ra); goto unlock; } @@ -733,7 +742,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { - ht_vdbg("unexpected callback to A-MPDU stop\n"); + ht_dbg(sdata, "unexpected callback to A-MPDU stop\n"); goto unlock_sta; } @@ -809,13 +818,13 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, goto out; if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { - ht_vdbg("wrong addBA response token, tid %d\n", tid); + ht_dbg(sta->sdata, "wrong addBA response token, tid %d\n", tid); goto out; } del_timer_sync(&tid_tx->addba_resp_timer); - ht_vdbg("switched off addBA timer for tid %d\n", tid); + ht_dbg(sta->sdata, "switched off addBA timer for tid %d\n", tid); /* * addba_resp_timer may have fired before we got here, and @@ -824,8 +833,9 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, */ if (test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state) || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { - ht_vdbg("got addBA resp for tid %d but we already gave up\n", - tid); + ht_dbg(sta->sdata, + "got addBA resp for tid %d but we already gave up\n", + tid); goto out; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d0c8f78115cb..7722a7336a58 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2771,9 +2771,8 @@ static int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, !sdata->u.mgd.associated) return -EINVAL; -#ifdef CONFIG_MAC80211_VERBOSE_TDLS_DEBUG - pr_debug("TDLS mgmt action %d peer %pM\n", action_code, peer); -#endif + tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n", + action_code, peer); skb = dev_alloc_skb(local->hw.extra_tx_headroom + max(sizeof(struct ieee80211_mgmt), @@ -2882,9 +2881,7 @@ static int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EINVAL; -#ifdef CONFIG_MAC80211_VERBOSE_TDLS_DEBUG - pr_debug("TDLS oper %d peer %pM\n", oper, peer); -#endif + tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer); switch (oper) { case NL80211_TDLS_ENABLE_LINK: diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h new file mode 100644 index 000000000000..6e6bbb9a9d41 --- /dev/null +++ b/net/mac80211/debug.h @@ -0,0 +1,152 @@ +#ifndef __MAC80211_DEBUG_H +#define __MAC80211_DEBUG_H + +#ifdef CONFIG_MAC80211_IBSS_DEBUG +#define MAC80211_IBSS_DEBUG 1 +#else +#define MAC80211_IBSS_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_PS_DEBUG +#define MAC80211_PS_DEBUG 1 +#else +#define MAC80211_PS_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_HT_DEBUG +#define MAC80211_HT_DEBUG 1 +#else +#define MAC80211_HT_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_MPL_DEBUG +#define MAC80211_MPL_DEBUG 1 +#else +#define MAC80211_MPL_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_MPATH_DEBUG +#define MAC80211_MPATH_DEBUG 1 +#else +#define MAC80211_MPATH_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_MHWMP_DEBUG +#define MAC80211_MHWMP_DEBUG 1 +#else +#define MAC80211_MHWMP_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_MESH_SYNC_DEBUG +#define MAC80211_MESH_SYNC_DEBUG 1 +#else +#define MAC80211_MESH_SYNC_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_TDLS_DEBUG +#define MAC80211_TDLS_DEBUG 1 +#else +#define MAC80211_TDLS_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_STA_DEBUG +#define MAC80211_STA_DEBUG 1 +#else +#define MAC80211_STA_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_MLME_DEBUG +#define MAC80211_MLME_DEBUG 1 +#else +#define MAC80211_MLME_DEBUG 0 +#endif + +#define _sdata_info(sdata, fmt, ...) \ +do { \ + pr_info("%s: " fmt, \ + (sdata)->name, ##__VA_ARGS__); \ +} while (0) + +#define _sdata_dbg(print, sdata, fmt, ...) \ +do { \ + if (print) \ + pr_debug("%s: " fmt, \ + (sdata)->name, ##__VA_ARGS__); \ +} while (0) + +#define _sdata_err(sdata, fmt, ...) \ +do { \ + pr_err("%s: " fmt, \ + (sdata)->name, ##__VA_ARGS__); \ +} while (0) + +#define _wiphy_dbg(print, wiphy, fmt, ...) \ +do { \ + if (print) \ + wiphy_dbg((wiphy), fmt, ##__VA_ARGS__); \ +} while (0) + +#define sdata_info(sdata, fmt, ...) \ + _sdata_info(sdata, fmt, ##__VA_ARGS__) +#define sdata_err(sdata, fmt, ...) \ + _sdata_err(sdata, fmt, ##__VA_ARGS__) +#define sdata_dbg(sdata, fmt, ...) \ + _sdata_dbg(1, sdata, fmt, ##__VA_ARGS__) + +#define ht_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_HT_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define ht_dbg_ratelimited(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_HT_DEBUG && net_ratelimit(), \ + sdata, fmt, ##__VA_ARGS__) + +#define ibss_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_IBSS_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define ps_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_PS_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define ps_dbg_hw(hw, fmt, ...) \ + _wiphy_dbg(MAC80211_PS_DEBUG, \ + (hw)->wiphy, fmt, ##__VA_ARGS__) + +#define ps_dbg_ratelimited(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_PS_DEBUG && net_ratelimit(), \ + sdata, fmt, ##__VA_ARGS__) + +#define mpl_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MPL_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define mpath_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MPATH_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define mhwmp_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MHWMP_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define msync_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MESH_SYNC_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define tdls_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_TDLS_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define sta_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_STA_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define mlme_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MLME_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define mlme_dbg_ratelimited(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MLME_DEBUG && net_ratelimit(), \ + sdata, fmt, ##__VA_ARGS__) + +#endif /* __MAC80211_DEBUG_H */ diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 512c894893d6..6d5aec9418ee 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -695,6 +695,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) sprintf(buf, "netdev:%s", sdata->name); if (!debugfs_rename(dir->d_parent, dir, dir->d_parent, buf)) - pr_err("mac80211: debugfs: failed to rename debugfs " - "dir to %s\n", buf); + sdata_err(sdata, + "debugfs: failed to rename debugfs dir to %s\n", + buf); } diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 6f8615c54b22..4b4538d63925 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -305,12 +305,10 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12; initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11; -#ifdef CONFIG_MAC80211_HT_DEBUG - net_dbg_ratelimited("delba from %pM (%s) tid %d reason code %d\n", - mgmt->sa, initiator ? "initiator" : "recipient", - tid, - le16_to_cpu(mgmt->u.action.u.delba.reason_code)); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_dbg_ratelimited(sdata, "delba from %pM (%s) tid %d reason code %d\n", + mgmt->sa, initiator ? "initiator" : "recipient", + tid, + le16_to_cpu(mgmt->u.action.u.delba.reason_code)); if (initiator == WLAN_BACK_INITIATOR) __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 8931110b8433..5746d62faba1 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -261,11 +261,7 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, memcpy(addr, sta->sta.addr, ETH_ALEN); -#ifdef CONFIG_MAC80211_IBSS_DEBUG - wiphy_debug(sdata->local->hw.wiphy, - "Adding new IBSS station %pM (dev=%s)\n", - addr, sdata->name); -#endif + ibss_dbg(sdata, "Adding new IBSS station %pM\n", addr); sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); @@ -280,8 +276,9 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, if (sta_info_insert_rcu(sta)) return sta_info_get(sdata, addr); if (auth && !sdata->u.ibss.auth_frame_registrations) { - ibss_vdbg("TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", - sdata->vif.addr, sdata->u.ibss.bssid, addr); + ibss_dbg(sdata, + "TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", + sdata->vif.addr, sdata->u.ibss.bssid, addr); ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, NULL, 0, addr, sdata->u.ibss.bssid, NULL, 0, 0); } @@ -304,7 +301,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, * allow new one to be added. */ if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { - net_dbg_ratelimited("%s: No room for a new IBSS STA entry %pM\n", + net_info_ratelimited("%s: No room for a new IBSS STA entry %pM\n", sdata->name, addr); rcu_read_lock(); return NULL; @@ -351,9 +348,9 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; - ibss_vdbg("%s: RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n", - sdata->name, mgmt->sa, mgmt->da, mgmt->bssid, - auth_transaction); + ibss_dbg(sdata, + "RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n", + mgmt->sa, mgmt->da, mgmt->bssid, auth_transaction); sta_info_destroy_addr(sdata, mgmt->sa); ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false); rcu_read_unlock(); @@ -416,10 +413,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, ieee80211_mandatory_rates(local, band); if (sta->sta.supp_rates[band] != prev_rates) { - ibss_vdbg("%s: updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", - sdata->name, sta->sta.addr, - prev_rates, - sta->sta.supp_rates[band]); + ibss_dbg(sdata, + "updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", + sta->sta.addr, prev_rates, + sta->sta.supp_rates[band]); rates_updated = true; } } else { @@ -534,16 +531,18 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, rx_timestamp = drv_get_tsf(local, sdata); } - ibss_vdbg("RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", - mgmt->sa, mgmt->bssid, - (unsigned long long)rx_timestamp, - (unsigned long long)beacon_timestamp, - (unsigned long long)(rx_timestamp - beacon_timestamp), - jiffies); + ibss_dbg(sdata, + "RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", + mgmt->sa, mgmt->bssid, + (unsigned long long)rx_timestamp, + (unsigned long long)beacon_timestamp, + (unsigned long long)(rx_timestamp - beacon_timestamp), + jiffies); if (beacon_timestamp > rx_timestamp) { - ibss_vdbg("%s: beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n", - sdata->name, mgmt->bssid); + ibss_dbg(sdata, + "beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n", + mgmt->bssid); ieee80211_sta_join_ibss(sdata, bss); supp_rates = ieee80211_sta_get_rates(local, elems, band, NULL); ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, @@ -569,7 +568,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, * allow new one to be added. */ if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { - net_dbg_ratelimited("%s: No room for a new IBSS STA entry %pM\n", + net_info_ratelimited("%s: No room for a new IBSS STA entry %pM\n", sdata->name, addr); return; } @@ -645,8 +644,8 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) if (ifibss->fixed_channel) return; - pr_debug("%s: No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n", - sdata->name); + sdata_info(sdata, + "No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n"); ieee80211_request_internal_scan(sdata, ifibss->ssid, ifibss->ssid_len, NULL); @@ -674,8 +673,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) bssid[0] |= 0x02; } - pr_debug("%s: Creating new IBSS network, BSSID %pM\n", - sdata->name, bssid); + sdata_info(sdata, "Creating new IBSS network, BSSID %pM\n", bssid); capability = WLAN_CAPABILITY_IBSS; @@ -706,8 +704,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&ifibss->mtx); active_ibss = ieee80211_sta_active_ibss(sdata); - ibss_vdbg("%s: sta_find_ibss (active_ibss=%d)\n", - sdata->name, active_ibss); + ibss_dbg(sdata, "sta_find_ibss (active_ibss=%d)\n", active_ibss); if (active_ibss) return; @@ -730,23 +727,24 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) struct ieee80211_bss *bss; bss = (void *)cbss->priv; - ibss_vdbg(" sta_find_ibss: selected %pM current %pM\n", - cbss->bssid, ifibss->bssid); - pr_debug("%s: Selected IBSS BSSID %pM based on configured SSID\n", - sdata->name, cbss->bssid); + ibss_dbg(sdata, + "sta_find_ibss: selected %pM current %pM\n", + cbss->bssid, ifibss->bssid); + sdata_info(sdata, + "Selected IBSS BSSID %pM based on configured SSID\n", + cbss->bssid); ieee80211_sta_join_ibss(sdata, bss); ieee80211_rx_bss_put(local, bss); return; } - ibss_vdbg(" did not try to join ibss\n"); + ibss_dbg(sdata, "sta_find_ibss: did not try to join ibss\n"); /* Selected IBSS not found in current scan results - try to scan */ if (time_after(jiffies, ifibss->last_scan_completed + IEEE80211_SCAN_INTERVAL)) { - pr_debug("%s: Trigger new scan to find an IBSS to join\n", - sdata->name); + sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); ieee80211_request_internal_scan(sdata, ifibss->ssid, ifibss->ssid_len, @@ -760,9 +758,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) ieee80211_sta_create_ibss(sdata); return; } - pr_debug("%s: IBSS not allowed on %d MHz\n", - sdata->name, - local->hw.conf.channel->center_freq); + sdata_info(sdata, "IBSS not allowed on %d MHz\n", + local->hw.conf.channel->center_freq); /* No IBSS found - decrease scan interval and continue * scanning. */ @@ -797,9 +794,9 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, tx_last_beacon = drv_tx_last_beacon(local); - ibss_vdbg("%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n", - sdata->name, mgmt->sa, mgmt->da, - mgmt->bssid, tx_last_beacon); + ibss_dbg(sdata, + "RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n", + mgmt->sa, mgmt->da, mgmt->bssid, tx_last_beacon); if (!tx_last_beacon && is_multicast_ether_addr(mgmt->da)) return; @@ -812,8 +809,8 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, pos = mgmt->u.probe_req.variable; if (pos[0] != WLAN_EID_SSID || pos + 2 + pos[1] > end) { - ibss_vdbg("%s: Invalid SSID IE in ProbeReq from %pM\n", - sdata->name, mgmt->sa); + ibss_dbg(sdata, "Invalid SSID IE in ProbeReq from %pM\n", + mgmt->sa); return; } if (pos[1] != 0 && @@ -830,7 +827,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, resp = (struct ieee80211_mgmt *) skb->data; memcpy(resp->da, mgmt->sa, ETH_ALEN); - ibss_vdbg("%s: Sending ProbeResp to %pM\n", sdata->name, resp->da); + ibss_dbg(sdata, "Sending ProbeResp to %pM\n", resp->da); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; ieee80211_tx_skb(sdata, skb); } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 36ce2bb066bf..f834a005e1c5 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -30,6 +30,7 @@ #include #include "key.h" #include "sta_info.h" +#include "debug.h" struct ieee80211_local; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 728d3eac1f59..576880317d0e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -57,9 +57,6 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - pr_debug("%s: setting MTU %d\n", dev->name, new_mtu); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ dev->mtu = new_mtu; return 0; } @@ -1223,7 +1220,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, if (__ffs64(mask) + hweight64(mask) != fls64(mask)) { /* not a contiguous mask ... not handled now! */ - pr_debug("not contiguous\n"); + pr_info("not contiguous\n"); break; } @@ -1414,10 +1411,6 @@ static u32 ieee80211_idle_off(struct ieee80211_local *local, if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE)) return 0; -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "device no longer idle - %s\n", reason); -#endif - local->hw.conf.flags &= ~IEEE80211_CONF_IDLE; return IEEE80211_CONF_CHANGE_IDLE; } @@ -1427,10 +1420,6 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local) if (local->hw.conf.flags & IEEE80211_CONF_IDLE) return 0; -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "device now idle\n"); -#endif - drv_flush(local, false); local->hw.conf.flags |= IEEE80211_CONF_IDLE; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 5bb600d93d77..b3b7e526e245 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -139,7 +139,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) } if (ret != -ENOSPC && ret != -EOPNOTSUPP) - wiphy_err(key->local->hw.wiphy, + sdata_err(sdata, "failed to set key (%d, %pM) to hardware (%d)\n", key->conf.keyidx, sta ? sta->sta.addr : bcast_addr, ret); @@ -186,7 +186,7 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) sta ? &sta->sta : NULL, &key->conf); if (ret) - wiphy_err(key->local->hw.wiphy, + sdata_err(sdata, "failed to remove key (%d, %pM) from hardware (%d)\n", key->conf.keyidx, sta ? sta->sta.addr : bcast_addr, ret); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index ae40a83675e9..764593d65fc3 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -523,10 +523,6 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata, { bool free_plinks; -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - pr_debug("%s: running mesh housekeeping\n", sdata->name); -#endif - ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); mesh_path_expire(sdata); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index aed1821bd6f1..fb7b6a11d0ba 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -13,13 +13,6 @@ #include "wme.h" #include "mesh.h" -#ifdef CONFIG_MAC80211_VERBOSE_MHWMP_DEBUG -#define mhwmp_dbg(fmt, args...) \ - pr_debug("Mesh HWMP (%s): " fmt "\n", sdata->name, ##args) -#else -#define mhwmp_dbg(fmt, args...) do { (void)(0); } while (0) -#endif - #define TEST_FRAME_LEN 8192 #define MAX_METRIC 0xffffffff #define ARITH_SHIFT 8 @@ -144,19 +137,19 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, switch (action) { case MPATH_PREQ: - mhwmp_dbg("sending PREQ to %pM", target); + mhwmp_dbg(sdata, "sending PREQ to %pM\n", target); ie_len = 37; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PREQ; break; case MPATH_PREP: - mhwmp_dbg("sending PREP to %pM", target); + mhwmp_dbg(sdata, "sending PREP to %pM\n", target); ie_len = 31; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PREP; break; case MPATH_RANN: - mhwmp_dbg("sending RANN from %pM", orig_addr); + mhwmp_dbg(sdata, "sending RANN from %pM\n", orig_addr); ie_len = sizeof(struct ieee80211_rann_ie); pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_RANN; @@ -535,10 +528,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, flags = PREQ_IE_FLAGS(preq_elem); root_is_gate = !!(flags & RANN_FLAG_IS_GATE); - mhwmp_dbg("received PREQ from %pM", orig_addr); + mhwmp_dbg(sdata, "received PREQ from %pM\n", orig_addr); if (ether_addr_equal(target_addr, sdata->vif.addr)) { - mhwmp_dbg("PREQ is for us"); + mhwmp_dbg(sdata, "PREQ is for us\n"); forward = false; reply = true; metric = 0; @@ -590,7 +583,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, lifetime = PREQ_IE_LIFETIME(preq_elem); ttl = ifmsh->mshcfg.element_ttl; if (ttl != 0) { - mhwmp_dbg("replying to the PREQ"); + mhwmp_dbg(sdata, "replying to the PREQ\n"); mesh_path_sel_frame_tx(MPATH_PREP, 0, orig_addr, cpu_to_le32(orig_sn), 0, target_addr, cpu_to_le32(target_sn), mgmt->sa, 0, ttl, @@ -611,7 +604,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, ifmsh->mshstats.dropped_frames_ttl++; return; } - mhwmp_dbg("forwarding the PREQ from %pM", orig_addr); + mhwmp_dbg(sdata, "forwarding the PREQ from %pM\n", orig_addr); --ttl; preq_id = PREQ_IE_PREQ_ID(preq_elem); hopcount = PREQ_IE_HOPCOUNT(preq_elem) + 1; @@ -658,7 +651,8 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, u8 next_hop[ETH_ALEN]; u32 target_sn, orig_sn, lifetime; - mhwmp_dbg("received PREP from %pM", PREP_IE_ORIG_ADDR(prep_elem)); + mhwmp_dbg(sdata, "received PREP from %pM\n", + PREP_IE_ORIG_ADDR(prep_elem)); orig_addr = PREP_IE_ORIG_ADDR(prep_elem); if (ether_addr_equal(orig_addr, sdata->vif.addr)) @@ -784,8 +778,9 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, if (ether_addr_equal(orig_addr, sdata->vif.addr)) return; - mhwmp_dbg("received RANN from %pM via neighbour %pM (is_gate=%d)", - orig_addr, mgmt->sa, root_is_gate); + mhwmp_dbg(sdata, + "received RANN from %pM via neighbour %pM (is_gate=%d)\n", + orig_addr, mgmt->sa, root_is_gate); rcu_read_lock(); sta = sta_info_get(sdata, mgmt->sa); @@ -818,8 +813,9 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, root_path_confirmation_jiffies(sdata)) || time_before(jiffies, mpath->last_preq_to_root))) && !(mpath->flags & MESH_PATH_FIXED) && (ttl != 0)) { - mhwmp_dbg("%s time to refresh root mpath %pM", sdata->name, - orig_addr); + mhwmp_dbg(sdata, + "time to refresh root mpath %pM\n", + orig_addr); mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); mpath->last_preq_to_root = jiffies; } @@ -926,7 +922,7 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) preq_node = kmalloc(sizeof(struct mesh_preq_queue), GFP_ATOMIC); if (!preq_node) { - mhwmp_dbg("could not allocate PREQ node"); + mhwmp_dbg(sdata, "could not allocate PREQ node\n"); return; } @@ -935,7 +931,7 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); kfree(preq_node); if (printk_ratelimit()) - mhwmp_dbg("PREQ node queue full"); + mhwmp_dbg(sdata, "PREQ node queue full\n"); return; } @@ -1183,7 +1179,7 @@ void mesh_path_timer(unsigned long data) if (!mpath->is_gate && mesh_gate_num(sdata) > 0) { ret = mesh_path_send_to_gates(mpath); if (ret) - mhwmp_dbg("no gate was reachable"); + mhwmp_dbg(sdata, "no gate was reachable\n"); } else mesh_path_flush_pending(mpath); } @@ -1221,7 +1217,7 @@ mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) 0, cpu_to_le32(ifmsh->preq_id++), sdata); break; default: - mhwmp_dbg("Proactive mechanism not supported"); + mhwmp_dbg(sdata, "Proactive mechanism not supported\n"); return; } } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 572f706fd65b..c9ae931dd693 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -18,12 +18,6 @@ #include "ieee80211_i.h" #include "mesh.h" -#ifdef CONFIG_MAC80211_VERBOSE_MPATH_DEBUG -#define mpath_dbg(fmt, args...) pr_debug(fmt, ##args) -#else -#define mpath_dbg(fmt, args...) do { (void)(0); } while (0) -#endif - /* There will be initially 2^INIT_PATHS_SIZE_ORDER buckets */ #define INIT_PATHS_SIZE_ORDER 2 @@ -322,9 +316,8 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, spin_lock_irqsave(&gate_mpath->frame_queue.lock, flags); skb_queue_splice(&gateq, &gate_mpath->frame_queue); - mpath_dbg("Mpath queue for gate %pM has %d frames\n", - gate_mpath->dst, - skb_queue_len(&gate_mpath->frame_queue)); + mpath_dbg(gate_mpath->sdata, "Mpath queue for gate %pM has %d frames\n", + gate_mpath->dst, skb_queue_len(&gate_mpath->frame_queue)); spin_unlock_irqrestore(&gate_mpath->frame_queue.lock, flags); if (!copy) @@ -446,9 +439,9 @@ int mesh_path_add_gate(struct mesh_path *mpath) hlist_add_head_rcu(&new_gate->list, tbl->known_gates); spin_unlock_bh(&tbl->gates_lock); rcu_read_unlock(); - mpath_dbg("Mesh path (%s): Recorded new gate: %pM. %d known gates\n", - mpath->sdata->name, mpath->dst, - mpath->sdata->u.mesh.num_gates); + mpath_dbg(mpath->sdata, + "Mesh path: Recorded new gate: %pM. %d known gates\n", + mpath->dst, mpath->sdata->u.mesh.num_gates); return 0; err_rcu: rcu_read_unlock(); @@ -477,8 +470,8 @@ static int mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) spin_unlock_bh(&tbl->gates_lock); mpath->sdata->u.mesh.num_gates--; mpath->is_gate = false; - mpath_dbg("Mesh path (%s): Deleted gate: %pM. " - "%d known gates\n", mpath->sdata->name, + mpath_dbg(mpath->sdata, + "Mesh path: Deleted gate: %pM. %d known gates\n", mpath->dst, mpath->sdata->u.mesh.num_gates); break; } @@ -946,19 +939,20 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) continue; if (gate->mpath->flags & MESH_PATH_ACTIVE) { - mpath_dbg("Forwarding to %pM\n", gate->mpath->dst); + mpath_dbg(sdata, "Forwarding to %pM\n", gate->mpath->dst); mesh_path_move_to_queue(gate->mpath, from_mpath, copy); from_mpath = gate->mpath; copy = true; } else { - mpath_dbg("Not forwarding %p\n", gate->mpath); - mpath_dbg("flags %x\n", gate->mpath->flags); + mpath_dbg(sdata, + "Not forwarding %p (flags %#x)\n", + gate->mpath, gate->mpath->flags); } } hlist_for_each_entry_rcu(gate, n, known_gates, list) if (gate->mpath->sdata == sdata) { - mpath_dbg("Sending to %pM\n", gate->mpath->dst); + mpath_dbg(sdata, "Sending to %pM\n", gate->mpath->dst); mesh_path_tx_pending(gate->mpath); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index be4fad128c34..a1dbd1540276 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -13,12 +13,6 @@ #include "rate.h" #include "mesh.h" -#ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG -#define mpl_dbg(fmt, args...) pr_debug(fmt, ##args) -#else -#define mpl_dbg(fmt, args...) do { (void)(0); } while (0) -#endif - #define PLINK_GET_LLID(p) (p + 2) #define PLINK_GET_PLID(p) (p + 4) @@ -134,12 +128,14 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) switch (sta->ch_type) { case NL80211_CHAN_NO_HT: - mpl_dbg("mesh_plink %pM: nonHT sta (%pM) is present", + mpl_dbg(sdata, + "mesh_plink %pM: nonHT sta (%pM) is present\n", sdata->vif.addr, sta->sta.addr); non_ht_sta = true; goto out; case NL80211_CHAN_HT20: - mpl_dbg("mesh_plink %pM: HT20 sta (%pM) is present", + mpl_dbg(sdata, + "mesh_plink %pM: HT20 sta (%pM) is present\n", sdata->vif.addr, sta->sta.addr); ht20_sta = true; default: @@ -160,7 +156,8 @@ out: sdata->vif.bss_conf.ht_operation_mode = ht_opmode; sdata->u.mesh.mshcfg.ht_opmode = ht_opmode; changed = BSS_CHANGED_HT; - mpl_dbg("mesh_plink %pM: protection mode changed to %d", + mpl_dbg(sdata, + "mesh_plink %pM: protection mode changed to %d\n", sdata->vif.addr, ht_opmode); } @@ -437,7 +434,8 @@ static void mesh_plink_timer(unsigned long data) spin_unlock_bh(&sta->lock); return; } - mpl_dbg("Mesh plink timer for %pM fired on state %d\n", + mpl_dbg(sta->sdata, + "Mesh plink timer for %pM fired on state %d\n", sta->sta.addr, sta->plink_state); reason = 0; llid = sta->llid; @@ -450,7 +448,8 @@ static void mesh_plink_timer(unsigned long data) /* retry timer */ if (sta->plink_retries < dot11MeshMaxRetries(sdata)) { u32 rand; - mpl_dbg("Mesh plink for %pM (retry, timeout): %d %d\n", + mpl_dbg(sta->sdata, + "Mesh plink for %pM (retry, timeout): %d %d\n", sta->sta.addr, sta->plink_retries, sta->plink_timeout); get_random_bytes(&rand, sizeof(u32)); @@ -530,7 +529,8 @@ int mesh_plink_open(struct sta_info *sta) sta->plink_state = NL80211_PLINK_OPN_SNT; mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); spin_unlock_bh(&sta->lock); - mpl_dbg("Mesh plink: starting establishment with %pM\n", + mpl_dbg(sdata, + "Mesh plink: starting establishment with %pM\n", sta->sta.addr); return mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, @@ -565,7 +565,6 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m u8 *baseaddr; u32 changed = 0; __le16 plid, llid, reason; -#ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG static const char *mplstates[] = { [NL80211_PLINK_LISTEN] = "LISTEN", [NL80211_PLINK_OPN_SNT] = "OPN-SNT", @@ -575,14 +574,14 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m [NL80211_PLINK_HOLDING] = "HOLDING", [NL80211_PLINK_BLOCKED] = "BLOCKED" }; -#endif /* need action_code, aux */ if (len < IEEE80211_MIN_ACTION_SIZE + 3) return; if (is_multicast_ether_addr(mgmt->da)) { - mpl_dbg("Mesh plink: ignore frame from multicast address"); + mpl_dbg(sdata, + "Mesh plink: ignore frame from multicast address\n"); return; } @@ -595,12 +594,14 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } ieee802_11_parse_elems(baseaddr, len - baselen, &elems); if (!elems.peering) { - mpl_dbg("Mesh plink: missing necessary peer link ie\n"); + mpl_dbg(sdata, + "Mesh plink: missing necessary peer link ie\n"); return; } if (elems.rsn_len && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { - mpl_dbg("Mesh plink: can't establish link with secure peer\n"); + mpl_dbg(sdata, + "Mesh plink: can't establish link with secure peer\n"); return; } @@ -610,14 +611,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m (ftype == WLAN_SP_MESH_PEERING_CONFIRM && ie_len != 6) || (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len != 6 && ie_len != 8)) { - mpl_dbg("Mesh plink: incorrect plink ie length %d %d\n", - ftype, ie_len); + mpl_dbg(sdata, + "Mesh plink: incorrect plink ie length %d %d\n", + ftype, ie_len); return; } if (ftype != WLAN_SP_MESH_PEERING_CLOSE && (!elems.mesh_id || !elems.mesh_config)) { - mpl_dbg("Mesh plink: missing necessary ie\n"); + mpl_dbg(sdata, "Mesh plink: missing necessary ie\n"); return; } /* Note the lines below are correct, the llid in the frame is the plid @@ -632,21 +634,21 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta = sta_info_get(sdata, mgmt->sa); if (!sta && ftype != WLAN_SP_MESH_PEERING_OPEN) { - mpl_dbg("Mesh plink: cls or cnf from unknown peer\n"); + mpl_dbg(sdata, "Mesh plink: cls or cnf from unknown peer\n"); rcu_read_unlock(); return; } if (ftype == WLAN_SP_MESH_PEERING_OPEN && !rssi_threshold_check(sta, sdata)) { - mpl_dbg("Mesh plink: %pM does not meet rssi threshold\n", + mpl_dbg(sdata, "Mesh plink: %pM does not meet rssi threshold\n", mgmt->sa); rcu_read_unlock(); return; } if (sta && !test_sta_flag(sta, WLAN_STA_AUTH)) { - mpl_dbg("Mesh plink: Action frame from non-authed peer\n"); + mpl_dbg(sdata, "Mesh plink: Action frame from non-authed peer\n"); rcu_read_unlock(); return; } @@ -683,7 +685,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } else if (!sta) { /* ftype == WLAN_SP_MESH_PEERING_OPEN */ if (!mesh_plink_free_count(sdata)) { - mpl_dbg("Mesh plink error: no more free plinks\n"); + mpl_dbg(sdata, "Mesh plink error: no more free plinks\n"); rcu_read_unlock(); return; } @@ -724,7 +726,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m event = CLS_ACPT; break; default: - mpl_dbg("Mesh plink: unknown frame subtype\n"); + mpl_dbg(sdata, "Mesh plink: unknown frame subtype\n"); rcu_read_unlock(); return; } @@ -734,13 +736,14 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m /* allocate sta entry if necessary and update info */ sta = mesh_peer_init(sdata, mgmt->sa, &elems); if (!sta) { - mpl_dbg("Mesh plink: failed to init peer!\n"); + mpl_dbg(sdata, "Mesh plink: failed to init peer!\n"); rcu_read_unlock(); return; } } - mpl_dbg("Mesh plink (peer, state, llid, plid, event): %pM %s %d %d %d\n", + mpl_dbg(sdata, + "Mesh plink (peer, state, llid, plid, event): %pM %s %d %d %d\n", mgmt->sa, mplstates[sta->plink_state], le16_to_cpu(sta->llid), le16_to_cpu(sta->plid), event); @@ -851,7 +854,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); changed |= BSS_CHANGED_BEACON; - mpl_dbg("Mesh plink with %pM ESTABLISHED\n", + mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); break; default: @@ -887,7 +890,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); changed |= BSS_CHANGED_BEACON; - mpl_dbg("Mesh plink with %pM ESTABLISHED\n", + mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CONFIRM, diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 0ccdad49f987..accfa00ffcdf 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -12,13 +12,6 @@ #include "mesh.h" #include "driver-ops.h" -#ifdef CONFIG_MAC80211_VERBOSE_MESH_SYNC_DEBUG -#define msync_dbg(fmt, args...) \ - pr_debug("Mesh sync (%s): " fmt "\n", sdata->name, ##args) -#else -#define msync_dbg(fmt, args...) do { (void)(0); } while (0) -#endif - /* This is not in the standard. It represents a tolerable tbtt drift below * which we do no TSF adjustment. */ @@ -65,14 +58,14 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata) spin_lock_bh(&ifmsh->sync_offset_lock); if (ifmsh->sync_offset_clockdrift_max < beacon_int_fraction) { - msync_dbg("TBTT : max clockdrift=%lld; adjusting", - (long long) ifmsh->sync_offset_clockdrift_max); + msync_dbg(sdata, "TBTT : max clockdrift=%lld; adjusting\n", + (long long) ifmsh->sync_offset_clockdrift_max); tsfdelta = -ifmsh->sync_offset_clockdrift_max; ifmsh->sync_offset_clockdrift_max = 0; } else { - msync_dbg("TBTT : max clockdrift=%lld; adjusting by %llu", - (long long) ifmsh->sync_offset_clockdrift_max, - (unsigned long long) beacon_int_fraction); + msync_dbg(sdata, "TBTT : max clockdrift=%lld; adjusting by %llu\n", + (long long) ifmsh->sync_offset_clockdrift_max, + (unsigned long long) beacon_int_fraction); tsfdelta = -beacon_int_fraction; ifmsh->sync_offset_clockdrift_max -= beacon_int_fraction; } @@ -120,7 +113,7 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (elems->mesh_config && mesh_peer_tbtt_adjusting(elems)) { clear_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN); - msync_dbg("STA %pM : is adjusting TBTT", sta->sta.addr); + msync_dbg(sdata, "STA %pM : is adjusting TBTT\n", sta->sta.addr); goto no_sync; } @@ -169,7 +162,8 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { s64 t_clockdrift = sta->t_offset_setpoint - sta->t_offset; - msync_dbg("STA %pM : sta->t_offset=%lld, sta->t_offset_setpoint=%lld, t_clockdrift=%lld", + msync_dbg(sdata, + "STA %pM : sta->t_offset=%lld, sta->t_offset_setpoint=%lld, t_clockdrift=%lld\n", sta->sta.addr, (long long) sta->t_offset, (long long) @@ -178,7 +172,8 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (t_clockdrift > TOFFSET_MAXIMUM_ADJUSTMENT || t_clockdrift < -TOFFSET_MAXIMUM_ADJUSTMENT) { - msync_dbg("STA %pM : t_clockdrift=%lld too large, setpoint reset", + msync_dbg(sdata, + "STA %pM : t_clockdrift=%lld too large, setpoint reset\n", sta->sta.addr, (long long) t_clockdrift); clear_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN); @@ -197,8 +192,8 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, } else { sta->t_offset_setpoint = sta->t_offset - TOFFSET_SET_MARGIN; set_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN); - msync_dbg("STA %pM : offset was invalid, " - " sta->t_offset=%lld", + msync_dbg(sdata, + "STA %pM : offset was invalid, sta->t_offset=%lld\n", sta->sta.addr, (long long) sta->t_offset); rcu_read_unlock(); @@ -226,17 +221,15 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata) * to the driver tsf setter, we punt * the tsf adjustment to the mesh tasklet */ - msync_dbg("TBTT : kicking off TBTT " - "adjustment with " - "clockdrift_max=%lld", - ifmsh->sync_offset_clockdrift_max); + msync_dbg(sdata, + "TBTT : kicking off TBTT adjustment with clockdrift_max=%lld\n", + ifmsh->sync_offset_clockdrift_max); set_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags); } else { - msync_dbg("TBTT : max clockdrift=%lld; " - "too small to adjust", - (long long) - ifmsh->sync_offset_clockdrift_max); + msync_dbg(sdata, + "TBTT : max clockdrift=%lld; too small to adjust\n", + (long long)ifmsh->sync_offset_clockdrift_max); ifmsh->sync_offset_clockdrift_max = 0; } spin_unlock_bh(&ifmsh->sync_offset_lock); @@ -268,7 +261,7 @@ static void mesh_sync_vendor_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, const u8 *oui; WARN_ON(sdata->u.mesh.mesh_sp_id != IEEE80211_SYNC_METHOD_VENDOR); - msync_dbg("called mesh_sync_vendor_rx_bcn_presp"); + msync_dbg(sdata, "called mesh_sync_vendor_rx_bcn_presp\n"); oui = mesh_get_vendor_oui(sdata); /* here you would implement the vendor offset tracking for this oui */ } @@ -278,7 +271,7 @@ static void mesh_sync_vendor_adjust_tbtt(struct ieee80211_sub_if_data *sdata) const u8 *oui; WARN_ON(sdata->u.mesh.mesh_sp_id != IEEE80211_SYNC_METHOD_VENDOR); - msync_dbg("called mesh_sync_vendor_adjust_tbtt"); + msync_dbg(sdata, "called mesh_sync_vendor_adjust_tbtt\n"); oui = mesh_get_vendor_oui(sdata); /* here you would implement the vendor tsf adjustment for this oui */ } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2b450f541993..e11cd0e033ef 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1186,19 +1186,16 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, params.txop = get_unaligned_le16(pos + 2); params.uapsd = uapsd; -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, - "WMM queue=%d aci=%d acm=%d aifs=%d " - "cWmin=%d cWmax=%d txop=%d uapsd=%d\n", - queue, aci, acm, - params.aifs, params.cw_min, params.cw_max, - params.txop, params.uapsd); -#endif + mlme_dbg(sdata, + "WMM queue=%d aci=%d acm=%d aifs=%d cWmin=%d cWmax=%d txop=%d uapsd=%d\n", + queue, aci, acm, + params.aifs, params.cw_min, params.cw_max, + params.txop, params.uapsd); sdata->tx_conf[queue] = params; if (drv_conf_tx(local, sdata, queue, ¶ms)) - wiphy_debug(local->hw.wiphy, - "failed to set TX queue parameters for queue %d\n", - queue); + sdata_err(sdata, + "failed to set TX queue parameters for queue %d\n", + queue); } /* enable WMM or activate new settings */ @@ -1565,11 +1562,10 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, goto out; } -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (beacon) - net_dbg_ratelimited("%s: detected beacon loss from AP - sending probe request\n", - sdata->name); -#endif + mlme_dbg_ratelimited(sdata, + "detected beacon loss from AP - sending probe request\n"); + ieee80211_cqm_rssi_notify(&sdata->vif, NL80211_CQM_RSSI_BEACON_LOSS_EVENT, GFP_KERNEL); @@ -1654,7 +1650,7 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); - pr_debug("%s: Connection to AP %pM lost\n", sdata->name, bssid); + sdata_info(sdata, "Connection to AP %pM lost\n", bssid); ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, @@ -1788,8 +1784,8 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, return RX_MGMT_NONE; if (status_code != WLAN_STATUS_SUCCESS) { - pr_debug("%s: %pM denied authentication (status %d)\n", - sdata->name, mgmt->sa, status_code); + sdata_info(sdata, "%pM denied authentication (status %d)\n", + mgmt->sa, status_code); ieee80211_destroy_auth_data(sdata, false); return RX_MGMT_CFG80211_RX_AUTH; } @@ -1812,7 +1808,7 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, return RX_MGMT_NONE; } - pr_debug("%s: authenticated\n", sdata->name); + sdata_info(sdata, "authenticated\n"); ifmgd->auth_data->done = true; ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC; run_again(ifmgd, ifmgd->auth_data->timeout); @@ -1825,7 +1821,7 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, goto out_err; } if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) { - pr_debug("%s: failed moving %pM to auth\n", sdata->name, bssid); + sdata_info(sdata, "failed moving %pM to auth\n", bssid); goto out_err; } mutex_unlock(&sdata->local->sta_mtx); @@ -1859,8 +1855,8 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); - pr_debug("%s: deauthenticated from %pM (Reason: %u)\n", - sdata->name, bssid, reason_code); + sdata_info(sdata, "deauthenticated from %pM (Reason: %u)\n", + bssid, reason_code); ieee80211_set_disassoc(sdata, 0, 0, false, NULL); @@ -1890,8 +1886,8 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); - pr_debug("%s: disassociated from %pM (Reason: %u)\n", - sdata->name, mgmt->sa, reason_code); + sdata_info(sdata, "disassociated from %pM (Reason: %u)\n", + mgmt->sa, reason_code); ieee80211_set_disassoc(sdata, 0, 0, false, NULL); @@ -1983,15 +1979,15 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14))) - pr_debug("%s: invalid AID value 0x%x; bits 15:14 not set\n", - sdata->name, aid); + sdata_info(sdata, "invalid AID value 0x%x; bits 15:14 not set\n", + aid); aid &= ~(BIT(15) | BIT(14)); ifmgd->broken_ap = false; if (aid == 0 || aid > IEEE80211_MAX_AID) { - pr_debug("%s: invalid AID value %d (out of range), turn off PS\n", - sdata->name, aid); + sdata_info(sdata, "invalid AID value %d (out of range), turn off PS\n", + aid); aid = 0; ifmgd->broken_ap = true; } @@ -2000,8 +1996,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); if (!elems.supp_rates) { - pr_debug("%s: no SuppRates element in AssocResp\n", - sdata->name); + sdata_info(sdata, "no SuppRates element in AssocResp\n"); return false; } @@ -2041,8 +2036,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (!err && !(ifmgd->flags & IEEE80211_STA_CONTROL_PORT)) err = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); if (err) { - pr_debug("%s: failed to move station %pM to desired state\n", - sdata->name, sta->sta.addr); + sdata_info(sdata, + "failed to move station %pM to desired state\n", + sta->sta.addr); WARN_ON(__sta_info_destroy(sta)); mutex_unlock(&sdata->local->sta_mtx); return false; @@ -2125,9 +2121,10 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); aid = le16_to_cpu(mgmt->u.assoc_resp.aid); - pr_debug("%s: RX %sssocResp from %pM (capab=0x%x status=%d aid=%d)\n", - sdata->name, reassoc ? "Rea" : "A", mgmt->sa, - capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); + sdata_info(sdata, + "RX %sssocResp from %pM (capab=0x%x status=%d aid=%d)\n", + reassoc ? "Rea" : "A", mgmt->sa, + capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); pos = mgmt->u.assoc_resp.variable; ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); @@ -2138,8 +2135,9 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, u32 tu, ms; tu = get_unaligned_le32(elems.timeout_int + 1); ms = tu * 1024 / 1000; - pr_debug("%s: %pM rejected association temporarily; comeback duration %u TU (%u ms)\n", - sdata->name, mgmt->sa, tu, ms); + sdata_info(sdata, + "%pM rejected association temporarily; comeback duration %u TU (%u ms)\n", + mgmt->sa, tu, ms); assoc_data->timeout = jiffies + msecs_to_jiffies(ms); if (ms > IEEE80211_ASSOC_TIMEOUT) run_again(ifmgd, assoc_data->timeout); @@ -2149,11 +2147,11 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, *bss = assoc_data->bss; if (status_code != WLAN_STATUS_SUCCESS) { - pr_debug("%s: %pM denied association (code=%d)\n", - sdata->name, mgmt->sa, status_code); + sdata_info(sdata, "%pM denied association (code=%d)\n", + mgmt->sa, status_code); ieee80211_destroy_assoc_data(sdata, false); } else { - pr_debug("%s: associated\n", sdata->name); + sdata_info(sdata, "associated\n"); if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ @@ -2261,7 +2259,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, if (ifmgd->auth_data && !ifmgd->auth_data->bss->proberesp_ies && ether_addr_equal(mgmt->bssid, ifmgd->auth_data->bss->bssid)) { /* got probe response, continue with auth */ - pr_debug("%s: direct probe responded\n", sdata->name); + sdata_info(sdata, "direct probe responded\n"); ifmgd->auth_data->tries = 0; ifmgd->auth_data->timeout = jiffies; run_again(ifmgd, ifmgd->auth_data->timeout); @@ -2397,10 +2395,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } if (ifmgd->flags & IEEE80211_STA_BEACON_POLL) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - net_dbg_ratelimited("%s: cancelling probereq poll due to a received beacon\n", - sdata->name); -#endif + mlme_dbg_ratelimited(sdata, + "cancelling probereq poll due to a received beacon\n"); mutex_lock(&local->mtx); ifmgd->flags &= ~IEEE80211_STA_BEACON_POLL; ieee80211_run_deferred_scan(local); @@ -2625,8 +2621,8 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) auth_data->tries++; if (auth_data->tries > IEEE80211_AUTH_MAX_TRIES) { - pr_debug("%s: authentication with %pM timed out\n", - sdata->name, auth_data->bss->bssid); + sdata_info(sdata, "authentication with %pM timed out\n", + auth_data->bss->bssid); /* * Most likely AP is not in the range so remove the @@ -2638,9 +2634,9 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) } if (auth_data->bss->proberesp_ies) { - pr_debug("%s: send auth to %pM (try %d/%d)\n", - sdata->name, auth_data->bss->bssid, auth_data->tries, - IEEE80211_AUTH_MAX_TRIES); + sdata_info(sdata, "send auth to %pM (try %d/%d)\n", + auth_data->bss->bssid, auth_data->tries, + IEEE80211_AUTH_MAX_TRIES); auth_data->expected_transaction = 2; ieee80211_send_auth(sdata, 1, auth_data->algorithm, @@ -2650,9 +2646,9 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) } else { const u8 *ssidie; - pr_debug("%s: direct probe to %pM (try %d/%i)\n", - sdata->name, auth_data->bss->bssid, auth_data->tries, - IEEE80211_AUTH_MAX_TRIES); + sdata_info(sdata, "direct probe to %pM (try %d/%i)\n", + auth_data->bss->bssid, auth_data->tries, + IEEE80211_AUTH_MAX_TRIES); ssidie = ieee80211_bss_get_ie(auth_data->bss, WLAN_EID_SSID); if (!ssidie) @@ -2680,8 +2676,8 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) assoc_data->tries++; if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) { - pr_debug("%s: association with %pM timed out\n", - sdata->name, assoc_data->bss->bssid); + sdata_info(sdata, "association with %pM timed out\n", + assoc_data->bss->bssid); /* * Most likely AP is not in the range so remove the @@ -2692,9 +2688,9 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) return -ETIMEDOUT; } - pr_debug("%s: associate with %pM (try %d/%d)\n", - sdata->name, assoc_data->bss->bssid, assoc_data->tries, - IEEE80211_ASSOC_MAX_TRIES); + sdata_info(sdata, "associate with %pM (try %d/%d)\n", + assoc_data->bss->bssid, assoc_data->tries, + IEEE80211_ASSOC_MAX_TRIES); ieee80211_send_assoc(sdata); assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT; @@ -2767,45 +2763,31 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_reset_ap_probe(sdata); else if (ifmgd->nullfunc_failed) { if (ifmgd->probe_send_count < max_tries) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, - "%s: No ack for nullfunc frame to" - " AP %pM, try %d/%i\n", - sdata->name, bssid, - ifmgd->probe_send_count, max_tries); -#endif + mlme_dbg(sdata, + "No ack for nullfunc frame to AP %pM, try %d/%i\n", + bssid, ifmgd->probe_send_count, + max_tries); ieee80211_mgd_probe_ap_send(sdata); } else { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, - "%s: No ack for nullfunc frame to" - " AP %pM, disconnecting.\n", - sdata->name, bssid); -#endif + mlme_dbg(sdata, + "No ack for nullfunc frame to AP %pM, disconnecting.\n", + bssid); ieee80211_sta_connection_lost(sdata, bssid, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); } } else if (time_is_after_jiffies(ifmgd->probe_timeout)) run_again(ifmgd, ifmgd->probe_timeout); else if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, - "%s: Failed to send nullfunc to AP %pM" - " after %dms, disconnecting.\n", - sdata->name, - bssid, probe_wait_ms); -#endif + mlme_dbg(sdata, + "Failed to send nullfunc to AP %pM after %dms, disconnecting\n", + bssid, probe_wait_ms); ieee80211_sta_connection_lost(sdata, bssid, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); } else if (ifmgd->probe_send_count < max_tries) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, - "%s: No probe response from AP %pM" - " after %dms, try %d/%i\n", - sdata->name, - bssid, probe_wait_ms, - ifmgd->probe_send_count, max_tries); -#endif + mlme_dbg(sdata, + "No probe response from AP %pM after %dms, try %d/%i\n", + bssid, probe_wait_ms, + ifmgd->probe_send_count, max_tries); ieee80211_mgd_probe_ap_send(sdata); } else { /* @@ -2920,11 +2902,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME; mutex_lock(&ifmgd->mtx); if (ifmgd->associated) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(sdata->local->hw.wiphy, - "%s: driver requested disconnect after resume.\n", - sdata->name); -#endif + mlme_dbg(sdata, + "driver requested disconnect after resume\n"); ieee80211_sta_connection_lost(sdata, ifmgd->associated->bssid, WLAN_REASON_UNSPECIFIED); @@ -3065,10 +3044,11 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, * since we look at probe response/beacon data here * it should be OK. */ - pr_debug("%s: Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", - sdata->name, cbss->channel->center_freq, - ht_cfreq, ht_oper->primary_chan, - cbss->channel->band); + sdata_info(sdata, + "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", + cbss->channel->center_freq, + ht_cfreq, ht_oper->primary_chan, + cbss->channel->band); ht_oper = NULL; } } @@ -3092,8 +3072,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, if (!ieee80211_set_channel_type(local, sdata, channel_type)) { /* can only fail due to HT40+/- mismatch */ channel_type = NL80211_CHAN_HT20; - pr_debug("%s: disabling 40 MHz due to multi-vif mismatch\n", - sdata->name); + sdata_info(sdata, + "disabling 40 MHz due to multi-vif mismatch\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_40MHZ; WARN_ON(!ieee80211_set_channel_type(local, sdata, channel_type)); @@ -3122,8 +3102,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, * we can connect -- with a warning. */ if (!basic_rates && min_rate_index >= 0) { - pr_debug("%s: No basic rates, using min rate instead\n", - sdata->name); + sdata_info(sdata, + "No basic rates, using min rate instead\n"); basic_rates = BIT(min_rate_index); } @@ -3149,8 +3129,9 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, err = sta_info_insert(sta); sta = NULL; if (err) { - pr_debug("%s: failed to insert STA entry for the AP (error %d)\n", - sdata->name, err); + sdata_info(sdata, + "failed to insert STA entry for the AP (error %d)\n", + err); return err; } } else @@ -3228,7 +3209,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, if (ifmgd->associated) ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - pr_debug("%s: authenticate with %pM\n", sdata->name, req->bss->bssid); + sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); err = ieee80211_prep_connection(sdata, req->bss, false); if (err) @@ -3410,8 +3391,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, * Wait up to one beacon interval ... * should this be more if we miss one? */ - pr_debug("%s: waiting for beacon from %pM\n", - sdata->name, ifmgd->bssid); + sdata_info(sdata, "waiting for beacon from %pM\n", + ifmgd->bssid); assoc_data->timeout = TU_TO_EXP_TIME(req->bss->beacon_interval); } else { assoc_data->have_beacon = true; @@ -3430,8 +3411,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, corrupt_type = "beacon"; } else if (bss->corrupt_data & IEEE80211_BSS_CORRUPT_PROBE_RESP) corrupt_type = "probe response"; - pr_debug("%s: associating with AP with corrupt %s\n", - sdata->name, corrupt_type); + sdata_info(sdata, "associating with AP with corrupt %s\n", + corrupt_type); } err = 0; @@ -3460,8 +3441,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, return 0; } - pr_debug("%s: deauthenticating from %pM by local choice (reason=%d)\n", - sdata->name, req->bssid, req->reason_code); + sdata_info(sdata, + "deauthenticating from %pM by local choice (reason=%d)\n", + req->bssid, req->reason_code); if (ifmgd->associated && ether_addr_equal(ifmgd->associated->bssid, req->bssid)) @@ -3503,8 +3485,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, return -ENOLINK; } - pr_debug("%s: disassociating from %pM by local choice (reason=%d)\n", - sdata->name, req->bss->bssid, req->reason_code); + sdata_info(sdata, + "disassociating from %pM by local choice (reason=%d)\n", + req->bss->bssid, req->reason_code); memcpy(bssid, req->bss->bssid, ETH_ALEN); ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DISASSOC, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9f1bd692589b..ab5185054e6c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -632,11 +632,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, HT_RX_REORDER_BUF_TIMEOUT)) goto set_release_timer; -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - wiphy_debug(sdata->local->hw.wiphy, - "release an RX reorder frame due to timeout on earlier frames\n"); -#endif + ht_dbg_ratelimited(sdata, + "release an RX reorder frame due to timeout on earlier frames\n"); ieee80211_release_reorder_frame(sdata, tid_agg_rx, j); /* @@ -1136,24 +1133,18 @@ static void ap_sta_ps_start(struct sta_info *sta) set_sta_flag(sta, WLAN_STA_PS_STA); if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - pr_debug("%s: STA %pM aid %d enters power save mode\n", - sdata->name, sta->sta.addr, sta->sta.aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + ps_dbg(sdata, "STA %pM aid %d enters power save mode\n", + sta->sta.addr, sta->sta.aid); } static void ap_sta_ps_end(struct sta_info *sta) { -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - pr_debug("%s: STA %pM aid %d exits power save mode\n", - sta->sdata->name, sta->sta.addr, sta->sta.aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + ps_dbg(sta->sdata, "STA %pM aid %d exits power save mode\n", + sta->sta.addr, sta->sta.aid); if (test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - pr_debug("%s: STA %pM aid %d driver-ps-blocked\n", - sta->sdata->name, sta->sta.addr, sta->sta.aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + ps_dbg(sta->sdata, "STA %pM aid %d driver-ps-blocked\n", + sta->sta.addr, sta->sta.aid); return; } @@ -1383,17 +1374,8 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX) sdata->fragment_next = 0; - if (!skb_queue_empty(&entry->skb_list)) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) entry->skb_list.next->data; - pr_debug("%s: RX reassembly removed oldest fragment entry (idx=%d age=%lu seq=%d last_frag=%d addr1=%pM addr2=%pM\n", - sdata->name, idx, - jiffies - entry->first_frag_time, entry->seq, - entry->last_frag, hdr->addr1, hdr->addr2); -#endif + if (!skb_queue_empty(&entry->skb_list)) __skb_queue_purge(&entry->skb_list); - } __skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */ *skb = NULL; @@ -1751,7 +1733,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) */ xmit_skb = skb_copy(skb, GFP_ATOMIC); if (!xmit_skb) - net_dbg_ratelimited("%s: failed to clone multicast frame\n", + net_info_ratelimited("%s: failed to clone multicast frame\n", dev->name); } else { dsta = sta_info_get(sdata, skb->data); @@ -1955,7 +1937,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) fwd_skb = skb_copy(skb, GFP_ATOMIC); if (!fwd_skb) { - net_dbg_ratelimited("%s: failed to clone mesh frame\n", + net_info_ratelimited("%s: failed to clone mesh frame\n", sdata->name); goto out; } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 77dcf2f89d42..06fa75ceb025 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -169,9 +169,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) if (sta->rate_ctrl) rate_control_free_sta(sta); -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "Destroyed STA %pM\n", sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr); kfree(sta); } @@ -278,9 +276,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, for (i = 0; i < NUM_RX_DATA_QUEUES; i++) sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "Allocated STA %pM\n", sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); #ifdef CONFIG_MAC80211_MESH sta->plink_state = NL80211_PLINK_LISTEN; @@ -333,8 +329,9 @@ static int sta_info_insert_drv_state(struct ieee80211_local *local, } if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { - pr_debug("%s: failed to move IBSS STA %pM to state %d (%d) - keeping it anyway\n", - sdata->name, sta->sta.addr, state + 1, err); + sdata_info(sdata, + "failed to move IBSS STA %pM to state %d (%d) - keeping it anyway\n", + sta->sta.addr, state + 1, err); err = 0; } @@ -389,9 +386,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) sinfo.generation = local->sta_generation; cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "Inserted STA %pM\n", sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr); /* move reference to rcu-protected */ rcu_read_lock(); @@ -617,9 +612,8 @@ static bool sta_info_cleanup_expire_buffered_ac(struct ieee80211_local *local, break; local->total_ps_buffered--; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - pr_debug("Buffered frame expired (STA %pM)\n", sta->sta.addr); -#endif + ps_dbg(sta->sdata, "Buffered frame expired (STA %pM)\n", + sta->sta.addr); dev_kfree_skb(skb); } @@ -745,9 +739,8 @@ int __must_check __sta_info_destroy(struct sta_info *sta) mesh_accept_plinks_update(sdata); #endif -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "Removed STA %pM\n", sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); + cancel_work_sync(&sta->drv_unblock_wk); cfg80211_del_sta(sdata->dev, sta->sta.addr, GFP_KERNEL); @@ -887,8 +880,8 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, continue; if (time_after(jiffies, sta->last_rx + exp_time)) { - ibss_vdbg("%s: expiring inactive STA %pM\n", - sdata->name, sta->sta.addr); + ibss_dbg(sdata, "expiring inactive STA %pM\n", + sta->sta.addr); WARN_ON(__sta_info_destroy(sta)); } } @@ -986,10 +979,9 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) sta_info_recalc_tim(sta); -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - pr_debug("%s: STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n", - sdata->name, sta->sta.addr, sta->sta.aid, filtered, buffered); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + ps_dbg(sdata, + "STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n", + sta->sta.addr, sta->sta.aid, filtered, buffered); } static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, @@ -1379,10 +1371,8 @@ int sta_info_move_state(struct sta_info *sta, return -EINVAL; } -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - pr_debug("%s: moving STA %pM to state %d\n", - sta->sdata->name, sta->sta.addr, new_state); -#endif + sta_dbg(sta->sdata, "moving STA %pM to state %d\n", + sta->sta.addr, new_state); /* * notify the driver before the actual changes so it can diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 51a6d1e6e250..2ed2f27fe8a7 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -155,13 +155,10 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, return; } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - if (net_ratelimit()) - wiphy_debug(local->hw.wiphy, - "dropped TX filtered frame, queue_len=%d PS=%d @%lu\n", - skb_queue_len(&sta->tx_filtered[ac]), - !!test_sta_flag(sta, WLAN_STA_PS_STA), jiffies); -#endif + ps_dbg_ratelimited(sta->sdata, + "dropped TX filtered frame, queue_len=%d PS=%d @%lu\n", + skb_queue_len(&sta->tx_filtered[ac]), + !!test_sta_flag(sta, WLAN_STA_PS_STA), jiffies); dev_kfree_skb(skb); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index af25c4e7ec5c..37eda7e00e03 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -297,9 +297,10 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (unlikely(!assoc && ieee80211_is_data(hdr->frame_control))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - pr_debug("%s: dropped data frame to not associated station %pM\n", - tx->sdata->name, hdr->addr1); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + sdata_info(tx->sdata, + "dropped data frame to not associated station %pM\n", + hdr->addr1); +#endif I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc); return TX_DROP; } @@ -366,10 +367,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) rcu_read_unlock(); local->total_ps_buffered = total; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - wiphy_debug(local->hw.wiphy, "PS buffers full - purged %d frames\n", - purged); -#endif + ps_dbg_hw(&local->hw, "PS buffers full - purged %d frames\n", purged); } static ieee80211_tx_result @@ -411,10 +409,8 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) purge_old_ps_buffers(tx->local); if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >= AP_MAX_BC_BUFFER) { -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - net_dbg_ratelimited("%s: BC TX buffer full - dropping the oldest frame\n", - tx->sdata->name); -#endif + ps_dbg(tx->sdata, + "BC TX buffer full - dropping the oldest frame\n"); dev_kfree_skb(skb_dequeue(&tx->sdata->bss->ps_bc_buf)); } else tx->local->total_ps_buffered++; @@ -465,18 +461,15 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) return TX_CONTINUE; } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - pr_debug("STA %pM aid %d: PS buffer for AC %d\n", - sta->sta.addr, sta->sta.aid, ac); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + ps_dbg(sta->sdata, "STA %pM aid %d: PS buffer for AC %d\n", + sta->sta.addr, sta->sta.aid, ac); if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) purge_old_ps_buffers(tx->local); if (skb_queue_len(&sta->ps_tx_buf[ac]) >= STA_MAX_TX_BUFFER) { struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf[ac]); -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - net_dbg_ratelimited("%s: STA %pM TX buffer for AC %d full - dropping oldest frame\n", - tx->sdata->name, sta->sta.addr, ac); -#endif + ps_dbg(tx->sdata, + "STA %pM TX buffer for AC %d full - dropping oldest frame\n", + sta->sta.addr, ac); dev_kfree_skb(old); } else tx->local->total_ps_buffered++; @@ -498,13 +491,11 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) sta_info_recalc_tim(sta); return TX_QUEUED; + } else if (unlikely(test_sta_flag(sta, WLAN_STA_PS_STA))) { + ps_dbg(tx->sdata, + "STA %pM in PS mode, but polling/in SP -> send frame\n", + sta->sta.addr); } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - else if (unlikely(test_sta_flag(sta, WLAN_STA_PS_STA))) { - pr_debug("%s: STA %pM in PS mode, but polling/in SP -> send frame\n", - tx->sdata->name, sta->sta.addr); - } -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ return TX_CONTINUE; } @@ -1963,7 +1954,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, (cpu_to_be16(ethertype) != sdata->control_port_protocol || !ether_addr_equal(sdata->vif.addr, skb->data + ETH_ALEN)))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - net_dbg_ratelimited("%s: dropped frame to %pM (unauthorized port)\n", + net_info_ratelimited("%s: dropped frame to %pM (unauthorized port)\n", dev->name, hdr.addr1); #endif -- cgit v1.2.3 From efc27f8ceebe5eb147fa31d6c995706d327ad855 Mon Sep 17 00:00:00 2001 From: Vijay Subramanian Date: Sun, 24 Jun 2012 13:03:07 +0000 Subject: net: Remove 'unlikely' qualifier in skb_steal_sock() With early demux enabled by default for TCP flows, there is high chance that skb->sk will be non-null. 'unlikely()' was removed from __inet_lookup_skb() but maybe it can be removed from skb_steal_sock() as well. Note: skb_steal_sock() is also called by __inet6_lookup_skb() and __udp4_lib_lookup_skb() but they are protected by their own 'unlikely' calls. Signed-off-by: Vijay Subramanian Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 87b424ae750a..21086036e348 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2154,7 +2154,7 @@ static inline void sk_change_net(struct sock *sk, struct net *net) static inline struct sock *skb_steal_sock(struct sk_buff *skb) { - if (unlikely(skb->sk)) { + if (skb->sk) { struct sock *sk = skb->sk; skb->destructor = NULL; -- cgit v1.2.3 From deaa58542b21d2b395db816952c202034319cbb4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 24 Jun 2012 20:22:49 +0000 Subject: net: struct sock cleanups Add missing kernel doc for sk_rx_dst Move sk_rx_dst to avoid two 32bit holes on 64bit arches Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 21086036e348..dcb54a0793ec 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -198,6 +198,7 @@ struct cg_proto; * @sk_lock: synchronizer * @sk_rcvbuf: size of receive buffer in bytes * @sk_wq: sock wait queue and async head + * @sk_rx_dst: receive input route used by early tcp demux * @sk_dst_cache: destination cache * @sk_dst_lock: destination cache lock * @sk_policy: flow policy @@ -317,9 +318,9 @@ struct sock { struct xfrm_policy *sk_policy[2]; #endif unsigned long sk_flags; + struct dst_entry *sk_rx_dst; struct dst_entry *sk_dst_cache; spinlock_t sk_dst_lock; - struct dst_entry *sk_rx_dst; atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; -- cgit v1.2.3 From c41254006377842013922fb1e407391f24c59522 Mon Sep 17 00:00:00 2001 From: Sjur Brændeland Date: Mon, 25 Jun 2012 07:49:41 +0000 Subject: caif-hsi: Add rtnl support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add RTNL support for managing the caif hsi interface. The HSI HW interface is no longer registering as a device, instead we use symbol_get to get hold of the HSI API. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- drivers/net/caif/caif_hsi.c | 226 +++++++++++++++++++++++++++----------------- include/net/caif/caif_hsi.h | 21 +++- 2 files changed, 157 insertions(+), 90 deletions(-) (limited to 'include/net') diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index d80759e58532..a14f85c0f0e8 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include @@ -79,7 +79,6 @@ MODULE_PARM_DESC(hsi_low_threshold, "HSI high threshold (FLOW ON)."); #define HIGH_WATER_MARK hsi_high_threshold static LIST_HEAD(cfhsi_list); -static spinlock_t cfhsi_list_lock; static void cfhsi_inactivity_tout(unsigned long arg) { @@ -1148,42 +1147,6 @@ static void cfhsi_setup(struct net_device *dev) cfhsi->ndev = dev; } -int cfhsi_probe(struct platform_device *pdev) -{ - struct cfhsi *cfhsi = NULL; - struct net_device *ndev; - - int res; - - ndev = alloc_netdev(sizeof(struct cfhsi), "cfhsi%d", cfhsi_setup); - if (!ndev) - return -ENODEV; - - cfhsi = netdev_priv(ndev); - cfhsi->ndev = ndev; - cfhsi->pdev = pdev; - - /* Assign the HSI device. */ - cfhsi->dev = pdev->dev.platform_data; - - /* Assign the driver to this HSI device. */ - cfhsi->dev->drv = &cfhsi->drv; - - /* Register network device. */ - res = register_netdev(ndev); - if (res) { - dev_err(&ndev->dev, "%s: Registration error: %d.\n", - __func__, res); - free_netdev(ndev); - } - /* Add CAIF HSI device to list. */ - spin_lock(&cfhsi_list_lock); - list_add_tail(&cfhsi->list, &cfhsi_list); - spin_unlock(&cfhsi_list_lock); - - return res; -} - static int cfhsi_open(struct net_device *ndev) { struct cfhsi *cfhsi = netdev_priv(ndev); @@ -1364,85 +1327,170 @@ static int cfhsi_close(struct net_device *ndev) return 0; } +static void cfhsi_uninit(struct net_device *dev) +{ + struct cfhsi *cfhsi = netdev_priv(dev); + ASSERT_RTNL(); + symbol_put(cfhsi_get_device); + list_del(&cfhsi->list); +} + static const struct net_device_ops cfhsi_ops = { + .ndo_uninit = cfhsi_uninit, .ndo_open = cfhsi_open, .ndo_stop = cfhsi_close, .ndo_start_xmit = cfhsi_xmit }; -int cfhsi_remove(struct platform_device *pdev) +static void cfhsi_netlink_parms(struct nlattr *data[], struct cfhsi *cfhsi) { - struct list_head *list_node; - struct list_head *n; - struct cfhsi *cfhsi = NULL; - struct cfhsi_dev *dev; + int i; - dev = (struct cfhsi_dev *)pdev->dev.platform_data; - spin_lock(&cfhsi_list_lock); - list_for_each_safe(list_node, n, &cfhsi_list) { - cfhsi = list_entry(list_node, struct cfhsi, list); - /* Find the corresponding device. */ - if (cfhsi->dev == dev) { - /* Remove from list. */ - list_del(list_node); - spin_unlock(&cfhsi_list_lock); - return 0; - } + if (!data) { + pr_debug("no params data found\n"); + return; } - spin_unlock(&cfhsi_list_lock); - return -ENODEV; + + i = __IFLA_CAIF_HSI_INACTIVITY_TOUT; + if (data[i]) + inactivity_timeout = nla_get_u32(data[i]); + + i = __IFLA_CAIF_HSI_AGGREGATION_TOUT; + if (data[i]) + aggregation_timeout = nla_get_u32(data[i]); + + i = __IFLA_CAIF_HSI_HEAD_ALIGN; + if (data[i]) + hsi_head_align = nla_get_u32(data[i]); + + i = __IFLA_CAIF_HSI_TAIL_ALIGN; + if (data[i]) + hsi_tail_align = nla_get_u32(data[i]); + + i = __IFLA_CAIF_HSI_QHIGH_WATERMARK; + if (data[i]) + hsi_high_threshold = nla_get_u32(data[i]); +} + +static int caif_hsi_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + cfhsi_netlink_parms(data, netdev_priv(dev)); + netdev_state_change(dev); + return 0; } -struct platform_driver cfhsi_plat_drv = { - .probe = cfhsi_probe, - .remove = cfhsi_remove, - .driver = { - .name = "cfhsi", - .owner = THIS_MODULE, - }, +static const struct nla_policy caif_hsi_policy[__IFLA_CAIF_HSI_MAX + 1] = { + [__IFLA_CAIF_HSI_INACTIVITY_TOUT] = { .type = NLA_U32, .len = 4 }, + [__IFLA_CAIF_HSI_AGGREGATION_TOUT] = { .type = NLA_U32, .len = 4 }, + [__IFLA_CAIF_HSI_HEAD_ALIGN] = { .type = NLA_U32, .len = 4 }, + [__IFLA_CAIF_HSI_TAIL_ALIGN] = { .type = NLA_U32, .len = 4 }, + [__IFLA_CAIF_HSI_QHIGH_WATERMARK] = { .type = NLA_U32, .len = 4 }, + [__IFLA_CAIF_HSI_QLOW_WATERMARK] = { .type = NLA_U32, .len = 4 }, }; -static void __exit cfhsi_exit_module(void) +static size_t caif_hsi_get_size(const struct net_device *dev) +{ + int i; + size_t s = 0; + for (i = __IFLA_CAIF_HSI_UNSPEC + 1; i < __IFLA_CAIF_HSI_MAX; i++) + s += nla_total_size(caif_hsi_policy[i].len); + return s; +} + +static int caif_hsi_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + if (nla_put_u32(skb, __IFLA_CAIF_HSI_INACTIVITY_TOUT, + inactivity_timeout) || + nla_put_u32(skb, __IFLA_CAIF_HSI_AGGREGATION_TOUT, + aggregation_timeout) || + nla_put_u32(skb, __IFLA_CAIF_HSI_HEAD_ALIGN, hsi_head_align) || + nla_put_u32(skb, __IFLA_CAIF_HSI_TAIL_ALIGN, hsi_tail_align) || + nla_put_u32(skb, __IFLA_CAIF_HSI_QHIGH_WATERMARK, + hsi_high_threshold) || + nla_put_u32(skb, __IFLA_CAIF_HSI_QLOW_WATERMARK, + hsi_low_threshold)) + return -EMSGSIZE; + + return 0; +} + +static int caif_hsi_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) { - struct list_head *list_node; - struct list_head *n; struct cfhsi *cfhsi = NULL; + struct platform_device *(*get_dev)(void); - spin_lock(&cfhsi_list_lock); - list_for_each_safe(list_node, n, &cfhsi_list) { - cfhsi = list_entry(list_node, struct cfhsi, list); + ASSERT_RTNL(); + + cfhsi = netdev_priv(dev); + cfhsi_netlink_parms(data, cfhsi); + dev_net_set(cfhsi->ndev, src_net); + + get_dev = symbol_get(cfhsi_get_device); + if (!get_dev) { + pr_err("%s: failed to get the cfhsi device symbol\n", __func__); + return -ENODEV; + } + + /* Assign the HSI device. */ + cfhsi->pdev = (*get_dev)(); + if (!cfhsi->pdev) { + pr_err("%s: failed to get the cfhsi device\n", __func__); + goto err; + } - /* Remove from list. */ - list_del(list_node); - spin_unlock(&cfhsi_list_lock); + /* Assign the HSI device. */ + cfhsi->dev = cfhsi->pdev->dev.platform_data; + + /* Assign the driver to this HSI device. */ + cfhsi->dev->drv = &cfhsi->drv; - unregister_netdevice(cfhsi->ndev); + if (register_netdevice(dev)) { + pr_warn("%s: device rtml registration failed\n", __func__); + goto err; - spin_lock(&cfhsi_list_lock); } - spin_unlock(&cfhsi_list_lock); + /* Add CAIF HSI device to list. */ + list_add_tail(&cfhsi->list, &cfhsi_list); - /* Unregister platform driver. */ - platform_driver_unregister(&cfhsi_plat_drv); + return 0; +err: + symbol_put(cfhsi_get_device); + return -ENODEV; } -static int __init cfhsi_init_module(void) +static struct rtnl_link_ops caif_hsi_link_ops __read_mostly = { + .kind = "cfhsi", + .priv_size = sizeof(struct cfhsi), + .setup = cfhsi_setup, + .maxtype = __IFLA_CAIF_HSI_MAX, + .policy = caif_hsi_policy, + .newlink = caif_hsi_newlink, + .changelink = caif_hsi_changelink, + .get_size = caif_hsi_get_size, + .fill_info = caif_hsi_fill_info, +}; + +static void __exit cfhsi_exit_module(void) { - int result; + struct list_head *list_node; + struct list_head *n; + struct cfhsi *cfhsi; - /* Initialize spin lock. */ - spin_lock_init(&cfhsi_list_lock); + rtnl_link_unregister(&caif_hsi_link_ops); - /* Register platform driver. */ - result = platform_driver_register(&cfhsi_plat_drv); - if (result) { - printk(KERN_ERR "Could not register platform HSI driver: %d.\n", - result); - goto err_dev_register; + rtnl_lock(); + list_for_each_safe(list_node, n, &cfhsi_list) { + cfhsi = list_entry(list_node, struct cfhsi, list); + unregister_netdev(cfhsi->ndev); } + rtnl_unlock(); +} - err_dev_register: - return result; +static int __init cfhsi_init_module(void) +{ + return rtnl_link_register(&caif_hsi_link_ops); } module_init(cfhsi_init_module); diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h index 439dadc8102f..a77b2bd7719a 100644 --- a/include/net/caif/caif_hsi.h +++ b/include/net/caif/caif_hsi.h @@ -170,7 +170,26 @@ struct cfhsi { unsigned long bits; }; - extern struct platform_driver cfhsi_driver; +/** + * enum ifla_caif_hsi - CAIF HSI NetlinkRT parameters. + * @IFLA_CAIF_HSI_INACTIVITY_TOUT: Inactivity timeout before + * taking the HSI wakeline down, in milliseconds. + * When using RT Netlink to create, destroy or configure a CAIF HSI interface, + * enum ifla_caif_hsi is used to specify the configuration attributes. + */ +enum ifla_caif_hsi { + __IFLA_CAIF_HSI_UNSPEC, + __IFLA_CAIF_HSI_INACTIVITY_TOUT, + __IFLA_CAIF_HSI_AGGREGATION_TOUT, + __IFLA_CAIF_HSI_HEAD_ALIGN, + __IFLA_CAIF_HSI_TAIL_ALIGN, + __IFLA_CAIF_HSI_QHIGH_WATERMARK, + __IFLA_CAIF_HSI_QLOW_WATERMARK, + __IFLA_CAIF_HSI_MAX +}; + +extern struct platform_device *cfhsi_get_device(void); + #endif /* CAIF_HSI_H_ */ -- cgit v1.2.3 From 1c385f1fdf6f9c66d982802cd74349c040980b50 Mon Sep 17 00:00:00 2001 From: Sjur Brændeland Date: Mon, 25 Jun 2012 07:49:42 +0000 Subject: caif-hsi: Replace platform device with ops structure. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove use of struct platform_device, and replace it with struct cfhsi_ops. Updated variable names in the same spirit: cfhsi_get_dev to cfhsi_get_ops, cfhsi->dev to cfhsi->ops and, cfhsi->dev.drv to cfhsi->ops->cb_ops. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- drivers/net/caif/caif_hsi.c | 109 +++++++++++++++++++++----------------------- include/net/caif/caif_hsi.h | 38 +++++++-------- 2 files changed, 70 insertions(+), 77 deletions(-) (limited to 'include/net') diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index a14f85c0f0e8..0927c108bd14 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -184,7 +183,7 @@ static int cfhsi_flush_fifo(struct cfhsi *cfhsi) __func__); do { - ret = cfhsi->dev->cfhsi_fifo_occupancy(cfhsi->dev, + ret = cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, &fifo_occupancy); if (ret) { netdev_warn(cfhsi->ndev, @@ -197,8 +196,8 @@ static int cfhsi_flush_fifo(struct cfhsi *cfhsi) fifo_occupancy = min(sizeof(buffer), fifo_occupancy); set_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits); - ret = cfhsi->dev->cfhsi_rx(buffer, fifo_occupancy, - cfhsi->dev); + ret = cfhsi->ops->cfhsi_rx(buffer, fifo_occupancy, + cfhsi->ops); if (ret) { clear_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits); netdev_warn(cfhsi->ndev, @@ -371,7 +370,7 @@ static void cfhsi_start_tx(struct cfhsi *cfhsi) } /* Set up new transfer. */ - res = cfhsi->dev->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->dev); + res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); if (WARN_ON(res < 0)) netdev_err(cfhsi->ndev, "%s: TX error %d.\n", __func__, res); @@ -410,11 +409,11 @@ static void cfhsi_tx_done(struct cfhsi *cfhsi) return; } -static void cfhsi_tx_done_cb(struct cfhsi_drv *drv) +static void cfhsi_tx_done_cb(struct cfhsi_cb_ops *cb_ops) { struct cfhsi *cfhsi; - cfhsi = container_of(drv, struct cfhsi, drv); + cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); netdev_dbg(cfhsi->ndev, "%s.\n", __func__); @@ -476,8 +475,8 @@ static int cfhsi_rx_desc(struct cfhsi_desc *desc, struct cfhsi *cfhsi) skb->dev = cfhsi->ndev; /* - * We are called from a arch specific platform device. - * Unfortunately we don't know what context we're + * We are in a callback handler and + * unfortunately we don't know what context we're * running in. */ if (in_interrupt()) @@ -607,7 +606,7 @@ static int cfhsi_rx_pld(struct cfhsi_desc *desc, struct cfhsi *cfhsi) skb->dev = cfhsi->ndev; /* - * We're called from a platform device, + * We're called in callback from HSI * and don't know the context we're running in. */ if (in_interrupt()) @@ -711,8 +710,8 @@ static void cfhsi_rx_done(struct cfhsi *cfhsi) netdev_dbg(cfhsi->ndev, "%s: Start RX.\n", __func__); - res = cfhsi->dev->cfhsi_rx(rx_ptr, rx_len, - cfhsi->dev); + res = cfhsi->ops->cfhsi_rx(rx_ptr, rx_len, + cfhsi->ops); if (WARN_ON(res < 0)) { netdev_err(cfhsi->ndev, "%s: RX error %d.\n", __func__, res); @@ -765,11 +764,11 @@ static void cfhsi_rx_slowpath(unsigned long arg) cfhsi_rx_done(cfhsi); } -static void cfhsi_rx_done_cb(struct cfhsi_drv *drv) +static void cfhsi_rx_done_cb(struct cfhsi_cb_ops *cb_ops) { struct cfhsi *cfhsi; - cfhsi = container_of(drv, struct cfhsi, drv); + cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); netdev_dbg(cfhsi->ndev, "%s.\n", __func__); @@ -803,7 +802,7 @@ static void cfhsi_wake_up(struct work_struct *work) } /* Activate wake line. */ - cfhsi->dev->cfhsi_wake_up(cfhsi->dev); + cfhsi->ops->cfhsi_wake_up(cfhsi->ops); netdev_dbg(cfhsi->ndev, "%s: Start waiting.\n", __func__); @@ -819,7 +818,7 @@ static void cfhsi_wake_up(struct work_struct *work) __func__, ret); clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - cfhsi->dev->cfhsi_wake_down(cfhsi->dev); + cfhsi->ops->cfhsi_wake_down(cfhsi->ops); return; } else if (!ret) { bool ca_wake = false; @@ -830,14 +829,14 @@ static void cfhsi_wake_up(struct work_struct *work) __func__); /* Check FIFO to check if modem has sent something. */ - WARN_ON(cfhsi->dev->cfhsi_fifo_occupancy(cfhsi->dev, + WARN_ON(cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, &fifo_occupancy)); netdev_dbg(cfhsi->ndev, "%s: Bytes in FIFO: %u.\n", __func__, (unsigned) fifo_occupancy); /* Check if we misssed the interrupt. */ - WARN_ON(cfhsi->dev->cfhsi_get_peer_wake(cfhsi->dev, + WARN_ON(cfhsi->ops->cfhsi_get_peer_wake(cfhsi->ops, &ca_wake)); if (ca_wake) { @@ -852,7 +851,7 @@ static void cfhsi_wake_up(struct work_struct *work) } clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - cfhsi->dev->cfhsi_wake_down(cfhsi->dev); + cfhsi->ops->cfhsi_wake_down(cfhsi->ops); return; } wake_ack: @@ -865,7 +864,7 @@ wake_ack: /* Resume read operation. */ netdev_dbg(cfhsi->ndev, "%s: Start RX.\n", __func__); - res = cfhsi->dev->cfhsi_rx(cfhsi->rx_ptr, cfhsi->rx_len, cfhsi->dev); + res = cfhsi->ops->cfhsi_rx(cfhsi->rx_ptr, cfhsi->rx_len, cfhsi->ops); if (WARN_ON(res < 0)) netdev_err(cfhsi->ndev, "%s: RX err %d.\n", __func__, res); @@ -896,7 +895,7 @@ wake_ack: if (likely(len > 0)) { /* Set up new transfer. */ - res = cfhsi->dev->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->dev); + res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); if (WARN_ON(res < 0)) { netdev_err(cfhsi->ndev, "%s: TX error %d.\n", __func__, res); @@ -923,7 +922,7 @@ static void cfhsi_wake_down(struct work_struct *work) return; /* Deactivate wake line. */ - cfhsi->dev->cfhsi_wake_down(cfhsi->dev); + cfhsi->ops->cfhsi_wake_down(cfhsi->ops); /* Wait for acknowledge. */ ret = CFHSI_WAKE_TOUT; @@ -942,7 +941,7 @@ static void cfhsi_wake_down(struct work_struct *work) netdev_err(cfhsi->ndev, "%s: Timeout.\n", __func__); /* Check if we misssed the interrupt. */ - WARN_ON(cfhsi->dev->cfhsi_get_peer_wake(cfhsi->dev, + WARN_ON(cfhsi->ops->cfhsi_get_peer_wake(cfhsi->ops, &ca_wake)); if (!ca_wake) netdev_err(cfhsi->ndev, "%s: CA Wake missed !.\n", @@ -951,7 +950,7 @@ static void cfhsi_wake_down(struct work_struct *work) /* Check FIFO occupancy. */ while (retry) { - WARN_ON(cfhsi->dev->cfhsi_fifo_occupancy(cfhsi->dev, + WARN_ON(cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, &fifo_occupancy)); if (!fifo_occupancy) @@ -969,8 +968,7 @@ static void cfhsi_wake_down(struct work_struct *work) clear_bit(CFHSI_AWAKE, &cfhsi->bits); /* Cancel pending RX requests. */ - cfhsi->dev->cfhsi_rx_cancel(cfhsi->dev); - + cfhsi->ops->cfhsi_rx_cancel(cfhsi->ops); } static void cfhsi_out_of_sync(struct work_struct *work) @@ -984,11 +982,11 @@ static void cfhsi_out_of_sync(struct work_struct *work) rtnl_unlock(); } -static void cfhsi_wake_up_cb(struct cfhsi_drv *drv) +static void cfhsi_wake_up_cb(struct cfhsi_cb_ops *cb_ops) { struct cfhsi *cfhsi = NULL; - cfhsi = container_of(drv, struct cfhsi, drv); + cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); netdev_dbg(cfhsi->ndev, "%s.\n", __func__); @@ -1003,11 +1001,11 @@ static void cfhsi_wake_up_cb(struct cfhsi_drv *drv) queue_work(cfhsi->wq, &cfhsi->wake_up_work); } -static void cfhsi_wake_down_cb(struct cfhsi_drv *drv) +static void cfhsi_wake_down_cb(struct cfhsi_cb_ops *cb_ops) { struct cfhsi *cfhsi = NULL; - cfhsi = container_of(drv, struct cfhsi, drv); + cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); netdev_dbg(cfhsi->ndev, "%s.\n", __func__); @@ -1110,7 +1108,7 @@ static int cfhsi_xmit(struct sk_buff *skb, struct net_device *dev) WARN_ON(!len); /* Set up new transfer. */ - res = cfhsi->dev->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->dev); + res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); if (WARN_ON(res < 0)) { netdev_err(cfhsi->ndev, "%s: TX error %d.\n", __func__, res); @@ -1125,19 +1123,19 @@ static int cfhsi_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } -static const struct net_device_ops cfhsi_ops; +static const struct net_device_ops cfhsi_netdevops; static void cfhsi_setup(struct net_device *dev) { int i; struct cfhsi *cfhsi = netdev_priv(dev); dev->features = 0; - dev->netdev_ops = &cfhsi_ops; dev->type = ARPHRD_CAIF; dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = CFHSI_MAX_CAIF_FRAME_SZ; dev->tx_queue_len = 0; dev->destructor = free_netdev; + dev->netdev_ops = &cfhsi_netdevops; for (i = 0; i < CFHSI_PRIO_LAST; ++i) skb_queue_head_init(&cfhsi->qhead[i]); cfhsi->cfdev.link_select = CAIF_LINK_HIGH_BANDW; @@ -1213,10 +1211,10 @@ static int cfhsi_open(struct net_device *ndev) spin_lock_init(&cfhsi->lock); /* Set up the driver. */ - cfhsi->drv.tx_done_cb = cfhsi_tx_done_cb; - cfhsi->drv.rx_done_cb = cfhsi_rx_done_cb; - cfhsi->drv.wake_up_cb = cfhsi_wake_up_cb; - cfhsi->drv.wake_down_cb = cfhsi_wake_down_cb; + cfhsi->cb_ops.tx_done_cb = cfhsi_tx_done_cb; + cfhsi->cb_ops.rx_done_cb = cfhsi_rx_done_cb; + cfhsi->cb_ops.wake_up_cb = cfhsi_wake_up_cb; + cfhsi->cb_ops.wake_down_cb = cfhsi_wake_down_cb; /* Initialize the work queues. */ INIT_WORK(&cfhsi->wake_up_work, cfhsi_wake_up); @@ -1230,7 +1228,7 @@ static int cfhsi_open(struct net_device *ndev) clear_bit(CFHSI_AWAKE, &cfhsi->bits); /* Create work thread. */ - cfhsi->wq = create_singlethread_workqueue(cfhsi->pdev->name); + cfhsi->wq = create_singlethread_workqueue(cfhsi->ndev->name); if (!cfhsi->wq) { netdev_err(cfhsi->ndev, "%s: Failed to create work queue.\n", __func__); @@ -1257,7 +1255,7 @@ static int cfhsi_open(struct net_device *ndev) cfhsi->aggregation_timer.function = cfhsi_aggregation_tout; /* Activate HSI interface. */ - res = cfhsi->dev->cfhsi_up(cfhsi->dev); + res = cfhsi->ops->cfhsi_up(cfhsi->ops); if (res) { netdev_err(cfhsi->ndev, "%s: can't activate HSI interface: %d.\n", @@ -1275,7 +1273,7 @@ static int cfhsi_open(struct net_device *ndev) return res; err_net_reg: - cfhsi->dev->cfhsi_down(cfhsi->dev); + cfhsi->ops->cfhsi_down(cfhsi->ops); err_activate: destroy_workqueue(cfhsi->wq); err_create_wq: @@ -1305,7 +1303,7 @@ static int cfhsi_close(struct net_device *ndev) del_timer_sync(&cfhsi->aggregation_timer); /* Cancel pending RX request (if any) */ - cfhsi->dev->cfhsi_rx_cancel(cfhsi->dev); + cfhsi->ops->cfhsi_rx_cancel(cfhsi->ops); /* Destroy workqueue */ destroy_workqueue(cfhsi->wq); @@ -1318,7 +1316,7 @@ static int cfhsi_close(struct net_device *ndev) cfhsi_abort_tx(cfhsi); /* Deactivate interface */ - cfhsi->dev->cfhsi_down(cfhsi->dev); + cfhsi->ops->cfhsi_down(cfhsi->ops); /* Free buffers. */ kfree(tx_buf); @@ -1335,7 +1333,7 @@ static void cfhsi_uninit(struct net_device *dev) list_del(&cfhsi->list); } -static const struct net_device_ops cfhsi_ops = { +static const struct net_device_ops cfhsi_netdevops = { .ndo_uninit = cfhsi_uninit, .ndo_open = cfhsi_open, .ndo_stop = cfhsi_close, @@ -1419,7 +1417,7 @@ static int caif_hsi_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct cfhsi *cfhsi = NULL; - struct platform_device *(*get_dev)(void); + struct cfhsi_ops *(*get_ops)(void); ASSERT_RTNL(); @@ -1427,36 +1425,31 @@ static int caif_hsi_newlink(struct net *src_net, struct net_device *dev, cfhsi_netlink_parms(data, cfhsi); dev_net_set(cfhsi->ndev, src_net); - get_dev = symbol_get(cfhsi_get_device); - if (!get_dev) { - pr_err("%s: failed to get the cfhsi device symbol\n", __func__); + get_ops = symbol_get(cfhsi_get_ops); + if (!get_ops) { + pr_err("%s: failed to get the cfhsi_ops\n", __func__); return -ENODEV; } /* Assign the HSI device. */ - cfhsi->pdev = (*get_dev)(); - if (!cfhsi->pdev) { - pr_err("%s: failed to get the cfhsi device\n", __func__); + cfhsi->ops = (*get_ops)(); + if (!cfhsi->ops) { + pr_err("%s: failed to get the cfhsi_ops\n", __func__); goto err; } - /* Assign the HSI device. */ - cfhsi->dev = cfhsi->pdev->dev.platform_data; - /* Assign the driver to this HSI device. */ - cfhsi->dev->drv = &cfhsi->drv; - + cfhsi->ops->cb_ops = &cfhsi->cb_ops; if (register_netdevice(dev)) { - pr_warn("%s: device rtml registration failed\n", __func__); + pr_warn("%s: caif_hsi device registration failed\n", __func__); goto err; - } /* Add CAIF HSI device to list. */ list_add_tail(&cfhsi->list, &cfhsi_list); return 0; err: - symbol_put(cfhsi_get_device); + symbol_put(cfhsi_get_ops); return -ENODEV; } diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h index a77b2bd7719a..6dc7dc2674b2 100644 --- a/include/net/caif/caif_hsi.h +++ b/include/net/caif/caif_hsi.h @@ -93,25 +93,25 @@ struct cfhsi_desc { #endif /* Structure implemented by the CAIF HSI driver. */ -struct cfhsi_drv { - void (*tx_done_cb) (struct cfhsi_drv *drv); - void (*rx_done_cb) (struct cfhsi_drv *drv); - void (*wake_up_cb) (struct cfhsi_drv *drv); - void (*wake_down_cb) (struct cfhsi_drv *drv); +struct cfhsi_cb_ops { + void (*tx_done_cb) (struct cfhsi_cb_ops *drv); + void (*rx_done_cb) (struct cfhsi_cb_ops *drv); + void (*wake_up_cb) (struct cfhsi_cb_ops *drv); + void (*wake_down_cb) (struct cfhsi_cb_ops *drv); }; /* Structure implemented by HSI device. */ -struct cfhsi_dev { - int (*cfhsi_up) (struct cfhsi_dev *dev); - int (*cfhsi_down) (struct cfhsi_dev *dev); - int (*cfhsi_tx) (u8 *ptr, int len, struct cfhsi_dev *dev); - int (*cfhsi_rx) (u8 *ptr, int len, struct cfhsi_dev *dev); - int (*cfhsi_wake_up) (struct cfhsi_dev *dev); - int (*cfhsi_wake_down) (struct cfhsi_dev *dev); - int (*cfhsi_get_peer_wake) (struct cfhsi_dev *dev, bool *status); - int (*cfhsi_fifo_occupancy)(struct cfhsi_dev *dev, size_t *occupancy); - int (*cfhsi_rx_cancel)(struct cfhsi_dev *dev); - struct cfhsi_drv *drv; +struct cfhsi_ops { + int (*cfhsi_up) (struct cfhsi_ops *dev); + int (*cfhsi_down) (struct cfhsi_ops *dev); + int (*cfhsi_tx) (u8 *ptr, int len, struct cfhsi_ops *dev); + int (*cfhsi_rx) (u8 *ptr, int len, struct cfhsi_ops *dev); + int (*cfhsi_wake_up) (struct cfhsi_ops *dev); + int (*cfhsi_wake_down) (struct cfhsi_ops *dev); + int (*cfhsi_get_peer_wake) (struct cfhsi_ops *dev, bool *status); + int (*cfhsi_fifo_occupancy) (struct cfhsi_ops *dev, size_t *occupancy); + int (*cfhsi_rx_cancel)(struct cfhsi_ops *dev); + struct cfhsi_cb_ops *cb_ops; }; /* Structure holds status of received CAIF frames processing */ @@ -138,8 +138,8 @@ struct cfhsi { struct net_device *ndev; struct platform_device *pdev; struct sk_buff_head qhead[CFHSI_PRIO_LAST]; - struct cfhsi_drv drv; - struct cfhsi_dev *dev; + struct cfhsi_cb_ops cb_ops; + struct cfhsi_ops *ops; int tx_state; struct cfhsi_rx_state rx_state; unsigned long inactivity_timeout; @@ -190,6 +190,6 @@ enum ifla_caif_hsi { __IFLA_CAIF_HSI_MAX }; -extern struct platform_device *cfhsi_get_device(void); +extern struct cfhsi_ops *cfhsi_get_ops(void); #endif /* CAIF_HSI_H_ */ -- cgit v1.2.3 From 91fa0cbc0c47930f771bf5425109956cc99c6b0b Mon Sep 17 00:00:00 2001 From: Sjur Brændeland Date: Mon, 25 Jun 2012 07:49:43 +0000 Subject: caif-hsi: Remove use of module parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove use of module parameters on caif hsi device, as rtnl configuration parameters are already supported. All caif hsi configuration data is put in cfhsi_config, and default values in hsi_default_config. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- drivers/net/caif/caif_hsi.c | 151 +++++++++++++++++++++----------------------- include/net/caif/caif_hsi.h | 14 ++-- 2 files changed, 82 insertions(+), 83 deletions(-) (limited to 'include/net') diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index 0927c108bd14..1c2bd01e1592 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -32,51 +32,39 @@ MODULE_DESCRIPTION("CAIF HSI driver"); #define PAD_POW2(x, pow) ((((x)&((pow)-1)) == 0) ? 0 :\ (((pow)-((x)&((pow)-1))))) -static int inactivity_timeout = 1000; -module_param(inactivity_timeout, int, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(inactivity_timeout, "Inactivity timeout on HSI, ms."); +static const struct cfhsi_config hsi_default_config = { -static int aggregation_timeout = 1; -module_param(aggregation_timeout, int, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(aggregation_timeout, "Aggregation timeout on HSI, ms."); + /* Inactivity timeout on HSI, ms */ + .inactivity_timeout = HZ, -/* - * HSI padding options. - * Warning: must be a base of 2 (& operation used) and can not be zero ! - */ -static int hsi_head_align = 4; -module_param(hsi_head_align, int, S_IRUGO); -MODULE_PARM_DESC(hsi_head_align, "HSI head alignment."); + /* Aggregation timeout (ms) of zero means no aggregation is done*/ + .aggregation_timeout = 1, -static int hsi_tail_align = 4; -module_param(hsi_tail_align, int, S_IRUGO); -MODULE_PARM_DESC(hsi_tail_align, "HSI tail alignment."); - -/* - * HSI link layer flowcontrol thresholds. - * Warning: A high threshold value migth increase throughput but it will at - * the same time prevent channel prioritization and increase the risk of - * flooding the modem. The high threshold should be above the low. - */ -static int hsi_high_threshold = 100; -module_param(hsi_high_threshold, int, S_IRUGO); -MODULE_PARM_DESC(hsi_high_threshold, "HSI high threshold (FLOW OFF)."); + /* + * HSI link layer flow-control thresholds. + * Threshold values for the HSI packet queue. Flow-control will be + * asserted when the number of packets exceeds q_high_mark. It will + * not be de-asserted before the number of packets drops below + * q_low_mark. + * Warning: A high threshold value might increase throughput but it + * will at the same time prevent channel prioritization and increase + * the risk of flooding the modem. The high threshold should be above + * the low. + */ + .q_high_mark = 100, + .q_low_mark = 50, -static int hsi_low_threshold = 50; -module_param(hsi_low_threshold, int, S_IRUGO); -MODULE_PARM_DESC(hsi_low_threshold, "HSI high threshold (FLOW ON)."); + /* + * HSI padding options. + * Warning: must be a base of 2 (& operation used) and can not be zero ! + */ + .head_align = 4, + .tail_align = 4, +}; #define ON 1 #define OFF 0 -/* - * Threshold values for the HSI packet queue. Flowcontrol will be asserted - * when the number of packets exceeds HIGH_WATER_MARK. It will not be - * de-asserted before the number of packets drops below LOW_WATER_MARK. - */ -#define LOW_WATER_MARK hsi_low_threshold -#define HIGH_WATER_MARK hsi_high_threshold - static LIST_HEAD(cfhsi_list); static void cfhsi_inactivity_tout(unsigned long arg) @@ -99,8 +87,8 @@ static void cfhsi_update_aggregation_stats(struct cfhsi *cfhsi, int hpad, tpad, len; info = (struct caif_payload_info *)&skb->cb; - hpad = 1 + PAD_POW2((info->hdr_len + 1), hsi_head_align); - tpad = PAD_POW2((skb->len + hpad), hsi_tail_align); + hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); + tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); len = skb->len + hpad + tpad; if (direction > 0) @@ -113,7 +101,7 @@ static bool cfhsi_can_send_aggregate(struct cfhsi *cfhsi) { int i; - if (cfhsi->aggregation_timeout == 0) + if (cfhsi->cfg.aggregation_timeout == 0) return true; for (i = 0; i < CFHSI_PRIO_BEBK; ++i) { @@ -169,7 +157,7 @@ static void cfhsi_abort_tx(struct cfhsi *cfhsi) cfhsi->tx_state = CFHSI_TX_STATE_IDLE; if (!test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->inactivity_timeout); + jiffies + cfhsi->cfg.inactivity_timeout); spin_unlock_bh(&cfhsi->lock); } @@ -250,8 +238,8 @@ static int cfhsi_tx_frm(struct cfhsi_desc *desc, struct cfhsi *cfhsi) /* Calculate needed head alignment and tail alignment. */ info = (struct caif_payload_info *)&skb->cb; - hpad = 1 + PAD_POW2((info->hdr_len + 1), hsi_head_align); - tpad = PAD_POW2((skb->len + hpad), hsi_tail_align); + hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); + tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); /* Check if frame still fits with added alignment. */ if ((skb->len + hpad + tpad) <= CFHSI_MAX_EMB_FRM_SZ) { @@ -292,8 +280,8 @@ static int cfhsi_tx_frm(struct cfhsi_desc *desc, struct cfhsi *cfhsi) /* Calculate needed head alignment and tail alignment. */ info = (struct caif_payload_info *)&skb->cb; - hpad = 1 + PAD_POW2((info->hdr_len + 1), hsi_head_align); - tpad = PAD_POW2((skb->len + hpad), hsi_tail_align); + hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); + tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); /* Fill in CAIF frame length in descriptor. */ desc->cffrm_len[nfrms] = hpad + skb->len + tpad; @@ -364,7 +352,7 @@ static void cfhsi_start_tx(struct cfhsi *cfhsi) cfhsi->tx_state = CFHSI_TX_STATE_IDLE; /* Start inactivity timer. */ mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->inactivity_timeout); + jiffies + cfhsi->cfg.inactivity_timeout); spin_unlock_bh(&cfhsi->lock); break; } @@ -390,7 +378,7 @@ static void cfhsi_tx_done(struct cfhsi *cfhsi) */ spin_lock_bh(&cfhsi->lock); if (cfhsi->flow_off_sent && - cfhsi_tx_queue_len(cfhsi) <= cfhsi->q_low_mark && + cfhsi_tx_queue_len(cfhsi) <= cfhsi->cfg.q_low_mark && cfhsi->cfdev.flowctrl) { cfhsi->flow_off_sent = 0; @@ -402,7 +390,7 @@ static void cfhsi_tx_done(struct cfhsi *cfhsi) cfhsi_start_tx(cfhsi); } else { mod_timer(&cfhsi->aggregation_timer, - jiffies + cfhsi->aggregation_timeout); + jiffies + cfhsi->cfg.aggregation_timeout); spin_unlock_bh(&cfhsi->lock); } @@ -645,7 +633,7 @@ static void cfhsi_rx_done(struct cfhsi *cfhsi) /* Update inactivity timer if pending. */ spin_lock_bh(&cfhsi->lock); mod_timer_pending(&cfhsi->inactivity_timer, - jiffies + cfhsi->inactivity_timeout); + jiffies + cfhsi->cfg.inactivity_timeout); spin_unlock_bh(&cfhsi->lock); if (cfhsi->rx_state.state == CFHSI_RX_STATE_DESC) { @@ -880,7 +868,7 @@ wake_ack: __func__); /* Start inactivity timer. */ mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->inactivity_timeout); + jiffies + cfhsi->cfg.inactivity_timeout); spin_unlock_bh(&cfhsi->lock); return; } @@ -1071,7 +1059,7 @@ static int cfhsi_xmit(struct sk_buff *skb, struct net_device *dev) /* Send flow off if number of packets is above high water mark. */ if (!cfhsi->flow_off_sent && - cfhsi_tx_queue_len(cfhsi) > cfhsi->q_high_mark && + cfhsi_tx_queue_len(cfhsi) > cfhsi->cfg.q_high_mark && cfhsi->cfdev.flowctrl) { cfhsi->flow_off_sent = 1; cfhsi->cfdev.flowctrl(cfhsi->ndev, OFF); @@ -1143,6 +1131,7 @@ static void cfhsi_setup(struct net_device *dev) cfhsi->cfdev.use_stx = false; cfhsi->cfdev.use_fcs = false; cfhsi->ndev = dev; + cfhsi->cfg = hsi_default_config; } static int cfhsi_open(struct net_device *ndev) @@ -1158,9 +1147,6 @@ static int cfhsi_open(struct net_device *ndev) /* Set flow info */ cfhsi->flow_off_sent = 0; - cfhsi->q_low_mark = LOW_WATER_MARK; - cfhsi->q_high_mark = HIGH_WATER_MARK; - /* * Allocate a TX buffer with the size of a HSI packet descriptors @@ -1188,20 +1174,8 @@ static int cfhsi_open(struct net_device *ndev) goto err_alloc_rx_flip; } - /* Pre-calculate inactivity timeout. */ - if (inactivity_timeout != -1) { - cfhsi->inactivity_timeout = - inactivity_timeout * HZ / 1000; - if (!cfhsi->inactivity_timeout) - cfhsi->inactivity_timeout = 1; - else if (cfhsi->inactivity_timeout > NEXT_TIMER_MAX_DELTA) - cfhsi->inactivity_timeout = NEXT_TIMER_MAX_DELTA; - } else { - cfhsi->inactivity_timeout = NEXT_TIMER_MAX_DELTA; - } - /* Initialize aggregation timeout */ - cfhsi->aggregation_timeout = aggregation_timeout; + cfhsi->cfg.aggregation_timeout = hsi_default_config.aggregation_timeout; /* Initialize recieve vaiables. */ cfhsi->rx_ptr = cfhsi->rx_buf; @@ -1350,24 +1324,39 @@ static void cfhsi_netlink_parms(struct nlattr *data[], struct cfhsi *cfhsi) } i = __IFLA_CAIF_HSI_INACTIVITY_TOUT; - if (data[i]) - inactivity_timeout = nla_get_u32(data[i]); + /* + * Inactivity timeout in millisecs. Lowest possible value is 1, + * and highest possible is NEXT_TIMER_MAX_DELTA. + */ + if (data[i]) { + u32 inactivity_timeout = nla_get_u32(data[i]); + /* Pre-calculate inactivity timeout. */ + cfhsi->cfg.inactivity_timeout = inactivity_timeout * HZ / 1000; + if (cfhsi->cfg.inactivity_timeout == 0) + cfhsi->cfg.inactivity_timeout = 1; + else if (cfhsi->cfg.inactivity_timeout > NEXT_TIMER_MAX_DELTA) + cfhsi->cfg.inactivity_timeout = NEXT_TIMER_MAX_DELTA; + } i = __IFLA_CAIF_HSI_AGGREGATION_TOUT; if (data[i]) - aggregation_timeout = nla_get_u32(data[i]); + cfhsi->cfg.aggregation_timeout = nla_get_u32(data[i]); i = __IFLA_CAIF_HSI_HEAD_ALIGN; if (data[i]) - hsi_head_align = nla_get_u32(data[i]); + cfhsi->cfg.head_align = nla_get_u32(data[i]); i = __IFLA_CAIF_HSI_TAIL_ALIGN; if (data[i]) - hsi_tail_align = nla_get_u32(data[i]); + cfhsi->cfg.tail_align = nla_get_u32(data[i]); i = __IFLA_CAIF_HSI_QHIGH_WATERMARK; if (data[i]) - hsi_high_threshold = nla_get_u32(data[i]); + cfhsi->cfg.q_high_mark = nla_get_u32(data[i]); + + i = __IFLA_CAIF_HSI_QLOW_WATERMARK; + if (data[i]) + cfhsi->cfg.q_low_mark = nla_get_u32(data[i]); } static int caif_hsi_changelink(struct net_device *dev, struct nlattr *tb[], @@ -1398,16 +1387,20 @@ static size_t caif_hsi_get_size(const struct net_device *dev) static int caif_hsi_fill_info(struct sk_buff *skb, const struct net_device *dev) { + struct cfhsi *cfhsi = netdev_priv(dev); + if (nla_put_u32(skb, __IFLA_CAIF_HSI_INACTIVITY_TOUT, - inactivity_timeout) || + cfhsi->cfg.inactivity_timeout) || nla_put_u32(skb, __IFLA_CAIF_HSI_AGGREGATION_TOUT, - aggregation_timeout) || - nla_put_u32(skb, __IFLA_CAIF_HSI_HEAD_ALIGN, hsi_head_align) || - nla_put_u32(skb, __IFLA_CAIF_HSI_TAIL_ALIGN, hsi_tail_align) || + cfhsi->cfg.aggregation_timeout) || + nla_put_u32(skb, __IFLA_CAIF_HSI_HEAD_ALIGN, + cfhsi->cfg.head_align) || + nla_put_u32(skb, __IFLA_CAIF_HSI_TAIL_ALIGN, + cfhsi->cfg.tail_align) || nla_put_u32(skb, __IFLA_CAIF_HSI_QHIGH_WATERMARK, - hsi_high_threshold) || + cfhsi->cfg.q_high_mark) || nla_put_u32(skb, __IFLA_CAIF_HSI_QLOW_WATERMARK, - hsi_low_threshold)) + cfhsi->cfg.q_low_mark)) return -EMSGSIZE; return 0; diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h index 6dc7dc2674b2..bcb9cc3ce98b 100644 --- a/include/net/caif/caif_hsi.h +++ b/include/net/caif/caif_hsi.h @@ -132,6 +132,15 @@ enum { CFHSI_PRIO_LAST, }; +struct cfhsi_config { + u32 inactivity_timeout; + u32 aggregation_timeout; + u32 head_align; + u32 tail_align; + u32 q_high_mark; + u32 q_low_mark; +}; + /* Structure implemented by CAIF HSI drivers. */ struct cfhsi { struct caif_dev_common cfdev; @@ -142,7 +151,7 @@ struct cfhsi { struct cfhsi_ops *ops; int tx_state; struct cfhsi_rx_state rx_state; - unsigned long inactivity_timeout; + struct cfhsi_config cfg; int rx_len; u8 *rx_ptr; u8 *tx_buf; @@ -150,8 +159,6 @@ struct cfhsi { u8 *rx_flip_buf; spinlock_t lock; int flow_off_sent; - u32 q_low_mark; - u32 q_high_mark; struct list_head list; struct work_struct wake_up_work; struct work_struct wake_down_work; @@ -164,7 +171,6 @@ struct cfhsi { struct timer_list rx_slowpath_timer; /* TX aggregation */ - unsigned long aggregation_timeout; int aggregation_len; struct timer_list aggregation_timer; -- cgit v1.2.3 From 88e920b4505105b710f8d4a535ec02c4078f8e2e Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 21 Jun 2012 11:09:54 -0700 Subject: nl80211: specify RSSI threshold in scheduled scan Support configuring an RSSI threshold in dBm (s32) when requesting scheduled scan, below which a BSS won't be reported by the cfg80211 driver. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 6 ++++++ include/net/cfg80211.h | 2 ++ net/wireless/nl80211.c | 9 ++++++++- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index b7c3b737ddde..c0fc5d277338 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1542,6 +1542,9 @@ enum nl80211_attrs { #define NL80211_MIN_REMAIN_ON_CHANNEL_TIME 10 +/* default RSSI threshold for scan results if none specified. */ +#define NL80211_SCAN_RSSI_THOLD_OFF -300 + /** * enum nl80211_iftype - (virtual) interface types * @@ -1974,6 +1977,8 @@ enum nl80211_reg_rule_attr { * @__NL80211_SCHED_SCAN_MATCH_ATTR_INVALID: attribute number 0 is reserved * @NL80211_SCHED_SCAN_MATCH_ATTR_SSID: SSID to be used for matching, * only report BSS with matching SSID. + * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI: RSSI threshold (in dBm) for reporting a + * BSS in scan results. Filtering is turned off if not specified. * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter * attribute number currently defined * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use @@ -1982,6 +1987,7 @@ enum nl80211_sched_scan_match_attr { __NL80211_SCHED_SCAN_MATCH_ATTR_INVALID, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, + NL80211_SCHED_SCAN_MATCH_ATTR_RSSI, /* keep last */ __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f0163a10b8ce..061c01957e54 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1027,6 +1027,7 @@ struct cfg80211_match_set { * @wiphy: the wiphy this was for * @dev: the interface * @channels: channels to scan + * @rssi_thold: don't report scan results below this threshold (in s32 dBm) */ struct cfg80211_sched_scan_request { struct cfg80211_ssid *ssids; @@ -1037,6 +1038,7 @@ struct cfg80211_sched_scan_request { size_t ie_len; struct cfg80211_match_set *match_sets; int n_match_sets; + s32 rssi_thold; /* internal */ struct wiphy *wiphy; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 888fadc4d63e..234ff3bbd104 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -340,6 +340,7 @@ static const struct nla_policy nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = { [NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_SSID_LEN }, + [NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 }, }; /* ifidx get helper */ @@ -4387,7 +4388,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCHED_SCAN_MATCH], tmp) { - struct nlattr *ssid; + struct nlattr *ssid, *rssi; nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, nla_data(attr), nla_len(attr), @@ -4403,6 +4404,12 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, request->match_sets[i].ssid.ssid_len = nla_len(ssid); } + rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI]; + if (rssi) + request->rssi_thold = nla_get_u32(rssi); + else + request->rssi_thold = + NL80211_SCAN_RSSI_THOLD_OFF; i++; } } -- cgit v1.2.3 From 32bad7e30f113a8a5cebe4704bf6519ab4383e1b Mon Sep 17 00:00:00 2001 From: "alex.bluesman.smirnov@gmail.com" Date: Mon, 25 Jun 2012 23:24:48 +0000 Subject: mac802154: add wpan device-class support Every real 802.15.4 transceiver, which works with software MAC layer, can be classified as a wpan device in this stack. So the wpan device implementation provides missing link in datapath between the device drivers and the Linux network queue. According to the IEEE 802.15.4 standard each packet can be one of the following types: - beacon - MAC layer command - ACK - data This patch adds support for the data packet-type only, but this is enough to perform data transmission and receiving over radio. Signed-off-by: Alexander Smirnov Signed-off-by: David S. Miller --- include/linux/nl802154.h | 14 +- include/net/mac802154.h | 8 + net/mac802154/Makefile | 2 +- net/mac802154/ieee802154_dev.c | 4 + net/mac802154/mac802154.h | 4 + net/mac802154/mac_cmd.c | 4 + net/mac802154/rx.c | 1 + net/mac802154/wpan.c | 559 +++++++++++++++++++++++++++++++++++++++++ 8 files changed, 583 insertions(+), 13 deletions(-) create mode 100644 net/mac802154/wpan.c (limited to 'include/net') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 5a3db3aa5f17..fd4f2d1cdf6c 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -130,18 +130,8 @@ enum { enum { __IEEE802154_DEV_INVALID = -1, - /* TODO: - * Nowadays three device types supported by this stack at linux-zigbee - * project: WPAN = 0, MONITOR = 1 and SMAC = 2. - * - * Since this stack implementation exists many years, it's definitely - * bad idea to change the assigned values due to they are already used - * by third-party userspace software like: iz-tools, wireshark... - * - * Currently only monitor device is added and initialized by '1' for - * compatibility. - */ - IEEE802154_DEV_MONITOR = 1, + IEEE802154_DEV_WPAN, + IEEE802154_DEV_MONITOR, __IEEE802154_DEV_MAX, }; diff --git a/include/net/mac802154.h b/include/net/mac802154.h index c9f8ab5cc687..d0d11df9cba1 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -21,6 +21,14 @@ #include +/* General MAC frame format: + * 2 bytes: Frame Control + * 1 byte: Sequence Number + * 20 bytes: Addressing fields + * 14 bytes: Auxiliary Security Header + */ +#define MAC802154_FRAME_HARD_HEADER_LEN (2 + 1 + 20 + 14) + /* The following flags are used to indicate changed address settings from * the stack to the hardware. */ diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile index ec1bd3fc1273..57cf5d1a2e4a 100644 --- a/net/mac802154/Makefile +++ b/net/mac802154/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_MAC802154) += mac802154.o -mac802154-objs := ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o monitor.o +mac802154-objs := ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o monitor.o wpan.o diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index e3edfb0661b0..e748aed290aa 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -140,6 +140,10 @@ mac802154_add_iface(struct wpan_phy *phy, const char *name, int type) dev = alloc_netdev(sizeof(struct mac802154_sub_if_data), name, mac802154_monitor_setup); break; + case IEEE802154_DEV_WPAN: + dev = alloc_netdev(sizeof(struct mac802154_sub_if_data), + name, mac802154_wpan_setup); + break; default: dev = NULL; err = -EINVAL; diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h index 789d9c948aec..c0efcf19a171 100644 --- a/net/mac802154/mac802154.h +++ b/net/mac802154/mac802154.h @@ -93,6 +93,7 @@ struct mac802154_sub_if_data { #define MAC802154_CHAN_NONE (~(u8)0) /* No channel is assigned */ extern struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced; +extern struct ieee802154_mlme_ops mac802154_mlme_wpan; int mac802154_slave_open(struct net_device *dev); int mac802154_slave_close(struct net_device *dev); @@ -100,6 +101,9 @@ int mac802154_slave_close(struct net_device *dev); void mac802154_monitors_rx(struct mac802154_priv *priv, struct sk_buff *skb); void mac802154_monitor_setup(struct net_device *dev); +void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb); +void mac802154_wpan_setup(struct net_device *dev); + netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb, u8 page, u8 chan); diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index 7a5d0e052cd7..db8341957bd2 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -43,3 +43,7 @@ struct wpan_phy *mac802154_get_phy(const struct net_device *dev) struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced = { .get_phy = mac802154_get_phy, }; + +struct ieee802154_mlme_ops mac802154_mlme_wpan = { + .get_phy = mac802154_get_phy, +}; diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 4a7d76d4f8bc..38548ec2098f 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -77,6 +77,7 @@ mac802154_subif_rx(struct ieee802154_dev *hw, struct sk_buff *skb, u8 lqi) } mac802154_monitors_rx(priv, skb); + mac802154_wpans_rx(priv, skb); out: dev_kfree_skb(skb); return; diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c new file mode 100644 index 000000000000..f30f6d4beea1 --- /dev/null +++ b/net/mac802154/wpan.c @@ -0,0 +1,559 @@ +/* + * Copyright 2007-2012 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Written by: + * Dmitry Eremin-Solenikov + * Sergey Lapin + * Maxim Gorbachyov + * Alexander Smirnov + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "mac802154.h" + +static inline int mac802154_fetch_skb_u8(struct sk_buff *skb, u8 *val) +{ + if (unlikely(!pskb_may_pull(skb, 1))) + return -EINVAL; + + *val = skb->data[0]; + skb_pull(skb, 1); + + return 0; +} + +static inline int mac802154_fetch_skb_u16(struct sk_buff *skb, u16 *val) +{ + if (unlikely(!pskb_may_pull(skb, 2))) + return -EINVAL; + + *val = skb->data[0] | (skb->data[1] << 8); + skb_pull(skb, 2); + + return 0; +} + +static inline void mac802154_haddr_copy_swap(u8 *dest, const u8 *src) +{ + int i; + for (i = 0; i < IEEE802154_ADDR_LEN; i++) + dest[IEEE802154_ADDR_LEN - i - 1] = src[i]; +} + +static int +mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + struct sockaddr_ieee802154 *sa = + (struct sockaddr_ieee802154 *)&ifr->ifr_addr; + int err = -ENOIOCTLCMD; + + spin_lock_bh(&priv->mib_lock); + + switch (cmd) { + case SIOCGIFADDR: + if (priv->pan_id == IEEE802154_PANID_BROADCAST || + priv->short_addr == IEEE802154_ADDR_BROADCAST) { + err = -EADDRNOTAVAIL; + break; + } + + sa->family = AF_IEEE802154; + sa->addr.addr_type = IEEE802154_ADDR_SHORT; + sa->addr.pan_id = priv->pan_id; + sa->addr.short_addr = priv->short_addr; + + err = 0; + break; + case SIOCSIFADDR: + dev_warn(&dev->dev, + "Using DEBUGing ioctl SIOCSIFADDR isn't recommened!\n"); + if (sa->family != AF_IEEE802154 || + sa->addr.addr_type != IEEE802154_ADDR_SHORT || + sa->addr.pan_id == IEEE802154_PANID_BROADCAST || + sa->addr.short_addr == IEEE802154_ADDR_BROADCAST || + sa->addr.short_addr == IEEE802154_ADDR_UNDEF) { + err = -EINVAL; + break; + } + + priv->pan_id = sa->addr.pan_id; + priv->short_addr = sa->addr.short_addr; + + err = 0; + break; + } + + spin_unlock_bh(&priv->mib_lock); + return err; +} + +static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + if (netif_running(dev)) + return -EBUSY; + + /* FIXME: validate addr */ + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + mac802154_dev_set_ieee_addr(dev); + return 0; +} + +static int mac802154_header_create(struct sk_buff *skb, + struct net_device *dev, + unsigned short type, + const void *_daddr, + const void *_saddr, + unsigned len) +{ + const struct ieee802154_addr *saddr = _saddr; + const struct ieee802154_addr *daddr = _daddr; + struct ieee802154_addr dev_addr; + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int pos = 2; + u8 *head; + u16 fc; + + if (!daddr) + return -EINVAL; + + head = kzalloc(MAC802154_FRAME_HARD_HEADER_LEN, GFP_KERNEL); + if (head == NULL) + return -ENOMEM; + + head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */ + fc = mac_cb_type(skb); + + if (!saddr) { + spin_lock_bh(&priv->mib_lock); + + if (priv->short_addr == IEEE802154_ADDR_BROADCAST || + priv->short_addr == IEEE802154_ADDR_UNDEF || + priv->pan_id == IEEE802154_PANID_BROADCAST) { + dev_addr.addr_type = IEEE802154_ADDR_LONG; + memcpy(dev_addr.hwaddr, dev->dev_addr, + IEEE802154_ADDR_LEN); + } else { + dev_addr.addr_type = IEEE802154_ADDR_SHORT; + dev_addr.short_addr = priv->short_addr; + } + + dev_addr.pan_id = priv->pan_id; + saddr = &dev_addr; + + spin_unlock_bh(&priv->mib_lock); + } + + if (daddr->addr_type != IEEE802154_ADDR_NONE) { + fc |= (daddr->addr_type << IEEE802154_FC_DAMODE_SHIFT); + + head[pos++] = daddr->pan_id & 0xff; + head[pos++] = daddr->pan_id >> 8; + + if (daddr->addr_type == IEEE802154_ADDR_SHORT) { + head[pos++] = daddr->short_addr & 0xff; + head[pos++] = daddr->short_addr >> 8; + } else { + mac802154_haddr_copy_swap(head + pos, daddr->hwaddr); + pos += IEEE802154_ADDR_LEN; + } + } + + if (saddr->addr_type != IEEE802154_ADDR_NONE) { + fc |= (saddr->addr_type << IEEE802154_FC_SAMODE_SHIFT); + + if ((saddr->pan_id == daddr->pan_id) && + (saddr->pan_id != IEEE802154_PANID_BROADCAST)) { + /* PANID compression/intra PAN */ + fc |= IEEE802154_FC_INTRA_PAN; + } else { + head[pos++] = saddr->pan_id & 0xff; + head[pos++] = saddr->pan_id >> 8; + } + + if (saddr->addr_type == IEEE802154_ADDR_SHORT) { + head[pos++] = saddr->short_addr & 0xff; + head[pos++] = saddr->short_addr >> 8; + } else { + mac802154_haddr_copy_swap(head + pos, saddr->hwaddr); + pos += IEEE802154_ADDR_LEN; + } + } + + head[0] = fc; + head[1] = fc >> 8; + + memcpy(skb_push(skb, pos), head, pos); + kfree(head); + + return pos; +} + +static int +mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr) +{ + const u8 *hdr = skb_mac_header(skb); + const u8 *tail = skb_tail_pointer(skb); + struct ieee802154_addr *addr = (struct ieee802154_addr *)haddr; + u16 fc; + int da_type; + + if (hdr + 3 > tail) + goto malformed; + + fc = hdr[0] | (hdr[1] << 8); + + hdr += 3; + + da_type = IEEE802154_FC_DAMODE(fc); + addr->addr_type = IEEE802154_FC_SAMODE(fc); + + switch (da_type) { + case IEEE802154_ADDR_NONE: + if (fc & IEEE802154_FC_INTRA_PAN) + goto malformed; + break; + case IEEE802154_ADDR_LONG: + if (fc & IEEE802154_FC_INTRA_PAN) { + if (hdr + 2 > tail) + goto malformed; + addr->pan_id = hdr[0] | (hdr[1] << 8); + hdr += 2; + } + + if (hdr + IEEE802154_ADDR_LEN > tail) + goto malformed; + + hdr += IEEE802154_ADDR_LEN; + break; + case IEEE802154_ADDR_SHORT: + if (fc & IEEE802154_FC_INTRA_PAN) { + if (hdr + 2 > tail) + goto malformed; + addr->pan_id = hdr[0] | (hdr[1] << 8); + hdr += 2; + } + + if (hdr + 2 > tail) + goto malformed; + + hdr += 2; + break; + default: + goto malformed; + + } + + switch (addr->addr_type) { + case IEEE802154_ADDR_NONE: + break; + case IEEE802154_ADDR_LONG: + if (!(fc & IEEE802154_FC_INTRA_PAN)) { + if (hdr + 2 > tail) + goto malformed; + addr->pan_id = hdr[0] | (hdr[1] << 8); + hdr += 2; + } + + if (hdr + IEEE802154_ADDR_LEN > tail) + goto malformed; + + mac802154_haddr_copy_swap(addr->hwaddr, hdr); + hdr += IEEE802154_ADDR_LEN; + break; + case IEEE802154_ADDR_SHORT: + if (!(fc & IEEE802154_FC_INTRA_PAN)) { + if (hdr + 2 > tail) + goto malformed; + addr->pan_id = hdr[0] | (hdr[1] << 8); + hdr += 2; + } + + if (hdr + 2 > tail) + goto malformed; + + addr->short_addr = hdr[0] | (hdr[1] << 8); + hdr += 2; + break; + default: + goto malformed; + } + + return sizeof(struct ieee802154_addr); + +malformed: + pr_debug("malformed packet\n"); + return 0; +} + +static netdev_tx_t +mac802154_wpan_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct mac802154_sub_if_data *priv; + u8 chan, page; + + priv = netdev_priv(dev); + + spin_lock_bh(&priv->mib_lock); + chan = priv->chan; + page = priv->page; + spin_unlock_bh(&priv->mib_lock); + + if (chan == MAC802154_CHAN_NONE || + page >= WPAN_NUM_PAGES || + chan >= WPAN_NUM_CHANNELS) + return NETDEV_TX_OK; + + skb->skb_iif = dev->ifindex; + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + return mac802154_tx(priv->hw, skb, page, chan); +} + +static struct header_ops mac802154_header_ops = { + .create = mac802154_header_create, + .parse = mac802154_header_parse, +}; + +static const struct net_device_ops mac802154_wpan_ops = { + .ndo_open = mac802154_slave_open, + .ndo_stop = mac802154_slave_close, + .ndo_start_xmit = mac802154_wpan_xmit, + .ndo_do_ioctl = mac802154_wpan_ioctl, + .ndo_set_mac_address = mac802154_wpan_mac_addr, +}; + +void mac802154_wpan_setup(struct net_device *dev) +{ + struct mac802154_sub_if_data *priv; + + dev->addr_len = IEEE802154_ADDR_LEN; + memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); + + dev->hard_header_len = MAC802154_FRAME_HARD_HEADER_LEN; + dev->header_ops = &mac802154_header_ops; + dev->needed_tailroom = 2; /* FCS */ + dev->mtu = IEEE802154_MTU; + dev->tx_queue_len = 10; + dev->type = ARPHRD_IEEE802154; + dev->flags = IFF_NOARP | IFF_BROADCAST; + dev->watchdog_timeo = 0; + + dev->destructor = free_netdev; + dev->netdev_ops = &mac802154_wpan_ops; + dev->ml_priv = &mac802154_mlme_wpan; + + priv = netdev_priv(dev); + priv->type = IEEE802154_DEV_WPAN; + + priv->chan = MAC802154_CHAN_NONE; + priv->page = 0; + + spin_lock_init(&priv->mib_lock); + + get_random_bytes(&priv->bsn, 1); + get_random_bytes(&priv->dsn, 1); + + priv->pan_id = IEEE802154_PANID_BROADCAST; + priv->short_addr = IEEE802154_ADDR_BROADCAST; +} + +static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb) +{ + return netif_rx(skb); +} + +static int +mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) +{ + pr_debug("getting packet via slave interface %s\n", sdata->dev->name); + + spin_lock_bh(&sdata->mib_lock); + + switch (mac_cb(skb)->da.addr_type) { + case IEEE802154_ADDR_NONE: + if (mac_cb(skb)->sa.addr_type != IEEE802154_ADDR_NONE) + /* FIXME: check if we are PAN coordinator */ + skb->pkt_type = PACKET_OTHERHOST; + else + /* ACK comes with both addresses empty */ + skb->pkt_type = PACKET_HOST; + break; + case IEEE802154_ADDR_LONG: + if (mac_cb(skb)->da.pan_id != sdata->pan_id && + mac_cb(skb)->da.pan_id != IEEE802154_PANID_BROADCAST) + skb->pkt_type = PACKET_OTHERHOST; + else if (!memcmp(mac_cb(skb)->da.hwaddr, sdata->dev->dev_addr, + IEEE802154_ADDR_LEN)) + skb->pkt_type = PACKET_HOST; + else + skb->pkt_type = PACKET_OTHERHOST; + break; + case IEEE802154_ADDR_SHORT: + if (mac_cb(skb)->da.pan_id != sdata->pan_id && + mac_cb(skb)->da.pan_id != IEEE802154_PANID_BROADCAST) + skb->pkt_type = PACKET_OTHERHOST; + else if (mac_cb(skb)->da.short_addr == sdata->short_addr) + skb->pkt_type = PACKET_HOST; + else if (mac_cb(skb)->da.short_addr == + IEEE802154_ADDR_BROADCAST) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_OTHERHOST; + break; + default: + break; + } + + spin_unlock_bh(&sdata->mib_lock); + + skb->dev = sdata->dev; + + sdata->dev->stats.rx_packets++; + sdata->dev->stats.rx_bytes += skb->len; + + switch (mac_cb_type(skb)) { + case IEEE802154_FC_TYPE_DATA: + return mac802154_process_data(sdata->dev, skb); + default: + pr_warning("ieee802154: bad frame received (type = %d)\n", + mac_cb_type(skb)); + kfree_skb(skb); + return NET_RX_DROP; + } +} + +static int mac802154_parse_frame_start(struct sk_buff *skb) +{ + u8 *head = skb->data; + u16 fc; + + if (mac802154_fetch_skb_u16(skb, &fc) || + mac802154_fetch_skb_u8(skb, &(mac_cb(skb)->seq))) + goto err; + + pr_debug("fc: %04x dsn: %02x\n", fc, head[2]); + + mac_cb(skb)->flags = IEEE802154_FC_TYPE(fc); + mac_cb(skb)->sa.addr_type = IEEE802154_FC_SAMODE(fc); + mac_cb(skb)->da.addr_type = IEEE802154_FC_DAMODE(fc); + + if (fc & IEEE802154_FC_INTRA_PAN) + mac_cb(skb)->flags |= MAC_CB_FLAG_INTRAPAN; + + if (mac_cb(skb)->da.addr_type != IEEE802154_ADDR_NONE) { + if (mac802154_fetch_skb_u16(skb, &(mac_cb(skb)->da.pan_id))) + goto err; + + /* source PAN id compression */ + if (mac_cb_is_intrapan(skb)) + mac_cb(skb)->sa.pan_id = mac_cb(skb)->da.pan_id; + + pr_debug("dest PAN addr: %04x\n", mac_cb(skb)->da.pan_id); + + if (mac_cb(skb)->da.addr_type == IEEE802154_ADDR_SHORT) { + u16 *da = &(mac_cb(skb)->da.short_addr); + + if (mac802154_fetch_skb_u16(skb, da)) + goto err; + + pr_debug("destination address is short: %04x\n", + mac_cb(skb)->da.short_addr); + } else { + if (!pskb_may_pull(skb, IEEE802154_ADDR_LEN)) + goto err; + + mac802154_haddr_copy_swap(mac_cb(skb)->da.hwaddr, + skb->data); + skb_pull(skb, IEEE802154_ADDR_LEN); + + pr_debug("destination address is hardware\n"); + } + } + + if (mac_cb(skb)->sa.addr_type != IEEE802154_ADDR_NONE) { + /* non PAN-compression, fetch source address id */ + if (!(mac_cb_is_intrapan(skb))) { + u16 *sa_pan = &(mac_cb(skb)->sa.pan_id); + + if (mac802154_fetch_skb_u16(skb, sa_pan)) + goto err; + } + + pr_debug("source PAN addr: %04x\n", mac_cb(skb)->da.pan_id); + + if (mac_cb(skb)->sa.addr_type == IEEE802154_ADDR_SHORT) { + u16 *sa = &(mac_cb(skb)->sa.short_addr); + + if (mac802154_fetch_skb_u16(skb, sa)) + goto err; + + pr_debug("source address is short: %04x\n", + mac_cb(skb)->sa.short_addr); + } else { + if (!pskb_may_pull(skb, IEEE802154_ADDR_LEN)) + goto err; + + mac802154_haddr_copy_swap(mac_cb(skb)->sa.hwaddr, + skb->data); + skb_pull(skb, IEEE802154_ADDR_LEN); + + pr_debug("source address is hardware\n"); + } + } + + return 0; +err: + return -EINVAL; +} + +void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb) +{ + int ret; + struct sk_buff *sskb; + struct mac802154_sub_if_data *sdata; + + ret = mac802154_parse_frame_start(skb); + if (ret) { + pr_debug("got invalid frame\n"); + return; + } + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &priv->slaves, list) { + if (sdata->type != IEEE802154_DEV_WPAN) + continue; + + sskb = skb_clone(skb, GFP_ATOMIC); + if (sskb) + mac802154_subif_frame(sdata, sskb); + } + rcu_read_unlock(); +} -- cgit v1.2.3 From dfb89c56add259b72a9c68d6b2846c1cd8c4e4b6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 27 Jun 2012 09:23:48 +0200 Subject: cfg80211: don't allow WoWLAN support without CONFIG_PM When CONFIG_PM is disabled, no device can possibly support WoWLAN since it can't go to sleep to start with. Due to this, mac80211 had even rejected the hardware registration. By making all the code and data for WoWLAN depend on CONFIG_PM we can promote this runtime error to a compile-time error. Add #ifdef around all WoWLAN code to remove it in systems that don't need it as they never suspend. Cc: Kalle Valo Acked-by: Luciano Coelho Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 2 ++ drivers/net/wireless/ti/wlcore/main.c | 2 ++ include/net/cfg80211.h | 2 ++ net/mac80211/main.c | 7 +++---- net/wireless/core.c | 4 ++++ net/wireless/nl80211.c | 6 ++++++ 6 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index f27e9732951d..b8fce0d4d72e 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -3487,6 +3487,7 @@ int ath6kl_cfg80211_init(struct ath6kl *ar) wiphy->cipher_suites = cipher_suites; wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); +#ifdef CONFIG_PM wiphy->wowlan.flags = WIPHY_WOWLAN_MAGIC_PKT | WIPHY_WOWLAN_DISCONNECT | WIPHY_WOWLAN_GTK_REKEY_FAILURE | @@ -3496,6 +3497,7 @@ int ath6kl_cfg80211_init(struct ath6kl *ar) wiphy->wowlan.n_patterns = WOW_MAX_FILTERS_PER_LIST; wiphy->wowlan.pattern_min_len = 1; wiphy->wowlan.pattern_max_len = WOW_PATTERN_SIZE; +#endif wiphy->max_sched_scan_ssids = MAX_PROBED_SSID_INDEX; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 1156e3f578c1..747a997bc608 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -5321,6 +5321,7 @@ int __devinit wlcore_probe(struct wl1271 *wl, struct platform_device *pdev) goto out_free_hw; } +#ifdef CONFIG_PM ret = enable_irq_wake(wl->irq); if (!ret) { wl->irq_wake_enabled = true; @@ -5334,6 +5335,7 @@ int __devinit wlcore_probe(struct wl1271 *wl, struct platform_device *pdev) WL1271_RX_FILTER_MAX_PATTERN_SIZE; } } +#endif disable_irq(wl->irq); ret = wl12xx_get_hw_info(wl); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 061c01957e54..7d3cd3ce9a26 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2153,7 +2153,9 @@ struct wiphy { char fw_version[ETHTOOL_BUSINFO_LEN]; u32 hw_version; +#ifdef CONFIG_PM struct wiphy_wowlan_support wowlan; +#endif u16 max_remain_on_channel_duration; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 0b040fb73673..aded0018f6f3 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -706,12 +706,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->hw.offchannel_tx_hw_queue >= local->hw.queues)) return -EINVAL; - if ((hw->wiphy->wowlan.flags || hw->wiphy->wowlan.n_patterns) #ifdef CONFIG_PM - && (!local->ops->suspend || !local->ops->resume) -#endif - ) + if ((hw->wiphy->wowlan.flags || hw->wiphy->wowlan.n_patterns) && + (!local->ops->suspend || !local->ops->resume)) return -EINVAL; +#endif if ((hw->flags & IEEE80211_HW_SCAN_WHILE_IDLE) && !local->ops->hw_scan) return -EINVAL; diff --git a/net/wireless/core.c b/net/wireless/core.c index 907f62c80e28..ddd32afa5f0a 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -421,9 +421,11 @@ int wiphy_register(struct wiphy *wiphy) int i; u16 ifmodes = wiphy->interface_modes; +#ifdef CONFIG_PM if (WARN_ON((wiphy->wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && !(wiphy->wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY))) return -EINVAL; +#endif if (WARN_ON(wiphy->ap_sme_capa && !(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME))) @@ -500,12 +502,14 @@ int wiphy_register(struct wiphy *wiphy) return -EINVAL; } +#ifdef CONFIG_PM if (rdev->wiphy.wowlan.n_patterns) { if (WARN_ON(!rdev->wiphy.wowlan.pattern_min_len || rdev->wiphy.wowlan.pattern_min_len > rdev->wiphy.wowlan.pattern_max_len)) return -EINVAL; } +#endif /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 234ff3bbd104..067c9fe02a7f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1112,6 +1112,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_nest_end(msg, nl_ifs); } +#ifdef CONFIG_PM if (dev->wiphy.wowlan.flags || dev->wiphy.wowlan.n_patterns) { struct nlattr *nl_wowlan; @@ -1152,6 +1153,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_nest_end(msg, nl_wowlan); } +#endif if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, dev->wiphy.software_iftypes)) @@ -6276,6 +6278,7 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) return cfg80211_leave_mesh(rdev, dev); } +#ifdef CONFIG_PM static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6504,6 +6507,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) kfree(new_triggers.patterns); return err; } +#endif static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) { @@ -7158,6 +7162,7 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, +#ifdef CONFIG_PM { .cmd = NL80211_CMD_GET_WOWLAN, .doit = nl80211_get_wowlan, @@ -7174,6 +7179,7 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, +#endif { .cmd = NL80211_CMD_SET_REKEY_OFFLOAD, .doit = nl80211_set_rekey_data, -- cgit v1.2.3 From f1caad274515ffd9841ac57ce9a7b5fc35bbf689 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 21 Jun 2012 04:36:39 +0000 Subject: netfilter: nf_conntrack: prepare l4proto->init_net cleanup l4proto->init contain quite redundant code. We can simplify this by adding a new parameter l3proto. This patch prepares that code simplification. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_proto.c | 5 +++-- net/netfilter/nf_conntrack_proto_dccp.c | 2 +- net/netfilter/nf_conntrack_proto_generic.c | 2 +- net/netfilter/nf_conntrack_proto_gre.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 4 ++-- net/netfilter/nf_conntrack_proto_tcp.c | 4 ++-- net/netfilter/nf_conntrack_proto_udp.c | 4 ++-- net/netfilter/nf_conntrack_proto_udplite.c | 2 +- 11 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 81c52b5205f2..5dd60f2d02a1 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -97,7 +97,7 @@ struct nf_conntrack_l4proto { #endif int *net_id; /* Init l4proto pernet data */ - int (*init_net)(struct net *net); + int (*init_net)(struct net *net, u_int16_t proto); /* Protocol name */ const char *name; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 041923cb67ad..76f7a2f657fe 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -337,7 +337,7 @@ static struct ctl_table icmp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -static int icmp_init_net(struct net *net) +static int icmp_init_net(struct net *net, u_int16_t proto) { struct nf_icmp_net *in = icmp_pernet(net); struct nf_proto_net *pn = (struct nf_proto_net *)in; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 63ed0121836c..807ae09df0ca 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -333,7 +333,7 @@ static struct ctl_table icmpv6_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -static int icmpv6_init_net(struct net *net) +static int icmpv6_init_net(struct net *net, u_int16_t proto) { struct nf_icmp_net *in = icmpv6_pernet(net); struct nf_proto_net *pn = (struct nf_proto_net *)in; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 9bd88aa3c74f..6f4b6f3deee5 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -461,7 +461,7 @@ int nf_conntrack_l4proto_register(struct net *net, int ret = 0; if (l4proto->init_net) { - ret = l4proto->init_net(net); + ret = l4proto->init_net(net, l4proto->l3proto); if (ret < 0) return ret; } @@ -515,7 +515,8 @@ int nf_conntrack_proto_init(struct net *net) { unsigned int i; int err; - err = nf_conntrack_l4proto_generic.init_net(net); + err = nf_conntrack_l4proto_generic.init_net(net, + nf_conntrack_l4proto_generic.l3proto); if (err < 0) return err; err = nf_ct_l4proto_register_sysctl(net, diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index c33f76af913f..52da8f0293b5 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -815,7 +815,7 @@ static struct ctl_table dccp_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -static int dccp_init_net(struct net *net) +static int dccp_init_net(struct net *net, u_int16_t proto) { struct dccp_net *dn = dccp_pernet(net); struct nf_proto_net *pn = (struct nf_proto_net *)dn; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index bb0e74fe0fae..d1ed7b44e079 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -135,7 +135,7 @@ static struct ctl_table generic_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -static int generic_init_net(struct net *net) +static int generic_init_net(struct net *net, u_int16_t proto) { struct nf_generic_net *gn = generic_pernet(net); struct nf_proto_net *pn = (struct nf_proto_net *)gn; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 5cac41c2fa09..b09b7af7f6f8 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -348,7 +348,7 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { }; #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -static int gre_init_net(struct net *net) +static int gre_init_net(struct net *net, u_int16_t proto) { struct netns_proto_gre *net_gre = gre_pernet(net); int i; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 8fb0582ad397..1e7836cead74 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -767,7 +767,7 @@ static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) return 0; } -static int sctpv4_init_net(struct net *net) +static int sctpv4_init_net(struct net *net, u_int16_t proto) { int ret; struct sctp_net *sn = sctp_pernet(net); @@ -793,7 +793,7 @@ static int sctpv4_init_net(struct net *net) return ret; } -static int sctpv6_init_net(struct net *net) +static int sctpv6_init_net(struct net *net, u_int16_t proto) { struct sctp_net *sn = sctp_pernet(net); struct nf_proto_net *pn = (struct nf_proto_net *)sn; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 99caa1304477..6db9d3c44820 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1593,7 +1593,7 @@ static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) return 0; } -static int tcpv4_init_net(struct net *net) +static int tcpv4_init_net(struct net *net, u_int16_t proto) { int i; int ret = 0; @@ -1631,7 +1631,7 @@ static int tcpv4_init_net(struct net *net) return ret; } -static int tcpv6_init_net(struct net *net) +static int tcpv6_init_net(struct net *net, u_int16_t proto) { int i; struct nf_tcp_net *tn = tcp_pernet(net); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index a83cf93545cd..2b978e6fd1c2 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -283,7 +283,7 @@ static void udp_init_net_data(struct nf_udp_net *un) } } -static int udpv4_init_net(struct net *net) +static int udpv4_init_net(struct net *net, u_int16_t proto) { int ret; struct nf_udp_net *un = udp_pernet(net); @@ -307,7 +307,7 @@ static int udpv4_init_net(struct net *net) return ret; } -static int udpv6_init_net(struct net *net) +static int udpv6_init_net(struct net *net, u_int16_t proto) { struct nf_udp_net *un = udp_pernet(net); struct nf_proto_net *pn = (struct nf_proto_net *)un; diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index b32e700f8dde..d33e51158039 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -234,7 +234,7 @@ static struct ctl_table udplite_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -static int udplite_init_net(struct net *net) +static int udplite_init_net(struct net *net, u_int16_t proto) { int i; struct udplite_net *un = udplite_pernet(net); -- cgit v1.2.3 From f28997e27a03abc679f13824a0574b09112eea37 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 21 Jun 2012 04:36:40 +0000 Subject: netfilter: nf_conntrack: add nf_ct_kfree_compat_sysctl_table This patch is a cleanup. It adds nf_ct_kfree_compat_sysctl_table to release l4proto's compat sysctl table and set the compat sysctl table point to NULL. This new function will be used by follow-up patches. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 8 ++++++++ net/netfilter/nf_conntrack_proto.c | 3 +-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 5dd60f2d02a1..08bb571b7abd 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -124,6 +124,14 @@ extern int nf_conntrack_l4proto_register(struct net *net, extern void nf_conntrack_l4proto_unregister(struct net *net, struct nf_conntrack_l4proto *proto); +static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn) +{ +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + kfree(pn->ctl_compat_table); + pn->ctl_compat_table = NULL; +#endif +} + /* Generic netlink helpers */ extern int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 6f4b6f3deee5..9d6b6ab193a9 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -361,8 +361,7 @@ int nf_ct_l4proto_register_sysctl(struct net *net, if (err == 0) goto out; - kfree(pn->ctl_compat_table); - pn->ctl_compat_table = NULL; + nf_ct_kfree_compat_sysctl_table(pn); nf_ct_unregister_sysctl(&pn->ctl_table_header, &pn->ctl_table, &pn->users); -- cgit v1.2.3 From c074da2810c118b3812f32d6754bd9ead2f169e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Jun 2012 23:14:15 +0000 Subject: ipv4: tcp: dont cache unconfirmed intput dst DDOS synflood attacks hit badly IP route cache. On typical machines, this cache is allowed to hold up to 8 Millions dst entries, 256 bytes for each, for a total of 2GB of memory. rt_garbage_collect() triggers and tries to cleanup things. Eventually route cache is disabled but machine is under fire and might OOM and crash. This patch exploits the new TCP early demux, to set a nocache boolean in case incoming TCP frame is for a not yet ESTABLISHED or TIMEWAIT socket. This 'nocache' boolean is then used in case dst entry is not found in route cache, to create an unhashed dst entry (DST_NOCACHE) SYN-cookie-ACK sent use a similar mechanism (ipv4: tcp: dont cache output dst for syncookies), so after this patch, a machine is able to absorb a DDOS synflood attack without polluting its IP route cache. Signed-off-by: Eric Dumazet Cc: Hans Schillstrom Signed-off-by: David S. Miller --- include/net/protocol.h | 2 +- include/net/route.h | 8 ++++---- include/net/tcp.h | 2 +- net/ipv4/arp.c | 2 +- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_input.c | 5 +++-- net/ipv4/route.c | 8 +++++--- net/ipv4/tcp_ipv4.c | 4 +++- net/ipv4/xfrm4_input.c | 2 +- 9 files changed, 20 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/protocol.h b/include/net/protocol.h index 967b926cbfb1..7cfc8f76914d 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -37,7 +37,7 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb); + int (*early_demux)(struct sk_buff *skb, bool *nocache); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); diff --git a/include/net/route.h b/include/net/route.h index 47eb25ac1f7f..6361f9335774 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -201,18 +201,18 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 } extern int ip_route_input_common(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin, bool noref); + u8 tos, struct net_device *devin, bool noref, bool nocache); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin) { - return ip_route_input_common(skb, dst, src, tos, devin, false); + return ip_route_input_common(skb, dst, src, tos, devin, false, false); } static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin) + u8 tos, struct net_device *devin, bool nocache) { - return ip_route_input_common(skb, dst, src, tos, devin, true); + return ip_route_input_common(skb, dst, src, tos, devin, true, nocache); } extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, diff --git a/include/net/tcp.h b/include/net/tcp.h index 6660ffc4963d..917ed2e55e8c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -325,7 +325,7 @@ extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); -extern int tcp_v4_early_demux(struct sk_buff *skb); +extern int tcp_v4_early_demux(struct sk_buff *skb, bool *nocache); extern int tcp_v4_rcv(struct sk_buff *skb); extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 2e560f0c757d..6a9795944369 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -828,7 +828,7 @@ static int arp_process(struct sk_buff *skb) } if (arp->ar_op == htons(ARPOP_REQUEST) && - ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { + ip_route_input_noref(skb, tip, sip, 0, dev, false) == 0) { rt = skb_rtable(skb); addr_type = rt->rt_type; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8d07c973409c..978d55f256ea 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -259,7 +259,7 @@ static void ip_expire(unsigned long arg) skb_dst_drop(head); iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, - iph->tos, head->dev); + iph->tos, head->dev, false); if (err) goto out_rcu_unlock; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 2a39204de5bc..7be54c8dcbe2 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -326,6 +326,7 @@ static int ip_rcv_finish(struct sk_buff *skb) */ if (skb_dst(skb) == NULL) { int err = -ENOENT; + bool nocache = false; if (sysctl_ip_early_demux) { const struct net_protocol *ipprot; @@ -334,13 +335,13 @@ static int ip_rcv_finish(struct sk_buff *skb) rcu_read_lock(); ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->early_demux) - err = ipprot->early_demux(skb); + err = ipprot->early_demux(skb, &nocache); rcu_read_unlock(); } if (err) { err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); + iph->tos, skb->dev, nocache); if (unlikely(err)) { if (err == -EXDEV) NET_INC_STATS_BH(dev_net(skb->dev), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 81533e3a23d1..fdc7900f9d7a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2214,7 +2214,7 @@ static int ip_mkroute_input(struct sk_buff *skb, */ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev) + u8 tos, struct net_device *dev, bool nocache) { struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -2353,6 +2353,8 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } + if (nocache) + rth->dst.flags |= DST_NOCACHE; hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); err = 0; @@ -2395,7 +2397,7 @@ martian_source_keep_err: } int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, bool noref) + u8 tos, struct net_device *dev, bool noref, bool nocache) { struct rtable *rth; unsigned int hash; @@ -2471,7 +2473,7 @@ skip_cache: rcu_read_unlock(); return -EINVAL; } - res = ip_route_input_slow(skb, daddr, saddr, tos, dev); + res = ip_route_input_slow(skb, daddr, saddr, tos, dev, nocache); rcu_read_unlock(); return res; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1781dc650b9d..33aabd4fc20f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1673,7 +1673,7 @@ csum_err: } EXPORT_SYMBOL(tcp_v4_do_rcv); -int tcp_v4_early_demux(struct sk_buff *skb) +int tcp_v4_early_demux(struct sk_buff *skb, bool *no_dst_cache) { struct net *net = dev_net(skb->dev); const struct iphdr *iph; @@ -1719,6 +1719,8 @@ int tcp_v4_early_demux(struct sk_buff *skb) } } } + } else { + *no_dst_cache = true; } out_err: diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 06814b6216dc..eee636b191b9 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -28,7 +28,7 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); if (ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) + iph->tos, skb->dev, false)) goto drop; } return dst_input(skb); -- cgit v1.2.3 From c10237e077cef50e925f052e49f3b4fead9d71f9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 27 Jun 2012 17:05:06 -0700 Subject: Revert "ipv4: tcp: dont cache unconfirmed intput dst" This reverts commit c074da2810c118b3812f32d6754bd9ead2f169e7. This change has several unwanted side effects: 1) Sockets will cache the DST_NOCACHE route in sk->sk_rx_dst and we'll thus never create a real cached route. 2) All TCP traffic will use DST_NOCACHE and never use the routing cache at all. Signed-off-by: David S. Miller --- include/net/protocol.h | 2 +- include/net/route.h | 8 ++++---- include/net/tcp.h | 2 +- net/ipv4/arp.c | 2 +- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_input.c | 5 ++--- net/ipv4/route.c | 8 +++----- net/ipv4/tcp_ipv4.c | 4 +--- net/ipv4/xfrm4_input.c | 2 +- 9 files changed, 15 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/protocol.h b/include/net/protocol.h index 7cfc8f76914d..967b926cbfb1 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -37,7 +37,7 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb, bool *nocache); + int (*early_demux)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); diff --git a/include/net/route.h b/include/net/route.h index 6361f9335774..47eb25ac1f7f 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -201,18 +201,18 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 } extern int ip_route_input_common(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin, bool noref, bool nocache); + u8 tos, struct net_device *devin, bool noref); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin) { - return ip_route_input_common(skb, dst, src, tos, devin, false, false); + return ip_route_input_common(skb, dst, src, tos, devin, false); } static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin, bool nocache) + u8 tos, struct net_device *devin) { - return ip_route_input_common(skb, dst, src, tos, devin, true, nocache); + return ip_route_input_common(skb, dst, src, tos, devin, true); } extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, diff --git a/include/net/tcp.h b/include/net/tcp.h index 917ed2e55e8c..6660ffc4963d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -325,7 +325,7 @@ extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); -extern int tcp_v4_early_demux(struct sk_buff *skb, bool *nocache); +extern int tcp_v4_early_demux(struct sk_buff *skb); extern int tcp_v4_rcv(struct sk_buff *skb); extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 6a9795944369..2e560f0c757d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -828,7 +828,7 @@ static int arp_process(struct sk_buff *skb) } if (arp->ar_op == htons(ARPOP_REQUEST) && - ip_route_input_noref(skb, tip, sip, 0, dev, false) == 0) { + ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { rt = skb_rtable(skb); addr_type = rt->rt_type; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 978d55f256ea..8d07c973409c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -259,7 +259,7 @@ static void ip_expire(unsigned long arg) skb_dst_drop(head); iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, - iph->tos, head->dev, false); + iph->tos, head->dev); if (err) goto out_rcu_unlock; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 7be54c8dcbe2..2a39204de5bc 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -326,7 +326,6 @@ static int ip_rcv_finish(struct sk_buff *skb) */ if (skb_dst(skb) == NULL) { int err = -ENOENT; - bool nocache = false; if (sysctl_ip_early_demux) { const struct net_protocol *ipprot; @@ -335,13 +334,13 @@ static int ip_rcv_finish(struct sk_buff *skb) rcu_read_lock(); ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->early_demux) - err = ipprot->early_demux(skb, &nocache); + err = ipprot->early_demux(skb); rcu_read_unlock(); } if (err) { err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev, nocache); + iph->tos, skb->dev); if (unlikely(err)) { if (err == -EXDEV) NET_INC_STATS_BH(dev_net(skb->dev), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fdc7900f9d7a..81533e3a23d1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2214,7 +2214,7 @@ static int ip_mkroute_input(struct sk_buff *skb, */ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, bool nocache) + u8 tos, struct net_device *dev) { struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -2353,8 +2353,6 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - if (nocache) - rth->dst.flags |= DST_NOCACHE; hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); err = 0; @@ -2397,7 +2395,7 @@ martian_source_keep_err: } int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, bool noref, bool nocache) + u8 tos, struct net_device *dev, bool noref) { struct rtable *rth; unsigned int hash; @@ -2473,7 +2471,7 @@ skip_cache: rcu_read_unlock(); return -EINVAL; } - res = ip_route_input_slow(skb, daddr, saddr, tos, dev, nocache); + res = ip_route_input_slow(skb, daddr, saddr, tos, dev); rcu_read_unlock(); return res; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 33aabd4fc20f..1781dc650b9d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1673,7 +1673,7 @@ csum_err: } EXPORT_SYMBOL(tcp_v4_do_rcv); -int tcp_v4_early_demux(struct sk_buff *skb, bool *no_dst_cache) +int tcp_v4_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct iphdr *iph; @@ -1719,8 +1719,6 @@ int tcp_v4_early_demux(struct sk_buff *skb, bool *no_dst_cache) } } } - } else { - *no_dst_cache = true; } out_err: diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index eee636b191b9..06814b6216dc 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -28,7 +28,7 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); if (ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev, false)) + iph->tos, skb->dev)) goto drop; } return dst_input(skb); -- cgit v1.2.3 From 1d1e34ddd48d27def2f324c1e3be16d460b16436 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 27 Jun 2012 21:57:03 -0700 Subject: xfrm_user: Propagate netlink error codes properly. Instead of using a fixed value of "-1" or "-EMSGSIZE", propagate what the nla_*() interfaces actually return. Signed-off-by: David S. Miller --- include/net/xfrm.h | 10 +- net/xfrm/xfrm_user.c | 394 ++++++++++++++++++++++++++------------------------- 2 files changed, 208 insertions(+), 196 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e0a55df5bde8..17acbc92476d 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1682,13 +1682,11 @@ static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m) { - if ((m->m | m->v) && - nla_put(skb, XFRMA_MARK, sizeof(struct xfrm_mark), m)) - goto nla_put_failure; - return 0; + int ret = 0; -nla_put_failure: - return -1; + if (m->m | m->v) + ret = nla_put(skb, XFRMA_MARK, sizeof(struct xfrm_mark), m); + return ret; } #endif /* _NET_XFRM_H */ diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 44293b3fd6a1..540762726aaf 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -754,58 +754,67 @@ static int copy_to_user_state_extra(struct xfrm_state *x, struct xfrm_usersa_info *p, struct sk_buff *skb) { - copy_to_user_state(x, p); - - if (x->coaddr && - nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr)) - goto nla_put_failure; - - if (x->lastused && - nla_put_u64(skb, XFRMA_LASTUSED, x->lastused)) - goto nla_put_failure; - - if (x->aead && - nla_put(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead)) - goto nla_put_failure; - - if (x->aalg && - (copy_to_user_auth(x->aalg, skb) || - nla_put(skb, XFRMA_ALG_AUTH_TRUNC, - xfrm_alg_auth_len(x->aalg), x->aalg))) - goto nla_put_failure; - - if (x->ealg && - nla_put(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg)) - goto nla_put_failure; - - if (x->calg && - nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg)) - goto nla_put_failure; - - if (x->encap && - nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap)) - goto nla_put_failure; + int ret = 0; - if (x->tfcpad && - nla_put_u32(skb, XFRMA_TFCPAD, x->tfcpad)) - goto nla_put_failure; - - if (xfrm_mark_put(skb, &x->mark)) - goto nla_put_failure; - - if (x->replay_esn && - nla_put(skb, XFRMA_REPLAY_ESN_VAL, - xfrm_replay_state_esn_len(x->replay_esn), - x->replay_esn)) - goto nla_put_failure; - - if (x->security && copy_sec_ctx(x->security, skb)) - goto nla_put_failure; - - return 0; + copy_to_user_state(x, p); -nla_put_failure: - return -EMSGSIZE; + if (x->coaddr) { + ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + if (ret) + goto out; + } + if (x->lastused) { + ret = nla_put_u64(skb, XFRMA_LASTUSED, x->lastused); + if (ret) + goto out; + } + if (x->aead) { + ret = nla_put(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead); + if (ret) + goto out; + } + if (x->aalg) { + ret = copy_to_user_auth(x->aalg, skb); + if (!ret) + ret = nla_put(skb, XFRMA_ALG_AUTH_TRUNC, + xfrm_alg_auth_len(x->aalg), x->aalg); + if (ret) + goto out; + } + if (x->ealg) { + ret = nla_put(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg); + if (ret) + goto out; + } + if (x->calg) { + ret = nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + if (ret) + goto out; + } + if (x->encap) { + ret = nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + if (ret) + goto out; + } + if (x->tfcpad) { + ret = nla_put_u32(skb, XFRMA_TFCPAD, x->tfcpad); + if (ret) + goto out; + } + ret = xfrm_mark_put(skb, &x->mark); + if (ret) + goto out; + if (x->replay_esn) { + ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), + x->replay_esn); + if (ret) + goto out; + } + if (x->security) + ret = copy_sec_ctx(x->security, skb); +out: + return ret; } static int dump_one_state(struct xfrm_state *x, int count, void *ptr) @@ -825,15 +834,12 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) p = nlmsg_data(nlh); err = copy_to_user_state_extra(x, p, skb); - if (err) - goto nla_put_failure; - + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } nlmsg_end(skb, nlh); return 0; - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return err; } static int xfrm_dump_sa_done(struct netlink_callback *cb) @@ -904,6 +910,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, struct xfrmu_spdinfo spc; struct xfrmu_spdhinfo sph; struct nlmsghdr *nlh; + int err; u32 *f; nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); @@ -922,15 +929,15 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, sph.spdhcnt = si.spdhcnt; sph.spdhmcnt = si.spdhmcnt; - if (nla_put(skb, XFRMA_SPD_INFO, sizeof(spc), &spc) || - nla_put(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph)) - goto nla_put_failure; + err = nla_put(skb, XFRMA_SPD_INFO, sizeof(spc), &spc); + if (!err) + err = nla_put(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } return nlmsg_end(skb, nlh); - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -965,6 +972,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, struct xfrmk_sadinfo si; struct xfrmu_sadhinfo sh; struct nlmsghdr *nlh; + int err; u32 *f; nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); @@ -978,15 +986,15 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, sh.sadhmcnt = si.sadhmcnt; sh.sadhcnt = si.sadhcnt; - if (nla_put_u32(skb, XFRMA_SAD_CNT, si.sadcnt) || - nla_put(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh)) - goto nla_put_failure; + err = nla_put_u32(skb, XFRMA_SAD_CNT, si.sadcnt); + if (!err) + err = nla_put(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } return nlmsg_end(skb, nlh); - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1439,9 +1447,8 @@ static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buf static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) { - if (xp->security) { + if (xp->security) return copy_sec_ctx(xp->security, skb); - } return 0; } static inline size_t userpolicy_type_attrsize(void) @@ -1477,6 +1484,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct sk_buff *in_skb = sp->in_skb; struct sk_buff *skb = sp->out_skb; struct nlmsghdr *nlh; + int err; nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); @@ -1485,22 +1493,19 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr p = nlmsg_data(nlh); copy_to_user_policy(xp, p, dir); - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_sec_ctx(xp, skb)) - goto nlmsg_failure; - if (copy_to_user_policy_type(xp->type, skb) < 0) - goto nlmsg_failure; - if (xfrm_mark_put(skb, &xp->mark)) - goto nla_put_failure; - + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_sec_ctx(xp, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } nlmsg_end(skb, nlh); return 0; - -nla_put_failure: -nlmsg_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_dump_policy_done(struct netlink_callback *cb) @@ -1688,6 +1693,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct { struct xfrm_aevent_id *id; struct nlmsghdr *nlh; + int err; nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); if (nlh == NULL) @@ -1703,35 +1709,39 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct id->flags = c->data.aevent; if (x->replay_esn) { - if (nla_put(skb, XFRMA_REPLAY_ESN_VAL, - xfrm_replay_state_esn_len(x->replay_esn), - x->replay_esn)) - goto nla_put_failure; + err = nla_put(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), + x->replay_esn); } else { - if (nla_put(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), - &x->replay)) - goto nla_put_failure; + err = nla_put(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), + &x->replay); } - if (nla_put(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft)) - goto nla_put_failure; - - if ((id->flags & XFRM_AE_RTHR) && - nla_put_u32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff)) - goto nla_put_failure; - - if ((id->flags & XFRM_AE_ETHR) && - nla_put_u32(skb, XFRMA_ETIMER_THRESH, - x->replay_maxage * 10 / HZ)) - goto nla_put_failure; + if (err) + goto out_cancel; + err = nla_put(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); + if (err) + goto out_cancel; - if (xfrm_mark_put(skb, &x->mark)) - goto nla_put_failure; + if (id->flags & XFRM_AE_RTHR) { + err = nla_put_u32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff); + if (err) + goto out_cancel; + } + if (id->flags & XFRM_AE_ETHR) { + err = nla_put_u32(skb, XFRMA_ETIMER_THRESH, + x->replay_maxage * 10 / HZ); + if (err) + goto out_cancel; + } + err = xfrm_mark_put(skb, &x->mark); + if (err) + goto out_cancel; return nlmsg_end(skb, nlh); -nla_put_failure: +out_cancel: nlmsg_cancel(skb, nlh); - return -EMSGSIZE; + return err; } static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2155,7 +2165,7 @@ static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, const struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; struct nlmsghdr *nlh; - int i; + int i, err; nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id), 0); if (nlh == NULL) @@ -2167,21 +2177,25 @@ static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); pol_id->dir = dir; - if (k != NULL && (copy_to_user_kmaddress(k, skb) < 0)) - goto nlmsg_failure; - - if (copy_to_user_policy_type(type, skb) < 0) - goto nlmsg_failure; - + if (k != NULL) { + err = copy_to_user_kmaddress(k, skb); + if (err) + goto out_cancel; + } + err = copy_to_user_policy_type(type, skb); + if (err) + goto out_cancel; for (i = 0, mp = m ; i < num_migrate; i++, mp++) { - if (copy_to_user_migrate(mp, skb) < 0) - goto nlmsg_failure; + err = copy_to_user_migrate(mp, skb); + if (err) + goto out_cancel; } return nlmsg_end(skb, nlh); -nlmsg_failure: + +out_cancel: nlmsg_cancel(skb, nlh); - return -EMSGSIZE; + return err; } static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, @@ -2354,6 +2368,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct { struct xfrm_user_expire *ue; struct nlmsghdr *nlh; + int err; nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); if (nlh == NULL) @@ -2363,13 +2378,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct copy_to_user_state(x, &ue->state); ue->hard = (c->data.hard != 0) ? 1 : 0; - if (xfrm_mark_put(skb, &x->mark)) - goto nla_put_failure; + err = xfrm_mark_put(skb, &x->mark); + if (err) + return err; return nlmsg_end(skb, nlh); - -nla_put_failure: - return -EMSGSIZE; } static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c) @@ -2470,7 +2483,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) struct nlmsghdr *nlh; struct sk_buff *skb; int len = xfrm_sa_len(x); - int headlen; + int headlen, err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELSA) { @@ -2485,8 +2498,9 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) return -ENOMEM; nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + err = -EMSGSIZE; if (nlh == NULL) - goto nla_put_failure; + goto out_free_skb; p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELSA) { @@ -2499,24 +2513,23 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) id->proto = x->id.proto; attr = nla_reserve(skb, XFRMA_SA, sizeof(*p)); + err = -EMSGSIZE; if (attr == NULL) - goto nla_put_failure; + goto out_free_skb; p = nla_data(attr); } - - if (copy_to_user_state_extra(x, p, skb)) - goto nla_put_failure; + err = copy_to_user_state_extra(x, p, skb); + if (err) + goto out_free_skb; nlmsg_end(skb, nlh); return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); -nla_put_failure: - /* Somebody screwed up with xfrm_sa_len! */ - WARN_ON(1); +out_free_skb: kfree_skb(skb); - return -1; + return err; } static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c) @@ -2557,9 +2570,10 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, struct xfrm_tmpl *xt, struct xfrm_policy *xp, int dir) { + __u32 seq = xfrm_get_acqseq(); struct xfrm_user_acquire *ua; struct nlmsghdr *nlh; - __u32 seq = xfrm_get_acqseq(); + int err; nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_ACQUIRE, sizeof(*ua), 0); if (nlh == NULL) @@ -2575,21 +2589,19 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, ua->calgos = xt->calgos; ua->seq = x->km.seq = seq; - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_state_sec_ctx(x, skb)) - goto nlmsg_failure; - if (copy_to_user_policy_type(xp->type, skb) < 0) - goto nlmsg_failure; - if (xfrm_mark_put(skb, &xp->mark)) - goto nla_put_failure; + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_state_sec_ctx(x, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } return nlmsg_end(skb, nlh); - -nla_put_failure: -nlmsg_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, @@ -2681,8 +2693,9 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, int dir, const struct km_event *c) { struct xfrm_user_polexpire *upe; - struct nlmsghdr *nlh; int hard = c->data.hard; + struct nlmsghdr *nlh; + int err; nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); if (nlh == NULL) @@ -2690,22 +2703,20 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, upe = nlmsg_data(nlh); copy_to_user_policy(xp, &upe->pol, dir); - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_sec_ctx(xp, skb)) - goto nlmsg_failure; - if (copy_to_user_policy_type(xp->type, skb) < 0) - goto nlmsg_failure; - if (xfrm_mark_put(skb, &xp->mark)) - goto nla_put_failure; + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_sec_ctx(xp, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } upe->hard = !!hard; return nlmsg_end(skb, nlh); - -nla_put_failure: -nlmsg_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) @@ -2725,13 +2736,13 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { + int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); struct net *net = xp_net(xp); struct xfrm_userpolicy_info *p; struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); - int headlen; + int headlen, err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELPOLICY) { @@ -2747,8 +2758,9 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e return -ENOMEM; nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + err = -EMSGSIZE; if (nlh == NULL) - goto nlmsg_failure; + goto out_free_skb; p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELPOLICY) { @@ -2763,29 +2775,29 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e memcpy(&id->sel, &xp->selector, sizeof(id->sel)); attr = nla_reserve(skb, XFRMA_POLICY, sizeof(*p)); + err = -EMSGSIZE; if (attr == NULL) - goto nlmsg_failure; + goto out_free_skb; p = nla_data(attr); } copy_to_user_policy(xp, p, dir); - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_policy_type(xp->type, skb) < 0) - goto nlmsg_failure; - - if (xfrm_mark_put(skb, &xp->mark)) - goto nla_put_failure; + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) + goto out_free_skb; nlmsg_end(skb, nlh); return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); -nla_put_failure: -nlmsg_failure: +out_free_skb: kfree_skb(skb); - return -1; + return err; } static int xfrm_notify_policy_flush(const struct km_event *c) @@ -2793,24 +2805,27 @@ static int xfrm_notify_policy_flush(const struct km_event *c) struct net *net = c->net; struct nlmsghdr *nlh; struct sk_buff *skb; + int err; skb = nlmsg_new(userpolicy_type_attrsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); + err = -EMSGSIZE; if (nlh == NULL) - goto nlmsg_failure; - if (copy_to_user_policy_type(c->data.type, skb) < 0) - goto nlmsg_failure; + goto out_free_skb; + err = copy_to_user_policy_type(c->data.type, skb); + if (err) + goto out_free_skb; nlmsg_end(skb, nlh); return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); -nlmsg_failure: +out_free_skb: kfree_skb(skb); - return -1; + return err; } static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) @@ -2853,15 +2868,14 @@ static int build_report(struct sk_buff *skb, u8 proto, ur->proto = proto; memcpy(&ur->sel, sel, sizeof(ur->sel)); - if (addr && - nla_put(skb, XFRMA_COADDR, sizeof(*addr), addr)) - goto nla_put_failure; - + if (addr) { + int err = nla_put(skb, XFRMA_COADDR, sizeof(*addr), addr); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } + } return nlmsg_end(skb, nlh); - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_send_report(struct net *net, u8 proto, -- cgit v1.2.3 From 160eb5a6b14ca2eab5c598bdbbb24c24624bad34 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 27 Jun 2012 22:01:22 -0700 Subject: ipv4: Kill early demux method return value. It's completely unnecessary. Signed-off-by: David S. Miller --- include/net/protocol.h | 2 +- include/net/tcp.h | 2 +- net/ipv4/ip_input.c | 42 +++++++++++++++++++----------------------- net/ipv4/tcp_ipv4.c | 19 ++++++------------- 4 files changed, 27 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/net/protocol.h b/include/net/protocol.h index 967b926cbfb1..057f2d315567 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -37,7 +37,7 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb); + void (*early_demux)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); diff --git a/include/net/tcp.h b/include/net/tcp.h index 6660ffc4963d..53fb7d814170 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -325,7 +325,7 @@ extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); -extern int tcp_v4_early_demux(struct sk_buff *skb); +extern void tcp_v4_early_demux(struct sk_buff *skb); extern int tcp_v4_rcv(struct sk_buff *skb); extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 2a39204de5bc..b27d4440f523 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -320,33 +320,29 @@ static int ip_rcv_finish(struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; + if (sysctl_ip_early_demux && !skb_dst(skb)) { + const struct net_protocol *ipprot; + int protocol = iph->protocol; + + rcu_read_lock(); + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot && ipprot->early_demux) + ipprot->early_demux(skb); + rcu_read_unlock(); + } + /* * Initialise the virtual path cache for the packet. It describes * how the packet travels inside Linux networking. */ - if (skb_dst(skb) == NULL) { - int err = -ENOENT; - - if (sysctl_ip_early_demux) { - const struct net_protocol *ipprot; - int protocol = iph->protocol; - - rcu_read_lock(); - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot && ipprot->early_demux) - err = ipprot->early_demux(skb); - rcu_read_unlock(); - } - - if (err) { - err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); - if (unlikely(err)) { - if (err == -EXDEV) - NET_INC_STATS_BH(dev_net(skb->dev), - LINUX_MIB_IPRPFILTER); - goto drop; - } + if (!skb_dst(skb)) { + int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev); + if (unlikely(err)) { + if (err == -EXDEV) + NET_INC_STATS_BH(dev_net(skb->dev), + LINUX_MIB_IPRPFILTER); + goto drop; } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1781dc650b9d..b4ae1c199f3e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1673,30 +1673,28 @@ csum_err: } EXPORT_SYMBOL(tcp_v4_do_rcv); -int tcp_v4_early_demux(struct sk_buff *skb) +void tcp_v4_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct iphdr *iph; const struct tcphdr *th; struct net_device *dev; struct sock *sk; - int err; - err = -ENOENT; if (skb->pkt_type != PACKET_HOST) - goto out_err; + return; if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) - goto out_err; + return; iph = ip_hdr(skb); th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); if (th->doff < sizeof(struct tcphdr) / 4) - goto out_err; + return; if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4)) - goto out_err; + return; dev = skb->dev; sk = __inet_lookup_established(net, &tcp_hashinfo, @@ -1713,16 +1711,11 @@ int tcp_v4_early_demux(struct sk_buff *skb) if (dst) { struct rtable *rt = (struct rtable *) dst; - if (rt->rt_iif == dev->ifindex) { + if (rt->rt_iif == dev->ifindex) skb_dst_set_noref(skb, dst); - err = 0; - } } } } - -out_err: - return err; } /* -- cgit v1.2.3 From fc8a7321d3d68af759a369a9ad3e2426688742d3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Jun 2012 10:33:25 +0200 Subject: mac80211: don't expose ieee80211_add_srates_ie() This and ieee80211_add_ext_srates_ie() aren't exported, so can't be used by drivers anyway, but there's also no reason that they should be so make them private to mac80211 and use sdata instead of vif arguments. Acked-by: Arik Nemtsov Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ------ net/mac80211/cfg.c | 12 ++++++------ net/mac80211/ieee80211_i.h | 4 ++++ net/mac80211/mesh_plink.c | 4 ++-- net/mac80211/tx.c | 4 ++-- net/mac80211/util.c | 10 ++++------ 6 files changed, 18 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 510d852d5222..5e67020b1702 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3826,12 +3826,6 @@ void ieee80211_enable_rssi_reports(struct ieee80211_vif *vif, void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif); -int ieee80211_add_srates_ie(struct ieee80211_vif *vif, - struct sk_buff *skb, bool need_basic); - -int ieee80211_add_ext_srates_ie(struct ieee80211_vif *vif, - struct sk_buff *skb, bool need_basic); - /** * ieee80211_ave_rssi - report the average rssi for the specified interface * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7722a7336a58..ebc353ef6902 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2665,8 +2665,8 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, tf->u.setup_req.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - ieee80211_add_srates_ie(&sdata->vif, skb, false); - ieee80211_add_ext_srates_ie(&sdata->vif, skb, false); + ieee80211_add_srates_ie(sdata, skb, false); + ieee80211_add_ext_srates_ie(sdata, skb, false); ieee80211_tdls_add_ext_capab(skb); break; case WLAN_TDLS_SETUP_RESPONSE: @@ -2679,8 +2679,8 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, tf->u.setup_resp.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - ieee80211_add_srates_ie(&sdata->vif, skb, false); - ieee80211_add_ext_srates_ie(&sdata->vif, skb, false); + ieee80211_add_srates_ie(sdata, skb, false); + ieee80211_add_ext_srates_ie(sdata, skb, false); ieee80211_tdls_add_ext_capab(skb); break; case WLAN_TDLS_SETUP_CONFIRM: @@ -2740,8 +2740,8 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, mgmt->u.action.u.tdls_discover_resp.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - ieee80211_add_srates_ie(&sdata->vif, skb, false); - ieee80211_add_ext_srates_ie(&sdata->vif, skb, false); + ieee80211_add_srates_ie(sdata, skb, false); + ieee80211_add_ext_srates_ie(sdata, skb, false); ieee80211_tdls_add_ext_capab(skb); break; default: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 341d77d472d2..6b7157d20507 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1480,6 +1480,10 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, struct ieee80211_channel *channel, enum nl80211_channel_type channel_type, u16 prot_mode); +int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, bool need_basic); +int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, bool need_basic); /* channel management */ enum ieee80211_chan_mode { diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index a1dbd1540276..425685914d7d 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -258,8 +258,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, 2); memcpy(pos + 2, &plid, 2); } - if (ieee80211_add_srates_ie(&sdata->vif, skb, true) || - ieee80211_add_ext_srates_ie(&sdata->vif, skb, true) || + if (ieee80211_add_srates_ie(sdata, skb, true) || + ieee80211_add_ext_srates_ie(sdata, skb, true) || mesh_add_rsn_ie(skb, sdata) || mesh_add_meshid_ie(skb, sdata) || mesh_add_meshconf_ie(skb, sdata)) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ec8f53467374..4e753032e48d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2420,9 +2420,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, *pos++ = WLAN_EID_SSID; *pos++ = 0x0; - if (ieee80211_add_srates_ie(&sdata->vif, skb, true) || + if (ieee80211_add_srates_ie(sdata, skb, true) || mesh_add_ds_params_ie(skb, sdata) || - ieee80211_add_ext_srates_ie(&sdata->vif, skb, true) || + ieee80211_add_ext_srates_ie(sdata, skb, true) || mesh_add_rsn_ie(skb, sdata) || mesh_add_ht_cap_ie(skb, sdata) || mesh_add_ht_oper_ie(skb, sdata) || diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 242ecde381f6..c4245695afc3 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1764,15 +1764,14 @@ ieee80211_ht_oper_to_channel_type(struct ieee80211_ht_operation *ht_oper) return channel_type; } -int ieee80211_add_srates_ie(struct ieee80211_vif *vif, +int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool need_basic) { - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; int rate; u8 i, rates, *pos; - u32 basic_rates = vif->bss_conf.basic_rates; + u32 basic_rates = sdata->vif.bss_conf.basic_rates; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; rates = sband->n_bitrates; @@ -1796,15 +1795,14 @@ int ieee80211_add_srates_ie(struct ieee80211_vif *vif, return 0; } -int ieee80211_add_ext_srates_ie(struct ieee80211_vif *vif, +int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool need_basic) { - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; int rate; u8 i, exrates, *pos; - u32 basic_rates = vif->bss_conf.basic_rates; + u32 basic_rates = sdata->vif.bss_conf.basic_rates; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; exrates = sband->n_bitrates; -- cgit v1.2.3 From 70e7341673a47fb1525cfc7d6651cc98b5348928 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Jun 2012 03:21:41 -0700 Subject: ipv4: Show that ip_send_reply() is purely unicast routine. Rename it to ip_send_unicast_reply() and add explicit 'saddr' argument. This removed one of the few users of rt->rt_spec_dst. Signed-off-by: David S. Miller --- include/net/ip.h | 5 +++-- net/ipv4/ip_output.c | 9 +++++---- net/ipv4/tcp_ipv4.c | 8 ++++---- 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 50841bd6f10e..ec5cfde85e9a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -158,8 +158,9 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; } -void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, - const struct ip_reply_arg *arg, unsigned int len); +void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, + __be32 saddr, const struct ip_reply_arg *arg, + unsigned int len); struct ipv4_config { int log_martians; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0f3185a662c3..2630900e480a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1459,13 +1459,14 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, /* * Generic function to send a packet as reply to another packet. - * Used to send TCP resets so far. ICMP should use this function too. + * Used to send TCP resets so far. * * Should run single threaded per socket because it uses the sock * structure to pass arguments. */ -void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, - const struct ip_reply_arg *arg, unsigned int len) +void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, + __be32 saddr, const struct ip_reply_arg *arg, + unsigned int len) { struct inet_sock *inet = inet_sk(sk); struct ip_options_data replyopts; @@ -1491,7 +1492,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, RT_TOS(arg->tos), RT_SCOPE_UNIVERSE, sk->sk_protocol, ip_reply_arg_flowi_flags(arg), - daddr, rt->rt_spec_dst, + daddr, saddr, tcp_hdr(skb)->source, tcp_hdr(skb)->dest); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b4ae1c199f3e..64568fa21d05 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -698,8 +698,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) net = dev_net(skb_dst(skb)->dev); arg.tos = ip_hdr(skb)->tos; - ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, - &arg, arg.iov[0].iov_len); + ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); @@ -781,8 +781,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, if (oif) arg.bound_dev_if = oif; arg.tos = tos; - ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, - &arg, arg.iov[0].iov_len); + ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); } -- cgit v1.2.3 From 35ebf65e851c6d9731abc6362b189858eb59f4d3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Jun 2012 03:59:11 -0700 Subject: ipv4: Create and use fib_compute_spec_dst() helper. The specific destination is the host we direct unicast replies to. Usually this is the original packet source address, but if we are responding to a multicast or broadcast packet we have to use something different. Specifically we must use the source address we would use if we were to send a packet to the unicast source of the original packet. The routing cache precomputes this value, but we want to remove that precomputation because it creates a hard dependency on the expensive rpfilter source address validation which we'd like to make cheaper. There are only three places where this matters: 1) ICMP replies. 2) pktinfo CMSG 3) IP options Now there will be no real users of rt->rt_spec_dst and we can simply remove it altogether. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_frontend.c | 29 +++++++++++++++++++++++++++++ net/ipv4/icmp.c | 6 ++++-- net/ipv4/ip_options.c | 22 +++++++++++----------- net/ipv4/ip_sockglue.c | 7 ++++--- 5 files changed, 49 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 4b347c0ca094..1687b3de54ff 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -230,6 +230,7 @@ extern struct fib_table *fib_get_table(struct net *net, u32 id); /* Exported by fib_frontend.c */ extern const struct nla_policy rtm_ipv4_policy[]; extern void ip_fib_init(void); +extern __be32 fib_compute_spec_dst(struct sk_buff *skb); extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, __be32 *spec_dst, u32 *itag); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3854411fa37c..451939b60c54 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -180,6 +180,35 @@ unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, } EXPORT_SYMBOL(inet_dev_addr_type); +__be32 fib_compute_spec_dst(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct in_device *in_dev; + struct fib_result res; + struct flowi4 fl4; + struct net *net; + + if (skb->pkt_type != PACKET_BROADCAST && + skb->pkt_type != PACKET_MULTICAST) + return ip_hdr(skb)->daddr; + + in_dev = __in_dev_get_rcu(dev); + BUG_ON(!in_dev); + fl4.flowi4_oif = 0; + fl4.flowi4_iif = 0; + fl4.daddr = ip_hdr(skb)->saddr; + fl4.saddr = ip_hdr(skb)->daddr; + fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; + + net = dev_net(dev); + if (!fib_lookup(net, &fl4, &res)) + return FIB_RES_PREFSRC(net, res); + else + return inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); +} + /* Given (packet source, input interface) and optional (dst, oif, tos): * - (main) check, that source is valid i.e. not broadcast or our local * address. diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 49a74cc79dc8..4bce5a2830aa 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -95,6 +95,7 @@ #include #include #include +#include /* * Build xmit assembly blocks @@ -333,7 +334,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct flowi4 fl4; struct sock *sk; struct inet_sock *inet; - __be32 daddr; + __be32 daddr, saddr; if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) return; @@ -347,6 +348,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) inet->tos = ip_hdr(skb)->tos; daddr = ipc.addr = ip_hdr(skb)->saddr; + saddr = fib_compute_spec_dst(skb); ipc.opt = NULL; ipc.tx_flags = 0; if (icmp_param->replyopts.opt.opt.optlen) { @@ -356,7 +358,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) } memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; - fl4.saddr = rt->rt_spec_dst; + fl4.saddr = saddr; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 708b99494e23..766dfe56885a 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -27,6 +27,7 @@ #include #include #include +#include /* * Write options to IP header, record destination address to @@ -104,7 +105,7 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) sptr = skb_network_header(skb); dptr = dopt->__data; - daddr = skb_rtable(skb)->rt_spec_dst; + daddr = fib_compute_spec_dst(skb); if (sopt->rr) { optlen = sptr[sopt->rr+1]; @@ -250,15 +251,14 @@ void ip_options_fragment(struct sk_buff *skb) int ip_options_compile(struct net *net, struct ip_options *opt, struct sk_buff *skb) { - int l; - unsigned char *iph; - unsigned char *optptr; - int optlen; + __be32 spec_dst = (__force __be32) 0; unsigned char *pp_ptr = NULL; - struct rtable *rt = NULL; + unsigned char *optptr; + unsigned char *iph; + int optlen, l; if (skb != NULL) { - rt = skb_rtable(skb); + spec_dst = fib_compute_spec_dst(skb); optptr = (unsigned char *)&(ip_hdr(skb)[1]); } else optptr = opt->__data; @@ -330,8 +330,8 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - if (rt) { - memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); + if (skb) { + memcpy(&optptr[optptr[2]-1], &spec_dst, 4); opt->is_changed = 1; } optptr[2] += 4; @@ -372,8 +372,8 @@ int ip_options_compile(struct net *net, goto error; } opt->ts = optptr - iph; - if (rt) { - memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); + if (skb) { + memcpy(&optptr[optptr[2]-1], &spec_dst, 4); timeptr = &optptr[optptr[2]+3]; } opt->ts_needaddr = 1; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 0d11f234d615..de29f46f68b0 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -40,6 +40,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include #endif +#include #include #include @@ -1019,8 +1020,8 @@ e_inval: * @sk: socket * @skb: buffer * - * To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst - * in skb->cb[] before dst drop. + * To support IP_CMSG_PKTINFO option, we store rt_iif and specific + * destination in skb->cb[] before dst drop. * This way, receiver doesnt make cache line misses to read rtable. */ void ipv4_pktinfo_prepare(struct sk_buff *skb) @@ -1030,7 +1031,7 @@ void ipv4_pktinfo_prepare(struct sk_buff *skb) if (rt) { pktinfo->ipi_ifindex = rt->rt_iif; - pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst; + pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; -- cgit v1.2.3 From 41347dcdd81988b8e60853257b2875285cc17a4e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Jun 2012 04:05:27 -0700 Subject: ipv4: Kill rt->rt_spec_dst, no longer used. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- include/net/route.h | 1 - net/ipv4/fib_frontend.c | 9 ++------- net/ipv4/route.c | 38 +++++++++----------------------------- net/ipv4/xfrm4_policy.c | 1 - 5 files changed, 12 insertions(+), 39 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 1687b3de54ff..9e6c26d4ba4c 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -233,7 +233,7 @@ extern void ip_fib_init(void); extern __be32 fib_compute_spec_dst(struct sk_buff *skb); extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, - __be32 *spec_dst, u32 *itag); + u32 *itag); extern void fib_select_default(struct fib_result *res); /* Exported by fib_semantics.c */ diff --git a/include/net/route.h b/include/net/route.h index 47eb25ac1f7f..211e2665139b 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -65,7 +65,6 @@ struct rtable { __be32 rt_gateway; /* Miscellaneous cached information */ - __be32 rt_spec_dst; /* RFC1122 specific destination */ u32 rt_peer_genid; unsigned long _peer; /* long-living peer info */ struct fib_info *fi; /* for client ref to shared metrics */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 451939b60c54..63b11ca54d95 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -218,8 +218,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) * called with rcu_read_lock() */ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, - int oif, struct net_device *dev, __be32 *spec_dst, - u32 *itag) + int oif, struct net_device *dev, u32 *itag) { struct in_device *in_dev; struct flowi4 fl4; @@ -258,7 +257,6 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, if (res.type != RTN_LOCAL || !accept_local) goto e_inval; } - *spec_dst = FIB_RES_PREFSRC(net, res); fib_combine_itag(itag, &res); dev_match = false; @@ -287,17 +285,14 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, ret = 0; if (fib_lookup(net, &fl4, &res) == 0) { - if (res.type == RTN_UNICAST) { - *spec_dst = FIB_RES_PREFSRC(net, res); + if (res.type == RTN_UNICAST) ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; - } } return ret; last_resort: if (rpf) goto e_rpf; - *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); *itag = 0; return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 81533e3a23d1..83d56a016625 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -440,7 +440,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) r->rt_key_tos, -1, HHUptod, - r->rt_spec_dst, &len); + 0, &len); seq_printf(seq, "%*s\n", 127 - len, ""); } @@ -1978,7 +1978,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, { unsigned int hash; struct rtable *rth; - __be32 spec_dst; struct in_device *in_dev = __in_dev_get_rcu(dev); u32 itag = 0; int err; @@ -1999,10 +1998,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (ipv4_is_zeronet(saddr)) { if (!ipv4_is_local_multicast(daddr)) goto e_inval; - spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); } else { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, - &itag); + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &itag); if (err < 0) goto e_err; } @@ -2029,7 +2026,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_oif = 0; rth->rt_mark = skb->mark; rth->rt_gateway = daddr; - rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; rt_init_peer(rth, dev_net(dev)->ipv4.peers); rth->fi = NULL; @@ -2093,7 +2089,6 @@ static int __mkroute_input(struct sk_buff *skb, int err; struct in_device *out_dev; unsigned int flags = 0; - __be32 spec_dst; u32 itag; /* get a working reference to the output device */ @@ -2105,7 +2100,7 @@ static int __mkroute_input(struct sk_buff *skb, err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), - in_dev->dev, &spec_dst, &itag); + in_dev->dev, &itag); if (err < 0) { ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); @@ -2157,7 +2152,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_oif = 0; rth->rt_mark = skb->mark; rth->rt_gateway = daddr; - rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; rt_init_peer(rth, &res->table->tb_peers); rth->fi = NULL; @@ -2223,7 +2217,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, u32 itag = 0; struct rtable *rth; unsigned int hash; - __be32 spec_dst; int err = -EINVAL; struct net *net = dev_net(dev); @@ -2281,12 +2274,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { err = fib_validate_source(skb, saddr, daddr, tos, net->loopback_dev->ifindex, - dev, &spec_dst, &itag); + dev, &itag); if (err < 0) goto martian_source_keep_err; if (err) flags |= RTCF_DIRECTSRC; - spec_dst = daddr; goto local_input; } @@ -2302,11 +2294,8 @@ brd_input: if (skb->protocol != htons(ETH_P_IP)) goto e_inval; - if (ipv4_is_zeronet(saddr)) - spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); - else { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, - &itag); + if (!ipv4_is_zeronet(saddr)) { + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &itag); if (err < 0) goto martian_source_keep_err; if (err) @@ -2344,7 +2333,6 @@ local_input: rth->rt_oif = 0; rth->rt_mark = skb->mark; rth->rt_gateway = daddr; - rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; rt_init_peer(rth, net->ipv4.peers); rth->fi = NULL; @@ -2362,7 +2350,6 @@ local_input: no_route: RT_CACHE_STAT_INC(in_no_route); - spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); res.type = RTN_UNREACHABLE; if (err == -ESRCH) err = -ENETUNREACH; @@ -2545,7 +2532,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_oif = orig_oif; rth->rt_mark = fl4->flowi4_mark; rth->rt_gateway = fl4->daddr; - rth->rt_spec_dst= fl4->saddr; rth->rt_peer_genid = 0; rt_init_peer(rth, (res->table ? &res->table->tb_peers : @@ -2554,12 +2540,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, RT_CACHE_STAT_INC(out_slow_tot); - if (flags & RTCF_LOCAL) { + if (flags & RTCF_LOCAL) rth->dst.input = ip_local_deliver; - rth->rt_spec_dst = fl4->daddr; - } if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { - rth->rt_spec_dst = fl4->saddr; if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) { rth->dst.output = ip_mc_output; @@ -2890,7 +2873,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_dst = ort->rt_dst; rt->rt_src = ort->rt_src; rt->rt_gateway = ort->rt_gateway; - rt->rt_spec_dst = ort->rt_spec_dst; rt_transfer_peer(rt, ort); rt->fi = ort->fi; if (rt->fi) @@ -2965,10 +2947,8 @@ static int rt_fill_info(struct net *net, nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) goto nla_put_failure; #endif - if (rt_is_input_route(rt)) { - if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_spec_dst)) - goto nla_put_failure; - } else if (rt->rt_src != rt->rt_key_src) { + if (!rt_is_input_route(rt) && + rt->rt_src != rt->rt_key_src) { if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src)) goto nla_put_failure; } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8855d8268552..9815ea0bca7f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -100,7 +100,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_src = rt->rt_src; xdst->u.rt.rt_dst = rt->rt_dst; xdst->u.rt.rt_gateway = rt->rt_gateway; - xdst->u.rt.rt_spec_dst = rt->rt_spec_dst; return 0; } -- cgit v1.2.3 From bf0c111ec80355ee9fe2e2bdb609a536b54768d8 Mon Sep 17 00:00:00 2001 From: Mahesh Palivela Date: Fri, 22 Jun 2012 07:27:46 +0000 Subject: cfg80211: allow advertising VHT capabilities Allow drivers to advertise their VHT capabilities and export them to userspace via nl80211. Signed-off-by: Mahesh Palivela Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 6 ++++++ include/net/cfg80211.h | 17 +++++++++++++++++ net/wireless/nl80211.c | 9 +++++++++ 3 files changed, 32 insertions(+) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index c0fc5d277338..23003272c70e 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1813,6 +1813,9 @@ enum nl80211_mpath_info { * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n + * @NL80211_BAND_ATTR_VHT_MCS_SET: 32-byte attribute containing the MCS set as + * defined in 802.11ac + * @NL80211_BAND_ATTR_VHT_CAPA: VHT capabilities, as in the HT information IE * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined * @__NL80211_BAND_ATTR_AFTER_LAST: internal use */ @@ -1826,6 +1829,9 @@ enum nl80211_band_attr { NL80211_BAND_ATTR_HT_AMPDU_FACTOR, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, + NL80211_BAND_ATTR_VHT_MCS_SET, + NL80211_BAND_ATTR_VHT_CAPA, + /* keep last */ __NL80211_BAND_ATTR_AFTER_LAST, NL80211_BAND_ATTR_MAX = __NL80211_BAND_ATTR_AFTER_LAST - 1 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7d3cd3ce9a26..1fc89c4f930c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -210,6 +210,22 @@ struct ieee80211_sta_ht_cap { struct ieee80211_mcs_info mcs; }; +/** + * struct ieee80211_sta_vht_cap - STA's VHT capabilities + * + * This structure describes most essential parameters needed + * to describe 802.11ac VHT capabilities for an STA. + * + * @vht_supported: is VHT supported by the STA + * @cap: VHT capabilities map as described in 802.11ac spec + * @vht_mcs: Supported VHT MCS rates + */ +struct ieee80211_sta_vht_cap { + bool vht_supported; + u32 cap; /* use IEEE80211_VHT_CAP_ */ + struct ieee80211_vht_mcs_info vht_mcs; +}; + /** * struct ieee80211_supported_band - frequency band definition * @@ -233,6 +249,7 @@ struct ieee80211_supported_band { int n_channels; int n_bitrates; struct ieee80211_sta_ht_cap ht_cap; + struct ieee80211_sta_vht_cap vht_cap; }; /* diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 067c9fe02a7f..5c4a720f0442 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -921,6 +921,15 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, dev->wiphy.bands[band]->ht_cap.ampdu_density))) goto nla_put_failure; + /* add VHT info */ + if (dev->wiphy.bands[band]->vht_cap.vht_supported && + (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET, + sizeof(dev->wiphy.bands[band]->vht_cap.vht_mcs), + &dev->wiphy.bands[band]->vht_cap.vht_mcs) || + nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA, + dev->wiphy.bands[band]->vht_cap.cap))) + goto nla_put_failure; + /* add frequencies */ nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS); if (!nl_freqs) -- cgit v1.2.3 From 3840a06e6046aaee95f33a120499d2dc8c054b9d Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 28 Jun 2012 12:34:19 +0000 Subject: tcp: pass fl6 to inet6_csk_route_req() This commit changes inet_csk_route_req() so that it uses a pointer to a struct flowi6, rather than allocating its own on the stack. This brings its behavior in line with its IPv4 cousin, inet_csk_route_req(), and allows a follow-on patch to fix a dst leak. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/inet6_connection_sock.h | 1 + net/ipv6/inet6_connection_sock.c | 26 +++++++++++++------------- net/ipv6/tcp_ipv6.c | 9 ++++++--- 3 files changed, 20 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 1866a676c810..df2a857e853d 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -26,6 +26,7 @@ extern int inet6_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, bool relax); extern struct dst_entry* inet6_csk_route_req(struct sock *sk, + struct flowi6 *fl6, const struct request_sock *req); extern struct request_sock *inet6_csk_search_req(const struct sock *sk, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index e23d35424ca9..bceb14450a1d 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -55,26 +55,26 @@ int inet6_csk_bind_conflict(const struct sock *sk, EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); struct dst_entry *inet6_csk_route_req(struct sock *sk, + struct flowi6 *fl6, const struct request_sock *req) { struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = treq->rmt_addr; - final_p = fl6_update_dst(&fl6, np->opt, &final); - fl6.saddr = treq->loc_addr; - fl6.flowi6_oif = treq->iif; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = inet_rsk(req)->rmt_port; - fl6.fl6_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + memset(fl6, 0, sizeof(*fl6)); + fl6->flowi6_proto = IPPROTO_TCP; + fl6->daddr = treq->rmt_addr; + final_p = fl6_update_dst(fl6, np->opt, &final); + fl6->saddr = treq->loc_addr; + fl6->flowi6_oif = treq->iif; + fl6->flowi6_mark = sk->sk_mark; + fl6->fl6_dport = inet_rsk(req)->rmt_port; + fl6->fl6_sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, flowi6_to_flowi(fl6)); + + dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); if (IS_ERR(dst)) return NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fc0b96bf9051..4e5fa5f6ec68 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -477,7 +477,8 @@ out: } -static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, +static int tcp_v6_send_synack(struct sock *sk, + struct request_sock *req, struct request_values *rvp, u16 queue_mapping) { @@ -1058,6 +1059,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; + struct flowi6 fl6; bool want_cookie = false; if (skb->protocol == htons(ETH_P_IP)) @@ -1177,7 +1179,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet6_csk_route_req(sk, req)) != NULL && + (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL && (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6, &treq->rmt_addr)) { @@ -1247,6 +1249,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; #endif + struct flowi6 fl6; if (skb->protocol == htons(ETH_P_IP)) { /* @@ -1309,7 +1312,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, goto out_overflow; if (!dst) { - dst = inet6_csk_route_req(sk, req); + dst = inet6_csk_route_req(sk, &fl6, req); if (!dst) goto out; } -- cgit v1.2.3 From 58050fce3530939372e6c2f4b4beb76fcb4caa65 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 28 Jun 2012 03:57:45 +0000 Subject: net: Use NLMSG_DEFAULT_SIZE in combination with nlmsg_new() Using NLMSG_GOODSIZE results in multiple pages being used as nlmsg_new() will automatically add the size of the netlink header to the payload thus exceeding the page limit. NLMSG_DEFAULT_SIZE takes this into account. Signed-off-by: Thomas Graf Cc: Jiri Pirko Cc: Dmitry Eremin-Solenikov Cc: Sergey Lapin Cc: Johannes Berg Cc: Lauro Ramos Venancio Cc: Aloisio Almeida Jr Cc: Samuel Ortiz Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 8 ++++---- drivers/net/wireless/mac80211_hwsim.c | 2 +- include/net/genetlink.h | 2 ++ net/ieee802154/netlink.c | 4 ++-- net/ieee802154/nl-mac.c | 2 +- net/ieee802154/nl-phy.c | 2 +- net/l2tp/l2tp_netlink.c | 6 +++--- net/nfc/netlink.c | 18 +++++++++--------- net/wireless/nl80211.c | 26 +++++++++++++------------- 9 files changed, 36 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 5350eeaa22ce..89853c31e7f4 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1476,7 +1476,7 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) void *hdr; int err; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -1538,7 +1538,7 @@ static int team_nl_send_generic(struct genl_info *info, struct team *team, struct sk_buff *skb; int err; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; @@ -1648,7 +1648,7 @@ static int __send_and_alloc_skb(struct sk_buff **pskb, if (err) return err; } - *pskb = genlmsg_new(NLMSG_DEFAULT_SIZE - GENL_HDRLEN, GFP_KERNEL); + *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!*pskb) return -ENOMEM; return 0; @@ -2016,7 +2016,7 @@ static int team_nl_send_event_port_list_get(struct team *team) int err; struct net *net = dev_net(team->dev); - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index a0b7cfd34685..a9ba3f7ea62b 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -571,7 +571,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, skb_dequeue(&data->pending); } - skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (skb == NULL) goto nla_put_failure; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ccb68880abf5..48905cd3884c 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -5,6 +5,8 @@ #include #include +#define GENLMSG_DEFAULT_SIZE (NLMSG_DEFAULT_SIZE - GENL_HDRLEN) + /** * struct genl_multicast_group - generic netlink multicast group * @name: name of the multicast group, names are per-family diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index c8097ae2482f..97351e1d07a4 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -44,7 +44,7 @@ struct genl_family nl802154_family = { struct sk_buff *ieee802154_nl_create(int flags, u8 req) { void *hdr; - struct sk_buff *msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + struct sk_buff *msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); unsigned long f; if (!msg) @@ -80,7 +80,7 @@ struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info, int flags, u8 req) { void *hdr; - struct sk_buff *msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + struct sk_buff *msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return NULL; diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index ca92587720f4..1e9917124e75 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -530,7 +530,7 @@ static int ieee802154_list_iface(struct sk_buff *skb, if (!dev) return -ENODEV; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) goto out_dev; diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index eed291626da6..d54be34cca94 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -101,7 +101,7 @@ static int ieee802154_list_phy(struct sk_buff *skb, if (!phy) return -ENODEV; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) goto out_dev; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index ddc553e76671..d71cd9229a47 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -72,7 +72,7 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) void *hdr; int ret = -ENOBUFS; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto out; @@ -353,7 +353,7 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info) goto out; } - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto out; @@ -699,7 +699,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) goto out; } - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto out; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 03c31db38f12..f4f07f9b61c0 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -167,7 +167,7 @@ int nfc_genl_targets_found(struct nfc_dev *dev) dev->genl_data.poll_req_pid = 0; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; @@ -195,7 +195,7 @@ int nfc_genl_target_lost(struct nfc_dev *dev, u32 target_idx) struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -226,7 +226,7 @@ int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol) struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -258,7 +258,7 @@ int nfc_genl_tm_deactivated(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -288,7 +288,7 @@ int nfc_genl_device_added(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -321,7 +321,7 @@ int nfc_genl_device_removed(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -434,7 +434,7 @@ int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx, pr_debug("DEP link is up\n"); - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; @@ -473,7 +473,7 @@ int nfc_genl_dep_link_down_event(struct nfc_dev *dev) pr_debug("DEP link is down\n"); - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; @@ -514,7 +514,7 @@ static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info) if (!dev) return -ENODEV; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { rc = -ENOMEM; goto out_putdev; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7ae54b82291f..cbdc0fd67a14 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7210,7 +7210,7 @@ void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, { struct sk_buff *msg; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -7286,7 +7286,7 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev, { struct sk_buff *msg; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -7502,7 +7502,7 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -7542,7 +7542,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -7580,7 +7580,7 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -7842,7 +7842,7 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, { struct sk_buff *msg; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -7863,7 +7863,7 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -8026,7 +8026,7 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, struct nlattr *pinfoattr; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -8069,7 +8069,7 @@ void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, struct nlattr *rekey_attr; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -8113,7 +8113,7 @@ void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, struct nlattr *attr; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -8157,7 +8157,7 @@ void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -8192,7 +8192,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct nlattr *pinfoattr; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -8236,7 +8236,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, void *hdr; int err; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; -- cgit v1.2.3 From 9e56e3800ea42e78b7c816bdd2d87d047be80541 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Jun 2012 18:54:02 -0700 Subject: ipv4: Adjust in_dev handling in fib_validate_source() Checking for in_dev being NULL is pointless. In fact, all of our callers have in_dev precomputed already, so just pass it in and remove the NULL checking. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- net/ipv4/fib_frontend.c | 27 ++++++++++----------------- net/ipv4/route.c | 10 ++++++---- 3 files changed, 17 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9e6c26d4ba4c..619f68a7185c 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -233,7 +233,7 @@ extern void ip_fib_init(void); extern __be32 fib_compute_spec_dst(struct sk_buff *skb); extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, - u32 *itag); + struct in_device *idev, u32 *itag); extern void fib_select_default(struct fib_result *res); /* Exported by fib_semantics.c */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1d13217e01ff..c84cff52021e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -226,15 +226,14 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) * called with rcu_read_lock() */ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, - int oif, struct net_device *dev, u32 *itag) + int oif, struct net_device *dev, struct in_device *idev, + u32 *itag) { - struct in_device *in_dev; - struct flowi4 fl4; + int ret, no_addr, rpf, accept_local; struct fib_result res; - int no_addr, rpf, accept_local; - bool dev_match; - int ret; + struct flowi4 fl4; struct net *net; + bool dev_match; fl4.flowi4_oif = 0; fl4.flowi4_iif = oif; @@ -244,19 +243,13 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, fl4.flowi4_scope = RT_SCOPE_UNIVERSE; no_addr = rpf = accept_local = 0; - in_dev = __in_dev_get_rcu(dev); - if (in_dev) { - no_addr = in_dev->ifa_list == NULL; - - /* Ignore rp_filter for packets protected by IPsec. */ - rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev); + no_addr = idev->ifa_list == NULL; - accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); - fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; - } + /* Ignore rp_filter for packets protected by IPsec. */ + rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); - if (in_dev == NULL) - goto e_inval; + accept_local = IN_DEV_ACCEPT_LOCAL(idev); + fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; net = dev_net(dev); if (fib_lookup(net, &fl4, &res)) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 83d56a016625..919d69e60bab 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1999,7 +1999,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (!ipv4_is_local_multicast(daddr)) goto e_inval; } else { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &itag); + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, + in_dev, &itag); if (err < 0) goto e_err; } @@ -2100,7 +2101,7 @@ static int __mkroute_input(struct sk_buff *skb, err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), - in_dev->dev, &itag); + in_dev->dev, in_dev, &itag); if (err < 0) { ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); @@ -2274,7 +2275,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { err = fib_validate_source(skb, saddr, daddr, tos, net->loopback_dev->ifindex, - dev, &itag); + dev, in_dev, &itag); if (err < 0) goto martian_source_keep_err; if (err) @@ -2295,7 +2296,8 @@ brd_input: goto e_inval; if (!ipv4_is_zeronet(saddr)) { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &itag); + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, + in_dev, &itag); if (err < 0) goto martian_source_keep_err; if (err) -- cgit v1.2.3 From d0087b29f77176480c27c203988b5704847d617c Mon Sep 17 00:00:00 2001 From: Ville Nuorvala Date: Thu, 28 Jun 2012 18:15:52 +0000 Subject: ipv6_tunnel: Allow receiving packets on the fallback tunnel if they pass sanity checks At Facebook, we do Layer-3 DSR via IP-in-IP tunneling. Our load balancers wrap an extra IP header on incoming packets so they can be routed to the backend. In the v4 tunnel driver, when these packets fall on the default tunl0 device, the behavior is to decapsulate them and drop them back on the stack. So our setup is that tunl0 has the VIP and eth0 has (obviously) the backend's real address. In IPv6 we do the same thing, but the v6 tunnel driver didn't have this same behavior - if you didn't have an explicit tunnel setup, it would drop the packet. This patch brings that v4 feature to the v6 driver. The same IPv6 address checks are performed as with any normal tunnel, but as the fallback tunnel endpoint addresses are unspecified, the checks must be performed on a per-packet basis, rather than at tunnel configuration time. [Patch description modified by phil@ipom.com] Signed-off-by: Ville Nuorvala Tested-by: Phil Dibowitz Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 2 ++ net/ipv6/ip6_tunnel.c | 65 ++++++++++++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index fc73e667b50e..358fb86f57eb 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -9,6 +9,8 @@ #define IP6_TNL_F_CAP_XMIT 0x10000 /* capable of receiving packets */ #define IP6_TNL_F_CAP_RCV 0x20000 +/* determine capability on a per-packet basis */ +#define IP6_TNL_F_CAP_PER_PACKET 0x40000 /* IPv6 tunnel */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index c9015fad8d65..04a3cba2c123 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -684,24 +684,50 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, IP6_ECN_set_ce(ipv6_hdr(skb)); } +static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) +{ + struct ip6_tnl_parm *p = &t->parms; + int ltype = ipv6_addr_type(laddr); + int rtype = ipv6_addr_type(raddr); + __u32 flags = 0; + + if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) { + flags = IP6_TNL_F_CAP_PER_PACKET; + } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && + rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && + !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && + (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { + if (ltype&IPV6_ADDR_UNICAST) + flags |= IP6_TNL_F_CAP_XMIT; + if (rtype&IPV6_ADDR_UNICAST) + flags |= IP6_TNL_F_CAP_RCV; + } + return flags; +} + /* called with rcu_read_lock() */ -static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) +static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) { struct ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = dev_net(t->dev); - if (p->flags & IP6_TNL_F_CAP_RCV) { + if ((p->flags & IP6_TNL_F_CAP_RCV) || + ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && + (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) { struct net_device *ldev = NULL; if (p->link) ldev = dev_get_by_index_rcu(net, p->link); - if ((ipv6_addr_is_multicast(&p->laddr) || - likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) && - likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0))) + if ((ipv6_addr_is_multicast(laddr) || + likely(ipv6_chk_addr(net, laddr, ldev, 0))) && + likely(!ipv6_chk_addr(net, raddr, NULL, 0))) ret = 1; - } return ret; } @@ -740,7 +766,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, goto discard; } - if (!ip6_tnl_rcv_ctl(t)) { + if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { t->dev->stats.rx_dropped++; rcu_read_unlock(); goto discard; @@ -1114,25 +1140,6 @@ tx_err: return NETDEV_TX_OK; } -static void ip6_tnl_set_cap(struct ip6_tnl *t) -{ - struct ip6_tnl_parm *p = &t->parms; - int ltype = ipv6_addr_type(&p->laddr); - int rtype = ipv6_addr_type(&p->raddr); - - p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV); - - if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && - rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && - !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && - (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { - if (ltype&IPV6_ADDR_UNICAST) - p->flags |= IP6_TNL_F_CAP_XMIT; - if (rtype&IPV6_ADDR_UNICAST) - p->flags |= IP6_TNL_F_CAP_RCV; - } -} - static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; @@ -1153,7 +1160,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; - ip6_tnl_set_cap(t); + p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); + p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) dev->flags |= IFF_POINTOPOINT; @@ -1438,6 +1446,9 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) t->parms.proto = IPPROTO_IPV6; dev_hold(dev); + + ip6_tnl_link_config(t); + rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } -- cgit v1.2.3 From 7a9bc9b81a5bc6e44ebc80ef781332e4385083f2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 29 Jun 2012 01:32:45 -0700 Subject: ipv4: Elide fib_validate_source() completely when possible. If rpfilter is off (or the SKB has an IPSEC path) and there are not tclassid users, we don't have to do anything at all when fib_validate_source() is invoked besides setting the itag to zero. We monitor tclassid uses with a counter (modified only under RTNL and marked __read_mostly) and we protect the fib_validate_source() real work with a test against this counter and whether rpfilter is to be done. Having a way to know whether we need no tclassid processing or not also opens the door for future optimized rpfilter algorithms that do not perform full FIB lookups. Signed-off-by: David S. Miller --- include/net/fib_rules.h | 1 + include/net/ip_fib.h | 5 +++++ net/core/fib_rules.c | 4 ++++ net/ipv4/fib_frontend.c | 32 ++++++++++++++++++++++++-------- net/ipv4/fib_rules.c | 16 +++++++++++++++- net/ipv4/fib_semantics.c | 10 ++++++++++ 6 files changed, 59 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 075f1e3a0fed..e361f4882426 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -52,6 +52,7 @@ struct fib_rules_ops { struct sk_buff *, struct fib_rule_hdr *, struct nlattr **); + void (*delete)(struct fib_rule *); int (*compare)(struct fib_rule *, struct fib_rule_hdr *, struct nlattr **); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 619f68a7185c..3dc7c96bbeab 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -235,6 +235,11 @@ extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); extern void fib_select_default(struct fib_result *res); +#ifdef CONFIG_IP_ROUTE_CLASSID +extern int fib_num_tclassid_users; +#else +#define fib_num_tclassid_users 0 +#endif /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 72cceb79d0d4..ab7db83236c9 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -151,6 +151,8 @@ static void fib_rules_cleanup_ops(struct fib_rules_ops *ops) list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) { list_del_rcu(&rule->list); + if (ops->delete) + ops->delete(rule); fib_rule_put(rule); } } @@ -499,6 +501,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).pid); + if (ops->delete) + ops->delete(rule); fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c84cff52021e..ae528d1b293a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -217,6 +218,10 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) return inet_select_addr(dev, ip_hdr(skb)->saddr, scope); } +#ifdef CONFIG_IP_ROUTE_CLASSID +int fib_num_tclassid_users __read_mostly; +#endif + /* Given (packet source, input interface) and optional (dst, oif, tos): * - (main) check, that source is valid i.e. not broadcast or our local * address. @@ -225,11 +230,11 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) * - check, that packet arrived from expected physical interface. * called with rcu_read_lock() */ -int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, - int oif, struct net_device *dev, struct in_device *idev, - u32 *itag) +static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, + u8 tos, int oif, struct net_device *dev, + int rpf, struct in_device *idev, u32 *itag) { - int ret, no_addr, rpf, accept_local; + int ret, no_addr, accept_local; struct fib_result res; struct flowi4 fl4; struct net *net; @@ -242,12 +247,9 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; - no_addr = rpf = accept_local = 0; + no_addr = accept_local = 0; no_addr = idev->ifa_list == NULL; - /* Ignore rp_filter for packets protected by IPsec. */ - rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); - accept_local = IN_DEV_ACCEPT_LOCAL(idev); fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; @@ -303,6 +305,20 @@ e_rpf: return -EXDEV; } +/* Ignore rp_filter for packets protected by IPsec. */ +int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, + u8 tos, int oif, struct net_device *dev, + struct in_device *idev, u32 *itag) +{ + int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); + + if (!r && !fib_num_tclassid_users) { + *itag = 0; + return 0; + } + return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag); +} + static inline __be32 sk_extract_addr(struct sockaddr *addr) { return ((struct sockaddr_in *) addr)->sin_addr.s_addr; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 2d043f71ef70..b23fd952c84f 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -169,8 +169,11 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->dst = nla_get_be32(tb[FRA_DST]); #ifdef CONFIG_IP_ROUTE_CLASSID - if (tb[FRA_FLOW]) + if (tb[FRA_FLOW]) { rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); + if (rule4->tclassid) + fib_num_tclassid_users++; + } #endif rule4->src_len = frh->src_len; @@ -184,6 +187,16 @@ errout: return err; } +static void fib4_rule_delete(struct fib_rule *rule) +{ +#ifdef CONFIG_IP_ROUTE_CLASSID + struct fib4_rule *rule4 = (struct fib4_rule *) rule; + + if (rule4->tclassid) + fib_num_tclassid_users--; +#endif +} + static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, struct nlattr **tb) { @@ -256,6 +269,7 @@ static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { .action = fib4_rule_action, .match = fib4_rule_match, .configure = fib4_rule_configure, + .delete = fib4_rule_delete, .compare = fib4_rule_compare, .fill = fib4_rule_fill, .default_pref = fib_default_rule_pref, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 415f8230fc88..c46c20b6b0b6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -163,6 +163,12 @@ void free_fib_info(struct fib_info *fi) return; } fib_info_cnt--; +#ifdef CONFIG_IP_ROUTE_CLASSID + change_nexthops(fi) { + if (nexthop_nh->nh_tclassid) + fib_num_tclassid_users--; + } endfor_nexthops(fi); +#endif call_rcu(&fi->rcu, free_fib_info_rcu); } @@ -421,6 +427,8 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; + if (nexthop_nh->nh_tclassid) + fib_num_tclassid_users++; #endif } @@ -815,6 +823,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) nh->nh_flags = cfg->fc_flags; #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid = cfg->fc_flow; + if (nh->nh_tclassid) + fib_num_tclassid_users++; #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = 1; -- cgit v1.2.3 From f4489ebeffa436c8427a20e2f05004e783708cde Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 29 Jun 2012 12:46:58 +0200 Subject: cfg80211: add channel tracking for AP and mesh We need to know which channel is used by a running AP and mesh for channel context accounting and finding matching/active interface combination. STA/IBSS have current_bss already which allows us to check which channel a vif is tuned to. Non-fixed channel IBSS can be handled with additional changes. Monitor mode is going to be handled differently. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ net/wireless/ap.c | 4 +++- net/wireless/mesh.c | 18 ++++++++++++++---- net/wireless/mlme.c | 1 + net/wireless/nl80211.c | 1 + 5 files changed, 22 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1fc89c4f930c..c62bc7864adf 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2408,6 +2408,9 @@ struct wireless_dev { struct ieee80211_channel *preset_chan; enum nl80211_channel_type preset_chantype; + /* for AP and mesh channel tracking */ + struct ieee80211_channel *channel; + bool ps; int ps_timeout; diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 45199cca63d5..fcc60d8dbefa 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -24,8 +24,10 @@ static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, return -ENOENT; err = rdev->ops->stop_ap(&rdev->wiphy, dev); - if (!err) + if (!err) { wdev->beacon_interval = 0; + wdev->channel = NULL; + } return err; } diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 3b73b07486cf..bab381344723 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -159,6 +159,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!err) { memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); wdev->mesh_id_len = setup->mesh_id_len; + wdev->channel = setup->channel; } return err; @@ -184,6 +185,7 @@ int cfg80211_set_mesh_freq(struct cfg80211_registered_device *rdev, enum nl80211_channel_type channel_type) { struct ieee80211_channel *channel; + int err; channel = rdev_freq_to_chan(rdev, freq, channel_type); if (!channel || !cfg80211_can_beacon_sec_chan(&rdev->wiphy, @@ -205,9 +207,14 @@ int cfg80211_set_mesh_freq(struct cfg80211_registered_device *rdev, if (!netif_running(wdev->netdev)) return -ENETDOWN; - return rdev->ops->libertas_set_mesh_channel(&rdev->wiphy, - wdev->netdev, - channel); + + err = rdev->ops->libertas_set_mesh_channel(&rdev->wiphy, + wdev->netdev, + channel); + if (!err) + wdev->channel = channel; + + return err; } if (wdev->mesh_id_len) @@ -249,8 +256,11 @@ static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, return -ENOTCONN; err = rdev->ops->leave_mesh(&rdev->wiphy, dev); - if (!err) + if (!err) { wdev->mesh_id_len = 0; + wdev->channel = NULL; + } + return err; } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index da4406f11929..a7882eb8c46e 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -947,6 +947,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, int freq, if (WARN_ON(!chan)) goto out; + wdev->channel = chan; nl80211_ch_switch_notify(rdev, dev, freq, type, GFP_KERNEL); out: wdev_unlock(wdev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 20d0fd6d1286..12096b4ebf62 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2488,6 +2488,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->preset_chan = params.channel; wdev->preset_chantype = params.channel_type; wdev->beacon_interval = params.beacon_interval; + wdev->channel = params.channel; } return err; } -- cgit v1.2.3 From c30a3d38689bc601e03d5f2ad3c37d8ea13e46ca Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 29 Jun 2012 12:46:59 +0200 Subject: cfg80211: track ibss fixed channel IBSS may hop between channels. It is necessary to account this special case when considering interface combinations. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ net/wireless/ibss.c | 1 + 2 files changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c62bc7864adf..e030c6af86dd 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2411,6 +2411,8 @@ struct wireless_dev { /* for AP and mesh channel tracking */ struct ieee80211_channel *channel; + bool ibss_fixed; + bool ps; int ps_timeout; diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 89baa3328411..b90fd86b2d18 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -113,6 +113,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, kfree(wdev->connect_keys); wdev->connect_keys = connkeys; + wdev->ibss_fixed = params->channel_fixed; #ifdef CONFIG_CFG80211_WEXT wdev->wext.ibss.channel = params->channel; #endif -- cgit v1.2.3 From dbbae26afa81320b3315fb4ad755b20f1ff256b4 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 29 Jun 2012 12:47:01 +0200 Subject: cfg80211: track monitor interfaces count Implements .set_monitor_enabled(wiphy, enabled). Notifies driver upon change of interface layout. If only monitor interfaces become present it is called with 2nd argument being true. If non-monitor interface appears then 2nd argument is false. Driver is notified only upon change. This makes it more obvious about the fact that cfg80211 supports single monitor channel. Once we implement multi-channel we don't want to allow setting monitor channel while other interface types are running. Otherwise it would be ambiguous once we start considering num_different_channels. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ net/wireless/core.c | 24 ++++++++++++++++++++++++ net/wireless/core.h | 14 ++++++++++++++ net/wireless/util.c | 5 +++++ 4 files changed, 47 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e030c6af86dd..f0d213dd8fe7 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1499,6 +1499,8 @@ struct cfg80211_gtk_rekey_data { * interfaces are active this callback should reject the configuration. * If no interfaces are active or the device is down, the channel should * be stored for when a monitor interface becomes active. + * @set_monitor_enabled: Notify driver that there are only monitor + * interfaces running. * @get_channel: Get the current operating channel, should return %NULL if * there's no single defined operating channel if for example the * device implements channel hopping for multi-channel virtual interfaces. @@ -1817,6 +1819,8 @@ struct cfg80211_ops { struct ethtool_stats *stats, u64 *data); void (*get_et_strings)(struct wiphy *wiphy, struct net_device *dev, u32 sset, u8 *data); + + void (*set_monitor_enabled)(struct wiphy *wiphy, bool enabled); }; /* diff --git a/net/wireless/core.c b/net/wireless/core.c index c65f59c952c9..8412da7d0f25 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -717,6 +717,24 @@ static struct device_type wiphy_type = { .name = "wlan", }; +void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, + enum nl80211_iftype iftype, int num) +{ + bool has_monitors_only_old = cfg80211_has_monitors_only(rdev); + bool has_monitors_only_new; + + ASSERT_RDEV_LOCK(rdev); + + rdev->num_running_ifaces += num; + if (iftype == NL80211_IFTYPE_MONITOR) + rdev->num_running_monitor_ifaces += num; + + has_monitors_only_new = cfg80211_has_monitors_only(rdev); + if (has_monitors_only_new != has_monitors_only_old) + rdev->ops->set_monitor_enabled(&rdev->wiphy, + has_monitors_only_new); +} + static int cfg80211_netdev_notifier_call(struct notifier_block *nb, unsigned long state, void *ndev) @@ -820,6 +838,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_DOWN: dev_hold(dev); + cfg80211_lock_rdev(rdev); + cfg80211_update_iface_num(rdev, wdev->iftype, -1); + cfg80211_unlock_rdev(rdev); queue_work(cfg80211_wq, &wdev->cleanup_work); break; case NETDEV_UP: @@ -927,6 +948,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, ret = cfg80211_can_add_interface(rdev, wdev->iftype); if (ret) return notifier_from_errno(ret); + cfg80211_lock_rdev(rdev); + cfg80211_update_iface_num(rdev, wdev->iftype, 1); + cfg80211_unlock_rdev(rdev); break; } diff --git a/net/wireless/core.h b/net/wireless/core.h index 56f18c2eb919..99acd51343b1 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -56,6 +56,9 @@ struct cfg80211_registered_device { u32 ap_beacons_nlpid; + int num_running_ifaces; + int num_running_monitor_ifaces; + /* BSSes/scanning */ spinlock_t bss_lock; struct list_head bss_list; @@ -197,6 +200,14 @@ static inline void wdev_unlock(struct wireless_dev *wdev) #define ASSERT_RDEV_LOCK(rdev) lockdep_assert_held(&(rdev)->mtx) #define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx) +static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) +{ + ASSERT_RDEV_LOCK(rdev); + + return rdev->num_running_ifaces == rdev->num_running_monitor_ifaces && + rdev->num_running_ifaces > 0; +} + enum cfg80211_event_type { EVENT_CONNECT_RESULT, EVENT_ROAMED, @@ -444,6 +455,9 @@ int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, u32 beacon_int); +void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, + enum nl80211_iftype iftype, int num); + #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS #define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond) #else diff --git a/net/wireless/util.c b/net/wireless/util.c index fc948d0a53f3..9b92ec57d07b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -871,6 +871,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, } } + if (!err && ntype != otype && netif_running(dev)) { + cfg80211_update_iface_num(rdev, ntype, 1); + cfg80211_update_iface_num(rdev, otype, -1); + } + return err; } -- cgit v1.2.3 From 2e165b818456ecc1024dd0387eeac64745526377 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 29 Jun 2012 12:47:06 +0200 Subject: cfg80211/mac80211: remove .get_channel We do not need it anymore since cfg80211 tracks monitor channel and monitor channel type. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ------ net/mac80211/cfg.c | 11 ----------- net/wireless/nl80211.c | 15 +++++---------- net/wireless/wext-compat.c | 9 ++------- 4 files changed, 7 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f0d213dd8fe7..fa269347355b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1501,9 +1501,6 @@ struct cfg80211_gtk_rekey_data { * be stored for when a monitor interface becomes active. * @set_monitor_enabled: Notify driver that there are only monitor * interfaces running. - * @get_channel: Get the current operating channel, should return %NULL if - * there's no single defined operating channel if for example the - * device implements channel hopping for multi-channel virtual interfaces. * * @scan: Request to do a scan. If returning zero, the scan request is given * the driver, and will be valid until passed to cfg80211_scan_done(). @@ -1810,9 +1807,6 @@ struct cfg80211_ops { struct net_device *dev, u16 noack_map); - struct ieee80211_channel *(*get_channel)(struct wiphy *wiphy, - enum nl80211_channel_type *type); - int (*get_et_sset_count)(struct wiphy *wiphy, struct net_device *dev, int sset); void (*get_et_stats)(struct wiphy *wiphy, struct net_device *dev, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ea4b1ea9105a..ccbe2413142a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2980,16 +2980,6 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, return 0; } -static struct ieee80211_channel * -ieee80211_wiphy_get_channel(struct wiphy *wiphy, - enum nl80211_channel_type *type) -{ - struct ieee80211_local *local = wiphy_priv(wiphy); - - *type = local->_oper_channel_type; - return local->oper_channel; -} - static void ieee80211_set_monitor_enabled(struct wiphy *wiphy, bool enabled) { struct ieee80211_local *local = wiphy_priv(wiphy); @@ -3073,7 +3063,6 @@ struct cfg80211_ops mac80211_config_ops = { .tdls_oper = ieee80211_tdls_oper, .tdls_mgmt = ieee80211_tdls_mgmt, .probe_client = ieee80211_probe_client, - .get_channel = ieee80211_wiphy_get_channel, .set_noack_map = ieee80211_set_noack_map, .set_monitor_enabled = ieee80211_set_monitor_enabled, #ifdef CONFIG_PM diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 12096b4ebf62..5d29ed1f7c62 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1689,16 +1689,11 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, (cfg80211_rdev_list_generation << 2))) goto nla_put_failure; - if (rdev->ops->get_channel) { - struct ieee80211_channel *chan; - enum nl80211_channel_type channel_type; - - chan = rdev->ops->get_channel(&rdev->wiphy, &channel_type); - if (chan && - (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, - chan->center_freq) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, - channel_type))) + if (rdev->monitor_channel) { + if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, + rdev->monitor_channel->center_freq) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, + rdev->monitor_channel_type)) goto nla_put_failure; } diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index bc879833b21f..7df42f541873 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -827,8 +827,6 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - struct ieee80211_channel *chan; - enum nl80211_channel_type channel_type; switch (wdev->iftype) { case NL80211_IFTYPE_STATION: @@ -836,13 +834,10 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); case NL80211_IFTYPE_MONITOR: - if (!rdev->ops->get_channel) + if (!rdev->monitor_channel) return -EINVAL; - chan = rdev->ops->get_channel(wdev->wiphy, &channel_type); - if (!chan) - return -EINVAL; - freq->m = chan->center_freq; + freq->m = rdev->monitor_channel->center_freq; freq->e = 6; return 0; default: -- cgit v1.2.3 From 38b3fef1730319e2730af3fc9f73698e3a9aeb4a Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Fri, 15 Jun 2012 11:50:28 +0300 Subject: Bluetooth: Improve debugging messages for hci_conn Improve debugging of hci_conn objects by: adding print to hci_conn refcounting, adding object spcifier when missing, change conn to hcon since conn is heavily used for l2cap_conn objects and this is misleading. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 6 ++++++ net/bluetooth/hci_conn.c | 44 ++++++++++++++++++++-------------------- 2 files changed, 28 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 75766b7f0dc7..475b8c04ba52 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -587,12 +587,18 @@ void hci_conn_put_device(struct hci_conn *conn); static inline void hci_conn_hold(struct hci_conn *conn) { + BT_DBG("hcon %p refcnt %d -> %d", conn, atomic_read(&conn->refcnt), + atomic_read(&conn->refcnt) + 1); + atomic_inc(&conn->refcnt); cancel_delayed_work(&conn->disc_work); } static inline void hci_conn_put(struct hci_conn *conn) { + BT_DBG("hcon %p refcnt %d -> %d", conn, atomic_read(&conn->refcnt), + atomic_read(&conn->refcnt) - 1); + if (atomic_dec_and_test(&conn->refcnt)) { unsigned long timeo; if (conn->type == ACL_LINK || conn->type == LE_LINK) { diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2fcced377e50..9bbef6e95d2c 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -107,7 +107,7 @@ static void hci_acl_connect_cancel(struct hci_conn *conn) { struct hci_cp_create_conn_cancel cp; - BT_DBG("%p", conn); + BT_DBG("hcon %p", conn); if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2) return; @@ -120,7 +120,7 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason) { struct hci_cp_disconnect cp; - BT_DBG("%p", conn); + BT_DBG("hcon %p", conn); conn->state = BT_DISCONN; @@ -134,7 +134,7 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle) struct hci_dev *hdev = conn->hdev; struct hci_cp_add_sco cp; - BT_DBG("%p", conn); + BT_DBG("hcon %p", conn); conn->state = BT_CONNECT; conn->out = true; @@ -152,7 +152,7 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle) struct hci_dev *hdev = conn->hdev; struct hci_cp_setup_sync_conn cp; - BT_DBG("%p", conn); + BT_DBG("hcon %p", conn); conn->state = BT_CONNECT; conn->out = true; @@ -196,7 +196,7 @@ void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __u8 rand[8], struct hci_dev *hdev = conn->hdev; struct hci_cp_le_start_enc cp; - BT_DBG("%p", conn); + BT_DBG("hcon %p", conn); memset(&cp, 0, sizeof(cp)); @@ -213,11 +213,11 @@ void hci_sco_setup(struct hci_conn *conn, __u8 status) { struct hci_conn *sco = conn->link; - BT_DBG("%p", conn); - if (!sco) return; + BT_DBG("hcon %p", conn); + if (!status) { if (lmp_esco_capable(conn->hdev)) hci_setup_sync(sco, conn->handle); @@ -235,7 +235,7 @@ static void hci_conn_timeout(struct work_struct *work) disc_work.work); __u8 reason; - BT_DBG("conn %p state %s", conn, state_to_string(conn->state)); + BT_DBG("hcon %p state %s", conn, state_to_string(conn->state)); if (atomic_read(&conn->refcnt)) return; @@ -266,7 +266,7 @@ static void hci_conn_enter_sniff_mode(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - BT_DBG("conn %p mode %d", conn, conn->mode); + BT_DBG("hcon %p mode %d", conn, conn->mode); if (test_bit(HCI_RAW, &hdev->flags)) return; @@ -301,7 +301,7 @@ static void hci_conn_idle(unsigned long arg) { struct hci_conn *conn = (void *) arg; - BT_DBG("conn %p mode %d", conn, conn->mode); + BT_DBG("hcon %p mode %d", conn, conn->mode); hci_conn_enter_sniff_mode(conn); } @@ -382,7 +382,7 @@ int hci_conn_del(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - BT_DBG("%s conn %p handle %d", hdev->name, conn, conn->handle); + BT_DBG("%s hcon %p handle %d", hdev->name, conn, conn->handle); del_timer(&conn->idle_timer); @@ -557,7 +557,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, /* Check link security requirement */ int hci_conn_check_link_mode(struct hci_conn *conn) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); if (hci_conn_ssp_enabled(conn) && !(conn->link_mode & HCI_LM_ENCRYPT)) return 0; @@ -568,7 +568,7 @@ int hci_conn_check_link_mode(struct hci_conn *conn) /* Authenticate remote device */ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); if (conn->pending_sec_level > sec_level) sec_level = conn->pending_sec_level; @@ -602,7 +602,7 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) /* Encrypt the the link */ static void hci_conn_encrypt(struct hci_conn *conn) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); if (!test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) { struct hci_cp_set_conn_encrypt cp; @@ -616,7 +616,7 @@ static void hci_conn_encrypt(struct hci_conn *conn) /* Enable security */ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); /* For sdp we don't need the link key. */ if (sec_level == BT_SECURITY_SDP) @@ -669,7 +669,7 @@ EXPORT_SYMBOL(hci_conn_security); /* Check secure link requirement */ int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); if (sec_level != BT_SECURITY_HIGH) return 1; /* Accept if non-secure is required */ @@ -684,7 +684,7 @@ EXPORT_SYMBOL(hci_conn_check_secure); /* Change link key */ int hci_conn_change_link_key(struct hci_conn *conn) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { struct hci_cp_change_conn_link_key cp; @@ -699,7 +699,7 @@ int hci_conn_change_link_key(struct hci_conn *conn) /* Switch role */ int hci_conn_switch_role(struct hci_conn *conn, __u8 role) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); if (!role && conn->link_mode & HCI_LM_MASTER) return 1; @@ -720,7 +720,7 @@ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active) { struct hci_dev *hdev = conn->hdev; - BT_DBG("conn %p mode %d", conn, conn->mode); + BT_DBG("hcon %p mode %d", conn, conn->mode); if (test_bit(HCI_RAW, &hdev->flags)) return; @@ -894,7 +894,7 @@ struct hci_chan *hci_chan_create(struct hci_conn *conn) struct hci_dev *hdev = conn->hdev; struct hci_chan *chan; - BT_DBG("%s conn %p", hdev->name, conn); + BT_DBG("%s hcon %p", hdev->name, conn); chan = kzalloc(sizeof(struct hci_chan), GFP_KERNEL); if (!chan) @@ -913,7 +913,7 @@ int hci_chan_del(struct hci_chan *chan) struct hci_conn *conn = chan->conn; struct hci_dev *hdev = conn->hdev; - BT_DBG("%s conn %p chan %p", hdev->name, conn, chan); + BT_DBG("%s hcon %p chan %p", hdev->name, conn, chan); list_del_rcu(&chan->list); @@ -929,7 +929,7 @@ void hci_chan_list_flush(struct hci_conn *conn) { struct hci_chan *chan, *n; - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); list_for_each_entry_safe(chan, n, &conn->chan_list, list) hci_chan_del(chan); -- cgit v1.2.3 From 4244854d22bf8f782698c5224b9191c8d2d42610 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Sat, 30 Jun 2012 03:04:26 +0000 Subject: sctp: be more restrictive in transport selection on bundled sacks It was noticed recently that when we send data on a transport, its possible that we might bundle a sack that arrived on a different transport. While this isn't a major problem, it does go against the SHOULD requirement in section 6.4 of RFC 2960: An endpoint SHOULD transmit reply chunks (e.g., SACK, HEARTBEAT ACK, etc.) to the same destination transport address from which it received the DATA or control chunk to which it is replying. This rule should also be followed if the endpoint is bundling DATA chunks together with the reply chunk. This patch seeks to correct that. It restricts the bundling of sack operations to only those transports which have moved the ctsn of the association forward since the last sack. By doing this we guarantee that we only bundle outbound saks on a transport that has received a chunk since the last sack. This brings us into stricter compliance with the RFC. Vlad had initially suggested that we strictly allow only sack bundling on the transport that last moved the ctsn forward. While this makes sense, I was concerned that doing so prevented us from bundling in the case where we had received chunks that moved the ctsn on multiple transports. In those cases, the RFC allows us to select any of the transports having received chunks to bundle the sack on. so I've modified the approach to allow for that, by adding a state variable to each transport that tracks weather it has moved the ctsn since the last sack. This I think keeps our behavior (and performance), close enough to our current profile that I think we can do this without a sysctl knob to enable/disable it. Signed-off-by: Neil Horman CC: Vlad Yaseivch CC: David S. Miller CC: linux-sctp@vger.kernel.org Reported-by: Michele Baldessari Reported-by: sorin serban Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++++ include/net/sctp/tsnmap.h | 3 ++- net/sctp/associola.c | 1 + net/sctp/output.c | 5 +++++ net/sctp/sm_make_chunk.c | 16 ++++++++++++++++ net/sctp/sm_sideeffect.c | 2 +- net/sctp/transport.c | 2 ++ net/sctp/tsnmap.c | 6 +++++- net/sctp/ulpevent.c | 3 ++- net/sctp/ulpqueue.c | 2 +- 10 files changed, 39 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e4652fe58958..fecdf31816f2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -912,6 +912,9 @@ struct sctp_transport { /* Is this structure kfree()able? */ malloced:1; + /* Has this transport moved the ctsn since we last sacked */ + __u32 sack_generation; + struct flowi fl; /* This is the peer's IP address and port. */ @@ -1584,6 +1587,7 @@ struct sctp_association { */ __u8 sack_needed; /* Do we need to sack the peer? */ __u32 sack_cnt; + __u32 sack_generation; /* These are capabilities which our peer advertised. */ __u8 ecn_capable:1, /* Can peer do ECN? */ diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h index e7728bc14ccf..2c5d2b4d5d1e 100644 --- a/include/net/sctp/tsnmap.h +++ b/include/net/sctp/tsnmap.h @@ -117,7 +117,8 @@ void sctp_tsnmap_free(struct sctp_tsnmap *map); int sctp_tsnmap_check(const struct sctp_tsnmap *, __u32 tsn); /* Mark this TSN as seen. */ -int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn); +int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn, + struct sctp_transport *trans); /* Mark this TSN and all lower as seen. */ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 5bc9ab161b37..b16517ee1aaf 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -271,6 +271,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a */ asoc->peer.sack_needed = 1; asoc->peer.sack_cnt = 0; + asoc->peer.sack_generation = 1; /* Assume that the peer will tell us if he recognizes ASCONF * as part of INIT exchange. diff --git a/net/sctp/output.c b/net/sctp/output.c index f1b7d4bb591e..6ae47acaaec6 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -248,6 +248,11 @@ static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt, /* If the SACK timer is running, we have a pending SACK */ if (timer_pending(timer)) { struct sctp_chunk *sack; + + if (pkt->transport->sack_generation != + pkt->transport->asoc->peer.sack_generation) + return retval; + asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (sack) { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index a85eeeb55dd0..b6de71efb140 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -734,8 +734,10 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) int len; __u32 ctsn; __u16 num_gabs, num_dup_tsns; + struct sctp_association *aptr = (struct sctp_association *)asoc; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; + struct sctp_transport *trans; memset(gabs, 0, sizeof(gabs)); ctsn = sctp_tsnmap_get_ctsn(map); @@ -805,6 +807,20 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns, sctp_tsnmap_get_dups(map)); + /* Once we have a sack generated, check to see what our sack + * generation is, if its 0, reset the transports to 0, and reset + * the association generation to 1 + * + * The idea is that zero is never used as a valid generation for the + * association so no transport will match after a wrap event like this, + * Until the next sack + */ + if (++aptr->peer.sack_generation == 0) { + list_for_each_entry(trans, &asoc->peer.transport_addr_list, + transports) + trans->sack_generation = 0; + aptr->peer.sack_generation = 1; + } nodata: return retval; } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index c96d1a81cf42..8716da1a8592 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1268,7 +1268,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_REPORT_TSN: /* Record the arrival of a TSN. */ error = sctp_tsnmap_mark(&asoc->peer.tsn_map, - cmd->obj.u32); + cmd->obj.u32, NULL); break; case SCTP_CMD_REPORT_FWDTSN: diff --git a/net/sctp/transport.c b/net/sctp/transport.c index b026ba0c6992..1dcceb6e0ce6 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -68,6 +68,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); memset(&peer->saddr, 0, sizeof(union sctp_addr)); + peer->sack_generation = 0; + /* From 6.3.1 RTO Calculation: * * C1) Until an RTT measurement has been made for a packet sent to the diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index f1e40cebc981..b5fb7c409023 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -114,7 +114,8 @@ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn) /* Mark this TSN as seen. */ -int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) +int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn, + struct sctp_transport *trans) { u16 gap; @@ -133,6 +134,9 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) */ map->max_tsn_seen++; map->cumulative_tsn_ack_point++; + if (trans) + trans->sack_generation = + trans->asoc->peer.sack_generation; map->base_tsn++; } else { /* Either we already have a gap, or about to record a gap, so diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 8a84017834c2..33d894776192 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -715,7 +715,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, * can mark it as received so the tsn_map is updated correctly. */ if (sctp_tsnmap_mark(&asoc->peer.tsn_map, - ntohl(chunk->subh.data_hdr->tsn))) + ntohl(chunk->subh.data_hdr->tsn), + chunk->transport)) goto fail_mark; /* First calculate the padding, so we don't inadvertently diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index f2d1de7f2ffb..f5a6a4f4faf7 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -1051,7 +1051,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, if (chunk && (freed >= needed)) { __u32 tsn; tsn = ntohl(chunk->subh.data_hdr->tsn); - sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn); + sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport); sctp_ulpq_tail_data(ulpq, chunk, gfp); sctp_ulpq_partial_delivery(ulpq, chunk, gfp); -- cgit v1.2.3 From 3a0c52a6d82cc41da965284412608c74aece34e4 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Mon, 2 Jul 2012 09:32:32 +0300 Subject: cfg80211: add 802.11ad (60gHz band) support Add enumerations for both cfg80211 and nl80211. This expands wiphy.bands etc. arrays. Extend channel <-> frequency translation to cover 60g band and modify the rate check logic since there are no legacy mandatory rates (only MCS is used.) Signed-off-by: Vladimir Kondratiev Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlegacy/3945-rs.c | 2 +- include/linux/nl80211.h | 2 ++ include/net/cfg80211.h | 2 ++ net/mac80211/tx.c | 2 ++ net/wireless/core.c | 10 ++++++++-- net/wireless/util.c | 35 +++++++++++++++++++++++++-------- 6 files changed, 42 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/iwlegacy/3945-rs.c b/drivers/net/wireless/iwlegacy/3945-rs.c index 4b10157d8686..d4fd29ad90dc 100644 --- a/drivers/net/wireless/iwlegacy/3945-rs.c +++ b/drivers/net/wireless/iwlegacy/3945-rs.c @@ -946,7 +946,7 @@ il3945_rate_scale_init(struct ieee80211_hw *hw, s32 sta_id) case IEEE80211_BAND_5GHZ: rs_sta->expected_tpt = il3945_expected_tpt_a; break; - case IEEE80211_NUM_BANDS: + default: BUG(); break; } diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 23003272c70e..74cc55c1bf28 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -2545,10 +2545,12 @@ enum nl80211_tx_rate_attributes { * enum nl80211_band - Frequency band * @NL80211_BAND_2GHZ: 2.4 GHz ISM band * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz) + * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 64.80 GHz) */ enum nl80211_band { NL80211_BAND_2GHZ, NL80211_BAND_5GHZ, + NL80211_BAND_60GHZ, }; /** diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fa269347355b..0b564e83a24b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -70,11 +70,13 @@ * * @IEEE80211_BAND_2GHZ: 2.4GHz ISM band * @IEEE80211_BAND_5GHZ: around 5GHz band (4.9-5.7) + * @IEEE80211_BAND_60GHZ: around 60 GHz band (58.32 - 64.80 GHz) * @IEEE80211_NUM_BANDS: number of defined bands */ enum ieee80211_band { IEEE80211_BAND_2GHZ = NL80211_BAND_2GHZ, IEEE80211_BAND_5GHZ = NL80211_BAND_5GHZ, + IEEE80211_BAND_60GHZ = NL80211_BAND_60GHZ, /* keep last */ IEEE80211_NUM_BANDS diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4e753032e48d..4990f4fb5864 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -140,6 +140,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, if (r->flags & IEEE80211_RATE_MANDATORY_A) mrate = r->bitrate; break; + case IEEE80211_BAND_60GHZ: + /* TODO, for now fall through */ case IEEE80211_NUM_BANDS: WARN_ON(1); break; diff --git a/net/wireless/core.c b/net/wireless/core.c index ca2b95f24846..e13365f1fa63 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -468,8 +468,14 @@ int wiphy_register(struct wiphy *wiphy) continue; sband->band = band; - - if (WARN_ON(!sband->n_channels || !sband->n_bitrates)) + if (WARN_ON(!sband->n_channels)) + return -EINVAL; + /* + * on 60gHz band, there are no legacy rates, so + * n_bitrates is 0 + */ + if (WARN_ON(band != IEEE80211_BAND_60GHZ && + !sband->n_bitrates)) return -EINVAL; /* diff --git a/net/wireless/util.c b/net/wireless/util.c index a9260ac85cf1..0228c64e73d8 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -35,19 +35,29 @@ int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band) { /* see 802.11 17.3.8.3.2 and Annex J * there are overlapping channel numbers in 5GHz and 2GHz bands */ - if (band == IEEE80211_BAND_5GHZ) { - if (chan >= 182 && chan <= 196) - return 4000 + chan * 5; - else - return 5000 + chan * 5; - } else { /* IEEE80211_BAND_2GHZ */ + if (chan <= 0) + return 0; /* not supported */ + switch (band) { + case IEEE80211_BAND_2GHZ: if (chan == 14) return 2484; else if (chan < 14) return 2407 + chan * 5; + break; + case IEEE80211_BAND_5GHZ: + if (chan >= 182 && chan <= 196) + return 4000 + chan * 5; else - return 0; /* not supported */ + return 5000 + chan * 5; + break; + case IEEE80211_BAND_60GHZ: + if (chan < 5) + return 56160 + chan * 2160; + break; + default: + ; } + return 0; /* not supported */ } EXPORT_SYMBOL(ieee80211_channel_to_frequency); @@ -60,8 +70,12 @@ int ieee80211_frequency_to_channel(int freq) return (freq - 2407) / 5; else if (freq >= 4910 && freq <= 4980) return (freq - 4000) / 5; - else + else if (freq <= 45000) /* DMG band lower limit */ return (freq - 5000) / 5; + else if (freq >= 58320 && freq <= 64800) + return (freq - 56160) / 2160; + else + return 0; } EXPORT_SYMBOL(ieee80211_frequency_to_channel); @@ -137,6 +151,11 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband, } WARN_ON(want != 0 && want != 3 && want != 6); break; + case IEEE80211_BAND_60GHZ: + /* check for mandatory HT MCS 1..4 */ + WARN_ON(!sband->ht_cap.ht_supported); + WARN_ON((sband->ht_cap.mcs.rx_mask[0] & 0x1e) != 0x1e); + break; case IEEE80211_NUM_BANDS: WARN_ON(1); break; -- cgit v1.2.3 From cb831b537d50d21f6afb5dffbde4cf6523627461 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 2 Jul 2012 15:40:18 +0200 Subject: mac80211: remove tx_frags driver callback The implementation of tx_frags is buggy due to not handling queue stop, and there's no driver implementing it so remove it. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 15 --------------- net/mac80211/driver-ops.h | 8 -------- net/mac80211/main.c | 2 +- net/mac80211/tx.c | 7 ++----- 4 files changed, 3 insertions(+), 29 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b5da094468f1..dc2a97af95e7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1896,19 +1896,6 @@ enum ieee80211_rate_control_changed { * The low-level driver should send the frame out based on * configuration in the TX control data. This handler should, * preferably, never fail and stop queues appropriately. - * This must be implemented if @tx_frags is not. - * Must be atomic. - * - * @tx_frags: Called to transmit multiple fragments of a single MSDU. - * This handler must consume all fragments, sending out some of - * them only is useless and it can't ask for some of them to be - * queued again. If the frame is not fragmented the queue has a - * single SKB only. To avoid issues with the networking stack - * when TX status is reported the frames should be removed from - * the skb queue. - * If this is used, the tx_info @vif and @sta pointers will be - * invalid -- you must not use them in that case. - * This must be implemented if @tx isn't. * Must be atomic. * * @start: Called before the first netdevice attached to the hardware @@ -2260,8 +2247,6 @@ enum ieee80211_rate_control_changed { */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); - void (*tx_frags)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, struct sk_buff_head *skbs); int (*start)(struct ieee80211_hw *hw); void (*stop)(struct ieee80211_hw *hw); #ifdef CONFIG_PM diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 44e8c1242781..5042151a3325 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -27,14 +27,6 @@ static inline void drv_tx(struct ieee80211_local *local, struct sk_buff *skb) local->ops->tx(&local->hw, skb); } -static inline void drv_tx_frags(struct ieee80211_local *local, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - struct sk_buff_head *skbs) -{ - local->ops->tx_frags(&local->hw, vif, sta, skbs); -} - static inline void drv_get_et_strings(struct ieee80211_sub_if_data *sdata, u32 sset, u8 *data) { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ab32c59be894..c794101f8987 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -587,7 +587,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN); - BUG_ON(!ops->tx && !ops->tx_frags); + BUG_ON(!ops->tx); BUG_ON(!ops->start); BUG_ON(!ops->stop); BUG_ON(!ops->config); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4990f4fb5864..364a1e7b4afa 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1295,11 +1295,8 @@ static bool __ieee80211_tx(struct ieee80211_local *local, break; } - if (local->ops->tx_frags) - drv_tx_frags(local, vif, pubsta, skbs); - else - result = ieee80211_tx_frags(local, vif, pubsta, skbs, - txpending); + result = ieee80211_tx_frags(local, vif, pubsta, skbs, + txpending); ieee80211_tpt_led_trig_tx(local, fc, led_len); ieee80211_led_tx(local, 1); -- cgit v1.2.3 From e3e1a0bcb3f192fe2f95f86a74bd4e7967341e74 Mon Sep 17 00:00:00 2001 From: Thomas Huehn Date: Mon, 2 Jul 2012 19:46:16 +0200 Subject: mac80211: reduce IEEE80211_TX_MAX_RATES IEEE80211_TX_MAX_RATES can be reduced from 5 to 4 as there is no current hardware supporting a rate chain with 5 multi rate stages (mrr), so 4 mrr stages are sufficient. The memory that is freed within the ieee80211_tx_info struct will be used in the upcoming Transmission Power Control (TPC) implementation. Suggested-by: Felix Fietkau Signed-off-by: Thomas Huehn [reword commit message] Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/carl9170/tx.c | 6 +++--- drivers/net/wireless/p54/txrx.c | 6 +++--- include/net/mac80211.h | 8 ++++---- net/mac80211/tx.c | 3 +-- 4 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/carl9170/tx.c b/drivers/net/wireless/ath/carl9170/tx.c index aed305177af6..ede0b572cebc 100644 --- a/drivers/net/wireless/ath/carl9170/tx.c +++ b/drivers/net/wireless/ath/carl9170/tx.c @@ -277,11 +277,11 @@ static void carl9170_tx_release(struct kref *ref) return; BUILD_BUG_ON( - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len) != 23); + offsetof(struct ieee80211_tx_info, status.ack_signal) != 20); - memset(&txinfo->status.ampdu_ack_len, 0, + memset(&txinfo->status.ack_signal, 0, sizeof(struct ieee80211_tx_info) - - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len)); + offsetof(struct ieee80211_tx_info, status.ack_signal)); if (atomic_read(&ar->tx_total_queued)) ar->tx_schedule = true; diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c index 82a1cac920bd..f38786e02623 100644 --- a/drivers/net/wireless/p54/txrx.c +++ b/drivers/net/wireless/p54/txrx.c @@ -422,11 +422,11 @@ static void p54_rx_frame_sent(struct p54_common *priv, struct sk_buff *skb) * Clear manually, ieee80211_tx_info_clear_status would * clear the counts too and we need them. */ - memset(&info->status.ampdu_ack_len, 0, + memset(&info->status.ack_signal, 0, sizeof(struct ieee80211_tx_info) - - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len)); + offsetof(struct ieee80211_tx_info, status.ack_signal)); BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, - status.ampdu_ack_len) != 23); + status.ack_signal) != 20); if (entry_hdr->flags & cpu_to_le16(P54_HDR_FLAG_DATA_ALIGN)) pad = entry_data->align[0]; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dc2a97af95e7..3f1b58cf9c8c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -475,7 +475,7 @@ enum mac80211_rate_control_flags { #define IEEE80211_TX_INFO_RATE_DRIVER_DATA_SIZE 24 /* maximum number of rate stages */ -#define IEEE80211_TX_MAX_RATES 5 +#define IEEE80211_TX_MAX_RATES 4 /** * struct ieee80211_tx_rate - rate selection/status @@ -563,11 +563,11 @@ struct ieee80211_tx_info { } control; struct { struct ieee80211_tx_rate rates[IEEE80211_TX_MAX_RATES]; - u8 ampdu_ack_len; int ack_signal; + u8 ampdu_ack_len; u8 ampdu_len; u8 antenna; - /* 14 bytes free */ + /* 21 bytes free */ } status; struct { struct ieee80211_tx_rate driver_rates[ @@ -634,7 +634,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) info->status.rates[i].count = 0; BUILD_BUG_ON( - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len) != 23); + offsetof(struct ieee80211_tx_info, status.ack_signal) != 20); memset(&info->status.ampdu_ack_len, 0, sizeof(struct ieee80211_tx_info) - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len)); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 364a1e7b4afa..c9d2175d15c1 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -959,8 +959,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) info->control.rates[1].idx = -1; info->control.rates[2].idx = -1; info->control.rates[3].idx = -1; - info->control.rates[4].idx = -1; - BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5); + BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 4); info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; } else { hdr->frame_control &= ~morefrags; -- cgit v1.2.3 From a1845fc7c552977e23fe552ad3f5c6c279e3d550 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 27 Jun 2012 13:18:36 +0200 Subject: mac80211: add TX prepare API Some drivers require setup before being able to send management frames in managed mode, in particular in multi-channel cases. Introduce API to allow the drivers to do such setup while being able to sleep waiting for the setup to finish in the device. This isn't possible inside the TX call since that can't sleep. A future patch may also restructure the TX retry to wait for the driver to report the frame status, as suggested by Arik in http://mid.gmane.org/CA+XVXffKSEL6ZQPQ98x-zO-NL2=TNF1uN==mprRyUmAaRn254g@mail.gmail.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 15 +++++++++++++++ net/mac80211/driver-ops.h | 14 ++++++++++++++ net/mac80211/mlme.c | 8 ++++++++ net/mac80211/trace.h | 7 +++++++ 4 files changed, 44 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3f1b58cf9c8c..e3fa90ce9ecb 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2244,6 +2244,18 @@ enum ieee80211_rate_control_changed { * @get_rssi: Get current signal strength in dBm, the function is optional * and can sleep. * + * @mgd_prepare_tx: Prepare for transmitting a management frame for association + * before associated. In multi-channel scenarios, a virtual interface is + * bound to a channel before it is associated, but as it isn't associated + * yet it need not necessarily be given airtime, in particular since any + * transmission to a P2P GO needs to be synchronized against the GO's + * powersave state. mac80211 will call this function before transmitting a + * management frame prior to having successfully associated to allow the + * driver to give it channel time for the transmission, to get a response + * and to be able to synchronize with the GO. + * The callback will be called before each transmission and upon return + * mac80211 will transmit the frame right away. + * The callback is optional and can (should!) sleep. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -2383,6 +2395,9 @@ struct ieee80211_ops { u32 sset, u8 *data); int (*get_rssi)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, s8 *rssi_dbm); + + void (*mgd_prepare_tx)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); }; /** diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 5042151a3325..df9203199102 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -852,4 +852,18 @@ static inline int drv_get_rssi(struct ieee80211_local *local, return ret; } + +static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + + check_sdata_in_driver(sdata); + WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); + + trace_drv_mgd_prepare_tx(local, sdata); + if (local->ops->mgd_prepare_tx) + local->ops->mgd_prepare_tx(&local->hw, &sdata->vif); + trace_drv_return_void(local); +} #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e9c0d1b68fc8..d563f7c55531 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -541,6 +541,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) memcpy(pos, assoc_data->ie + offset, noffset - offset); } + drv_mgd_prepare_tx(local, sdata); + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; ieee80211_tx_skb(sdata, skb); } @@ -580,6 +582,9 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED)) IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + + drv_mgd_prepare_tx(local, sdata); + ieee80211_tx_skb(sdata, skb); } } @@ -1756,6 +1761,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, if (!elems.challenge) return; auth_data->expected_transaction = 4; + drv_mgd_prepare_tx(sdata->local, sdata); ieee80211_send_auth(sdata, 3, auth_data->algorithm, elems.challenge - 2, elems.challenge_len + 2, auth_data->bss->bssid, auth_data->bss->bssid, @@ -2641,6 +2647,8 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) return -ETIMEDOUT; } + drv_mgd_prepare_tx(local, sdata); + if (auth_data->bss->proberesp_ies) { sdata_info(sdata, "send auth to %pM (try %d/%d)\n", auth_data->bss->bssid, auth_data->tries, diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 2e60f4acd027..e1e9d10ec2e7 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1244,6 +1244,13 @@ TRACE_EVENT(drv_get_rssi, ) ); +DEFINE_EVENT(local_sdata_evt, drv_mgd_prepare_tx, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + + TP_ARGS(local, sdata) +); + /* * Tracing for API calls that drivers call. */ -- cgit v1.2.3 From 08911475d1d0921401e37d83292b217e1411d10b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 29 Jun 2012 05:23:24 +0000 Subject: netfilter: nf_conntrack: generalize nf_ct_l4proto_net This patch generalizes nf_ct_l4proto_net by splitting it into chunks and moving the corresponding protocol part to where it really belongs to. To clarify, note that we follow two different approaches to support per-net depending if it's built-in or run-time loadable protocol tracker. Signed-off-by: Pablo Neira Ayuso Acked-by: Gao feng --- include/net/netfilter/nf_conntrack_l4proto.h | 3 +++ net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 6 ++++++ net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 6 ++++++ net/netfilter/nf_conntrack_proto.c | 22 ++++++---------------- net/netfilter/nf_conntrack_proto_generic.c | 6 ++++++ net/netfilter/nf_conntrack_proto_tcp.c | 7 +++++++ net/netfilter/nf_conntrack_proto_udp.c | 7 +++++++ 7 files changed, 41 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 08bb571b7abd..c3be4aef6bf7 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -99,6 +99,9 @@ struct nf_conntrack_l4proto { /* Init l4proto pernet data */ int (*init_net)(struct net *net, u_int16_t proto); + /* Return the per-net protocol part. */ + struct nf_proto_net *(*get_net_proto)(struct net *net); + /* Protocol name */ const char *name; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 9c2095c5571f..5241d997ab75 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -388,6 +388,11 @@ static int icmp_init_net(struct net *net, u_int16_t proto) return ret; } +static struct nf_proto_net *icmp_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.icmp.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = { .l3proto = PF_INET, @@ -418,4 +423,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = icmp_init_net, + .get_net_proto = icmp_get_net_proto, }; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 9fc5cf5f3e8b..2d54b2061d68 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -358,6 +358,11 @@ static int icmpv6_init_net(struct net *net, u_int16_t proto) return icmpv6_kmemdup_sysctl_table(pn, in); } +static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.icmpv6.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = { .l3proto = PF_INET6, @@ -386,4 +391,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = icmpv6_init_net, + .get_net_proto = icmpv6_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 21b850c4b3ab..0dc63854390f 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -303,22 +303,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, struct nf_conntrack_l4proto *l4proto) { - switch (l4proto->l4proto) { - case IPPROTO_TCP: - return (struct nf_proto_net *)&net->ct.nf_ct_proto.tcp; - case IPPROTO_UDP: - return (struct nf_proto_net *)&net->ct.nf_ct_proto.udp; - case IPPROTO_ICMP: - return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmp; - case IPPROTO_ICMPV6: - return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmpv6; - case 255: /* l4proto_generic */ - return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; - default: - if (l4proto->net_id) - return net_generic(net, *l4proto->net_id); - else - return NULL; + if (l4proto->get_net_proto) { + /* statically built-in protocols use static per-net */ + return l4proto->get_net_proto(net); + } else if (l4proto->net_id) { + /* ... and loadable protocols use dynamic per-net */ + return net_generic(net, *l4proto->net_id); } return NULL; } diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 7c11c5444194..d25f29377648 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -186,6 +186,11 @@ static int generic_init_net(struct net *net, u_int16_t proto) return ret; } +static struct nf_proto_net *generic_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.generic.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = { .l3proto = PF_UNSPEC, @@ -207,4 +212,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = generic_init_net, + .get_net_proto = generic_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 44f0da830156..07e56ea2e9bf 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1623,6 +1623,11 @@ static int tcp_init_net(struct net *net, u_int16_t proto) return ret; } +static struct nf_proto_net *tcp_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.tcp.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = { .l3proto = PF_INET, @@ -1656,6 +1661,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = tcp_init_net, + .get_net_proto = tcp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); @@ -1692,5 +1698,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = tcp_init_net, + .get_net_proto = tcp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index e7e0434c3056..59623cc56e8d 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -297,6 +297,11 @@ static int udp_init_net(struct net *net, u_int16_t proto) return ret; } +static struct nf_proto_net *udp_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.udp.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = { .l3proto = PF_INET, @@ -325,6 +330,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = udp_init_net, + .get_net_proto = udp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); @@ -356,5 +362,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = udp_init_net, + .get_net_proto = udp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); -- cgit v1.2.3 From a263b3093641fb1ec377582c90986a7fd0625184 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jul 2012 02:02:15 -0700 Subject: ipv4: Make neigh lookups directly in output packet path. Do not use the dst cached neigh, we'll be getting rid of that. Signed-off-by: David S. Miller --- include/net/arp.h | 28 +++++++++++++++++++--------- include/net/neighbour.h | 11 +++++++++-- net/core/neighbour.c | 12 +++++++----- net/ipv4/ip_output.c | 12 ++++++++---- net/ipv4/route.c | 6 +----- 5 files changed, 44 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/arp.h b/include/net/arp.h index 4a1f3fb562eb..4617d9841132 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -15,24 +15,34 @@ static inline u32 arp_hashfn(u32 key, const struct net_device *dev, u32 hash_rnd return val * hash_rnd; } -static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key) +static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { - struct neigh_hash_table *nht; + struct neigh_hash_table *nht = rcu_dereference_bh(arp_tbl.nht); struct neighbour *n; u32 hash_val; - rcu_read_lock_bh(); - nht = rcu_dereference_bh(arp_tbl.nht); + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + key = 0; + hash_val = arp_hashfn(key, dev, nht->hash_rnd[0]) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; n = rcu_dereference_bh(n->next)) { - if (n->dev == dev && *(u32 *)n->primary_key == key) { - if (!atomic_inc_not_zero(&n->refcnt)) - n = NULL; - break; - } + if (n->dev == dev && *(u32 *)n->primary_key == key) + return n; } + + return NULL; +} + +static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv4_neigh_lookup_noref(dev, key); + if (n && !atomic_inc_not_zero(&n->refcnt)) + n = NULL; rcu_read_unlock_bh(); return n; diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 6cdfeedb650b..e1d18bdeebb8 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -202,9 +202,16 @@ extern struct neighbour * neigh_lookup(struct neigh_table *tbl, extern struct neighbour * neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, const void *pkey); -extern struct neighbour * neigh_create(struct neigh_table *tbl, +extern struct neighbour * __neigh_create(struct neigh_table *tbl, + const void *pkey, + struct net_device *dev, + bool want_ref); +static inline struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, - struct net_device *dev); + struct net_device *dev) +{ + return __neigh_create(tbl, pkey, dev, true); +} extern void neigh_destroy(struct neighbour *neigh); extern int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); extern int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d81d026138f0..a793af9af150 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -474,8 +474,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, } EXPORT_SYMBOL(neigh_lookup_nodev); -struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, - struct net_device *dev) +struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, + struct net_device *dev, bool want_ref) { u32 hash_val; int key_len = tbl->key_len; @@ -535,14 +535,16 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, n1 = rcu_dereference_protected(n1->next, lockdep_is_held(&tbl->lock))) { if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { - neigh_hold(n1); + if (want_ref) + neigh_hold(n1); rc = n1; goto out_tbl_unlock; } } n->dead = 0; - neigh_hold(n); + if (want_ref) + neigh_hold(n); rcu_assign_pointer(n->next, rcu_dereference_protected(nht->hash_buckets[hash_val], lockdep_is_held(&tbl->lock))); @@ -558,7 +560,7 @@ out_neigh_release: neigh_release(n); goto out; } -EXPORT_SYMBOL(neigh_create); +EXPORT_SYMBOL(__neigh_create); static u32 pneigh_hash(const void *pkey, int key_len) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 2630900e480a..6e9a266a0535 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -170,6 +170,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; + u32 nexthop; if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); @@ -191,15 +192,18 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } - rcu_read_lock(); - neigh = dst_get_neighbour_noref(dst); + rcu_read_lock_bh(); + nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr; + neigh = __ipv4_neigh_lookup_noref(dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); if (neigh) { int res = neigh_output(neigh, skb); - rcu_read_unlock(); + rcu_read_unlock_bh(); return res; } - rcu_read_unlock(); + rcu_read_unlock_bh(); net_dbg_ratelimited("%s: No header cache and no neighbour!\n", __func__); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6a5afc715558..2f40363e2851 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1098,17 +1098,13 @@ static int slow_chain_length(const struct rtable *head) static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - static const __be32 inaddr_any = 0; struct net_device *dev = dst->dev; const __be32 *pkey = daddr; const struct rtable *rt; struct neighbour *n; rt = (const struct rtable *) dst; - - if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) - pkey = &inaddr_any; - else if (rt->rt_gateway) + if (rt->rt_gateway) pkey = (const __be32 *) &rt->rt_gateway; n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey); -- cgit v1.2.3 From 5110effee8fde2edfacac9cd12a9960ab2dc39ea Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jul 2012 02:21:03 -0700 Subject: net: Do delayed neigh confirmation. When a dst_confirm() happens, mark the confirmation as pending in the dst. Then on the next packet out, when we have the neigh in-hand, do the update. This removes the dependency in dst_confirm() of dst's having an attached neigh. While we're here, remove the explicit 'dst' NULL check, all except 2 or 3 call sites ensure it's not NULL. So just fix those cases up. Signed-off-by: David S. Miller --- include/net/dst.h | 29 +++++++++++++++++++++-------- include/net/neighbour.h | 15 --------------- net/core/dst.c | 3 ++- net/ipv4/ip_output.c | 2 +- net/ipv4/tcp_input.c | 19 +++++++++++++------ net/ipv6/ip6_output.c | 2 +- 6 files changed, 38 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index f0bf3b8d5911..84e7a3ff968d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -51,7 +51,7 @@ struct dst_entry { int (*input)(struct sk_buff *); int (*output)(struct sk_buff *); - int flags; + unsigned short flags; #define DST_HOST 0x0001 #define DST_NOXFRM 0x0002 #define DST_NOPOLICY 0x0004 @@ -62,6 +62,8 @@ struct dst_entry { #define DST_FAKE_RTABLE 0x0080 #define DST_XFRM_TUNNEL 0x0100 + unsigned short pending_confirm; + short error; short obsolete; unsigned short header_len; /* more space at head required */ @@ -371,7 +373,8 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) extern int dst_discard(struct sk_buff *skb); extern void *dst_alloc(struct dst_ops *ops, struct net_device *dev, - int initial_ref, int initial_obsolete, int flags); + int initial_ref, int initial_obsolete, + unsigned short flags); extern void __dst_free(struct dst_entry *dst); extern struct dst_entry *dst_destroy(struct dst_entry *dst); @@ -395,14 +398,24 @@ static inline void dst_rcu_free(struct rcu_head *head) static inline void dst_confirm(struct dst_entry *dst) { - if (dst) { - struct neighbour *n; + dst->pending_confirm = 1; +} - rcu_read_lock(); - n = dst_get_neighbour_noref(dst); - neigh_confirm(n); - rcu_read_unlock(); +static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, + struct sk_buff *skb) +{ + struct hh_cache *hh; + + if (unlikely(dst->pending_confirm)) { + n->confirmed = jiffies; + dst->pending_confirm = 0; } + + hh = &n->hh; + if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) + return neigh_hh_output(hh, skb); + else + return n->output(n, skb); } static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index e1d18bdeebb8..344d8988842a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -309,12 +309,6 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh) #define neigh_hold(n) atomic_inc(&(n)->refcnt) -static inline void neigh_confirm(struct neighbour *neigh) -{ - if (neigh) - neigh->confirmed = jiffies; -} - static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { unsigned long now = jiffies; @@ -358,15 +352,6 @@ static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb) return dev_queue_xmit(skb); } -static inline int neigh_output(struct neighbour *n, struct sk_buff *skb) -{ - struct hh_cache *hh = &n->hh; - if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) - return neigh_hh_output(hh, skb); - else - return n->output(n, skb); -} - static inline struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat) { diff --git a/net/core/dst.c b/net/core/dst.c index 43d94cedbf7c..a6e19a23a745 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -152,7 +152,7 @@ EXPORT_SYMBOL(dst_discard); const u32 dst_default_metrics[RTAX_MAX]; void *dst_alloc(struct dst_ops *ops, struct net_device *dev, - int initial_ref, int initial_obsolete, int flags) + int initial_ref, int initial_obsolete, unsigned short flags) { struct dst_entry *dst; @@ -188,6 +188,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->__use = 0; dst->lastuse = jiffies; dst->flags = flags; + dst->pending_confirm = 0; dst->next = NULL; if (!(flags & DST_NOCOUNT)) dst_entries_add(ops, 1); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6e9a266a0535..cc52679790b2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -198,7 +198,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); if (neigh) { - int res = neigh_output(neigh, skb); + int res = dst_neigh_output(dst, neigh, skb); rcu_read_unlock_bh(); return res; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8416f8a68e65..ca0d0e7c9778 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -740,13 +740,13 @@ void tcp_update_metrics(struct sock *sk) if (sysctl_tcp_nometrics_save) return; - dst_confirm(dst); - if (dst && (dst->flags & DST_HOST)) { const struct inet_connection_sock *icsk = inet_csk(sk); int m; unsigned long rtt; + dst_confirm(dst); + if (icsk->icsk_backoff || !tp->srtt) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. @@ -3869,9 +3869,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_cong_avoid(sk, ack, prior_in_flight); } - if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) - dst_confirm(__sk_dst_get(sk)); - + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { + struct dst_entry *dst = __sk_dst_get(sk); + if (dst) + dst_confirm(dst); + } return 1; no_queue: @@ -6140,9 +6142,14 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, case TCP_FIN_WAIT1: if (tp->snd_una == tp->write_seq) { + struct dst_entry *dst; + tcp_set_state(sk, TCP_FIN_WAIT2); sk->sk_shutdown |= SEND_SHUTDOWN; - dst_confirm(__sk_dst_get(sk)); + + dst = __sk_dst_get(sk); + if (dst) + dst_confirm(dst); if (!sock_flag(sk, SOCK_DEAD)) /* Wake up lingering close() */ diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a233a7ccbc3a..c94e4aabe11b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -125,7 +125,7 @@ static int ip6_finish_output2(struct sk_buff *skb) rcu_read_lock(); neigh = dst_get_neighbour_noref(dst); if (neigh) { - int res = neigh_output(neigh, skb); + int res = dst_neigh_output(dst, neigh, skb); rcu_read_unlock(); return res; -- cgit v1.2.3 From f894cbf847c9bea1955095bf37aca6c050553167 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jul 2012 21:52:24 -0700 Subject: net: Add optional SKB arg to dst_ops->neigh_lookup(). Causes the handler to use the daddr in the ipv4/ipv6 header when the route gateway is unspecified (local subnet). Signed-off-by: David S. Miller --- include/net/dst.h | 8 +++++++- include/net/dst_ops.h | 4 +++- net/bridge/br_netfilter.c | 4 +++- net/decnet/dn_route.c | 8 ++++++-- net/ipv4/route.c | 14 ++++++++++---- net/ipv6/route.c | 14 ++++++++++---- net/xfrm/xfrm_policy.c | 6 ++++-- 7 files changed, 43 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 84e7a3ff968d..295a70547e7d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -420,7 +420,13 @@ static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - return dst->ops->neigh_lookup(dst, daddr); + return dst->ops->neigh_lookup(dst, NULL, daddr); +} + +static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, + struct sk_buff *skb) +{ + return dst->ops->neigh_lookup(dst, skb, NULL); } static inline void dst_link_failure(struct sk_buff *skb) diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 3682a0a076c1..4badc86e45d1 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -26,7 +26,9 @@ struct dst_ops { void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, u32 mtu); int (*local_out)(struct sk_buff *skb); - struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, const void *daddr); + struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr); struct kmem_cache *kmem_cachep; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 20fa719889ee..4378775432b6 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -120,7 +120,9 @@ static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old) return NULL; } -static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { return NULL; } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 2493ed5bfecd..60e4c6e1bac0 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -117,7 +117,9 @@ static void dn_dst_destroy(struct dst_entry *); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); -static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr); +static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr); static int dn_route_input(struct sk_buff *); static void dn_run_flush(unsigned long dummy); @@ -828,7 +830,9 @@ static unsigned int dn_dst_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index bae36386e722..7453dfcdb439 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -188,7 +188,9 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) return p; } -static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr); +static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr); static struct dst_ops ipv4_dst_ops = { .family = AF_INET, @@ -1088,7 +1090,9 @@ static int slow_chain_length(const struct rtable *head) return length >> FRACT_BITS; } -static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { struct net_device *dev = dst->dev; const __be32 *pkey = daddr; @@ -1098,6 +1102,8 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const vo rt = (const struct rtable *) dst; if (rt->rt_gateway) pkey = (const __be32 *) &rt->rt_gateway; + else if (skb) + pkey = &ip_hdr(skb)->daddr; n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey); if (n) @@ -1107,7 +1113,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const vo static int rt_bind_neighbour(struct rtable *rt) { - struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); + struct neighbour *n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway); if (IS_ERR(n)) return PTR_ERR(n); dst_set_neighbour(&rt->dst, n); @@ -1388,7 +1394,7 @@ static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) rt->rt_gateway = peer->redirect_learned.a4; - n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); + n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway); if (IS_ERR(n)) { rt->rt_gateway = orig_gw; return; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c518e4ec0cea..4b581c675bb2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -120,21 +120,27 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) return p; } -static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr) +static inline const void *choose_neigh_daddr(struct rt6_info *rt, + struct sk_buff *skb, + const void *daddr) { struct in6_addr *p = &rt->rt6i_gateway; if (!ipv6_addr_any(p)) return (const void *) p; + else if (skb) + return &ipv6_hdr(skb)->daddr; return daddr; } -static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { struct rt6_info *rt = (struct rt6_info *) dst; struct neighbour *n; - daddr = choose_neigh_daddr(rt, daddr); + daddr = choose_neigh_daddr(rt, skb, daddr); n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); if (n) return n; @@ -1162,7 +1168,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (neigh) neigh_hold(neigh); else { - neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr); + neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr); if (IS_ERR(neigh)) { in6_dev_put(idev); dst_free(&rt->dst); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ccfbd328a69d..a28a3f972d5b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2404,9 +2404,11 @@ static unsigned int xfrm_mtu(const struct dst_entry *dst) return mtu ? : dst_mtu(dst->path); } -static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { - return dst_neigh_lookup(dst->path, daddr); + return dst->path->ops->neigh_lookup(dst, skb, daddr); } int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) -- cgit v1.2.3 From fccd7d5c77ff61d5283e7ce8242791d5f00dcc17 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jul 2012 22:22:18 -0700 Subject: decnet: Use neighbours privately in dn_route struct. This allows an easy conversion away from dst_get_neighbour*(). Signed-off-by: David S. Miller --- include/net/dn_route.h | 2 ++ net/decnet/dn_neigh.c | 2 +- net/decnet/dn_route.c | 36 +++++++++++++++++++++++++++++------- 3 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/dn_route.h b/include/net/dn_route.h index c507e05d172f..4f7d6a182381 100644 --- a/include/net/dn_route.h +++ b/include/net/dn_route.h @@ -67,6 +67,8 @@ extern void dn_rt_cache_flush(int delay); struct dn_route { struct dst_entry dst; + struct neighbour *n; + struct flowidn fld; __le16 rt_saddr; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 8e9a35b17df4..3aede1b459fd 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -202,7 +202,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; - struct neighbour *neigh = dst_get_neighbour_noref(dst); + struct neighbour *neigh = rt->n; struct net_device *dev = neigh->dev; char mac_addr[ETH_ALEN]; unsigned int seq; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 60e4c6e1bac0..6e74b3f110bc 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -114,6 +114,7 @@ static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); static unsigned int dn_dst_mtu(const struct dst_entry *dst); static void dn_dst_destroy(struct dst_entry *); +static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); @@ -140,6 +141,7 @@ static struct dst_ops dn_dst_ops = { .mtu = dn_dst_mtu, .cow_metrics = dst_cow_metrics_generic, .destroy = dn_dst_destroy, + .ifdown = dn_dst_ifdown, .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, @@ -148,9 +150,27 @@ static struct dst_ops dn_dst_ops = { static void dn_dst_destroy(struct dst_entry *dst) { + struct dn_route *rt = (struct dn_route *) dst; + + if (rt->n) + neigh_release(rt->n); dst_destroy_metrics_generic(dst); } +static void dn_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) +{ + if (how) { + struct dn_route *rt = (struct dn_route *) dst; + struct neighbour *n = rt->n; + + if (n && n->dev == dev) { + n->dev = dev_net(dev)->loopback_dev; + dev_hold(n->dev); + dev_put(dev); + } + } +} + static __inline__ unsigned int dn_hash(__le16 src, __le16 dst) { __u16 tmp = (__u16 __force)(src ^ dst); @@ -246,7 +266,8 @@ static int dn_dst_gc(struct dst_ops *ops) */ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) { - struct neighbour *n = dst_get_neighbour_noref(dst); + struct dn_route *rt = (struct dn_route *) dst; + struct neighbour *n = rt->n; u32 min_mtu = 230; struct dn_dev *dn; @@ -715,7 +736,8 @@ out: static int dn_to_neigh_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *n = dst_get_neighbour_noref(dst); + struct dn_route *rt = (struct dn_route *) dst; + struct neighbour *n = rt->n; return n->output(n, skb); } @@ -729,7 +751,7 @@ static int dn_output(struct sk_buff *skb) int err = -EINVAL; - if (dst_get_neighbour_noref(dst) == NULL) + if (rt->n == NULL) goto error; skb->dev = dev; @@ -852,11 +874,11 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) } rt->rt_type = res->type; - if (dev != NULL && dst_get_neighbour_noref(&rt->dst) == NULL) { + if (dev != NULL && rt->n == NULL) { n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); if (IS_ERR(n)) return PTR_ERR(n); - dst_set_neighbour(&rt->dst, n); + rt->n = n; } if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) @@ -1163,7 +1185,7 @@ make_route: rt->rt_dst_map = fld.daddr; rt->rt_src_map = fld.saddr; - dst_set_neighbour(&rt->dst, neigh); + rt->n = neigh; neigh = NULL; rt->dst.lastuse = jiffies; @@ -1433,7 +1455,7 @@ make_route: rt->fld.flowidn_iif = in_dev->ifindex; rt->fld.flowidn_mark = fld.flowidn_mark; - dst_set_neighbour(&rt->dst, neigh); + rt->n = neigh; rt->dst.lastuse = jiffies; rt->dst.output = dn_rt_bug; switch (res.type) { -- cgit v1.2.3 From 1d248b1cf4e09dbec8cef5f7fbeda5874248bd09 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 3 Jul 2012 01:01:51 -0700 Subject: net: Pass neighbours and dest address into NETEVENT_REDIRECT events. Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c | 23 +++++++++------------- include/net/netevent.h | 4 ++++ net/ipv6/route.c | 7 ++++++- 3 files changed, 19 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c index 55cf72af69ce..633c6029e53c 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c @@ -62,7 +62,8 @@ static const unsigned int MAX_ATIDS = 64 * 1024; static const unsigned int ATID_BASE = 0x10000; static void cxgb_neigh_update(struct neighbour *neigh); -static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new); +static void cxgb_redirect(struct dst_entry *old, struct neighbour *old_neigh, + struct dst_entry *new, struct neighbour *new_neigh); static inline int offload_activated(struct t3cdev *tdev) { @@ -968,8 +969,9 @@ static int nb_callback(struct notifier_block *self, unsigned long event, } case (NETEVENT_REDIRECT):{ struct netevent_redirect *nr = ctx; - cxgb_redirect(nr->old, nr->new); - cxgb_neigh_update(dst_get_neighbour_noref(nr->new)); + cxgb_redirect(nr->old, nr->old_neigh, + nr->new, nr->new_neigh); + cxgb_neigh_update(nr->new_neigh); break; } default: @@ -1107,10 +1109,10 @@ static void set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e) tdev->send(tdev, skb); } -static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new) +static void cxgb_redirect(struct dst_entry *old, struct neighbour *old_neigh, + struct dst_entry *new, struct neighbour *new_neigh) { struct net_device *olddev, *newdev; - struct neighbour *n; struct tid_info *ti; struct t3cdev *tdev; u32 tid; @@ -1118,15 +1120,8 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new) struct l2t_entry *e; struct t3c_tid_entry *te; - n = dst_get_neighbour_noref(old); - if (!n) - return; - olddev = n->dev; - - n = dst_get_neighbour_noref(new); - if (!n) - return; - newdev = n->dev; + olddev = old_neigh->dev; + newdev = new_neigh->dev; if (!is_offloading(olddev)) return; diff --git a/include/net/netevent.h b/include/net/netevent.h index 086f8a5b59dc..3ce4988c9c08 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -12,10 +12,14 @@ */ struct dst_entry; +struct neighbour; struct netevent_redirect { struct dst_entry *old; + struct neighbour *old_neigh; struct dst_entry *new; + struct neighbour *new_neigh; + const void *daddr; }; enum netevent_notif_type { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4b581c675bb2..34b29881e22d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1687,6 +1687,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, struct rt6_info *rt, *nrt = NULL; struct netevent_redirect netevent; struct net *net = dev_net(neigh->dev); + struct neighbour *old_neigh; rt = ip6_route_redirect(dest, src, saddr, neigh->dev); @@ -1714,7 +1715,8 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == dst_get_neighbour_noref_raw(&rt->dst)) + old_neigh = dst_get_neighbour_noref_raw(&rt->dst); + if (neigh == old_neigh) goto out; nrt = ip6_rt_copy(rt, dest); @@ -1732,7 +1734,10 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, goto out; netevent.old = &rt->dst; + netevent.old_neigh = old_neigh; netevent.new = &nrt->dst; + netevent.new_neigh = neigh; + netevent.daddr = dest; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags & RTF_CACHE) { -- cgit v1.2.3 From 97cac0821af4474ec4ba3a9e7a36b98ed9b6db88 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jul 2012 22:43:47 -0700 Subject: ipv6: Store route neighbour in rt6_info struct. This makes for a simplified conversion away from dst_get_neighbour*(). All code outside of ipv6 will use neigh lookups via dst_neigh_lookup*(). Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ net/ipv6/ip6_output.c | 8 ++++++-- net/ipv6/route.c | 42 ++++++++++++++++++++++++++---------------- net/ipv6/xfrm6_policy.c | 1 + 4 files changed, 35 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a192f7807659..0fedbd8d747a 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -86,6 +86,8 @@ struct fib6_table; struct rt6_info { struct dst_entry dst; + struct neighbour *n; + /* * Tail elements of dst_entry (__refcnt etc.) * and these elements (rarely used in hot path) are in diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c94e4aabe11b..6d9c0abc8c20 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -88,6 +88,7 @@ static int ip6_finish_output2(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; struct neighbour *neigh; + struct rt6_info *rt; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -123,7 +124,8 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_lock(); - neigh = dst_get_neighbour_noref(dst); + rt = (struct rt6_info *) dst; + neigh = rt->n; if (neigh) { int res = dst_neigh_output(dst, neigh, skb); @@ -944,6 +946,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct net *net = sock_net(sk); #ifdef CONFIG_IPV6_OPTIMISTIC_DAD struct neighbour *n; + struct rt6_info *rt; #endif int err; @@ -972,7 +975,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry of the nexthop router */ rcu_read_lock(); - n = dst_get_neighbour_noref(*dst); + rt = (struct rt6_info *) dst; + n = rt->n; if (n && !(n->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 34b29881e22d..ceff71d24f8e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -155,7 +155,7 @@ static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev) if (IS_ERR(n)) return PTR_ERR(n); } - dst_set_neighbour(&rt->dst, n); + rt->n = n; return 0; } @@ -285,6 +285,9 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; + if (rt->n) + neigh_release(rt->n); + if (!(rt->dst.flags & DST_HOST)) dst_destroy_metrics_generic(dst); @@ -335,12 +338,19 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct net_device *loopback_dev = dev_net(dev)->loopback_dev; - if (dev != loopback_dev && idev && idev->dev == dev) { - struct inet6_dev *loopback_idev = - in6_dev_get(loopback_dev); - if (loopback_idev) { - rt->rt6i_idev = loopback_idev; - in6_dev_put(idev); + if (dev != loopback_dev) { + if (idev && idev->dev == dev) { + struct inet6_dev *loopback_idev = + in6_dev_get(loopback_dev); + if (loopback_idev) { + rt->rt6i_idev = loopback_idev; + in6_dev_put(idev); + } + } + if (rt->n && rt->n->dev == dev) { + rt->n->dev = loopback_dev; + dev_hold(loopback_dev); + dev_put(dev); } } } @@ -430,7 +440,7 @@ static void rt6_probe(struct rt6_info *rt) * to no more than one per minute. */ rcu_read_lock(); - neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL; + neigh = rt ? rt->n : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) goto out; read_lock_bh(&neigh->lock); @@ -477,7 +487,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt) int m; rcu_read_lock(); - neigh = dst_get_neighbour_noref(&rt->dst); + neigh = rt->n; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; @@ -824,7 +834,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, if (rt) { rt->rt6i_flags |= RTF_CACHE; - dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst))); + rt->n = neigh_clone(ort->n); } return rt; } @@ -858,7 +868,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -1178,7 +1188,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; - dst_set_neighbour(&rt->dst, neigh); + rt->n = neigh; atomic_set(&rt->dst.__refcnt, 1); rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; @@ -1715,7 +1725,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - old_neigh = dst_get_neighbour_noref_raw(&rt->dst); + old_neigh = rt->n; if (neigh == old_neigh) goto out; @@ -1728,7 +1738,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, nrt->rt6i_flags &= ~RTF_GATEWAY; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); + nrt->n = neigh_clone(neigh); if (ip6_ins_rt(nrt)) goto out; @@ -2442,7 +2452,7 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; rcu_read_lock(); - n = dst_get_neighbour_noref(&rt->dst); + n = rt->n; if (n) { if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { rcu_read_unlock(); @@ -2666,7 +2676,7 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) seq_puts(m, "00000000000000000000000000000000 00 "); #endif rcu_read_lock(); - n = dst_get_neighbour_noref(&rt->dst); + n = rt->n; if (n) { seq_printf(m, "%pi6", n->primary_key); } else { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index d7494845efbf..bb02038b822b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -103,6 +103,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ + xdst->u.rt6.n = neigh_clone(rt->n); xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); xdst->u.rt6.rt6i_metric = rt->rt6i_metric; -- cgit v1.2.3 From 36bdbcae2fa2a6dfa99344d4190fcea0aa7b7c25 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jul 2012 22:58:02 -0700 Subject: net: Kill dst->_neighbour, accessors, and final uses. No longer used. Signed-off-by: David S. Miller --- include/net/dst.h | 17 +---------------- net/core/dst.c | 18 ------------------ 2 files changed, 1 insertion(+), 34 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 295a70547e7d..b2634e446613 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -42,7 +42,7 @@ struct dst_entry { struct dst_entry *from; }; struct dst_entry *path; - struct neighbour __rcu *_neighbour; + void *__pad0; #ifdef CONFIG_XFRM struct xfrm_state *xfrm; #else @@ -96,21 +96,6 @@ struct dst_entry { }; }; -static inline struct neighbour *dst_get_neighbour_noref(struct dst_entry *dst) -{ - return rcu_dereference(dst->_neighbour); -} - -static inline struct neighbour *dst_get_neighbour_noref_raw(struct dst_entry *dst) -{ - return rcu_dereference_raw(dst->_neighbour); -} - -static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh) -{ - rcu_assign_pointer(dst->_neighbour, neigh); -} - extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); extern const u32 dst_default_metrics[RTAX_MAX]; diff --git a/net/core/dst.c b/net/core/dst.c index a6e19a23a745..07bacff84aa4 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -171,7 +171,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - RCU_INIT_POINTER(dst->_neighbour, NULL); #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif @@ -225,19 +224,12 @@ EXPORT_SYMBOL(__dst_free); struct dst_entry *dst_destroy(struct dst_entry * dst) { struct dst_entry *child; - struct neighbour *neigh; smp_rmb(); again: - neigh = rcu_dereference_protected(dst->_neighbour, 1); child = dst->child; - if (neigh) { - RCU_INIT_POINTER(dst->_neighbour, NULL); - neigh_release(neigh); - } - if (!(dst->flags & DST_NOCOUNT)) dst_entries_add(dst->ops, -1); @@ -361,19 +353,9 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { - struct neighbour *neigh; - dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - rcu_read_lock(); - neigh = dst_get_neighbour_noref(dst); - if (neigh && neigh->dev == dev) { - neigh->dev = dst->dev; - dev_hold(dst->dev); - dev_put(dev); - } - rcu_read_unlock(); } } -- cgit v1.2.3 From 8eb41c8dfb9e2396d2452ada9023a83d610b9051 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Thu, 5 Jul 2012 14:25:49 +0300 Subject: {nl,cfg}80211: support high bitrates Until now, a u16 value was used to represent bitrate value. With VHT bitrates this becomes too small. Introduce a new 32-bit bitrate attribute. nl80211 will report both the new and the old attribute, unless the bitrate doesn't fit into the old u16 attribute in which case only the new one will be reported. User space tools encouraged to prefer the 32-bit attribute, if available (since it won't be available on older kernels.) Signed-off-by: Vladimir Kondratiev [reword commit message and comments a bit] Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 9 +++++++++ include/net/cfg80211.h | 2 +- net/wireless/nl80211.c | 9 +++++++-- net/wireless/util.c | 2 +- 4 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 74cc55c1bf28..db961a59247f 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1638,12 +1638,20 @@ struct nl80211_sta_flag_update { * * These attribute types are used with %NL80211_STA_INFO_TXRATE * when getting information about the bitrate of a station. + * There are 2 attributes for bitrate, a legacy one that represents + * a 16-bit value, and new one that represents a 32-bit value. + * If the rate value fits into 16 bit, both attributes are reported + * with the same value. If the rate is too high to fit into 16 bits + * (>6.5535Gbps) only 32-bit attribute is included. + * User space tools encouraged to use the 32-bit attribute and fall + * back to the 16-bit one for compatibility with older kernels. * * @__NL80211_RATE_INFO_INVALID: attribute number 0 is reserved * @NL80211_RATE_INFO_BITRATE: total bitrate (u16, 100kbit/s) * @NL80211_RATE_INFO_MCS: mcs index for 802.11n (u8) * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 Mhz dualchannel bitrate * @NL80211_RATE_INFO_SHORT_GI: 400ns guard interval + * @NL80211_RATE_INFO_BITRATE32: total bitrate (u32, 100kbit/s) * @NL80211_RATE_INFO_MAX: highest rate_info number currently defined * @__NL80211_RATE_INFO_AFTER_LAST: internal use */ @@ -1653,6 +1661,7 @@ enum nl80211_rate_info { NL80211_RATE_INFO_MCS, NL80211_RATE_INFO_40_MHZ_WIDTH, NL80211_RATE_INFO_SHORT_GI, + NL80211_RATE_INFO_BITRATE32, /* keep last */ __NL80211_RATE_INFO_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0b564e83a24b..8837efc368f9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3487,7 +3487,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, int freq, * * return 0 if MCS index >= 32 */ -u16 cfg80211_calculate_bitrate(struct rate_info *rate); +u32 cfg80211_calculate_bitrate(struct rate_info *rate); /* Logging, debugging and troubleshooting/diagnostic helpers. */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 77102e66f1ea..2a5cdb60bc6e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2618,7 +2618,8 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, int attr) { struct nlattr *rate; - u16 bitrate; + u32 bitrate; + u16 bitrate_compat; rate = nla_nest_start(msg, attr); if (!rate) @@ -2626,8 +2627,12 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */ bitrate = cfg80211_calculate_bitrate(info); + /* report 16-bit bitrate only if we can */ + bitrate_compat = bitrate < (1UL << 16) ? bitrate : 0; if ((bitrate > 0 && - nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate)) || + nla_put_u32(msg, NL80211_RATE_INFO_BITRATE32, bitrate)) || + (bitrate_compat > 0 && + nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate_compat)) || ((info->flags & RATE_INFO_FLAGS_MCS) && nla_put_u8(msg, NL80211_RATE_INFO_MCS, info->mcs)) || ((info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) && diff --git a/net/wireless/util.c b/net/wireless/util.c index 0228c64e73d8..6e52726f7fe3 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -900,7 +900,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return err; } -u16 cfg80211_calculate_bitrate(struct rate_info *rate) +u32 cfg80211_calculate_bitrate(struct rate_info *rate) { int modulation, streams, bitrate; -- cgit v1.2.3 From 95ddc1fc4519ed48ddc7d622bd5c84dff3eebb0a Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Thu, 5 Jul 2012 14:25:50 +0300 Subject: cfg80211: bitrate calculation for 60g 60g band uses different from .11n MCS scheme, so bitrate should be calculated differently Signed-off-by: Vladimir Kondratiev Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ net/wireless/util.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8837efc368f9..51f67a9003a9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -580,11 +580,13 @@ enum station_info_flags { * @RATE_INFO_FLAGS_MCS: @tx_bitrate_mcs filled * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 Mhz width transmission * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval + * @RATE_INFO_FLAGS_60G: 60gHz MCS */ enum rate_info_flags { RATE_INFO_FLAGS_MCS = 1<<0, RATE_INFO_FLAGS_40_MHZ_WIDTH = 1<<1, RATE_INFO_FLAGS_SHORT_GI = 1<<2, + RATE_INFO_FLAGS_60G = 1<<3, }; /** diff --git a/net/wireless/util.c b/net/wireless/util.c index 6e52726f7fe3..e31f1dba79ec 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -900,12 +900,61 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return err; } +static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate) +{ + static const u32 __mcs2bitrate[] = { + /* control PHY */ + [0] = 275, + /* SC PHY */ + [1] = 3850, + [2] = 7700, + [3] = 9625, + [4] = 11550, + [5] = 12512, /* 1251.25 mbps */ + [6] = 15400, + [7] = 19250, + [8] = 23100, + [9] = 25025, + [10] = 30800, + [11] = 38500, + [12] = 46200, + /* OFDM PHY */ + [13] = 6930, + [14] = 8662, /* 866.25 mbps */ + [15] = 13860, + [16] = 17325, + [17] = 20790, + [18] = 27720, + [19] = 34650, + [20] = 41580, + [21] = 45045, + [22] = 51975, + [23] = 62370, + [24] = 67568, /* 6756.75 mbps */ + /* LP-SC PHY */ + [25] = 6260, + [26] = 8340, + [27] = 11120, + [28] = 12510, + [29] = 16680, + [30] = 22240, + [31] = 25030, + }; + + if (WARN_ON_ONCE(rate->mcs >= ARRAY_SIZE(__mcs2bitrate))) + return 0; + + return __mcs2bitrate[rate->mcs]; +} + u32 cfg80211_calculate_bitrate(struct rate_info *rate) { int modulation, streams, bitrate; if (!(rate->flags & RATE_INFO_FLAGS_MCS)) return rate->legacy; + if (rate->flags & RATE_INFO_FLAGS_60G) + return cfg80211_calculate_bitrate_60g(rate); /* the formula below does only work for MCS values smaller than 32 */ if (WARN_ON_ONCE(rate->mcs >= 32)) -- cgit v1.2.3 From f4530fa574df4d833506c53697ed1daa0d390bf4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 5 Jul 2012 22:13:13 -0700 Subject: ipv4: Avoid overhead when no custom FIB rules are installed. If the user hasn't actually installed any custom rules, or fiddled with the default ones, don't go through the whole FIB rules layer. It's just pure overhead. Instead do what we do with CONFIG_IP_MULTIPLE_TABLES disabled, check the individual tables by hand, one by one. Also, move fib_num_tclassid_users into the ipv4 network namespace. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 36 ++++++++++++++++++++++++++++++++---- include/net/netns/ipv4.h | 8 ++++++++ net/ipv4/fib_frontend.c | 27 ++++++++++++++++++++++----- net/ipv4/fib_rules.c | 12 ++++++++---- net/ipv4/fib_semantics.c | 6 +++--- 5 files changed, 73 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 3dc7c96bbeab..539c6721f810 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -220,11 +220,33 @@ extern void __net_exit fib4_rules_exit(struct net *net); extern u32 fib_rules_tclass(const struct fib_result *res); #endif -extern int fib_lookup(struct net *n, struct flowi4 *flp, struct fib_result *res); - extern struct fib_table *fib_new_table(struct net *net, u32 id); extern struct fib_table *fib_get_table(struct net *net, u32 id); +extern int __fib_lookup(struct net *net, struct flowi4 *flp, + struct fib_result *res); + +static inline int fib_lookup(struct net *net, struct flowi4 *flp, + struct fib_result *res) +{ + if (!net->ipv4.fib_has_custom_rules) { + if (net->ipv4.fib_local && + !fib_table_lookup(net->ipv4.fib_local, flp, res, + FIB_LOOKUP_NOREF)) + return 0; + if (net->ipv4.fib_main && + !fib_table_lookup(net->ipv4.fib_main, flp, res, + FIB_LOOKUP_NOREF)) + return 0; + if (net->ipv4.fib_default && + !fib_table_lookup(net->ipv4.fib_default, flp, res, + FIB_LOOKUP_NOREF)) + return 0; + return -ENETUNREACH; + } + return __fib_lookup(net, flp, res); +} + #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ @@ -236,9 +258,15 @@ extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, struct in_device *idev, u32 *itag); extern void fib_select_default(struct fib_result *res); #ifdef CONFIG_IP_ROUTE_CLASSID -extern int fib_num_tclassid_users; +static inline int fib_num_tclassid_users(struct net *net) +{ + return net->ipv4.fib_num_tclassid_users; +} #else -#define fib_num_tclassid_users 0 +static inline int fib_num_tclassid_users(struct net *net) +{ + return 0; +} #endif /* Exported by fib_semantics.c */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 227f0cd9d3f6..599e48fa97cb 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -11,6 +11,7 @@ struct ctl_table_header; struct ipv4_devconf; struct fib_rules_ops; struct hlist_head; +struct fib_table; struct sock; struct netns_ipv4 { @@ -24,6 +25,13 @@ struct netns_ipv4 { struct ipv4_devconf *devconf_dflt; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; + bool fib_has_custom_rules; + struct fib_table *fib_local; + struct fib_table *fib_main; + struct fib_table *fib_default; +#endif +#ifdef CONFIG_IP_ROUTE_CLASSID + int fib_num_tclassid_users; #endif struct hlist_head *fib_table_hash; struct sock *fibnl; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3e11ea225dad..81f85716a894 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -86,6 +86,24 @@ struct fib_table *fib_new_table(struct net *net, u32 id) tb = fib_trie_table(id); if (!tb) return NULL; + + switch (id) { + case RT_TABLE_LOCAL: + net->ipv4.fib_local = tb; + break; + + case RT_TABLE_MAIN: + net->ipv4.fib_main = tb; + break; + + case RT_TABLE_DEFAULT: + net->ipv4.fib_default = tb; + break; + + default: + break; + } + h = id & (FIB_TABLE_HASHSZ - 1); hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); return tb; @@ -218,10 +236,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) return inet_select_addr(dev, ip_hdr(skb)->saddr, scope); } -#ifdef CONFIG_IP_ROUTE_CLASSID -int fib_num_tclassid_users __read_mostly; -#endif - /* Given (packet source, input interface) and optional (dst, oif, tos): * - (main) check, that source is valid i.e. not broadcast or our local * address. @@ -312,7 +326,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); - if (!r && !fib_num_tclassid_users) { + if (!r && !fib_num_tclassid_users(dev_net(dev))) { *itag = 0; return 0; } @@ -1134,6 +1148,9 @@ static int __net_init fib_net_init(struct net *net) { int error; +#ifdef CONFIG_IP_ROUTE_CLASSID + net->ipv4.fib_num_tclassid_users = 0; +#endif error = ip_fib_net_init(net); if (error < 0) goto out; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index b23fd952c84f..c06da93b0b70 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -54,7 +54,7 @@ u32 fib_rules_tclass(const struct fib_result *res) } #endif -int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) +int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) { struct fib_lookup_arg arg = { .result = res, @@ -67,7 +67,7 @@ int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) return err; } -EXPORT_SYMBOL_GPL(fib_lookup); +EXPORT_SYMBOL_GPL(__fib_lookup); static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) @@ -172,7 +172,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (tb[FRA_FLOW]) { rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); if (rule4->tclassid) - fib_num_tclassid_users++; + net->ipv4.fib_num_tclassid_users++; } #endif @@ -182,6 +182,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->dstmask = inet_make_mask(rule4->dst_len); rule4->tos = frh->tos; + net->ipv4.fib_has_custom_rules = true; err = 0; errout: return err; @@ -189,12 +190,14 @@ errout: static void fib4_rule_delete(struct fib_rule *rule) { + struct net *net = rule->fr_net; #ifdef CONFIG_IP_ROUTE_CLASSID struct fib4_rule *rule4 = (struct fib4_rule *) rule; if (rule4->tclassid) - fib_num_tclassid_users--; + net->ipv4.fib_num_tclassid_users--; #endif + net->ipv4.fib_has_custom_rules = true; } static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, @@ -309,6 +312,7 @@ int __net_init fib4_rules_init(struct net *net) if (err < 0) goto fail; net->ipv4.rules_ops = ops; + net->ipv4.fib_has_custom_rules = false; return 0; fail: diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c46c20b6b0b6..ae301c897a19 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -166,7 +166,7 @@ void free_fib_info(struct fib_info *fi) #ifdef CONFIG_IP_ROUTE_CLASSID change_nexthops(fi) { if (nexthop_nh->nh_tclassid) - fib_num_tclassid_users--; + fi->fib_net->ipv4.fib_num_tclassid_users--; } endfor_nexthops(fi); #endif call_rcu(&fi->rcu, free_fib_info_rcu); @@ -428,7 +428,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, nla = nla_find(attrs, attrlen, RTA_FLOW); nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; if (nexthop_nh->nh_tclassid) - fib_num_tclassid_users++; + fi->fib_net->ipv4.fib_num_tclassid_users++; #endif } @@ -824,7 +824,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid = cfg->fc_flow; if (nh->nh_tclassid) - fib_num_tclassid_users++; + fi->fib_net->ipv4.fib_num_tclassid_users++; #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = 1; -- cgit v1.2.3 From 6bd0405bb4196b44f1acb7a58f11382cdaf6f7f0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 5 Jul 2012 15:42:10 +0200 Subject: netfilter: nf_ct_ecache: fix crash with multiple containers, one shutting down Hans reports that he's still hitting: BUG: unable to handle kernel NULL pointer dereference at 000000000000027c IP: [] netlink_has_listeners+0xb/0x60 PGD 0 Oops: 0000 [#3] PREEMPT SMP CPU 0 It happens when adding a number of containers with do: nfct_query(h, NFCT_Q_CREATE, ct); and most likely one namespace shuts down. this problem was supposed to be fixed by: 70e9942 netfilter: nf_conntrack: make event callback registration per-netns Still, it was missing one rcu_access_pointer to check if the callback is set or not. Reported-by: Hans Schillstrom Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index a88fb6939387..e1ce1048fe5f 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -78,7 +78,7 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *e; - if (net->ct.nf_conntrack_event_cb == NULL) + if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) return; e = nf_ct_ecache_find(ct); -- cgit v1.2.3 From f72b85b8eb6657fae95ac8f5cb20954b4d87a520 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Jul 2012 19:49:54 +0200 Subject: mac80211: remove ieee80211_key_removed This API call was intended to be used by drivers if they want to optimize key handling by removing one key when another is added. Remove it since no driver is using it. If needed, it can always be added back. Signed-off-by: Johannes Berg --- Documentation/DocBook/80211.tmpl | 1 - include/net/mac80211.h | 16 ---------------- net/mac80211/key.c | 20 -------------------- 3 files changed, 37 deletions(-) (limited to 'include/net') diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl index f3e214f9e256..42e7f030cb16 100644 --- a/Documentation/DocBook/80211.tmpl +++ b/Documentation/DocBook/80211.tmpl @@ -404,7 +404,6 @@ !Finclude/net/mac80211.h ieee80211_get_tkip_p1k !Finclude/net/mac80211.h ieee80211_get_tkip_p1k_iv !Finclude/net/mac80211.h ieee80211_get_tkip_p2k -!Finclude/net/mac80211.h ieee80211_key_removed diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e3fa90ce9ecb..c5dbb46debb0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3591,22 +3591,6 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); void ieee80211_request_smps(struct ieee80211_vif *vif, enum ieee80211_smps_mode smps_mode); -/** - * ieee80211_key_removed - disable hw acceleration for key - * @key_conf: The key hw acceleration should be disabled for - * - * This allows drivers to indicate that the given key has been - * removed from hardware acceleration, due to a new key that - * was added. Don't use this if the key can continue to be used - * for TX, if the key restriction is on RX only it is permitted - * to keep the key for TX only and not call this function. - * - * Due to locking constraints, it may only be called during - * @set_key. This function must be allowed to sleep, and the - * key it tries to disable may still be used until it returns. - */ -void ieee80211_key_removed(struct ieee80211_key_conf *key_conf); - /** * ieee80211_ready_on_channel - notification of remain-on-channel start * @hw: pointer as obtained from ieee80211_alloc_hw() diff --git a/net/mac80211/key.c b/net/mac80211/key.c index b3b7e526e245..7ae678ba5d67 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -194,26 +194,6 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; } -void ieee80211_key_removed(struct ieee80211_key_conf *key_conf) -{ - struct ieee80211_key *key; - - key = container_of(key_conf, struct ieee80211_key, conf); - - might_sleep(); - assert_key_lock(key->local); - - key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; - - /* - * Flush TX path to avoid attempts to use this key - * after this function returns. Until then, drivers - * must be prepared to handle the key. - */ - synchronize_rcu(); -} -EXPORT_SYMBOL_GPL(ieee80211_key_removed); - static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, bool uni, bool multi) { -- cgit v1.2.3 From 89a54e48b9cbb44aed1bf6cd712e087b96b6ae65 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jun 2012 14:33:17 +0200 Subject: nl80211: prepare for non-netdev wireless devs In order to support a P2P device abstraction and Bluetooth high-speed AMPs, we need to have a way to identify virtual interfaces that don't have a netdev associated. Do this by adding a NL80211_ATTR_WDEV attribute to identify a wdev which may or may not also be a netdev. To simplify things, use a 64-bit value with the high 32 bits being the wiphy index for this new wdev identifier in the nl80211 API. Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 5 ++ include/net/cfg80211.h | 22 +++++--- net/wireless/core.c | 13 ++--- net/wireless/core.h | 6 +-- net/wireless/nl80211.c | 135 ++++++++++++++++++++++++++++++++++++++---------- net/wireless/sme.c | 4 +- net/wireless/util.c | 6 +-- 7 files changed, 144 insertions(+), 47 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index db961a59247f..e791487ead37 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -771,6 +771,9 @@ enum nl80211_commands { * @NL80211_ATTR_IFNAME: network interface name * @NL80211_ATTR_IFTYPE: type of virtual interface, see &enum nl80211_iftype * + * @NL80211_ATTR_WDEV: wireless device identifier, used for pseudo-devices + * that don't have a netdev (u64) + * * @NL80211_ATTR_MAC: MAC address (various uses) * * @NL80211_ATTR_KEY_DATA: (temporal) key data; for TKIP this consists of @@ -1493,6 +1496,8 @@ enum nl80211_attrs { NL80211_ATTR_BG_SCAN_PERIOD, + NL80211_ATTR_WDEV, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 51f67a9003a9..a14e6a406681 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2341,17 +2341,25 @@ struct cfg80211_internal_bss; struct cfg80211_cached_keys; /** - * struct wireless_dev - wireless per-netdev state + * struct wireless_dev - wireless device state * - * This structure must be allocated by the driver/stack - * that uses the ieee80211_ptr field in struct net_device - * (this is intentional so it can be allocated along with - * the netdev.) + * For netdevs, this structure must be allocated by the driver + * that uses the ieee80211_ptr field in struct net_device (this + * is intentional so it can be allocated along with the netdev.) + * It need not be registered then as netdev registration will + * be intercepted by cfg80211 to see the new wireless device. + * + * For non-netdev uses, it must also be allocated by the driver + * in response to the cfg80211 callbacks that require it, as + * there's no netdev registration in that case it may not be + * allocated outside of callback operations that return it. * * @wiphy: pointer to hardware description * @iftype: interface type * @list: (private) Used to collect the interfaces - * @netdev: (private) Used to reference back to the netdev + * @netdev: (private) Used to reference back to the netdev, may be %NULL + * @identifier: (private) Identifier used in nl80211 to identify this + * wireless device if it has no netdev * @current_bss: (private) Used by the internal configuration code * @channel: (private) Used by the internal configuration code to track * the user-set AP, monitor and WDS channel @@ -2383,6 +2391,8 @@ struct wireless_dev { struct list_head list; struct net_device *netdev; + u32 identifier; + struct list_head mgmt_registrations; spinlock_t mgmt_registrations_lock; diff --git a/net/wireless/core.c b/net/wireless/core.c index e42a97b5b971..b110a8a242db 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -176,7 +176,7 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, if (!(rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK)) return -EOPNOTSUPP; - list_for_each_entry(wdev, &rdev->netdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL; err = dev_change_net_namespace(wdev->netdev, net, "wlan%d"); if (err) @@ -188,7 +188,7 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, /* failed -- clean up to old netns */ net = wiphy_net(&rdev->wiphy); - list_for_each_entry_continue_reverse(wdev, &rdev->netdev_list, + list_for_each_entry_continue_reverse(wdev, &rdev->wdev_list, list) { wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL; err = dev_change_net_namespace(wdev->netdev, net, @@ -226,7 +226,7 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) rtnl_lock(); mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->netdev_list, list) + list_for_each_entry(wdev, &rdev->wdev_list, list) dev_close(wdev->netdev); mutex_unlock(&rdev->devlist_mtx); @@ -304,7 +304,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) mutex_init(&rdev->mtx); mutex_init(&rdev->devlist_mtx); mutex_init(&rdev->sched_scan_mtx); - INIT_LIST_HEAD(&rdev->netdev_list); + INIT_LIST_HEAD(&rdev->wdev_list); spin_lock_init(&rdev->bss_lock); INIT_LIST_HEAD(&rdev->bss_list); INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); @@ -622,7 +622,7 @@ void wiphy_unregister(struct wiphy *wiphy) __count == 0; })); mutex_lock(&rdev->devlist_mtx); - BUG_ON(!list_empty(&rdev->netdev_list)); + BUG_ON(!list_empty(&rdev->wdev_list)); mutex_unlock(&rdev->devlist_mtx); /* @@ -821,7 +821,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, spin_lock_init(&wdev->mgmt_registrations_lock); mutex_lock(&rdev->devlist_mtx); - list_add_rcu(&wdev->list, &rdev->netdev_list); + wdev->identifier = ++rdev->wdev_id; + list_add_rcu(&wdev->list, &rdev->wdev_list); rdev->devlist_generation++; /* can only change netns with wiphy */ dev->features |= NETIF_F_NETNS_LOCAL; diff --git a/net/wireless/core.h b/net/wireless/core.h index 377dc394f48c..6b0170a5f05f 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -47,11 +47,11 @@ struct cfg80211_registered_device { /* wiphy index, internal only */ int wiphy_idx; - /* associate netdev list */ + /* associated wireless interfaces */ struct mutex devlist_mtx; /* protected by devlist_mtx or RCU */ - struct list_head netdev_list; - int devlist_generation; + struct list_head wdev_list; + int devlist_generation, wdev_id; int opencount; /* also protected by devlist_mtx */ wait_queue_head_t dev_wait; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2a5cdb60bc6e..35a9b15289f1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -46,28 +46,60 @@ static struct genl_family nl80211_fam = { .post_doit = nl80211_post_doit, }; -/* internal helper: get rdev and dev */ -static int get_rdev_dev_by_ifindex(struct net *netns, struct nlattr **attrs, - struct cfg80211_registered_device **rdev, - struct net_device **dev) +/* returns ERR_PTR values */ +static struct wireless_dev * +__cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) { - int ifindex; + struct cfg80211_registered_device *rdev; + struct wireless_dev *result = NULL; + bool have_ifidx = attrs[NL80211_ATTR_IFINDEX]; + bool have_wdev_id = attrs[NL80211_ATTR_WDEV]; + u64 wdev_id; + int wiphy_idx = -1; + int ifidx = -1; - if (!attrs[NL80211_ATTR_IFINDEX]) - return -EINVAL; + assert_cfg80211_lock(); - ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); - *dev = dev_get_by_index(netns, ifindex); - if (!*dev) - return -ENODEV; + if (!have_ifidx && !have_wdev_id) + return ERR_PTR(-EINVAL); - *rdev = cfg80211_get_dev_from_ifindex(netns, ifindex); - if (IS_ERR(*rdev)) { - dev_put(*dev); - return PTR_ERR(*rdev); + if (have_ifidx) + ifidx = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); + if (have_wdev_id) { + wdev_id = nla_get_u64(attrs[NL80211_ATTR_WDEV]); + wiphy_idx = wdev_id >> 32; } - return 0; + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + struct wireless_dev *wdev; + + if (wiphy_net(&rdev->wiphy) != netns) + continue; + + if (have_wdev_id && rdev->wiphy_idx != wiphy_idx) + continue; + + mutex_lock(&rdev->devlist_mtx); + list_for_each_entry(wdev, &rdev->wdev_list, list) { + if (have_ifidx && wdev->netdev && + wdev->netdev->ifindex == ifidx) { + result = wdev; + break; + } + if (have_wdev_id && wdev->identifier == (u32)wdev_id) { + result = wdev; + break; + } + } + mutex_unlock(&rdev->devlist_mtx); + + if (result) + break; + } + + if (result) + return result; + return ERR_PTR(-ENODEV); } static struct cfg80211_registered_device * @@ -79,13 +111,40 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) assert_cfg80211_lock(); if (!attrs[NL80211_ATTR_WIPHY] && - !attrs[NL80211_ATTR_IFINDEX]) + !attrs[NL80211_ATTR_IFINDEX] && + !attrs[NL80211_ATTR_WDEV]) return ERR_PTR(-EINVAL); if (attrs[NL80211_ATTR_WIPHY]) rdev = cfg80211_rdev_by_wiphy_idx( nla_get_u32(attrs[NL80211_ATTR_WIPHY])); + if (attrs[NL80211_ATTR_WDEV]) { + u64 wdev_id = nla_get_u64(attrs[NL80211_ATTR_WDEV]); + struct wireless_dev *wdev; + bool found = false; + + tmp = cfg80211_rdev_by_wiphy_idx(wdev_id >> 32); + if (tmp) { + /* make sure wdev exists */ + mutex_lock(&tmp->devlist_mtx); + list_for_each_entry(wdev, &tmp->wdev_list, list) { + if (wdev->identifier != (u32)wdev_id) + continue; + found = true; + break; + } + mutex_unlock(&tmp->devlist_mtx); + + if (!found) + tmp = NULL; + + if (rdev && tmp != rdev) + return ERR_PTR(-EINVAL); + rdev = tmp; + } + } + if (attrs[NL80211_ATTR_IFINDEX]) { int ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); netdev = dev_get_by_index(netns, ifindex); @@ -294,6 +353,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_NOACK_MAP] = { .type = NLA_U16 }, [NL80211_ATTR_INACTIVITY_TIMEOUT] = { .type = NLA_U16 }, [NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 }, + [NL80211_ATTR_WDEV] = { .type = NLA_U64 }, }; /* policy for the key attributes */ @@ -1674,6 +1734,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct net_device *dev) { void *hdr; + u64 wdev_id = (u64)dev->ieee80211_ptr->identifier | + ((u64)rdev->wiphy_idx << 32); hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_INTERFACE); if (!hdr) @@ -1684,6 +1746,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_put_string(msg, NL80211_ATTR_IFNAME, dev->name) || nla_put_u32(msg, NL80211_ATTR_IFTYPE, dev->ieee80211_ptr->iftype) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id) || nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->devlist_generation ^ (cfg80211_rdev_list_generation << 2))) @@ -1724,7 +1787,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx = 0; mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->netdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { if (if_idx < if_start) { if_idx++; continue; @@ -2350,7 +2413,7 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->netdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) continue; @@ -6660,8 +6723,8 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; struct net_device *dev; - int err; bool rtnl = ops->internal_flags & NL80211_FLAG_NEED_RTNL; if (rtnl) @@ -6676,21 +6739,39 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, } info->user_ptr[0] = rdev; } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) { - err = get_rdev_dev_by_ifindex(genl_info_net(info), info->attrs, - &rdev, &dev); - if (err) { + mutex_lock(&cfg80211_mutex); + wdev = __cfg80211_wdev_from_attrs(genl_info_net(info), + info->attrs); + if (IS_ERR(wdev)) { + mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); - return err; + return PTR_ERR(wdev); } + + if (!wdev->netdev) { + mutex_unlock(&cfg80211_mutex); + if (rtnl) + rtnl_unlock(); + return -EINVAL; + } + + dev = wdev->netdev; + rdev = wiphy_to_dev(wdev->wiphy); + if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && !netif_running(dev)) { - cfg80211_unlock_rdev(rdev); - dev_put(dev); + mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -ENETDOWN; } + + dev_hold(dev); + cfg80211_lock_rdev(rdev); + + mutex_unlock(&cfg80211_mutex); + info->user_ptr[0] = rdev; info->user_ptr[1] = dev; } @@ -8483,7 +8564,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb, rcu_read_lock(); list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { - list_for_each_entry_rcu(wdev, &rdev->netdev_list, list) + list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) cfg80211_mlme_unregister_socket(wdev, notify->pid); if (rdev->ap_beacons_nlpid == notify->pid) rdev->ap_beacons_nlpid = 0; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f7e937ff8978..dec97981e689 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -51,7 +51,7 @@ static bool cfg80211_is_all_idle(void) */ list_for_each_entry(rdev, &cfg80211_rdev_list, list) { cfg80211_lock_rdev(rdev); - list_for_each_entry(wdev, &rdev->netdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) is_all_idle = false; @@ -221,7 +221,7 @@ void cfg80211_conn_work(struct work_struct *work) cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->netdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { wdev_lock(wdev); if (!netif_running(wdev->netdev)) { wdev_unlock(wdev); diff --git a/net/wireless/util.c b/net/wireless/util.c index e31f1dba79ec..f7a0647bde9a 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -793,7 +793,7 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev) mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->netdev_list, list) + list_for_each_entry(wdev, &rdev->wdev_list, list) cfg80211_process_wdev_events(wdev); mutex_unlock(&rdev->devlist_mtx); @@ -994,7 +994,7 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->netdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { if (!wdev->beacon_interval) continue; if (wdev->beacon_interval != beacon_int) { @@ -1050,7 +1050,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, break; } - list_for_each_entry(wdev_iter, &rdev->netdev_list, list) { + list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { if (wdev_iter == wdev) continue; if (!netif_running(wdev_iter->netdev)) -- cgit v1.2.3 From 71bbc9943883cffaf5d7a7728a4e4c50b3ac44d3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jun 2012 15:30:18 +0200 Subject: cfg80211: use wdev in mgmt-tx/ROC APIs The management frame and remain-on-channel APIs will be needed in the P2P device abstraction, so move them over to the new wdev-based APIs. Userspace can still use both the interface index and wdev identifier for them so it's backward compatible, but for the P2P Device wdev it will be able to use the wdev identifier only. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 22 +++--- drivers/net/wireless/ath/ath6kl/core.h | 5 ++ drivers/net/wireless/ath/ath6kl/wmi.c | 10 +-- include/net/cfg80211.h | 27 ++++--- net/mac80211/cfg.c | 21 +++-- net/mac80211/ieee80211_i.h | 6 ++ net/mac80211/offchannel.c | 6 +- net/mac80211/rx.c | 2 +- net/mac80211/status.c | 9 ++- net/wireless/core.h | 2 +- net/wireless/mlme.c | 34 ++++----- net/wireless/nl80211.c | 119 +++++++++++++++++------------ net/wireless/nl80211.h | 10 +-- 13 files changed, 152 insertions(+), 121 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index aca1d2689e90..5f0c66bb6bdf 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -2975,14 +2975,14 @@ static int ath6kl_change_station(struct wiphy *wiphy, struct net_device *dev, } static int ath6kl_remain_on_channel(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie) { - struct ath6kl *ar = ath6kl_priv(dev); - struct ath6kl_vif *vif = netdev_priv(dev); + struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); + struct ath6kl *ar = ath6kl_priv(vif->ndev); u32 id; /* TODO: if already pending or ongoing remain-on-channel, @@ -2999,11 +2999,11 @@ static int ath6kl_remain_on_channel(struct wiphy *wiphy, } static int ath6kl_cancel_remain_on_channel(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u64 cookie) { - struct ath6kl *ar = ath6kl_priv(dev); - struct ath6kl_vif *vif = netdev_priv(dev); + struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); + struct ath6kl *ar = ath6kl_priv(vif->ndev); if (cookie != vif->last_roc_id) return -ENOENT; @@ -3134,15 +3134,15 @@ static bool ath6kl_is_p2p_go_ssid(const u8 *buf, size_t len) return false; } -static int ath6kl_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, +static int ath6kl_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, bool no_cck, bool dont_wait_for_ack, u64 *cookie) { - struct ath6kl *ar = ath6kl_priv(dev); - struct ath6kl_vif *vif = netdev_priv(dev); + struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); + struct ath6kl *ar = ath6kl_priv(vif->ndev); u32 id; const struct ieee80211_mgmt *mgmt; bool more_data, queued; @@ -3187,10 +3187,10 @@ static int ath6kl_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, } static void ath6kl_mgmt_frame_register(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u16 frame_type, bool reg) { - struct ath6kl_vif *vif = netdev_priv(dev); + struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: frame_type=0x%x reg=%d\n", __func__, frame_type, reg); diff --git a/drivers/net/wireless/ath/ath6kl/core.h b/drivers/net/wireless/ath/ath6kl/core.h index d38a31de344c..cec49a31029a 100644 --- a/drivers/net/wireless/ath/ath6kl/core.h +++ b/drivers/net/wireless/ath/ath6kl/core.h @@ -589,6 +589,11 @@ struct ath6kl_vif { struct list_head mc_filter; }; +static inline struct ath6kl_vif *ath6kl_vif_from_wdev(struct wireless_dev *wdev) +{ + return container_of(wdev, struct ath6kl_vif, wdev); +} + #define WOW_LIST_ID 0 #define WOW_HOST_REQ_DELAY 500 /* ms */ diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c index a6caa673e8ad..c30ab4b11d61 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.c +++ b/drivers/net/wireless/ath/ath6kl/wmi.c @@ -474,7 +474,7 @@ static int ath6kl_wmi_remain_on_chnl_event_rx(struct wmi *wmi, u8 *datap, return -EINVAL; } id = vif->last_roc_id; - cfg80211_ready_on_channel(vif->ndev, id, chan, NL80211_CHAN_NO_HT, + cfg80211_ready_on_channel(&vif->wdev, id, chan, NL80211_CHAN_NO_HT, dur, GFP_ATOMIC); return 0; @@ -513,7 +513,7 @@ static int ath6kl_wmi_cancel_remain_on_chnl_event_rx(struct wmi *wmi, else id = vif->last_roc_id; /* timeout on uncanceled r-o-c */ vif->last_cancel_roc_id = 0; - cfg80211_remain_on_channel_expired(vif->ndev, id, chan, + cfg80211_remain_on_channel_expired(&vif->wdev, id, chan, NL80211_CHAN_NO_HT, GFP_ATOMIC); return 0; @@ -533,7 +533,7 @@ static int ath6kl_wmi_tx_status_event_rx(struct wmi *wmi, u8 *datap, int len, ath6kl_dbg(ATH6KL_DBG_WMI, "tx_status: id=%x ack_status=%u\n", id, ev->ack_status); if (wmi->last_mgmt_tx_frame) { - cfg80211_mgmt_tx_status(vif->ndev, id, + cfg80211_mgmt_tx_status(&vif->wdev, id, wmi->last_mgmt_tx_frame, wmi->last_mgmt_tx_frame_len, !!ev->ack_status, GFP_ATOMIC); @@ -568,7 +568,7 @@ static int ath6kl_wmi_rx_probe_req_event_rx(struct wmi *wmi, u8 *datap, int len, dlen, freq, vif->probe_req_report); if (vif->probe_req_report || vif->nw_type == AP_NETWORK) - cfg80211_rx_mgmt(vif->ndev, freq, 0, + cfg80211_rx_mgmt(&vif->wdev, freq, 0, ev->data, dlen, GFP_ATOMIC); return 0; @@ -608,7 +608,7 @@ static int ath6kl_wmi_rx_action_event_rx(struct wmi *wmi, u8 *datap, int len, return -EINVAL; } ath6kl_dbg(ATH6KL_DBG_WMI, "rx_action: len=%u freq=%u\n", dlen, freq); - cfg80211_rx_mgmt(vif->ndev, freq, 0, + cfg80211_rx_mgmt(&vif->wdev, freq, 0, ev->data, dlen, GFP_ATOMIC); return 0; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a14e6a406681..7eaaee7bb07d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1753,23 +1753,23 @@ struct cfg80211_ops { int (*flush_pmksa)(struct wiphy *wiphy, struct net_device *netdev); int (*remain_on_channel)(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie); int (*cancel_remain_on_channel)(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u64 cookie); - int (*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev, + int (*mgmt_tx)(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, bool no_cck, bool dont_wait_for_ack, u64 *cookie); int (*mgmt_tx_cancel_wait)(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u64 cookie); int (*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev, @@ -1780,7 +1780,7 @@ struct cfg80211_ops { s32 rssi_thold, u32 rssi_hyst); void (*mgmt_frame_register)(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u16 frame_type, bool reg); int (*set_antenna)(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant); @@ -3279,7 +3279,7 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, /** * cfg80211_ready_on_channel - notification of remain_on_channel start - * @dev: network device + * @wdev: wireless device * @cookie: the request cookie * @chan: The current channel (from remain_on_channel request) * @channel_type: Channel type @@ -3287,21 +3287,20 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, * channel * @gfp: allocation flags */ -void cfg80211_ready_on_channel(struct net_device *dev, u64 cookie, +void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp); /** * cfg80211_remain_on_channel_expired - remain_on_channel duration expired - * @dev: network device + * @wdev: wireless device * @cookie: the request cookie * @chan: The current channel (from remain_on_channel request) * @channel_type: Channel type * @gfp: allocation flags */ -void cfg80211_remain_on_channel_expired(struct net_device *dev, - u64 cookie, +void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, gfp_t gfp); @@ -3329,7 +3328,7 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp); /** * cfg80211_rx_mgmt - notification of received, unprocessed management frame - * @dev: network device + * @wdev: wireless device receiving the frame * @freq: Frequency on which the frame was received in MHz * @sig_dbm: signal strength in mBm, or 0 if unknown * @buf: Management frame (header + body) @@ -3344,12 +3343,12 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp); * This function is called whenever an Action frame is received for a station * mode interface, but is not processed in kernel. */ -bool cfg80211_rx_mgmt(struct net_device *dev, int freq, int sig_dbm, +bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm, const u8 *buf, size_t len, gfp_t gfp); /** * cfg80211_mgmt_tx_status - notification of TX status for management frame - * @dev: network device + * @wdev: wireless device receiving the frame * @cookie: Cookie returned by cfg80211_ops::mgmt_tx() * @buf: Management frame (header + body) * @len: length of the frame data @@ -3360,7 +3359,7 @@ bool cfg80211_rx_mgmt(struct net_device *dev, int freq, int sig_dbm, * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the * transmission attempt. */ -void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie, +void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6a171e299b57..7d9abea37b17 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2299,13 +2299,13 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, } static int ieee80211_remain_on_channel(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; int ret; @@ -2392,23 +2392,23 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, } static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u64 cookie) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; return ieee80211_cancel_roc(local, cookie, false); } -static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, bool no_cck, bool dont_wait_for_ack, u64 *cookie) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct sta_info *sta; @@ -2513,21 +2513,20 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u64 cookie) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; + struct ieee80211_local *local = wiphy_priv(wiphy); return ieee80211_cancel_roc(local, cookie, true); } static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u16 frame_type, bool reg) { struct ieee80211_local *local = wiphy_priv(wiphy); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); switch (frame_type) { case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e0423f8c0ce1..8f8535ee5995 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1091,6 +1091,12 @@ IEEE80211_DEV_TO_SUB_IF(struct net_device *dev) return netdev_priv(dev); } +static inline struct ieee80211_sub_if_data * +IEEE80211_WDEV_TO_SUB_IF(struct wireless_dev *wdev) +{ + return container_of(wdev, struct ieee80211_sub_if_data, wdev); +} + /* this struct represents 802.11n's RA/TID combination */ struct ieee80211_ra_tid { u8 ra[ETH_ALEN]; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index b0fb6a2b89ad..8c047fc8b325 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -191,7 +191,7 @@ void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) roc->frame = NULL; } } else { - cfg80211_ready_on_channel(roc->sdata->dev, (unsigned long)roc, + cfg80211_ready_on_channel(&roc->sdata->wdev, (unsigned long)roc, roc->chan, roc->chan_type, roc->req_duration, GFP_KERNEL); } @@ -299,7 +299,7 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) /* was never transmitted */ if (roc->frame) { - cfg80211_mgmt_tx_status(roc->sdata->dev, + cfg80211_mgmt_tx_status(&roc->sdata->wdev, (unsigned long)roc->frame, roc->frame->data, roc->frame->len, false, GFP_KERNEL); @@ -307,7 +307,7 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) } if (!roc->mgmt_tx_cookie) - cfg80211_remain_on_channel_expired(roc->sdata->dev, + cfg80211_remain_on_channel_expired(&roc->sdata->wdev, (unsigned long)roc, roc->chan, roc->chan_type, GFP_KERNEL); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ab5185054e6c..f8cf9e7477a3 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2404,7 +2404,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) if (rx->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) sig = status->signal; - if (cfg80211_rx_mgmt(rx->sdata->dev, status->freq, sig, + if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig, rx->skb->data, rx->skb->len, GFP_ATOMIC)) { if (rx->sta) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 2ed2f27fe8a7..8cd72914cdaf 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -519,14 +519,19 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) u64 cookie = (unsigned long)skb; acked = info->flags & IEEE80211_TX_STAT_ACK; + /* + * TODO: When we have non-netdev frame TX, + * we cannot use skb->dev->ieee80211_ptr + */ + if (ieee80211_is_nullfunc(hdr->frame_control) || ieee80211_is_qos_nullfunc(hdr->frame_control)) cfg80211_probe_status(skb->dev, hdr->addr1, cookie, acked, GFP_ATOMIC); else cfg80211_mgmt_tx_status( - skb->dev, cookie, skb->data, skb->len, - acked, GFP_ATOMIC); + skb->dev->ieee80211_ptr, cookie, skb->data, + skb->len, acked, GFP_ATOMIC); } if (unlikely(info->ack_frame_id)) { diff --git a/net/wireless/core.h b/net/wireless/core.h index 6b0170a5f05f..eae5a25a1691 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -372,7 +372,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, - struct net_device *dev, + struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index d4fece3bb18a..abe9f82d5a82 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -567,29 +567,28 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, } } -void cfg80211_ready_on_channel(struct net_device *dev, u64 cookie, +void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp) { - struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - nl80211_send_remain_on_channel(rdev, dev, cookie, chan, channel_type, + nl80211_send_remain_on_channel(rdev, wdev, cookie, chan, channel_type, duration, gfp); } EXPORT_SYMBOL(cfg80211_ready_on_channel); -void cfg80211_remain_on_channel_expired(struct net_device *dev, - u64 cookie, +void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, gfp_t gfp) { - struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - nl80211_send_remain_on_channel_cancel(rdev, dev, cookie, chan, + nl80211_send_remain_on_channel_cancel(rdev, wdev, cookie, chan, channel_type, gfp); } EXPORT_SYMBOL(cfg80211_remain_on_channel_expired); @@ -678,8 +677,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, list_add(&nreg->list, &wdev->mgmt_registrations); if (rdev->ops->mgmt_frame_register) - rdev->ops->mgmt_frame_register(wiphy, wdev->netdev, - frame_type, true); + rdev->ops->mgmt_frame_register(wiphy, wdev, frame_type, true); out: spin_unlock_bh(&wdev->mgmt_registrations_lock); @@ -702,7 +700,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) if (rdev->ops->mgmt_frame_register) { u16 frame_type = le16_to_cpu(reg->frame_type); - rdev->ops->mgmt_frame_register(wiphy, wdev->netdev, + rdev->ops->mgmt_frame_register(wiphy, wdev, frame_type, false); } @@ -731,14 +729,14 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) } int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, - struct net_device *dev, + struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, bool no_cck, bool dont_wait_for_ack, u64 *cookie) { - struct wireless_dev *wdev = dev->ieee80211_ptr; + struct net_device *dev = wdev->netdev; const struct ieee80211_mgmt *mgmt; u16 stype; @@ -825,16 +823,15 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return -EINVAL; /* Transmit the Action frame as requested by user space */ - return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, offchan, + return rdev->ops->mgmt_tx(&rdev->wiphy, wdev, chan, offchan, channel_type, channel_type_valid, wait, buf, len, no_cck, dont_wait_for_ack, cookie); } -bool cfg80211_rx_mgmt(struct net_device *dev, int freq, int sig_mbm, +bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, const u8 *buf, size_t len, gfp_t gfp) { - struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct cfg80211_mgmt_registration *reg; @@ -871,7 +868,7 @@ bool cfg80211_rx_mgmt(struct net_device *dev, int freq, int sig_mbm, /* found match! */ /* Indicate the received Action frame to user space */ - if (nl80211_send_mgmt(rdev, dev, reg->nlpid, + if (nl80211_send_mgmt(rdev, wdev, reg->nlpid, freq, sig_mbm, buf, len, gfp)) continue; @@ -886,15 +883,14 @@ bool cfg80211_rx_mgmt(struct net_device *dev, int freq, int sig_mbm, } EXPORT_SYMBOL(cfg80211_rx_mgmt); -void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie, +void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp) { - struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); /* Indicate TX status of the Action frame to user space */ - nl80211_send_mgmt_tx_status(rdev, dev, cookie, buf, len, ack, gfp); + nl80211_send_mgmt_tx_status(rdev, wdev, cookie, buf, len, ack, gfp); } EXPORT_SYMBOL(cfg80211_mgmt_tx_status); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5800c49d6942..0dc3356eea40 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1728,6 +1728,11 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) return result; } +static inline u64 wdev_id(struct wireless_dev *wdev) +{ + return (u64)wdev->identifier | + ((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32); +} static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct cfg80211_registered_device *rdev, @@ -1735,8 +1740,6 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, { struct net_device *dev = wdev->netdev; void *hdr; - u64 wdev_id = (u64)wdev->identifier | - ((u64)rdev->wiphy_idx << 32); hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_INTERFACE); if (!hdr) @@ -1750,7 +1753,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFTYPE, wdev->iftype) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->devlist_generation ^ (cfg80211_rdev_list_generation << 2))) @@ -5752,7 +5755,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; struct ieee80211_channel *chan; struct sk_buff *msg; void *hdr; @@ -5800,7 +5803,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, goto free_msg; } - err = rdev->ops->remain_on_channel(&rdev->wiphy, dev, chan, + err = rdev->ops->remain_on_channel(&rdev->wiphy, wdev, chan, channel_type, duration, &cookie); if (err) @@ -5824,7 +5827,7 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; u64 cookie; if (!info->attrs[NL80211_ATTR_COOKIE]) @@ -5835,7 +5838,7 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); - return rdev->ops->cancel_remain_on_channel(&rdev->wiphy, dev, cookie); + return rdev->ops->cancel_remain_on_channel(&rdev->wiphy, wdev, cookie); } static u32 rateset_to_mask(struct ieee80211_supported_band *sband, @@ -5984,7 +5987,7 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; u16 frame_type = IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION; if (!info->attrs[NL80211_ATTR_FRAME_MATCH]) @@ -5993,21 +5996,24 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_FRAME_TYPE]) frame_type = nla_get_u16(info->attrs[NL80211_ATTR_FRAME_TYPE]); - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + switch (wdev->iftype) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_P2P_GO: + break; + default: return -EOPNOTSUPP; + } /* not much point in registering if we can't reply */ if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; - return cfg80211_mlme_register_mgmt(dev->ieee80211_ptr, info->snd_pid, - frame_type, + return cfg80211_mlme_register_mgmt(wdev, info->snd_pid, frame_type, nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); } @@ -6015,7 +6021,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; struct ieee80211_channel *chan; enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; bool channel_type_valid = false; @@ -6036,14 +6042,18 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + switch (wdev->iftype) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_P2P_GO: + break; + default: return -EOPNOTSUPP; + } if (info->attrs[NL80211_ATTR_DURATION]) { if (!(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) @@ -6092,7 +6102,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) } } - err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, offchan, channel_type, + err = cfg80211_mlme_mgmt_tx(rdev, wdev, chan, offchan, channel_type, channel_type_valid, wait, nla_data(info->attrs[NL80211_ATTR_FRAME]), nla_len(info->attrs[NL80211_ATTR_FRAME]), @@ -6120,7 +6130,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; u64 cookie; if (!info->attrs[NL80211_ATTR_COOKIE]) @@ -6129,17 +6139,21 @@ static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *in if (!rdev->ops->mgmt_tx_cancel_wait) return -EOPNOTSUPP; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + switch (wdev->iftype) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_P2P_GO: + break; + default: return -EOPNOTSUPP; + } cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); - return rdev->ops->mgmt_tx_cancel_wait(&rdev->wiphy, dev, cookie); + return rdev->ops->mgmt_tx_cancel_wait(&rdev->wiphy, wdev, cookie); } static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info) @@ -7172,7 +7186,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_remain_on_channel, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -7180,7 +7194,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_cancel_remain_on_channel, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -7196,7 +7210,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_register_mgmt, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, }, { @@ -7204,7 +7218,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_tx_mgmt, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -7212,7 +7226,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_tx_mgmt_cancel_wait, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -8040,7 +8054,7 @@ nla_put_failure: static void nl80211_send_remain_on_chan_event( int cmd, struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, + struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp) @@ -8059,7 +8073,9 @@ static void nl80211_send_remain_on_chan_event( } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) || + nla_put_u32(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, chan->center_freq) || nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, channel_type) || nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie)) @@ -8081,23 +8097,24 @@ static void nl80211_send_remain_on_chan_event( } void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, + struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp) { nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, - rdev, netdev, cookie, chan, + rdev, wdev, cookie, chan, channel_type, duration, gfp); } void nl80211_send_remain_on_channel_cancel( - struct cfg80211_registered_device *rdev, struct net_device *netdev, + struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, gfp_t gfp) { nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, - rdev, netdev, cookie, chan, + rdev, wdev, cookie, chan, channel_type, 0, gfp); } @@ -8211,10 +8228,11 @@ bool nl80211_unexpected_4addr_frame(struct net_device *dev, } int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u32 nlpid, + struct wireless_dev *wdev, u32 nlpid, int freq, int sig_dbm, const u8 *buf, size_t len, gfp_t gfp) { + struct net_device *netdev = wdev->netdev; struct sk_buff *msg; void *hdr; @@ -8229,7 +8247,8 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + netdev->ifindex)) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) || (sig_dbm && nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) || @@ -8247,10 +8266,11 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, } void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, + struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp) { + struct net_device *netdev = wdev->netdev; struct sk_buff *msg; void *hdr; @@ -8265,7 +8285,8 @@ void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + netdev->ifindex)) || nla_put(msg, NL80211_ATTR_FRAME, len, buf) || nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie) || (ack && nla_put_flag(msg, NL80211_ATTR_ACK))) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 01a1122c3b33..0469303b5c3c 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -74,13 +74,13 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, gfp_t gfp); void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, - struct net_device *netdev, - u64 cookie, + struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp); void nl80211_send_remain_on_channel_cancel( - struct cfg80211_registered_device *rdev, struct net_device *netdev, + struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, gfp_t gfp); @@ -92,11 +92,11 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, gfp_t gfp); int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u32 nlpid, + struct wireless_dev *wdev, u32 nlpid, int freq, int sig_dbm, const u8 *buf, size_t len, gfp_t gfp); void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, + struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp); -- cgit v1.2.3 From a9a741a7e2e6337ae5c030e78827c233c08902a7 Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Mon, 30 Apr 2012 18:21:51 +0200 Subject: NFC: Prepare asynchronous error management for driver and shdlc Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- include/net/nfc/hci.h | 2 ++ net/nfc/hci/core.c | 8 ++++++++ net/nfc/hci/shdlc.c | 19 +++++++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index e30e6a869714..d25dd9b99877 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -112,6 +112,8 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev); void nfc_hci_set_clientdata(struct nfc_hci_dev *hdev, void *clientdata); void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev); +void nfc_hci_driver_failure(struct nfc_hci_dev *hdev, int err); + /* Host IDs */ #define NFC_HCI_HOST_CONTROLLER_ID 0x00 #define NFC_HCI_TERMINAL_HOST_ID 0x01 diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index a8b0b71e8f86..1dc6485343b9 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -717,6 +717,14 @@ void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev) } EXPORT_SYMBOL(nfc_hci_get_clientdata); +void nfc_hci_driver_failure(struct nfc_hci_dev *hdev, int err) +{ + /* TODO: lower layer has permanent failure. + * complete potential HCI command or send an empty tag discovered event + */ +} +EXPORT_SYMBOL(nfc_hci_driver_failure); + void nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb) { struct hcp_packet *packet; diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/shdlc.c index 6b836e6242b7..d7c74d152a72 100644 --- a/net/nfc/hci/shdlc.c +++ b/net/nfc/hci/shdlc.c @@ -523,10 +523,6 @@ static void nfc_shdlc_handle_send_queue(struct nfc_shdlc *shdlc) r = shdlc->ops->xmit(shdlc, skb); if (r < 0) { - /* - * TODO: Cannot send, shdlc machine is dead, we - * must propagate the information up to HCI. - */ shdlc->hard_fault = r; break; } @@ -590,6 +586,11 @@ static void nfc_shdlc_sm_work(struct work_struct *work) skb_queue_purge(&shdlc->ack_pending_q); break; case SHDLC_CONNECTING: + if (shdlc->hard_fault) { + nfc_shdlc_connect_complete(shdlc, shdlc->hard_fault); + break; + } + if (shdlc->connect_tries++ < 5) r = nfc_shdlc_connect_initiate(shdlc); else @@ -610,6 +611,11 @@ static void nfc_shdlc_sm_work(struct work_struct *work) } nfc_shdlc_handle_rcv_queue(shdlc); + + if (shdlc->hard_fault) { + nfc_shdlc_connect_complete(shdlc, shdlc->hard_fault); + break; + } break; case SHDLC_CONNECTED: nfc_shdlc_handle_rcv_queue(shdlc); @@ -637,10 +643,7 @@ static void nfc_shdlc_sm_work(struct work_struct *work) } if (shdlc->hard_fault) { - /* - * TODO: Handle hard_fault that occured during - * this invocation of the shdlc worker - */ + nfc_hci_driver_failure(shdlc->hdev, shdlc->hard_fault); } break; default: -- cgit v1.2.3 From 456411ca812860d7ba06d3e4013ce1d8b9dbc7cd Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Mon, 11 Jun 2012 13:49:51 +0200 Subject: NFC: Driver failure API This API should be used by drivers, HCI, SHDLC or NCI stacks to report an unrecoverable error. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- include/net/nfc/nfc.h | 2 ++ net/nfc/core.c | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 180964b954ab..6431f5e39022 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -204,4 +204,6 @@ int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, int nfc_tm_deactivated(struct nfc_dev *dev); int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb); +void nfc_driver_failure(struct nfc_dev *dev, int err); + #endif /* __NET_NFC_H */ diff --git a/net/nfc/core.c b/net/nfc/core.c index 4177bb5104b9..32f28326b623 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -651,6 +651,16 @@ int nfc_target_lost(struct nfc_dev *dev, u32 target_idx) } EXPORT_SYMBOL(nfc_target_lost); +void nfc_driver_failure(struct nfc_dev *dev, int err) +{ + /* + * TODO: if polling is active, send empty target_found + * or else do whatever makes sense to let user space + * know this device needs to be closed and reinitialized. + */ +} +EXPORT_SYMBOL(nfc_driver_failure); + static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); -- cgit v1.2.3 From a10d595b1074d04446f77161eea165e5809e163c Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Tue, 5 Jun 2012 14:42:11 +0200 Subject: NFC: Allow HCI driver to pre-open pipes to some gates Some NFC chips will statically create and open pipes for both standard and proprietary gates. The driver can now pass this information to HCI such that HCI will not attempt to create and open them, but will instead directly use the passed pipe ids. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- drivers/nfc/pn544_hci.c | 31 ++++++++++++++++++------------- include/net/nfc/hci.h | 17 ++++++++++++++--- net/nfc/hci/command.c | 8 ++++++-- net/nfc/hci/core.c | 23 +++++++++-------------- net/nfc/hci/hci.h | 5 ----- 5 files changed, 47 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/pn544_hci.c b/drivers/nfc/pn544_hci.c index 69df6fecb847..c67b55e224e0 100644 --- a/drivers/nfc/pn544_hci.c +++ b/drivers/nfc/pn544_hci.c @@ -108,16 +108,22 @@ enum pn544_state { #define PN544_NFC_WI_MGMT_GATE 0xA1 -static u8 pn544_custom_gates[] = { - PN544_SYS_MGMT_GATE, - PN544_SWP_MGMT_GATE, - PN544_POLLING_LOOP_MGMT_GATE, - PN544_NFC_WI_MGMT_GATE, - PN544_RF_READER_F_GATE, - PN544_RF_READER_JEWEL_GATE, - PN544_RF_READER_ISO15693_GATE, - PN544_RF_READER_NFCIP1_INITIATOR_GATE, - PN544_RF_READER_NFCIP1_TARGET_GATE +static struct nfc_hci_gate pn544_gates[] = { + {NFC_HCI_ADMIN_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_LOOPBACK_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_ID_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_LINK_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_RF_READER_B_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_RF_READER_A_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_SYS_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_SWP_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_POLLING_LOOP_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_NFC_WI_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_RF_READER_F_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_RF_READER_JEWEL_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_RF_READER_ISO15693_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_RF_READER_NFCIP1_INITIATOR_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_RF_READER_NFCIP1_TARGET_GATE, NFC_HCI_INVALID_PIPE} }; /* Largest headroom needed for outgoing custom commands */ @@ -849,10 +855,9 @@ static int __devinit pn544_hci_probe(struct i2c_client *client, goto err_rti; } - init_data.gate_count = ARRAY_SIZE(pn544_custom_gates); + init_data.gate_count = ARRAY_SIZE(pn544_gates); - memcpy(init_data.gates, pn544_custom_gates, - ARRAY_SIZE(pn544_custom_gates)); + memcpy(init_data.gates, pn544_gates, sizeof(pn544_gates)); /* * TODO: Session id must include the driver name + some bus addr diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index d25dd9b99877..f5169b04f082 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -44,10 +44,20 @@ struct nfc_hci_ops { struct nfc_target *target); }; -#define NFC_HCI_MAX_CUSTOM_GATES 15 +/* Pipes */ +#define NFC_HCI_INVALID_PIPE 0x80 +#define NFC_HCI_LINK_MGMT_PIPE 0x00 +#define NFC_HCI_ADMIN_PIPE 0x01 + +struct nfc_hci_gate { + u8 gate; + u8 pipe; +}; + +#define NFC_HCI_MAX_CUSTOM_GATES 50 struct nfc_hci_init_data { u8 gate_count; - u8 gates[NFC_HCI_MAX_CUSTOM_GATES]; + struct nfc_hci_gate gates[NFC_HCI_MAX_CUSTOM_GATES]; char session_id[9]; }; @@ -182,7 +192,8 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, void nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb); /* connecting to gates and sending hci instructions */ -int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate); +int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate, + u8 pipe); int nfc_hci_disconnect_gate(struct nfc_hci_dev *hdev, u8 gate); int nfc_hci_disconnect_all_gates(struct nfc_hci_dev *hdev); int nfc_hci_get_param(struct nfc_hci_dev *hdev, u8 gate, u8 idx, diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 12cd6f3f77ec..46362ef979db 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -299,9 +299,9 @@ int nfc_hci_disconnect_all_gates(struct nfc_hci_dev *hdev) } EXPORT_SYMBOL(nfc_hci_disconnect_all_gates); -int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate) +int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate, + u8 pipe) { - u8 pipe = NFC_HCI_INVALID_PIPE; bool pipe_created = false; int r; @@ -310,6 +310,9 @@ int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate) if (hdev->gate2pipe[dest_gate] != NFC_HCI_INVALID_PIPE) return -EADDRINUSE; + if (pipe != NFC_HCI_INVALID_PIPE) + goto pipe_is_open; + switch (dest_gate) { case NFC_HCI_LINK_MGMT_GATE: pipe = NFC_HCI_LINK_MGMT_PIPE; @@ -335,6 +338,7 @@ int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate) return r; } +pipe_is_open: hdev->gate2pipe[dest_gate] = pipe; return 0; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index e6b2df3981b6..4ccc518f56eb 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -315,15 +315,15 @@ static void nfc_hci_cmd_timeout(unsigned long data) } static int hci_dev_connect_gates(struct nfc_hci_dev *hdev, u8 gate_count, - u8 gates[]) + struct nfc_hci_gate *gates) { int r; - u8 *p = gates; while (gate_count--) { - r = nfc_hci_connect_gate(hdev, NFC_HCI_HOST_CONTROLLER_ID, *p); + r = nfc_hci_connect_gate(hdev, NFC_HCI_HOST_CONTROLLER_ID, + gates->gate, gates->pipe); if (r < 0) return r; - p++; + gates++; } return 0; @@ -333,14 +333,13 @@ static int hci_dev_session_init(struct nfc_hci_dev *hdev) { struct sk_buff *skb = NULL; int r; - u8 hci_gates[] = { /* NFC_HCI_ADMIN_GATE MUST be first */ - NFC_HCI_ADMIN_GATE, NFC_HCI_LOOPBACK_GATE, - NFC_HCI_ID_MGMT_GATE, NFC_HCI_LINK_MGMT_GATE, - NFC_HCI_RF_READER_B_GATE, NFC_HCI_RF_READER_A_GATE - }; + + if (hdev->init_data.gates[0].gate != NFC_HCI_ADMIN_GATE) + return -EPROTO; r = nfc_hci_connect_gate(hdev, NFC_HCI_HOST_CONTROLLER_ID, - NFC_HCI_ADMIN_GATE); + hdev->init_data.gates[0].gate, + hdev->init_data.gates[0].pipe); if (r < 0) goto exit; @@ -368,10 +367,6 @@ static int hci_dev_session_init(struct nfc_hci_dev *hdev) if (r < 0) goto exit; - r = hci_dev_connect_gates(hdev, sizeof(hci_gates), hci_gates); - if (r < 0) - goto disconnect_all; - r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count, hdev->init_data.gates); if (r < 0) diff --git a/net/nfc/hci/hci.h b/net/nfc/hci/hci.h index d3cde075ba60..fa9a21e92239 100644 --- a/net/nfc/hci/hci.h +++ b/net/nfc/hci/hci.h @@ -132,9 +132,4 @@ void nfc_hci_hcp_message_rx(struct nfc_hci_dev *hdev, u8 pipe, u8 type, #define NFC_HCI_ANY_E_REG_ACCESS_DENIED 0x0a #define NFC_HCI_ANY_E_PIPE_ACCESS_DENIED 0x0b -/* Pipes */ -#define NFC_HCI_INVALID_PIPE 0x80 -#define NFC_HCI_LINK_MGMT_PIPE 0x00 -#define NFC_HCI_ADMIN_PIPE 0x01 - #endif /* __LOCAL_HCI_H */ -- cgit v1.2.3 From e044a651b9b7b1b33d8b7fdb2bb27e443f392083 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 18:05:28 -0700 Subject: ipv4: Fix crashes in fib_rules_tclass(). All paths assume, when CONFIG_IP_MULTIPLE_TABLES is enabled, that any successful call to fib_lookup() will initialize the fib_result->r value to something. We violated that expectation in the new fib_lookup() fast path. Reported-by: Or Gerlitz Tested-by: Eric Dumazet Tested-by: Greg Rose Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 539c6721f810..000c4674e18e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -230,6 +230,7 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) { if (!net->ipv4.fib_has_custom_rules) { + res->r = NULL; if (net->ipv4.fib_local && !fib_table_lookup(net->ipv4.fib_local, flp, res, FIB_LOOKUP_NOREF)) -- cgit v1.2.3 From 4aabd8ef8c43677cfee3e1e36c5a79edddb41942 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 9 Jul 2012 16:07:30 -0700 Subject: tcp: Move dynamnic metrics handling into seperate file. Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++ net/ipv4/Makefile | 2 +- net/ipv4/tcp_input.c | 188 +----------------------------------------------- net/ipv4/tcp_metrics.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 199 insertions(+), 187 deletions(-) create mode 100644 net/ipv4/tcp_metrics.c (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 53fb7d814170..98ca797001a2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -388,6 +388,8 @@ extern void tcp_enter_frto(struct sock *sk); extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_update_metrics(struct sock *sk); +extern void tcp_init_metrics(struct sock *sk); +extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); extern void tcp_init_sock(struct sock *sk); extern unsigned int tcp_poll(struct file * file, struct socket *sock, @@ -556,6 +558,8 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return (tp->srtt >> 3) + tp->rttvar; } +extern void tcp_set_rto(struct sock *sk); + static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { tp->pred_flags = htonl((tp->tcp_header_len << 26) | diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ff75d3bbcd6a..5a23e8b37106 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -7,7 +7,7 @@ obj-y := route.o inetpeer.o protocol.o \ ip_output.o ip_sockglue.o inet_hashtables.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ - tcp_minisocks.o tcp_cong.o \ + tcp_minisocks.o tcp_cong.o tcp_metrics.o \ datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ca0d0e7c9778..055ac49b8b40 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -93,7 +93,6 @@ int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_frto_response __read_mostly; -int sysctl_tcp_nometrics_save __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; @@ -701,7 +700,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ -static inline void tcp_set_rto(struct sock *sk) +void tcp_set_rto(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); /* Old crap is replaced with new one. 8) @@ -728,109 +727,6 @@ static inline void tcp_set_rto(struct sock *sk) tcp_bound_rto(sk); } -/* Save metrics learned by this TCP session. - This function is called only, when TCP finishes successfully - i.e. when it enters TIME-WAIT or goes from LAST-ACK to CLOSE. - */ -void tcp_update_metrics(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); - - if (sysctl_tcp_nometrics_save) - return; - - if (dst && (dst->flags & DST_HOST)) { - const struct inet_connection_sock *icsk = inet_csk(sk); - int m; - unsigned long rtt; - - dst_confirm(dst); - - if (icsk->icsk_backoff || !tp->srtt) { - /* This session failed to estimate rtt. Why? - * Probably, no packets returned in time. - * Reset our results. - */ - if (!(dst_metric_locked(dst, RTAX_RTT))) - dst_metric_set(dst, RTAX_RTT, 0); - return; - } - - rtt = dst_metric_rtt(dst, RTAX_RTT); - m = rtt - tp->srtt; - - /* If newly calculated rtt larger than stored one, - * store new one. Otherwise, use EWMA. Remember, - * rtt overestimation is always better than underestimation. - */ - if (!(dst_metric_locked(dst, RTAX_RTT))) { - if (m <= 0) - set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt); - else - set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3)); - } - - if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { - unsigned long var; - if (m < 0) - m = -m; - - /* Scale deviation to rttvar fixed point */ - m >>= 1; - if (m < tp->mdev) - m = tp->mdev; - - var = dst_metric_rtt(dst, RTAX_RTTVAR); - if (m >= var) - var = m; - else - var -= (var - m) >> 2; - - set_dst_metric_rtt(dst, RTAX_RTTVAR, var); - } - - if (tcp_in_initial_slowstart(tp)) { - /* Slow start still did not finish. */ - if (dst_metric(dst, RTAX_SSTHRESH) && - !dst_metric_locked(dst, RTAX_SSTHRESH) && - (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) - dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); - if (!dst_metric_locked(dst, RTAX_CWND) && - tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) - dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); - } else if (tp->snd_cwnd > tp->snd_ssthresh && - icsk->icsk_ca_state == TCP_CA_Open) { - /* Cong. avoidance phase, cwnd is reliable. */ - if (!dst_metric_locked(dst, RTAX_SSTHRESH)) - dst_metric_set(dst, RTAX_SSTHRESH, - max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); - if (!dst_metric_locked(dst, RTAX_CWND)) - dst_metric_set(dst, RTAX_CWND, - (dst_metric(dst, RTAX_CWND) + - tp->snd_cwnd) >> 1); - } else { - /* Else slow start did not finish, cwnd is non-sense, - ssthresh may be also invalid. - */ - if (!dst_metric_locked(dst, RTAX_CWND)) - dst_metric_set(dst, RTAX_CWND, - (dst_metric(dst, RTAX_CWND) + - tp->snd_ssthresh) >> 1); - if (dst_metric(dst, RTAX_SSTHRESH) && - !dst_metric_locked(dst, RTAX_SSTHRESH) && - tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) - dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); - } - - if (!dst_metric_locked(dst, RTAX_REORDERING)) { - if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && - tp->reordering != sysctl_tcp_reordering) - dst_metric_set(dst, RTAX_REORDERING, tp->reordering); - } - } -} - __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) { __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); @@ -867,7 +763,7 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) * Packet counting of FACK is based on in-order assumptions, therefore TCP * disables it when reordering is detected */ -static void tcp_disable_fack(struct tcp_sock *tp) +void tcp_disable_fack(struct tcp_sock *tp) { /* RFC3517 uses different metric in lost marker => reset on change */ if (tcp_is_fack(tp)) @@ -881,86 +777,6 @@ static void tcp_dsack_seen(struct tcp_sock *tp) tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; } -/* Initialize metrics on socket. */ - -static void tcp_init_metrics(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); - - if (dst == NULL) - goto reset; - - dst_confirm(dst); - - if (dst_metric_locked(dst, RTAX_CWND)) - tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND); - if (dst_metric(dst, RTAX_SSTHRESH)) { - tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); - if (tp->snd_ssthresh > tp->snd_cwnd_clamp) - tp->snd_ssthresh = tp->snd_cwnd_clamp; - } else { - /* ssthresh may have been reduced unnecessarily during. - * 3WHS. Restore it back to its initial default. - */ - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - } - if (dst_metric(dst, RTAX_REORDERING) && - tp->reordering != dst_metric(dst, RTAX_REORDERING)) { - tcp_disable_fack(tp); - tcp_disable_early_retrans(tp); - tp->reordering = dst_metric(dst, RTAX_REORDERING); - } - - if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) - goto reset; - - /* Initial rtt is determined from SYN,SYN-ACK. - * The segment is small and rtt may appear much - * less than real one. Use per-dst memory - * to make it more realistic. - * - * A bit of theory. RTT is time passed after "normal" sized packet - * is sent until it is ACKed. In normal circumstances sending small - * packets force peer to delay ACKs and calculation is correct too. - * The algorithm is adaptive and, provided we follow specs, it - * NEVER underestimate RTT. BUT! If peer tries to make some clever - * tricks sort of "quick acks" for time long enough to decrease RTT - * to low value, and then abruptly stops to do it and starts to delay - * ACKs, wait for troubles. - */ - if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) { - tp->srtt = dst_metric_rtt(dst, RTAX_RTT); - tp->rtt_seq = tp->snd_nxt; - } - if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) { - tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR); - tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); - } - tcp_set_rto(sk); -reset: - if (tp->srtt == 0) { - /* RFC6298: 5.7 We've failed to get a valid RTT sample from - * 3WHS. This is most likely due to retransmission, - * including spurious one. Reset the RTO back to 3secs - * from the more aggressive 1sec to avoid more spurious - * retransmission. - */ - tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; - } - /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been - * retransmitted. In light of RFC6298 more aggressive 1sec - * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK - * retransmission has occurred. - */ - if (tp->total_retrans > 1) - tp->snd_cwnd = 1; - else - tp->snd_cwnd = tcp_init_cwnd(tp, dst); - tp->snd_cwnd_stamp = tcp_time_stamp; -} - static void tcp_update_reordering(struct sock *sk, const int metric, const int ts) { diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c new file mode 100644 index 000000000000..2793ecf928d3 --- /dev/null +++ b/net/ipv4/tcp_metrics.c @@ -0,0 +1,192 @@ +#include +#include + +#include +#include +#include +#include + +int sysctl_tcp_nometrics_save __read_mostly; + +/* Save metrics learned by this TCP session. This function is called + * only, when TCP finishes successfully i.e. when it enters TIME-WAIT + * or goes from LAST-ACK to CLOSE. + */ +void tcp_update_metrics(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); + + if (sysctl_tcp_nometrics_save) + return; + + if (dst && (dst->flags & DST_HOST)) { + const struct inet_connection_sock *icsk = inet_csk(sk); + int m; + unsigned long rtt; + + dst_confirm(dst); + + if (icsk->icsk_backoff || !tp->srtt) { + /* This session failed to estimate rtt. Why? + * Probably, no packets returned in time. + * Reset our results. + */ + if (!(dst_metric_locked(dst, RTAX_RTT))) + dst_metric_set(dst, RTAX_RTT, 0); + return; + } + + rtt = dst_metric_rtt(dst, RTAX_RTT); + m = rtt - tp->srtt; + + /* If newly calculated rtt larger than stored one, + * store new one. Otherwise, use EWMA. Remember, + * rtt overestimation is always better than underestimation. + */ + if (!(dst_metric_locked(dst, RTAX_RTT))) { + if (m <= 0) + set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt); + else + set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3)); + } + + if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { + unsigned long var; + if (m < 0) + m = -m; + + /* Scale deviation to rttvar fixed point */ + m >>= 1; + if (m < tp->mdev) + m = tp->mdev; + + var = dst_metric_rtt(dst, RTAX_RTTVAR); + if (m >= var) + var = m; + else + var -= (var - m) >> 2; + + set_dst_metric_rtt(dst, RTAX_RTTVAR, var); + } + + if (tcp_in_initial_slowstart(tp)) { + /* Slow start still did not finish. */ + if (dst_metric(dst, RTAX_SSTHRESH) && + !dst_metric_locked(dst, RTAX_SSTHRESH) && + (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) + dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); + if (!dst_metric_locked(dst, RTAX_CWND) && + tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) + dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); + } else if (tp->snd_cwnd > tp->snd_ssthresh && + icsk->icsk_ca_state == TCP_CA_Open) { + /* Cong. avoidance phase, cwnd is reliable. */ + if (!dst_metric_locked(dst, RTAX_SSTHRESH)) + dst_metric_set(dst, RTAX_SSTHRESH, + max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); + if (!dst_metric_locked(dst, RTAX_CWND)) + dst_metric_set(dst, RTAX_CWND, + (dst_metric(dst, RTAX_CWND) + + tp->snd_cwnd) >> 1); + } else { + /* Else slow start did not finish, cwnd is non-sense, + ssthresh may be also invalid. + */ + if (!dst_metric_locked(dst, RTAX_CWND)) + dst_metric_set(dst, RTAX_CWND, + (dst_metric(dst, RTAX_CWND) + + tp->snd_ssthresh) >> 1); + if (dst_metric(dst, RTAX_SSTHRESH) && + !dst_metric_locked(dst, RTAX_SSTHRESH) && + tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) + dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); + } + + if (!dst_metric_locked(dst, RTAX_REORDERING)) { + if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && + tp->reordering != sysctl_tcp_reordering) + dst_metric_set(dst, RTAX_REORDERING, tp->reordering); + } + } +} + +/* Initialize metrics on socket. */ + +void tcp_init_metrics(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); + + if (dst == NULL) + goto reset; + + dst_confirm(dst); + + if (dst_metric_locked(dst, RTAX_CWND)) + tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND); + if (dst_metric(dst, RTAX_SSTHRESH)) { + tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); + if (tp->snd_ssthresh > tp->snd_cwnd_clamp) + tp->snd_ssthresh = tp->snd_cwnd_clamp; + } else { + /* ssthresh may have been reduced unnecessarily during. + * 3WHS. Restore it back to its initial default. + */ + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + } + if (dst_metric(dst, RTAX_REORDERING) && + tp->reordering != dst_metric(dst, RTAX_REORDERING)) { + tcp_disable_fack(tp); + tcp_disable_early_retrans(tp); + tp->reordering = dst_metric(dst, RTAX_REORDERING); + } + + if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) + goto reset; + + /* Initial rtt is determined from SYN,SYN-ACK. + * The segment is small and rtt may appear much + * less than real one. Use per-dst memory + * to make it more realistic. + * + * A bit of theory. RTT is time passed after "normal" sized packet + * is sent until it is ACKed. In normal circumstances sending small + * packets force peer to delay ACKs and calculation is correct too. + * The algorithm is adaptive and, provided we follow specs, it + * NEVER underestimate RTT. BUT! If peer tries to make some clever + * tricks sort of "quick acks" for time long enough to decrease RTT + * to low value, and then abruptly stops to do it and starts to delay + * ACKs, wait for troubles. + */ + if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) { + tp->srtt = dst_metric_rtt(dst, RTAX_RTT); + tp->rtt_seq = tp->snd_nxt; + } + if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) { + tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR); + tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); + } + tcp_set_rto(sk); +reset: + if (tp->srtt == 0) { + /* RFC6298: 5.7 We've failed to get a valid RTT sample from + * 3WHS. This is most likely due to retransmission, + * including spurious one. Reset the RTO back to 3secs + * from the more aggressive 1sec to avoid more spurious + * retransmission. + */ + tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; + } + /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been + * retransmitted. In light of RFC6298 more aggressive 1sec + * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK + * retransmission has occurred. + */ + if (tp->total_retrans > 1) + tp->snd_cwnd = 1; + else + tp->snd_cwnd = tcp_init_cwnd(tp, dst); + tp->snd_cwnd_stamp = tcp_time_stamp; +} -- cgit v1.2.3 From ab92bb2f679d66c7e12a6b1c0cdd76fe308f6546 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 9 Jul 2012 16:19:30 -0700 Subject: tcp: Abstract back handling peer aliveness test into helper function. Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_metrics.c | 10 ++++++++++ net/ipv6/tcp_ipv6.c | 2 +- 4 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 98ca797001a2..5478356ea8c5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -389,6 +389,7 @@ extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_update_metrics(struct sock *sk); extern void tcp_init_metrics(struct sock *sk); +extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); extern void tcp_init_sock(struct sock *sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 64568fa21d05..e9312a8f33a1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1405,7 +1405,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && (!peer || !peer->tcp_ts_stamp) && - (!dst || !dst_metric(dst, RTAX_RTT))) { + !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 2793ecf928d3..9afe703c85cc 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1,7 +1,9 @@ +#include #include #include #include +#include #include #include #include @@ -190,3 +192,11 @@ reset: tp->snd_cwnd = tcp_init_cwnd(tp, dst); tp->snd_cwnd_stamp = tcp_time_stamp; } + +bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) +{ + if (!dst) + return false; + return dst_metric(dst, RTAX_RTT) ? true : false; +} +EXPORT_SYMBOL_GPL(tcp_peer_is_proven); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6cc67ed6c2e6..75d179555c28 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1177,7 +1177,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && (!peer || !peer->tcp_ts_stamp) && - (!dst || !dst_metric(dst, RTAX_RTT))) { + !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. -- cgit v1.2.3 From 51c5d0c4b169bf762f09e0d5b283a7f0b2a45739 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 00:49:14 -0700 Subject: tcp: Maintain dynamic metrics in local cache. Maintain a local hash table of TCP dynamic metrics blobs. Computed TCP metrics are no longer maintained in the route metrics. The table uses RCU and an extremely simple hash so that it has low latency and low overhead. A simple hash is legitimate because we only make metrics blobs for fully established connections. Some tweaking of the default hash table sizes, metric timeouts, and the hash chain length limit certainly could use some tweaking. But the basic design seems sound. With help from Eric Dumazet and Joe Perches. Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 3 + include/net/tcp.h | 1 + net/ipv4/tcp.c | 2 + net/ipv4/tcp_metrics.c | 555 +++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 468 insertions(+), 93 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 599e48fa97cb..2e089a99d603 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -7,6 +7,7 @@ #include +struct tcpm_hash_bucket; struct ctl_table_header; struct ipv4_devconf; struct fib_rules_ops; @@ -39,6 +40,8 @@ struct netns_ipv4 { struct sock **icmp_sk; struct sock *tcp_sock; struct inet_peer_base *peers; + struct tcpm_hash_bucket *tcp_metrics_hash; + unsigned int tcp_metrics_hash_mask; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5478356ea8c5..0900d63d1627 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -389,6 +389,7 @@ extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_update_metrics(struct sock *sk); extern void tcp_init_metrics(struct sock *sk); +extern void tcp_metrics_init(void); extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3ba605f60e4e..29aa0c800cd0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3563,6 +3563,8 @@ void __init tcp_init(void) pr_info("Hash tables configured (established %u bind %u)\n", tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); + tcp_metrics_init(); + tcp_register_congestion_control(&tcp_reno); memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 9afe703c85cc..56223bab251b 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1,134 +1,431 @@ +#include +#include +#include +#include #include #include +#include +#include #include #include +#include #include +#include #include +#include #include #include int sysctl_tcp_nometrics_save __read_mostly; +enum tcp_metric_index { + TCP_METRIC_RTT, + TCP_METRIC_RTTVAR, + TCP_METRIC_SSTHRESH, + TCP_METRIC_CWND, + TCP_METRIC_REORDERING, + + /* Always last. */ + TCP_METRIC_MAX, +}; + +struct tcp_metrics_block { + struct tcp_metrics_block __rcu *tcpm_next; + struct inetpeer_addr tcpm_addr; + unsigned long tcpm_stamp; + u32 tcpm_lock; + u32 tcpm_vals[TCP_METRIC_MAX]; +}; + +static bool tcp_metric_locked(struct tcp_metrics_block *tm, + enum tcp_metric_index idx) +{ + return tm->tcpm_lock & (1 << idx); +} + +static u32 tcp_metric_get(struct tcp_metrics_block *tm, + enum tcp_metric_index idx) +{ + return tm->tcpm_vals[idx]; +} + +static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm, + enum tcp_metric_index idx) +{ + return msecs_to_jiffies(tm->tcpm_vals[idx]); +} + +static void tcp_metric_set(struct tcp_metrics_block *tm, + enum tcp_metric_index idx, + u32 val) +{ + tm->tcpm_vals[idx] = val; +} + +static void tcp_metric_set_msecs(struct tcp_metrics_block *tm, + enum tcp_metric_index idx, + u32 val) +{ + tm->tcpm_vals[idx] = jiffies_to_msecs(val); +} + +static bool addr_same(const struct inetpeer_addr *a, + const struct inetpeer_addr *b) +{ + const struct in6_addr *a6, *b6; + + if (a->family != b->family) + return false; + if (a->family == AF_INET) + return a->addr.a4 == b->addr.a4; + + a6 = (const struct in6_addr *) &a->addr.a6[0]; + b6 = (const struct in6_addr *) &b->addr.a6[0]; + + return ipv6_addr_equal(a6, b6); +} + +struct tcpm_hash_bucket { + struct tcp_metrics_block __rcu *chain; +}; + +static DEFINE_SPINLOCK(tcp_metrics_lock); + +static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst) +{ + u32 val; + + val = 0; + if (dst_metric_locked(dst, RTAX_RTT)) + val |= 1 << TCP_METRIC_RTT; + if (dst_metric_locked(dst, RTAX_RTTVAR)) + val |= 1 << TCP_METRIC_RTTVAR; + if (dst_metric_locked(dst, RTAX_SSTHRESH)) + val |= 1 << TCP_METRIC_SSTHRESH; + if (dst_metric_locked(dst, RTAX_CWND)) + val |= 1 << TCP_METRIC_CWND; + if (dst_metric_locked(dst, RTAX_REORDERING)) + val |= 1 << TCP_METRIC_REORDERING; + tm->tcpm_lock = val; + + tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT); + tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR); + tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); + tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); + tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); +} + +static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, + struct inetpeer_addr *addr, + unsigned int hash, + bool reclaim) +{ + struct tcp_metrics_block *tm; + struct net *net; + + spin_lock_bh(&tcp_metrics_lock); + net = dev_net(dst->dev); + if (unlikely(reclaim)) { + struct tcp_metrics_block *oldest; + + oldest = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); + for (tm = rcu_dereference(oldest->tcpm_next); tm; + tm = rcu_dereference(tm->tcpm_next)) { + if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) + oldest = tm; + } + tm = oldest; + } else { + tm = kmalloc(sizeof(*tm), GFP_ATOMIC); + if (!tm) + goto out_unlock; + } + tm->tcpm_addr = *addr; + tm->tcpm_stamp = jiffies; + + tcpm_suck_dst(tm, dst); + + if (likely(!reclaim)) { + tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain; + rcu_assign_pointer(net->ipv4.tcp_metrics_hash[hash].chain, tm); + } + +out_unlock: + spin_unlock_bh(&tcp_metrics_lock); + return tm; +} + +#define TCP_METRICS_TIMEOUT (60 * 60 * HZ) + +static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) +{ + if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) + tcpm_suck_dst(tm, dst); +} + +#define TCP_METRICS_RECLAIM_DEPTH 5 +#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL + +static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, int depth) +{ + if (tm) + return tm; + if (depth > TCP_METRICS_RECLAIM_DEPTH) + return TCP_METRICS_RECLAIM_PTR; + return NULL; +} + +static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr, + struct net *net, unsigned int hash) +{ + struct tcp_metrics_block *tm; + int depth = 0; + + for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + tm = rcu_dereference(tm->tcpm_next)) { + if (addr_same(&tm->tcpm_addr, addr)) + break; + depth++; + } + return tcp_get_encode(tm, depth); +} + +static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, + struct dst_entry *dst) +{ + struct tcp_metrics_block *tm; + struct inetpeer_addr addr; + unsigned int hash; + struct net *net; + + addr.family = req->rsk_ops->family; + switch (addr.family) { + case AF_INET: + addr.addr.a4 = inet_rsk(req)->rmt_addr; + hash = (__force unsigned int) addr.addr.a4; + break; + case AF_INET6: + *(struct in6_addr *)addr.addr.a6 = inet6_rsk(req)->rmt_addr; + hash = ((__force unsigned int) addr.addr.a6[0] ^ + (__force unsigned int) addr.addr.a6[1] ^ + (__force unsigned int) addr.addr.a6[2] ^ + (__force unsigned int) addr.addr.a6[3]); + break; + default: + return NULL; + } + + hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); + + net = dev_net(dst->dev); + hash &= net->ipv4.tcp_metrics_hash_mask; + + for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + tm = rcu_dereference(tm->tcpm_next)) { + if (addr_same(&tm->tcpm_addr, &addr)) + break; + } + tcpm_check_stamp(tm, dst); + return tm; +} + +static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, + struct dst_entry *dst, + bool create) +{ + struct tcp_metrics_block *tm; + struct inetpeer_addr addr; + unsigned int hash; + struct net *net; + bool reclaim; + + addr.family = sk->sk_family; + switch (addr.family) { + case AF_INET: + addr.addr.a4 = inet_sk(sk)->inet_daddr; + hash = (__force unsigned int) addr.addr.a4; + break; + case AF_INET6: + *(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr; + hash = ((__force unsigned int) addr.addr.a6[0] ^ + (__force unsigned int) addr.addr.a6[1] ^ + (__force unsigned int) addr.addr.a6[2] ^ + (__force unsigned int) addr.addr.a6[3]); + break; + default: + return NULL; + } + + hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); + + net = dev_net(dst->dev); + hash &= net->ipv4.tcp_metrics_hash_mask; + + tm = __tcp_get_metrics(&addr, net, hash); + reclaim = false; + if (tm == TCP_METRICS_RECLAIM_PTR) { + reclaim = true; + tm = NULL; + } + if (!tm && create) + tm = tcpm_new(dst, &addr, hash, reclaim); + else + tcpm_check_stamp(tm, dst); + + return tm; +} + /* Save metrics learned by this TCP session. This function is called * only, when TCP finishes successfully i.e. when it enters TIME-WAIT * or goes from LAST-ACK to CLOSE. */ void tcp_update_metrics(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); struct dst_entry *dst = __sk_dst_get(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_metrics_block *tm; + unsigned long rtt; + u32 val; + int m; - if (sysctl_tcp_nometrics_save) + if (sysctl_tcp_nometrics_save || !dst) return; - if (dst && (dst->flags & DST_HOST)) { - const struct inet_connection_sock *icsk = inet_csk(sk); - int m; - unsigned long rtt; - + if (dst->flags & DST_HOST) dst_confirm(dst); - if (icsk->icsk_backoff || !tp->srtt) { - /* This session failed to estimate rtt. Why? - * Probably, no packets returned in time. - * Reset our results. - */ - if (!(dst_metric_locked(dst, RTAX_RTT))) - dst_metric_set(dst, RTAX_RTT, 0); - return; - } + rcu_read_lock(); + if (icsk->icsk_backoff || !tp->srtt) { + /* This session failed to estimate rtt. Why? + * Probably, no packets returned in time. Reset our + * results. + */ + tm = tcp_get_metrics(sk, dst, false); + if (tm && !tcp_metric_locked(tm, TCP_METRIC_RTT)) + tcp_metric_set(tm, TCP_METRIC_RTT, 0); + goto out_unlock; + } else + tm = tcp_get_metrics(sk, dst, true); - rtt = dst_metric_rtt(dst, RTAX_RTT); - m = rtt - tp->srtt; + if (!tm) + goto out_unlock; - /* If newly calculated rtt larger than stored one, - * store new one. Otherwise, use EWMA. Remember, - * rtt overestimation is always better than underestimation. - */ - if (!(dst_metric_locked(dst, RTAX_RTT))) { - if (m <= 0) - set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt); - else - set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3)); - } + rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); + m = rtt - tp->srtt; - if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { - unsigned long var; - if (m < 0) - m = -m; + /* If newly calculated rtt larger than stored one, store new + * one. Otherwise, use EWMA. Remember, rtt overestimation is + * always better than underestimation. + */ + if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { + if (m <= 0) + rtt = tp->srtt; + else + rtt -= (m >> 3); + tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt); + } - /* Scale deviation to rttvar fixed point */ - m >>= 1; - if (m < tp->mdev) - m = tp->mdev; + if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { + unsigned long var; - var = dst_metric_rtt(dst, RTAX_RTTVAR); - if (m >= var) - var = m; - else - var -= (var - m) >> 2; + if (m < 0) + m = -m; - set_dst_metric_rtt(dst, RTAX_RTTVAR, var); - } + /* Scale deviation to rttvar fixed point */ + m >>= 1; + if (m < tp->mdev) + m = tp->mdev; - if (tcp_in_initial_slowstart(tp)) { - /* Slow start still did not finish. */ - if (dst_metric(dst, RTAX_SSTHRESH) && - !dst_metric_locked(dst, RTAX_SSTHRESH) && - (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) - dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); - if (!dst_metric_locked(dst, RTAX_CWND) && - tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) - dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); - } else if (tp->snd_cwnd > tp->snd_ssthresh && - icsk->icsk_ca_state == TCP_CA_Open) { - /* Cong. avoidance phase, cwnd is reliable. */ - if (!dst_metric_locked(dst, RTAX_SSTHRESH)) - dst_metric_set(dst, RTAX_SSTHRESH, - max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); - if (!dst_metric_locked(dst, RTAX_CWND)) - dst_metric_set(dst, RTAX_CWND, - (dst_metric(dst, RTAX_CWND) + - tp->snd_cwnd) >> 1); - } else { - /* Else slow start did not finish, cwnd is non-sense, - ssthresh may be also invalid. - */ - if (!dst_metric_locked(dst, RTAX_CWND)) - dst_metric_set(dst, RTAX_CWND, - (dst_metric(dst, RTAX_CWND) + - tp->snd_ssthresh) >> 1); - if (dst_metric(dst, RTAX_SSTHRESH) && - !dst_metric_locked(dst, RTAX_SSTHRESH) && - tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) - dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); - } + var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); + if (m >= var) + var = m; + else + var -= (var - m) >> 2; - if (!dst_metric_locked(dst, RTAX_REORDERING)) { - if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && + tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var); + } + + if (tcp_in_initial_slowstart(tp)) { + /* Slow start still did not finish. */ + if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { + val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); + if (val && (tp->snd_cwnd >> 1) > val) + tcp_metric_set(tm, TCP_METRIC_SSTHRESH, + tp->snd_cwnd >> 1); + } + if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { + val = tcp_metric_get(tm, TCP_METRIC_CWND); + if (tp->snd_cwnd > val) + tcp_metric_set(tm, TCP_METRIC_CWND, + tp->snd_cwnd); + } + } else if (tp->snd_cwnd > tp->snd_ssthresh && + icsk->icsk_ca_state == TCP_CA_Open) { + /* Cong. avoidance phase, cwnd is reliable. */ + if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) + tcp_metric_set(tm, TCP_METRIC_SSTHRESH, + max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); + if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { + val = tcp_metric_get(tm, TCP_METRIC_CWND); + tcp_metric_set(tm, RTAX_CWND, (val + tp->snd_cwnd) >> 1); + } + } else { + /* Else slow start did not finish, cwnd is non-sense, + * ssthresh may be also invalid. + */ + if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { + val = tcp_metric_get(tm, TCP_METRIC_CWND); + tcp_metric_set(tm, TCP_METRIC_CWND, + (val + tp->snd_ssthresh) >> 1); + } + if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { + val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); + if (val && tp->snd_ssthresh > val) + tcp_metric_set(tm, TCP_METRIC_SSTHRESH, + tp->snd_ssthresh); + } + if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { + val = tcp_metric_get(tm, TCP_METRIC_REORDERING); + if (val < tp->reordering && tp->reordering != sysctl_tcp_reordering) - dst_metric_set(dst, RTAX_REORDERING, tp->reordering); + tcp_metric_set(tm, TCP_METRIC_REORDERING, + tp->reordering); } } + tm->tcpm_stamp = jiffies; +out_unlock: + rcu_read_unlock(); } /* Initialize metrics on socket. */ void tcp_init_metrics(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_metrics_block *tm; + u32 val; if (dst == NULL) goto reset; dst_confirm(dst); - if (dst_metric_locked(dst, RTAX_CWND)) - tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND); - if (dst_metric(dst, RTAX_SSTHRESH)) { - tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); + rcu_read_lock(); + tm = tcp_get_metrics(sk, dst, true); + if (!tm) { + rcu_read_unlock(); + goto reset; + } + + if (tcp_metric_locked(tm, TCP_METRIC_CWND)) + tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); + + val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); + if (val) { + tp->snd_ssthresh = val; if (tp->snd_ssthresh > tp->snd_cwnd_clamp) tp->snd_ssthresh = tp->snd_cwnd_clamp; } else { @@ -137,16 +434,18 @@ void tcp_init_metrics(struct sock *sk) */ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; } - if (dst_metric(dst, RTAX_REORDERING) && - tp->reordering != dst_metric(dst, RTAX_REORDERING)) { + val = tcp_metric_get(tm, TCP_METRIC_REORDERING); + if (val && tp->reordering != val) { tcp_disable_fack(tp); tcp_disable_early_retrans(tp); - tp->reordering = dst_metric(dst, RTAX_REORDERING); + tp->reordering = val; } - if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) + val = tcp_metric_get(tm, TCP_METRIC_RTT); + if (val == 0 || tp->srtt == 0) { + rcu_read_unlock(); goto reset; - + } /* Initial rtt is determined from SYN,SYN-ACK. * The segment is small and rtt may appear much * less than real one. Use per-dst memory @@ -161,14 +460,18 @@ void tcp_init_metrics(struct sock *sk) * to low value, and then abruptly stops to do it and starts to delay * ACKs, wait for troubles. */ - if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) { - tp->srtt = dst_metric_rtt(dst, RTAX_RTT); + val = msecs_to_jiffies(val); + if (val > tp->srtt) { + tp->srtt = val; tp->rtt_seq = tp->snd_nxt; } - if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) { - tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR); + val = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); + if (val > tp->mdev) { + tp->mdev = val; tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); } + rcu_read_unlock(); + tcp_set_rto(sk); reset: if (tp->srtt == 0) { @@ -195,8 +498,74 @@ reset: bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) { + struct tcp_metrics_block *tm; + bool ret; + if (!dst) return false; - return dst_metric(dst, RTAX_RTT) ? true : false; + + rcu_read_lock(); + tm = __tcp_get_metrics_req(req, dst); + if (tm && tcp_metric_get(tm, TCP_METRIC_RTT)) + ret = true; + else + ret = false; + rcu_read_unlock(); + + return ret; } EXPORT_SYMBOL_GPL(tcp_peer_is_proven); + +static unsigned long tcpmhash_entries; +static int __init set_tcpmhash_entries(char *str) +{ + ssize_t ret; + + if (!str) + return 0; + + ret = kstrtoul(str, 0, &tcpmhash_entries); + if (ret) + return 0; + + return 1; +} +__setup("tcpmhash_entries=", set_tcpmhash_entries); + +static int __net_init tcp_net_metrics_init(struct net *net) +{ + int slots, size; + + slots = tcpmhash_entries; + if (!slots) { + if (totalram_pages >= 128 * 1024) + slots = 16 * 1024; + else + slots = 8 * 1024; + } + + size = slots * sizeof(struct tcpm_hash_bucket); + + net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL); + if (!net->ipv4.tcp_metrics_hash) + return -ENOMEM; + + net->ipv4.tcp_metrics_hash_mask = (slots - 1); + + return 0; +} + +static void __net_exit tcp_net_metrics_exit(struct net *net) +{ + kfree(net->ipv4.tcp_metrics_hash); +} + +static __net_initdata struct pernet_operations tcp_net_metrics_ops = { + .init = tcp_net_metrics_init, + .exit = tcp_net_metrics_exit, +}; + +void __init tcp_metrics_init(void) +{ + register_pernet_subsys(&tcp_net_metrics_ops); +} -- cgit v1.2.3 From 94334d5ed4b64ebcd2c4b421e133b921f8ccf75d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 00:53:48 -0700 Subject: net: Kill set_dst_metric_rtt(). No longer used. Signed-off-by: David S. Miller --- include/net/dst.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index b2634e446613..51610468c63d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -209,12 +209,6 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr return msecs_to_jiffies(dst_metric(dst, metric)); } -static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric, - unsigned long rtt) -{ - dst_metric_set(dst, metric, jiffies_to_msecs(rtt)); -} - static inline u32 dst_allfrag(const struct dst_entry *dst) { -- cgit v1.2.3 From 81166dd6fa8eb780b2132d32fbc77eb6ac04e44e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 03:14:24 -0700 Subject: tcp: Move timestamps from inetpeer to metrics cache. With help from Lin Ming. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 4 +- include/net/tcp.h | 5 +- net/ipv4/inetpeer.c | 1 - net/ipv4/route.c | 8 +-- net/ipv4/tcp_ipv4.c | 30 ++--------- net/ipv4/tcp_metrics.c | 136 +++++++++++++++++++++++++++++++++++++++++++++-- net/ipv4/tcp_minisocks.c | 46 ---------------- net/ipv6/route.c | 13 +---- net/ipv6/tcp_ipv6.c | 33 ++---------- 9 files changed, 149 insertions(+), 127 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index c27c8f10ebdc..1119f6f6cdb4 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -46,15 +46,13 @@ struct inet_peer { }; /* * Once inet_peer is queued for deletion (refcnt == -1), following fields - * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp + * are not available: rid, ip_id_count * We can share memory with rcu_head to help keep inet_peer small. */ union { struct { atomic_t rid; /* Frag reception counter */ atomic_t ip_id_count; /* IP ID for the next packet */ - __u32 tcp_ts; - __u32 tcp_ts_stamp; }; struct rcu_head rcu; struct inet_peer *gc_next; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0900d63d1627..3618fefae049 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -390,7 +390,10 @@ extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_update_metrics(struct sock *sk); extern void tcp_init_metrics(struct sock *sk); extern void tcp_metrics_init(void); -extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); +extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check); +extern bool tcp_remember_stamp(struct sock *sk); +extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); +extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); extern void tcp_init_sock(struct sock *sk); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index da90a8cab614..f457bcb41350 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -508,7 +508,6 @@ relookup: (daddr->family == AF_INET) ? secure_ip_id(daddr->addr.a4) : secure_ipv6_id(daddr->addr.a6)); - p->tcp_ts_stamp = 0; p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; p->rate_last = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d02c91177d32..78d81543766d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2846,7 +2846,7 @@ static int rt_fill_info(struct net *net, struct rtmsg *r; struct nlmsghdr *nlh; unsigned long expires = 0; - u32 id = 0, ts = 0, tsage = 0, error; + u32 id = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); if (nlh == NULL) @@ -2903,10 +2903,6 @@ static int rt_fill_info(struct net *net, const struct inet_peer *peer = rt_peer_ptr(rt); inet_peer_refcheck(peer); id = atomic_read(&peer->ip_id_count) & 0xffff; - if (peer->tcp_ts_stamp) { - ts = peer->tcp_ts; - tsage = get_seconds() - peer->tcp_ts_stamp; - } expires = ACCESS_ONCE(peer->pmtu_expires); if (expires) { if (time_before(jiffies, expires)) @@ -2942,7 +2938,7 @@ static int rt_fill_info(struct net *net, goto nla_put_failure; } - if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, + if (rtnl_put_cacheinfo(skb, &rt->dst, id, 0, 0, expires, error) < 0) goto nla_put_failure; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e9312a8f33a1..d406bf7f37d9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -209,22 +209,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } if (tcp_death_row.sysctl_tw_recycle && - !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) { - struct inet_peer *peer = rt_get_peer(rt, fl4->daddr); - /* - * VJ's idea. We save last timestamp seen from - * the destination in peer table, when entering state - * TIME-WAIT * and initialize rx_opt.ts_recent from it, - * when trying new connection. - */ - if (peer) { - inet_peer_refcheck(peer); - if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { - tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; - tp->rx_opt.ts_recent = peer->tcp_ts; - } - } - } + !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) + tcp_fetch_timewait_stamp(sk, &rt->dst); inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; @@ -1375,7 +1361,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = cookie_v4_init_sequence(sk, skb, &req->mss); req->cookie_ts = tmp_opt.tstamp_ok; } else if (!isn) { - struct inet_peer *peer = NULL; struct flowi4 fl4; /* VJ's idea. We save last timestamp seen @@ -1390,12 +1375,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && - fl4.daddr == saddr && - (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { - inet_peer_refcheck(peer); - if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && - (s32)(peer->tcp_ts - req->ts_recent) > - TCP_PAWS_WINDOW) { + fl4.daddr == saddr) { + if (!tcp_peer_is_proven(req, dst, true)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); goto drop_and_release; } @@ -1404,8 +1385,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) else if (!sysctl_tcp_syncookies && (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && - (!peer || !peer->tcp_ts_stamp) && - !tcp_peer_is_proven(req, dst)) { + !tcp_peer_is_proven(req, dst, false)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 56223bab251b..1fd83d3118fe 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -34,6 +34,8 @@ struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; struct inetpeer_addr tcpm_addr; unsigned long tcpm_stamp; + u32 tcpm_ts; + u32 tcpm_ts_stamp; u32 tcpm_lock; u32 tcpm_vals[TCP_METRIC_MAX]; }; @@ -114,6 +116,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst) tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); + tm->tcpm_ts = 0; + tm->tcpm_ts_stamp = 0; } static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, @@ -230,6 +234,45 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return tm; } +static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) +{ + struct inet6_timewait_sock *tw6; + struct tcp_metrics_block *tm; + struct inetpeer_addr addr; + unsigned int hash; + struct net *net; + + addr.family = tw->tw_family; + switch (addr.family) { + case AF_INET: + addr.addr.a4 = tw->tw_daddr; + hash = (__force unsigned int) addr.addr.a4; + break; + case AF_INET6: + tw6 = inet6_twsk((struct sock *)tw); + *(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr; + hash = ((__force unsigned int) addr.addr.a6[0] ^ + (__force unsigned int) addr.addr.a6[1] ^ + (__force unsigned int) addr.addr.a6[2] ^ + (__force unsigned int) addr.addr.a6[3]); + break; + default: + return NULL; + } + + hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); + + net = twsk_net(tw); + hash &= net->ipv4.tcp_metrics_hash_mask; + + for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + tm = rcu_dereference(tm->tcpm_next)) { + if (addr_same(&tm->tcpm_addr, &addr)) + break; + } + return tm; +} + static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, struct dst_entry *dst, bool create) @@ -496,7 +539,7 @@ reset: tp->snd_cwnd_stamp = tcp_time_stamp; } -bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) +bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check) { struct tcp_metrics_block *tm; bool ret; @@ -506,16 +549,99 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) rcu_read_lock(); tm = __tcp_get_metrics_req(req, dst); - if (tm && tcp_metric_get(tm, TCP_METRIC_RTT)) - ret = true; - else - ret = false; + if (paws_check) { + if (tm && + (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL && + (s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW) + ret = false; + else + ret = true; + } else { + if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp) + ret = true; + else + ret = false; + } rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(tcp_peer_is_proven); +void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst) +{ + struct tcp_metrics_block *tm; + + rcu_read_lock(); + tm = tcp_get_metrics(sk, dst, true); + if (tm) { + struct tcp_sock *tp = tcp_sk(sk); + + if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) { + tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp; + tp->rx_opt.ts_recent = tm->tcpm_ts; + } + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp); + +/* VJ's idea. Save last timestamp seen from this destination and hold + * it at least for normal timewait interval to use for duplicate + * segment detection in subsequent connections, before they enter + * synchronized state. + */ +bool tcp_remember_stamp(struct sock *sk) +{ + struct dst_entry *dst = __sk_dst_get(sk); + bool ret = false; + + if (dst) { + struct tcp_metrics_block *tm; + + rcu_read_lock(); + tm = tcp_get_metrics(sk, dst, true); + if (tm) { + struct tcp_sock *tp = tcp_sk(sk); + + if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 || + ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && + tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { + tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; + tm->tcpm_ts = tp->rx_opt.ts_recent; + } + ret = true; + } + rcu_read_unlock(); + } + return ret; +} + +bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) +{ + struct tcp_metrics_block *tm; + bool ret = false; + + rcu_read_lock(); + tm = __tcp_get_metrics_tw(tw); + if (tw) { + const struct tcp_timewait_sock *tcptw; + struct sock *sk = (struct sock *) tw; + + tcptw = tcp_twsk(sk); + if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 || + ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && + tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { + tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; + tm->tcpm_ts = tcptw->tw_ts_recent; + } + ret = true; + } + rcu_read_unlock(); + + return ret; +} + static unsigned long tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 72b7c63b1a39..a51aa534dab1 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -49,52 +49,6 @@ struct inet_timewait_death_row tcp_death_row = { }; EXPORT_SYMBOL_GPL(tcp_death_row); -/* VJ's idea. Save last timestamp seen from this destination - * and hold it at least for normal timewait interval to use for duplicate - * segment detection in subsequent connections, before they enter synchronized - * state. - */ - -static bool tcp_remember_stamp(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct inet_peer *peer; - - peer = icsk->icsk_af_ops->get_peer(sk); - if (peer) { - if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || - ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && - peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { - peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; - peer->tcp_ts = tp->rx_opt.ts_recent; - } - return true; - } - - return false; -} - -static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) -{ - const struct tcp_timewait_sock *tcptw; - struct sock *sk = (struct sock *) tw; - struct inet_peer *peer; - - tcptw = tcp_twsk(sk); - peer = tcptw->tw_peer; - if (peer) { - if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || - ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && - peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { - peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; - peer->tcp_ts = tcptw->tw_ts_recent; - } - return true; - } - return false; -} - static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6cc6c881f54f..0c0684753781 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2348,13 +2348,11 @@ static int rt6_fill_node(struct net *net, int iif, int type, u32 pid, u32 seq, int prefix, int nowait, unsigned int flags) { - const struct inet_peer *peer; struct rtmsg *rtm; struct nlmsghdr *nlh; long expires; u32 table; struct neighbour *n; - u32 ts, tsage; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2473,16 +2471,7 @@ static int rt6_fill_node(struct net *net, else expires = INT_MAX; - peer = NULL; - if (rt6_has_peer(rt)) - peer = rt6_peer_ptr(rt); - ts = tsage = 0; - if (peer && peer->tcp_ts_stamp) { - ts = peer->tcp_ts; - tsage = get_seconds() - peer->tcp_ts_stamp; - } - - if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage, + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires, rt->dst.error) < 0) goto nla_put_failure; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 75d179555c28..9e96b5f21d2a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -277,22 +277,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, rt = (struct rt6_info *) dst; if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && - ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) { - struct inet_peer *peer = rt6_get_peer(rt); - /* - * VJ's idea. We save last timestamp seen from - * the destination in peer table, when entering state - * TIME-WAIT * and initialize rx_opt.ts_recent from it, - * when trying new connection. - */ - if (peer) { - inet_peer_refcheck(peer); - if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { - tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; - tp->rx_opt.ts_recent = peer->tcp_ts; - } - } - } + ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) + tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -1134,8 +1120,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq->iif = inet6_iif(skb); if (!isn) { - struct inet_peer *peer = NULL; - if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { @@ -1160,14 +1144,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL && - (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && - ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6, - &treq->rmt_addr)) { - inet_peer_refcheck(peer); - if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && - (s32)(peer->tcp_ts - req->ts_recent) > - TCP_PAWS_WINDOW) { + (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) { + if (!tcp_peer_is_proven(req, dst, true)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); goto drop_and_release; } @@ -1176,8 +1154,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) else if (!sysctl_tcp_syncookies && (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && - (!peer || !peer->tcp_ts_stamp) && - !tcp_peer_is_proven(req, dst)) { + !tcp_peer_is_proven(req, dst, false)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. -- cgit v1.2.3 From 16d1839907e695387654901995f9286b65fbbc6a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 03:32:59 -0700 Subject: inet: Remove ->get_peer() method. No longer used. Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 1 - net/ipv4/tcp_ipv4.c | 16 ---------------- net/ipv6/tcp_ipv6.c | 16 ---------------- 3 files changed, 33 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index af3c743a40e4..291e7cee14e7 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -43,7 +43,6 @@ struct inet_connection_sock_af_ops { struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst); - struct inet_peer *(*get_peer)(struct sock *sk); u16 net_header_len; u16 net_frag_header_len; u16 sockaddr_len; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d406bf7f37d9..ddefd39ac0cf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1847,21 +1847,6 @@ do_time_wait: goto discard_it; } -struct inet_peer *tcp_v4_get_peer(struct sock *sk) -{ - struct rtable *rt = (struct rtable *) __sk_dst_get(sk); - struct inet_sock *inet = inet_sk(sk); - - /* If we don't have a valid cached route, or we're doing IP - * options which make the IPv4 header destination address - * different from our peer's, do not bother with this. - */ - if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr) - return NULL; - return rt_get_peer_create(rt, inet->inet_daddr); -} -EXPORT_SYMBOL(tcp_v4_get_peer); - static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp_timewait_sock), .twsk_unique = tcp_twsk_unique, @@ -1874,7 +1859,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v4_conn_request, .syn_recv_sock = tcp_v4_syn_recv_sock, - .get_peer = tcp_v4_get_peer, .net_header_len = sizeof(struct iphdr), .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9e96b5f21d2a..61175cb2478f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1689,20 +1689,6 @@ do_time_wait: goto discard_it; } -static struct inet_peer *tcp_v6_get_peer(struct sock *sk) -{ - struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - - /* If we don't have a valid cached route, or we're doing IP - * options which make the IPv6 header destination address - * different from our peer's, do not bother with this. - */ - if (!rt || !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) - return NULL; - return rt6_get_peer_create(rt); -} - static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, @@ -1715,7 +1701,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { .rebuild_header = inet6_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .get_peer = tcp_v6_get_peer, .net_header_len = sizeof(struct ipv6hdr), .net_frag_header_len = sizeof(struct frag_hdr), .setsockopt = ipv6_setsockopt, @@ -1747,7 +1732,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .get_peer = tcp_v4_get_peer, .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, -- cgit v1.2.3 From 3e12939a2a67fbb4cbd962c3b9bc398c73319766 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 04:01:57 -0700 Subject: inet: Kill FLOWI_FLAG_PRECOW_METRICS. No longer needed. TCP writes metrics, but now in it's own special cache that does not dirty the route metrics. Therefore there is no longer any reason to pre-cow metrics in this way. Signed-off-by: David S. Miller --- include/net/flow.h | 5 ++--- include/net/inet_sock.h | 2 -- include/net/route.h | 2 -- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/route.c | 11 ++--------- net/ipv6/route.c | 2 +- 6 files changed, 6 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index bd524f598561..ce9cb7656b47 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -20,9 +20,8 @@ struct flowi_common { __u8 flowic_proto; __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 -#define FLOWI_FLAG_PRECOW_METRICS 0x02 -#define FLOWI_FLAG_CAN_SLEEP 0x04 -#define FLOWI_FLAG_RT_NOCACHE 0x08 +#define FLOWI_FLAG_CAN_SLEEP 0x02 +#define FLOWI_FLAG_RT_NOCACHE 0x04 __u32 flowic_secid; }; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index ae17e1352d7e..924d7b98ab60 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -245,8 +245,6 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl) flags |= FLOWI_FLAG_ANYSRC; - if (sk->sk_protocol == IPPROTO_TCP) - flags |= FLOWI_FLAG_PRECOW_METRICS; return flags; } diff --git a/include/net/route.h b/include/net/route.h index 211e2665139b..635d7a99d199 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -278,8 +278,6 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - if (protocol == IPPROTO_TCP) - flow_flags |= FLOWI_FLAG_PRECOW_METRICS; if (can_sleep) flow_flags |= FLOWI_FLAG_CAN_SLEEP; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 034ddbe42adf..76825be3b643 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -375,7 +375,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options_rcu *opt = inet_rsk(req)->opt; struct net *net = sock_net(sk); - int flags = inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS; + int flags = inet_sk_flowi_flags(sk); if (nocache) flags |= FLOWI_FLAG_RT_NOCACHE; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e376354dcb65..d4834e2914a0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1658,7 +1658,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, struct rtable *rt; flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, - protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS, + protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1836,18 +1836,11 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, { struct inet_peer_base *base; struct inet_peer *peer; - int create = 0; - - /* If a peer entry exists for this destination, we must hook - * it up in order to get at cached metrics. - */ - if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) - create = 1; base = inetpeer_base_ptr(rt->_peer); BUG_ON(!base); - peer = inet_getpeer_v4(base, rt->rt_dst, create); + peer = inet_getpeer_v4(base, rt->rt_dst, 0); if (peer) { __rt_set_peer(rt, peer); rt->rt_peer_genid = rt_peer_genid(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0c0684753781..b7eb51e1a0e1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1093,7 +1093,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS; + fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; -- cgit v1.2.3 From 5943634fc5592037db0693b261f7f4bea6bb9457 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 06:58:42 -0700 Subject: ipv4: Maintain redirect and PMTU info in struct rtable again. Maintaining this in the inetpeer entries was not the right way to do this at all. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 4 -- include/net/route.h | 2 +- net/ipv4/inetpeer.c | 3 - net/ipv4/route.c | 185 ++++++++++-------------------------------------- net/ipv4/xfrm4_policy.c | 1 + 5 files changed, 41 insertions(+), 154 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 1119f6f6cdb4..53f464d7cddc 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -36,10 +36,6 @@ struct inet_peer { u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ unsigned long rate_last; - unsigned long pmtu_expires; - u32 pmtu_orig; - u32 pmtu_learned; - struct inetpeer_addr_base redirect_learned; union { struct list_head gc_list; struct rcu_head gc_rcu; diff --git a/include/net/route.h b/include/net/route.h index 635d7a99d199..c27449466d18 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -65,7 +65,7 @@ struct rtable { __be32 rt_gateway; /* Miscellaneous cached information */ - u32 rt_peer_genid; + u32 rt_pmtu; unsigned long _peer; /* long-living peer info */ struct fib_info *fi; /* for client ref to shared metrics */ }; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f457bcb41350..e1e0a4e8fd34 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -511,9 +511,6 @@ relookup: p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; p->rate_last = 0; - p->pmtu_expires = 0; - p->pmtu_orig = 0; - memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); INIT_LIST_HEAD(&p->gc_list); /* Link the node. */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 67b08745daf9..677d65253e4c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -669,7 +669,7 @@ static inline int rt_fast_clean(struct rtable *rth) static inline int rt_valuable(struct rtable *rth) { return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - (rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires); + rth->dst.expires; } static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) @@ -1242,13 +1242,6 @@ skip_hashing: return rt; } -static atomic_t __rt_peer_genid = ATOMIC_INIT(0); - -static u32 rt_peer_genid(void) -{ - return atomic_read(&__rt_peer_genid); -} - void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) { struct inet_peer_base *base; @@ -1262,8 +1255,6 @@ void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) if (peer) { if (!rt_set_peer(rt, peer)) inet_putpeer(peer); - else - rt->rt_peer_genid = rt_peer_genid(); } } @@ -1323,30 +1314,6 @@ static void rt_del(unsigned int hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) -{ - struct rtable *rt = (struct rtable *) dst; - __be32 orig_gw = rt->rt_gateway; - struct neighbour *n; - - dst_confirm(&rt->dst); - - rt->rt_gateway = peer->redirect_learned.a4; - - n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway); - if (!n) { - rt->rt_gateway = orig_gw; - return; - } - if (!(n->nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); - } else { - rt->rt_flags |= RTCF_REDIRECTED; - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); - } - neigh_release(n); -} - /* called in rcu_read_lock() section */ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) @@ -1355,7 +1322,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, struct in_device *in_dev = __in_dev_get_rcu(dev); __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; - struct inet_peer *peer; struct net *net; if (!in_dev) @@ -1388,6 +1354,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rthp = &rt_hash_table[hash].chain; while ((rt = rcu_dereference(*rthp)) != NULL) { + struct neighbour *n; + rthp = &rt->dst.rt_next; if (rt->rt_key_dst != daddr || @@ -1401,13 +1369,16 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->rt_gateway != old_gw) continue; - peer = rt_get_peer_create(rt, rt->rt_dst); - if (peer) { - if (peer->redirect_learned.a4 != new_gw) { - peer->redirect_learned.a4 = new_gw; - atomic_inc(&__rt_peer_genid); + n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); + if (n) { + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + } else { + rt->rt_gateway = new_gw; + rt->rt_flags |= RTCF_REDIRECTED; + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } - check_peer_redir(&rt->dst, peer); + neigh_release(n); } } } @@ -1425,23 +1396,6 @@ reject_redirect: ; } -static bool peer_pmtu_expired(struct inet_peer *peer) -{ - unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); - - return orig && - time_after_eq(jiffies, orig) && - cmpxchg(&peer->pmtu_expires, orig, 0) == orig; -} - -static bool peer_pmtu_cleaned(struct inet_peer *peer) -{ - unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); - - return orig && - cmpxchg(&peer->pmtu_expires, orig, 0) == orig; -} - static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; @@ -1451,16 +1405,13 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) if (dst->obsolete > 0) { ip_rt_put(rt); ret = NULL; - } else if (rt->rt_flags & RTCF_REDIRECTED) { + } else if ((rt->rt_flags & RTCF_REDIRECTED) || + rt->dst.expires) { unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, rt->rt_oif, rt_genid(dev_net(dst->dev))); rt_del(hash, rt); ret = NULL; - } else if (rt_has_peer(rt)) { - struct inet_peer *peer = rt_peer_ptr(rt); - if (peer_pmtu_expired(peer)) - dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); } } return ret; @@ -1604,50 +1555,17 @@ out: kfree_skb(skb); return 0; } -static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) -{ - unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); - - if (!expires) - return; - if (time_before(jiffies, expires)) { - u32 orig_dst_mtu = dst_mtu(dst); - if (peer->pmtu_learned < orig_dst_mtu) { - if (!peer->pmtu_orig) - peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); - dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); - } - } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) - dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); -} - static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { struct rtable *rt = (struct rtable *) dst; - struct inet_peer *peer; dst_confirm(dst); - peer = rt_get_peer_create(rt, rt->rt_dst); - if (peer) { - unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); - - if (mtu < ip_rt_min_pmtu) - mtu = ip_rt_min_pmtu; - if (!pmtu_expires || mtu < peer->pmtu_learned) { - - pmtu_expires = jiffies + ip_rt_mtu_expires; - if (!pmtu_expires) - pmtu_expires = 1UL; - - peer->pmtu_learned = mtu; - peer->pmtu_expires = pmtu_expires; + if (mtu < ip_rt_min_pmtu) + mtu = ip_rt_min_pmtu; - atomic_inc(&__rt_peer_genid); - rt->rt_peer_genid = rt_peer_genid(); - } - check_peer_pmtu(dst, peer); - } + rt->rt_pmtu = mtu; + dst_set_expires(&rt->dst, ip_rt_mtu_expires); } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, @@ -1679,30 +1597,12 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) } EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); -static void ipv4_validate_peer(struct rtable *rt) -{ - if (rt->rt_peer_genid != rt_peer_genid()) { - struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst); - - if (peer) { - check_peer_pmtu(&rt->dst, peer); - - if (peer->redirect_learned.a4 && - peer->redirect_learned.a4 != rt->rt_gateway) - check_peer_redir(&rt->dst, peer); - } - - rt->rt_peer_genid = rt_peer_genid(); - } -} - static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { struct rtable *rt = (struct rtable *) dst; if (rt_is_expired(rt)) return NULL; - ipv4_validate_peer(rt); return dst; } @@ -1728,11 +1628,8 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt && rt_has_peer(rt)) { - struct inet_peer *peer = rt_peer_ptr(rt); - if (peer_pmtu_cleaned(peer)) - dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig); - } + if (rt) + dst_set_expires(&rt->dst, 0); } static int ip_rt_bug(struct sk_buff *skb) @@ -1812,7 +1709,13 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) static unsigned int ipv4_mtu(const struct dst_entry *dst) { const struct rtable *rt = (const struct rtable *) dst; - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + unsigned int mtu = rt->rt_pmtu; + + if (mtu && time_after_eq(jiffies, rt->dst.expires)) + mtu = 0; + + if (!mtu) + mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu && rt_is_output_route(rt)) return mtu; @@ -1843,19 +1746,10 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, peer = inet_getpeer_v4(base, rt->rt_dst, 0); if (peer) { __rt_set_peer(rt, peer); - rt->rt_peer_genid = rt_peer_genid(); if (inet_metrics_new(peer)) memcpy(peer->metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX); dst_init_metrics(&rt->dst, peer->metrics, false); - - check_peer_pmtu(&rt->dst, peer); - - if (peer->redirect_learned.a4 && - peer->redirect_learned.a4 != rt->rt_gateway) { - rt->rt_gateway = peer->redirect_learned.a4; - rt->rt_flags |= RTCF_REDIRECTED; - } } else { if (fi->fib_metrics != (u32 *) dst_default_metrics) { rt->fi = fi; @@ -1955,8 +1849,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_pmtu = 0; rth->rt_gateway = daddr; - rth->rt_peer_genid = 0; rt_init_peer(rth, dev_net(dev)->ipv4.peers); rth->fi = NULL; if (our) { @@ -2081,8 +1975,8 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_pmtu = 0; rth->rt_gateway = daddr; - rth->rt_peer_genid = 0; rt_init_peer(rth, &res->table->tb_peers); rth->fi = NULL; @@ -2260,8 +2154,8 @@ local_input: rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_pmtu = 0; rth->rt_gateway = daddr; - rth->rt_peer_genid = 0; rt_init_peer(rth, net->ipv4.peers); rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { @@ -2337,7 +2231,6 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { - ipv4_validate_peer(rth); if (noref) { dst_use_noref(&rth->dst, jiffies); skb_dst_set_noref(skb, &rth->dst); @@ -2459,8 +2352,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; rth->rt_mark = fl4->flowi4_mark; + rth->rt_pmtu = 0; rth->rt_gateway = fl4->daddr; - rth->rt_peer_genid = 0; rt_init_peer(rth, (res->table ? &res->table->tb_peers : dev_net(dev_out)->ipv4.peers)); @@ -2717,7 +2610,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { - ipv4_validate_peer(rth); dst_use(&rth->dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); @@ -2794,6 +2686,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; rt->rt_mark = ort->rt_mark; + rt->rt_pmtu = ort->rt_pmtu; rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; @@ -2896,13 +2789,13 @@ static int rt_fill_info(struct net *net, const struct inet_peer *peer = rt_peer_ptr(rt); inet_peer_refcheck(peer); id = atomic_read(&peer->ip_id_count) & 0xffff; - expires = ACCESS_ONCE(peer->pmtu_expires); - if (expires) { - if (time_before(jiffies, expires)) - expires -= jiffies; - else - expires = 0; - } + } + expires = rt->dst.expires; + if (expires) { + if (time_before(jiffies, expires)) + expires -= jiffies; + else + expires = 0; } if (rt_is_input_route(rt)) { diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 9815ea0bca7f..951bcf35b21c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -100,6 +100,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_src = rt->rt_src; xdst->u.rt.rt_dst = rt->rt_dst; xdst->u.rt.rt_gateway = rt->rt_gateway; + xdst->u.rt.rt_pmtu = rt->rt_pmtu; return 0; } -- cgit v1.2.3 From f185071ddf799e194ba015d040d3d49cdbfa7e48 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 07:26:01 -0700 Subject: ipv4: Remove inetpeer from routes. No longer used. Signed-off-by: David S. Miller --- include/net/route.h | 57 ---------------------------------------------- net/ipv4/route.c | 60 +++++-------------------------------------------- net/ipv4/xfrm4_policy.c | 7 ------ 3 files changed, 6 insertions(+), 118 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index c27449466d18..52362368af09 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -40,7 +40,6 @@ #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) struct fib_nh; -struct inet_peer; struct fib_info; struct rtable { struct dst_entry dst; @@ -66,44 +65,9 @@ struct rtable { /* Miscellaneous cached information */ u32 rt_pmtu; - unsigned long _peer; /* long-living peer info */ struct fib_info *fi; /* for client ref to shared metrics */ }; -static inline struct inet_peer *rt_peer_ptr(struct rtable *rt) -{ - return inetpeer_ptr(rt->_peer); -} - -static inline bool rt_has_peer(struct rtable *rt) -{ - return inetpeer_ptr_is_peer(rt->_peer); -} - -static inline void __rt_set_peer(struct rtable *rt, struct inet_peer *peer) -{ - __inetpeer_ptr_set_peer(&rt->_peer, peer); -} - -static inline bool rt_set_peer(struct rtable *rt, struct inet_peer *peer) -{ - return inetpeer_ptr_set_peer(&rt->_peer, peer); -} - -static inline void rt_init_peer(struct rtable *rt, struct inet_peer_base *base) -{ - inetpeer_init_ptr(&rt->_peer, base); -} - -static inline void rt_transfer_peer(struct rtable *rt, struct rtable *ort) -{ - rt->_peer = ort->_peer; - if (rt_has_peer(ort)) { - struct inet_peer *peer = rt_peer_ptr(ort); - atomic_inc(&peer->refcnt); - } -} - static inline bool rt_is_input_route(const struct rtable *rt) { return rt->rt_route_iif != 0; @@ -326,27 +290,6 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable return rt; } -extern void rt_bind_peer(struct rtable *rt, __be32 daddr, int create); - -static inline struct inet_peer *__rt_get_peer(struct rtable *rt, __be32 daddr, int create) -{ - if (rt_has_peer(rt)) - return rt_peer_ptr(rt); - - rt_bind_peer(rt, daddr, create); - return (rt_has_peer(rt) ? rt_peer_ptr(rt) : NULL); -} - -static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr) -{ - return __rt_get_peer(rt, daddr, 0); -} - -static inline struct inet_peer *rt_get_peer_create(struct rtable *rt, __be32 daddr) -{ - return __rt_get_peer(rt, daddr, 1); -} - static inline int inet_iif(const struct sk_buff *skb) { return skb_rtable(skb)->rt_iif; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9cc00f8a6ee5..95bfa1ba5b28 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -889,7 +889,6 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); - inetpeer_invalidate_family(AF_INET); } /* @@ -1216,22 +1215,6 @@ skip_hashing: return rt; } -void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) -{ - struct inet_peer_base *base; - struct inet_peer *peer; - - base = inetpeer_base_ptr(rt->_peer); - if (!base) - return; - - peer = inet_getpeer_v4(base, daddr, create); - if (peer) { - if (!rt_set_peer(rt, peer)) - inet_putpeer(peer); - } -} - /* * Peer allocation may fail only in serious out-of-memory conditions. However * we still can generate some output. @@ -1588,10 +1571,6 @@ static void ipv4_dst_destroy(struct dst_entry *dst) fib_info_put(rt->fi); rt->fi = NULL; } - if (rt_has_peer(rt)) { - struct inet_peer *peer = rt_peer_ptr(rt); - inet_putpeer(peer); - } } @@ -1711,26 +1690,11 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, struct fib_info *fi) { - struct inet_peer_base *base; - struct inet_peer *peer; - - base = inetpeer_base_ptr(rt->_peer); - BUG_ON(!base); - - peer = inet_getpeer_v4(base, rt->rt_dst, 0); - if (peer) { - __rt_set_peer(rt, peer); - if (inet_metrics_new(peer)) - memcpy(peer->metrics, fi->fib_metrics, - sizeof(u32) * RTAX_MAX); - dst_init_metrics(&rt->dst, peer->metrics, false); - } else { - if (fi->fib_metrics != (u32 *) dst_default_metrics) { - rt->fi = fi; - atomic_inc(&fi->fib_clntref); - } - dst_init_metrics(&rt->dst, fi->fib_metrics, true); + if (fi->fib_metrics != (u32 *) dst_default_metrics) { + rt->fi = fi; + atomic_inc(&fi->fib_clntref); } + dst_init_metrics(&rt->dst, fi->fib_metrics, true); } static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, @@ -1820,7 +1784,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_mark = skb->mark; rth->rt_pmtu = 0; rth->rt_gateway = daddr; - rt_init_peer(rth, dev_net(dev)->ipv4.peers); rth->fi = NULL; if (our) { rth->dst.input= ip_local_deliver; @@ -1946,7 +1909,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_mark = skb->mark; rth->rt_pmtu = 0; rth->rt_gateway = daddr; - rt_init_peer(rth, &res->table->tb_peers); rth->fi = NULL; rth->dst.input = ip_forward; @@ -2125,7 +2087,6 @@ local_input: rth->rt_mark = skb->mark; rth->rt_pmtu = 0; rth->rt_gateway = daddr; - rt_init_peer(rth, net->ipv4.peers); rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -2323,9 +2284,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_mark = fl4->flowi4_mark; rth->rt_pmtu = 0; rth->rt_gateway = fl4->daddr; - rt_init_peer(rth, (res->table ? - &res->table->tb_peers : - dev_net(dev_out)->ipv4.peers)); rth->fi = NULL; RT_CACHE_STAT_INC(out_slow_tot); @@ -2662,7 +2620,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_dst = ort->rt_dst; rt->rt_src = ort->rt_src; rt->rt_gateway = ort->rt_gateway; - rt_transfer_peer(rt, ort); rt->fi = ort->fi; if (rt->fi) atomic_inc(&rt->fi->fib_clntref); @@ -2700,7 +2657,7 @@ static int rt_fill_info(struct net *net, struct rtmsg *r; struct nlmsghdr *nlh; unsigned long expires = 0; - u32 id = 0, error; + u32 error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); if (nlh == NULL) @@ -2753,11 +2710,6 @@ static int rt_fill_info(struct net *net, goto nla_put_failure; error = rt->dst.error; - if (rt_has_peer(rt)) { - const struct inet_peer *peer = rt_peer_ptr(rt); - inet_peer_refcheck(peer); - id = atomic_read(&peer->ip_id_count) & 0xffff; - } expires = rt->dst.expires; if (expires) { if (time_before(jiffies, expires)) @@ -2792,7 +2744,7 @@ static int rt_fill_info(struct net *net, goto nla_put_failure; } - if (rtnl_put_cacheinfo(skb, &rt->dst, id, expires, error) < 0) + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) goto nla_put_failure; return nlmsg_end(skb, nlh); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 951bcf35b21c..87d3fcc302d4 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -90,8 +90,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.dst.dev = dev; dev_hold(dev); - rt_transfer_peer(&xdst->u.rt, rt); - /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | @@ -210,11 +208,6 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) dst_destroy_metrics_generic(dst); - if (rt_has_peer(&xdst->u.rt)) { - struct inet_peer *peer = rt_peer_ptr(&xdst->u.rt); - inet_putpeer(peer); - } - xfrm_dst_destroy(xdst); } -- cgit v1.2.3 From 1a203cb33a7dc791b6c0aedf701e70ac00c50cdb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Jul 2012 19:05:57 +0000 Subject: ipv6: optimize ipv6 addresses compares On 64 bit arches having efficient unaligned accesses (eg x86_64) we can use long words to reduce number of instructions for free. Joe Perches suggested to change ipv6_masked_addr_cmp() to return a bool instead of 'int', to make sure ipv6_masked_addr_cmp() cannot be used in a sorting function. Signed-off-by: Eric Dumazet Cc: Joe Perches Signed-off-by: David S. Miller --- include/net/ipv6.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index aecf88436abf..d4261d4d6c47 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -298,14 +298,23 @@ static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr return memcmp(a1, a2, sizeof(struct in6_addr)); } -static inline int +static inline bool ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, const struct in6_addr *a2) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + const unsigned long *ul1 = (const unsigned long *)a1; + const unsigned long *ulm = (const unsigned long *)m; + const unsigned long *ul2 = (const unsigned long *)a2; + + return !!(((ul1[0] ^ ul2[0]) & ulm[0]) | + ((ul1[1] ^ ul2[1]) & ulm[1])); +#else return !!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3])); +#endif } static inline void ipv6_addr_prefix(struct in6_addr *pfx, @@ -335,10 +344,17 @@ static inline void ipv6_addr_set(struct in6_addr *addr, static inline bool ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + const unsigned long *ul1 = (const unsigned long *)a1; + const unsigned long *ul2 = (const unsigned long *)a2; + + return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL; +#else return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; +#endif } static inline bool __ipv6_prefix_equal(const __be32 *a1, const __be32 *a2, @@ -391,8 +407,14 @@ bool ip6_frag_match(struct inet_frag_queue *q, void *a); static inline bool ipv6_addr_any(const struct in6_addr *a) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + const unsigned long *ul = (const unsigned long *)a; + + return (ul[0] | ul[1]) == 0UL; +#else return (a->s6_addr32[0] | a->s6_addr32[1] | a->s6_addr32[2] | a->s6_addr32[3]) == 0; +#endif } static inline bool ipv6_addr_loopback(const struct in6_addr *a) -- cgit v1.2.3 From 4b10b274e22ca6df1cda2fccf3870b8586feec15 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Wed, 11 Jul 2012 14:43:34 +0300 Subject: Bluetooth: debug: Print l2cap_chan refcount Improve debug output. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index d80e3f0691b4..e5164ff55f27 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -672,11 +672,15 @@ enum { static inline void l2cap_chan_hold(struct l2cap_chan *c) { + BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt)); + atomic_inc(&c->refcnt); } static inline void l2cap_chan_put(struct l2cap_chan *c) { + BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt)); + if (atomic_dec_and_test(&c->refcnt)) kfree(c); } -- cgit v1.2.3 From 46d3ceabd8d98ed0ad10f20c595ca784e34786c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jul 2012 05:50:31 +0000 Subject: tcp: TCP Small Queues This introduce TSQ (TCP Small Queues) TSQ goal is to reduce number of TCP packets in xmit queues (qdisc & device queues), to reduce RTT and cwnd bias, part of the bufferbloat problem. sk->sk_wmem_alloc not allowed to grow above a given limit, allowing no more than ~128KB [1] per tcp socket in qdisc/dev layers at a given time. TSO packets are sized/capped to half the limit, so that we have two TSO packets in flight, allowing better bandwidth use. As a side effect, setting the limit to 40000 automatically reduces the standard gso max limit (65536) to 40000/2 : It can help to reduce latencies of high prio packets, having smaller TSO packets. This means we divert sock_wfree() to a tcp_wfree() handler, to queue/send following frames when skb_orphan() [2] is called for the already queued skbs. Results on my dev machines (tg3/ixgbe nics) are really impressive, using standard pfifo_fast, and with or without TSO/GSO. Without reduction of nominal bandwidth, we have reduction of buffering per bulk sender : < 1ms on Gbit (instead of 50ms with TSO) < 8ms on 100Mbit (instead of 132 ms) I no longer have 4 MBytes backlogged in qdisc by a single netperf session, and both side socket autotuning no longer use 4 Mbytes. As skb destructor cannot restart xmit itself ( as qdisc lock might be taken at this point ), we delegate the work to a tasklet. We use one tasklest per cpu for performance reasons. If tasklet finds a socket owned by the user, it sets TSQ_OWNED flag. This flag is tested in a new protocol method called from release_sock(), to eventually send new segments. [1] New /proc/sys/net/ipv4/tcp_limit_output_bytes tunable [2] skb_orphan() is usually called at TX completion time, but some drivers call it in their start_xmit() handler. These drivers should at least use BQL, or else a single TCP session can still fill the whole NIC TX ring, since TSQ will have no effect. Signed-off-by: Eric Dumazet Cc: Dave Taht Cc: Tom Herbert Cc: Matt Mathis Cc: Yuchung Cheng Cc: Nandita Dukkipati Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 14 +++ include/linux/tcp.h | 9 ++ include/net/sock.h | 2 + include/net/tcp.h | 4 + net/core/sock.c | 4 + net/ipv4/sysctl_net_ipv4.c | 7 ++ net/ipv4/tcp.c | 6 ++ net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_minisocks.c | 1 + net/ipv4/tcp_output.c | 154 ++++++++++++++++++++++++++++++++- net/ipv6/tcp_ipv6.c | 1 + 11 files changed, 202 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 47b6c79e9b05..e20c17a7d34e 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -551,6 +551,20 @@ tcp_thin_dupack - BOOLEAN Documentation/networking/tcp-thin.txt Default: 0 +tcp_limit_output_bytes - INTEGER + Controls TCP Small Queue limit per tcp socket. + TCP bulk sender tends to increase packets in flight until it + gets losses notifications. With SNDBUF autotuning, this can + result in a large amount of packets queued in qdisc/device + on the local machine, hurting latency of other flows, for + typical pfifo_fast qdiscs. + tcp_limit_output_bytes limits the number of bytes on qdisc + or device to reduce artificial RTT/cwnd and reduce bufferbloat. + Note: For GSO/TSO enabled flows, we try to have at least two + packets in flight. Reducing tcp_limit_output_bytes might also + reduce the size of individual GSO packet (64KB being the max) + Default: 131072 + UDP variables: udp_mem - vector of 3 INTEGERs: min, pressure, max diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2de9cf46f9fc..1888169e07c7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -339,6 +339,9 @@ struct tcp_sock { u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ + struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ + unsigned long tsq_flags; + /* Data for direct copy to user */ struct { struct sk_buff_head prequeue; @@ -494,6 +497,12 @@ struct tcp_sock { struct tcp_cookie_values *cookie_values; }; +enum tsq_flags { + TSQ_THROTTLED, + TSQ_QUEUED, + TSQ_OWNED, /* tcp_tasklet_func() found socket was locked */ +}; + static inline struct tcp_sock *tcp_sk(const struct sock *sk) { return (struct tcp_sock *)sk; diff --git a/include/net/sock.h b/include/net/sock.h index dcb54a0793ec..88de092df50f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -858,6 +858,8 @@ struct proto { int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); + void (*release_cb)(struct sock *sk); + /* Keeping track of sk's, looking them up, and port selection methods. */ void (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); diff --git a/include/net/tcp.h b/include/net/tcp.h index 3618fefae049..439984b9af49 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -253,6 +253,7 @@ extern int sysctl_tcp_cookie_size; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; +extern int sysctl_tcp_limit_output_bytes; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; @@ -321,6 +322,8 @@ extern struct proto tcp_prot; extern void tcp_init_mem(struct net *net); +extern void tcp_tasklet_init(void); + extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); @@ -334,6 +337,7 @@ extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); extern int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); +extern void tcp_release_cb(struct sock *sk); extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, unsigned int len); diff --git a/net/core/sock.c b/net/core/sock.c index 929bdcc2383b..24039ac12426 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2159,6 +2159,10 @@ void release_sock(struct sock *sk) spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_backlog.tail) __release_sock(sk); + + if (sk->sk_prot->release_cb) + sk->sk_prot->release_cb(sk); + sk->sk_lock.owned = 0; if (waitqueue_active(&sk->sk_lock.wq)) wake_up(&sk->sk_lock.wq); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 12aa0c5867c4..70730f7aeafe 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -598,6 +598,13 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_limit_output_bytes", + .data = &sysctl_tcp_limit_output_bytes, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, #ifdef CONFIG_NET_DMA { .procname = "tcp_dma_copybreak", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d902da96d154..4252cd8f39fd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -376,6 +376,7 @@ void tcp_init_sock(struct sock *sk) skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); + INIT_LIST_HEAD(&tp->tsq_node); icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; @@ -796,6 +797,10 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, inet_csk(sk)->icsk_ext_hdr_len - tp->tcp_header_len); + /* TSQ : try to have two TSO segments in flight */ + xmit_size_goal = min_t(u32, xmit_size_goal, + sysctl_tcp_limit_output_bytes >> 1); + xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); /* We try hard to avoid divides here */ @@ -3574,4 +3579,5 @@ void __init tcp_init(void) tcp_secret_primary = &tcp_secret_one; tcp_secret_retiring = &tcp_secret_two; tcp_secret_secondary = &tcp_secret_two; + tcp_tasklet_init(); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ddefd39ac0cf..01545a3fc0f2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2588,6 +2588,7 @@ struct proto tcp_prot = { .sendmsg = tcp_sendmsg, .sendpage = tcp_sendpage, .backlog_rcv = tcp_v4_do_rcv, + .release_cb = tcp_release_cb, .hash = inet_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 65608863fdee..c66f2ede160e 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -424,6 +424,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, treq->snt_isn + 1 + tcp_s_data_size(oldtp); tcp_prequeue_init(newtp); + INIT_LIST_HEAD(&newtp->tsq_node); tcp_init_wl(newtp, treq->rcv_isn); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c465d3e51e28..03854abfd9d8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -50,6 +50,9 @@ int sysctl_tcp_retrans_collapse __read_mostly = 1; */ int sysctl_tcp_workaround_signed_windows __read_mostly = 0; +/* Default TSQ limit of two TSO segments */ +int sysctl_tcp_limit_output_bytes __read_mostly = 131072; + /* This limits the percentage of the congestion window which we * will allow a single TSO frame to consume. Building TSO frames * which are too large can cause TCP streams to be bursty. @@ -65,6 +68,8 @@ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); +static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, + int push_one, gfp_t gfp); /* Account for new data that has been sent to the network. */ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) @@ -783,6 +788,140 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb return size; } + +/* TCP SMALL QUEUES (TSQ) + * + * TSQ goal is to keep small amount of skbs per tcp flow in tx queues (qdisc+dev) + * to reduce RTT and bufferbloat. + * We do this using a special skb destructor (tcp_wfree). + * + * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb + * needs to be reallocated in a driver. + * The invariant being skb->truesize substracted from sk->sk_wmem_alloc + * + * Since transmit from skb destructor is forbidden, we use a tasklet + * to process all sockets that eventually need to send more skbs. + * We use one tasklet per cpu, with its own queue of sockets. + */ +struct tsq_tasklet { + struct tasklet_struct tasklet; + struct list_head head; /* queue of tcp sockets */ +}; +static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); + +/* + * One tasklest per cpu tries to send more skbs. + * We run in tasklet context but need to disable irqs when + * transfering tsq->head because tcp_wfree() might + * interrupt us (non NAPI drivers) + */ +static void tcp_tasklet_func(unsigned long data) +{ + struct tsq_tasklet *tsq = (struct tsq_tasklet *)data; + LIST_HEAD(list); + unsigned long flags; + struct list_head *q, *n; + struct tcp_sock *tp; + struct sock *sk; + + local_irq_save(flags); + list_splice_init(&tsq->head, &list); + local_irq_restore(flags); + + list_for_each_safe(q, n, &list) { + tp = list_entry(q, struct tcp_sock, tsq_node); + list_del(&tp->tsq_node); + + sk = (struct sock *)tp; + bh_lock_sock(sk); + + if (!sock_owned_by_user(sk)) { + if ((1 << sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | + TCPF_CLOSING | TCPF_CLOSE_WAIT)) + tcp_write_xmit(sk, + tcp_current_mss(sk), + 0, 0, + GFP_ATOMIC); + } else { + /* defer the work to tcp_release_cb() */ + set_bit(TSQ_OWNED, &tp->tsq_flags); + } + bh_unlock_sock(sk); + + clear_bit(TSQ_QUEUED, &tp->tsq_flags); + sk_free(sk); + } +} + +/** + * tcp_release_cb - tcp release_sock() callback + * @sk: socket + * + * called from release_sock() to perform protocol dependent + * actions before socket release. + */ +void tcp_release_cb(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (test_and_clear_bit(TSQ_OWNED, &tp->tsq_flags)) { + if ((1 << sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | + TCPF_CLOSING | TCPF_CLOSE_WAIT)) + tcp_write_xmit(sk, + tcp_current_mss(sk), + 0, 0, + GFP_ATOMIC); + } +} +EXPORT_SYMBOL(tcp_release_cb); + +void __init tcp_tasklet_init(void) +{ + int i; + + for_each_possible_cpu(i) { + struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i); + + INIT_LIST_HEAD(&tsq->head); + tasklet_init(&tsq->tasklet, + tcp_tasklet_func, + (unsigned long)tsq); + } +} + +/* + * Write buffer destructor automatically called from kfree_skb. + * We cant xmit new skbs from this context, as we might already + * hold qdisc lock. + */ +void tcp_wfree(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + struct tcp_sock *tp = tcp_sk(sk); + + if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && + !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { + unsigned long flags; + struct tsq_tasklet *tsq; + + /* Keep a ref on socket. + * This last ref will be released in tcp_tasklet_func() + */ + atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc); + + /* queue this socket to tasklet queue */ + local_irq_save(flags); + tsq = &__get_cpu_var(tsq_tasklet); + list_add(&tp->tsq_node, &tsq->head); + tasklet_schedule(&tsq->tasklet); + local_irq_restore(flags); + } else { + sock_wfree(skb); + } +} + /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial * transmission and possible later retransmissions. @@ -844,7 +983,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); - skb_set_owner_w(skb, sk); + + skb_orphan(skb); + skb->sk = sk; + skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? + tcp_wfree : sock_wfree; + atomic_add(skb->truesize, &sk->sk_wmem_alloc); /* Build TCP header and checksum it. */ th = tcp_hdr(skb); @@ -1780,6 +1924,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, while ((skb = tcp_send_head(sk))) { unsigned int limit; + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); @@ -1800,6 +1945,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, break; } + /* TSQ : sk_wmem_alloc accounts skb truesize, + * including skb overhead. But thats OK. + */ + if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { + set_bit(TSQ_THROTTLED, &tp->tsq_flags); + break; + } limit = mss_now; if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 61175cb2478f..70458a9cd837 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1970,6 +1970,7 @@ struct proto tcpv6_prot = { .sendmsg = tcp_sendmsg, .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, + .release_cb = tcp_release_cb, .hash = tcp_v6_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, -- cgit v1.2.3 From 94206125c4aac32e43c25bfe1b827e7ab993b7dc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2012 20:38:08 -0700 Subject: ipv4: Rearrange arguments to ip_rt_redirect() Pass in the SKB rather than just the IP addresses, so that policy and other aspects can reside in ip_rt_redirect() rather then icmp_redirect(). Signed-off-by: David S. Miller --- include/net/route.h | 3 +-- net/ipv4/icmp.c | 36 ++++++------------------------------ net/ipv4/route.c | 23 +++++++++++++++++++---- 3 files changed, 26 insertions(+), 36 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 52362368af09..b1402787aecb 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -108,8 +108,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; extern int ip_rt_init(void); -extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw, - __be32 src, struct net_device *dev); +extern void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw); extern void rt_cache_flush(struct net *net, int how); extern void rt_cache_flush_batch(struct net *net); extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 588514627aa7..70a793559bb5 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -755,40 +755,16 @@ out_err: static void icmp_redirect(struct sk_buff *skb) { - const struct iphdr *iph; - - if (skb->len < sizeof(struct iphdr)) - goto out_err; + if (skb->len < sizeof(struct iphdr)) { + ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); + return; + } - /* - * Get the copied header of the packet that caused the redirect - */ if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto out; - - iph = (const struct iphdr *)skb->data; - - switch (icmp_hdr(skb)->code & 7) { - case ICMP_REDIR_NET: - case ICMP_REDIR_NETTOS: - /* - * As per RFC recommendations now handle it as a host redirect. - */ - case ICMP_REDIR_HOST: - case ICMP_REDIR_HOSTTOS: - ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr, - icmp_hdr(skb)->un.gateway, - iph->saddr, skb->dev); - break; - } + return; + ip_rt_redirect(skb, icmp_hdr(skb)->un.gateway); icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway); - -out: - return; -out_err: - ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); - goto out; } /* diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a4de87f44c30..f8921b448c39 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1292,18 +1292,33 @@ static void ip_do_redirect(struct rtable *rt, __be32 old_gw, __be32 new_gw) } /* called in rcu_read_lock() section */ -void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, - __be32 saddr, struct net_device *dev) +void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) { - int s, i; + const struct iphdr *iph = (const struct iphdr *) skb->data; + __be32 old_gw = ip_hdr(skb)->saddr; + __be32 daddr = iph->daddr; + __be32 saddr = iph->saddr; + struct net_device *dev = skb->dev; struct in_device *in_dev = __in_dev_get_rcu(dev); - __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; + __be32 skeys[2] = { saddr, 0 }; struct net *net; + int s, i; if (!in_dev) return; + switch (icmp_hdr(skb)->code & 7) { + case ICMP_REDIR_NET: + case ICMP_REDIR_NETTOS: + case ICMP_REDIR_HOST: + case ICMP_REDIR_HOSTTOS: + break; + + default: + return; + } + net = dev_net(dev); if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || -- cgit v1.2.3 From e47a185b31dd2acd424fac7dc0efb96fc5b31a33 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2012 20:55:47 -0700 Subject: ipv4: Generalize ip_do_redirect() and hook into new dst_ops->redirect. All of the redirect acceptance policy is now contained within. Signed-off-by: David S. Miller --- include/net/dst_ops.h | 1 + net/ipv4/route.c | 94 +++++++++++++++++++++++++++++---------------------- 2 files changed, 55 insertions(+), 40 deletions(-) (limited to 'include/net') diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 4badc86e45d1..085931fa7ce0 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -25,6 +25,7 @@ struct dst_ops { struct dst_entry * (*negative_advice)(struct dst_entry *); void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, u32 mtu); + void (*redirect)(struct dst_entry *dst, struct sk_buff *skb); int (*local_out)(struct sk_buff *skb); struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, struct sk_buff *skb, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f8921b448c39..f3d25656ddb0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -149,6 +149,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); +static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb); static int rt_garbage_collect(struct dst_ops *ops); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -179,6 +180,7 @@ static struct dst_ops ipv4_dst_ops = { .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, + .redirect = ip_do_redirect, .local_out = __ip_local_out, .neigh_lookup = ipv4_neigh_lookup, }; @@ -1271,42 +1273,18 @@ static void rt_del(unsigned int hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static void ip_do_redirect(struct rtable *rt, __be32 old_gw, __be32 new_gw) -{ - struct neighbour *n; - - if (rt->rt_gateway != old_gw) - return; - - n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); - if (n) { - if (!(n->nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); - } else { - rt->rt_gateway = new_gw; - rt->rt_flags |= RTCF_REDIRECTED; - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); - } - neigh_release(n); - } -} - -/* called in rcu_read_lock() section */ -void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) +static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb) { const struct iphdr *iph = (const struct iphdr *) skb->data; + __be32 new_gw = icmp_hdr(skb)->un.gateway; __be32 old_gw = ip_hdr(skb)->saddr; + struct net_device *dev = skb->dev; __be32 daddr = iph->daddr; __be32 saddr = iph->saddr; - struct net_device *dev = skb->dev; - struct in_device *in_dev = __in_dev_get_rcu(dev); - int ikeys[2] = { dev->ifindex, 0 }; - __be32 skeys[2] = { saddr, 0 }; + struct in_device *in_dev; + struct neighbour *n; + struct rtable *rt; struct net *net; - int s, i; - - if (!in_dev) - return; switch (icmp_hdr(skb)->code & 7) { case ICMP_REDIR_NET: @@ -1319,6 +1297,14 @@ void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) return; } + rt = (struct rtable *) dst; + if (rt->rt_gateway != old_gw) + return; + + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + return; + net = dev_net(dev); if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || @@ -1335,6 +1321,43 @@ void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) goto reject_redirect; } + n = ipv4_neigh_lookup(dst, NULL, &new_gw); + if (n) { + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + } else { + rt->rt_gateway = new_gw; + rt->rt_flags |= RTCF_REDIRECTED; + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + } + neigh_release(n); + } + return; + +reject_redirect: +#ifdef CONFIG_IP_ROUTE_VERBOSE + if (IN_DEV_LOG_MARTIANS(in_dev)) + net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n" + " Advised path = %pI4 -> %pI4\n", + &old_gw, dev->name, &new_gw, + &saddr, &daddr); +#endif + ; +} + +/* called in rcu_read_lock() section */ +void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) +{ + const struct iphdr *iph = (const struct iphdr *) skb->data; + __be32 daddr = iph->daddr; + __be32 saddr = iph->saddr; + struct net_device *dev = skb->dev; + int ikeys[2] = { dev->ifindex, 0 }; + __be32 skeys[2] = { saddr, 0 }; + struct net *net; + int s, i; + + net = dev_net(dev); for (s = 0; s < 2; s++) { for (i = 0; i < 2; i++) { unsigned int hash; @@ -1358,21 +1381,12 @@ void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) rt->dst.dev != dev) continue; - ip_do_redirect(rt, old_gw, new_gw); + ip_do_redirect(&rt->dst, skb); } } } return; -reject_redirect: -#ifdef CONFIG_IP_ROUTE_VERBOSE - if (IN_DEV_LOG_MARTIANS(in_dev)) - net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n" - " Advised path = %pI4 -> %pI4\n", - &old_gw, dev->name, &new_gw, - &saddr, &daddr); -#endif - ; } static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) -- cgit v1.2.3 From b42597e2f36e2043756aa7462faddf630962f061 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2012 21:25:45 -0700 Subject: ipv4: Add ipv4_redirect() and ipv4_sk_redirect() helper functions. Signed-off-by: David S. Miller --- include/net/route.h | 3 +++ net/ipv4/route.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index b1402787aecb..6ab93eeb8660 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -180,6 +180,9 @@ static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 s extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u32 mark, u8 protocol, int flow_flags); extern void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu); +extern void ipv4_redirect(struct sk_buff *skb, struct net *net, + int oif, u32 mark, u8 protocol, int flow_flags); +extern void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk); extern void ip_rt_send_redirect(struct sk_buff *skb); extern unsigned int inet_addr_type(struct net *net, __be32 addr); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f3d25656ddb0..aabece6b729a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1590,6 +1590,34 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) } EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); +void ipv4_redirect(struct sk_buff *skb, struct net *net, + int oif, u32 mark, u8 protocol, int flow_flags) +{ + const struct iphdr *iph = (const struct iphdr *)skb->data; + struct flowi4 fl4; + struct rtable *rt; + + flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, + protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); + rt = __ip_route_output_key(net, &fl4); + if (!IS_ERR(rt)) { + ip_do_redirect(&rt->dst, skb); + ip_rt_put(rt); + } +} +EXPORT_SYMBOL_GPL(ipv4_redirect); + +void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + + return ipv4_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, + sk->sk_mark, + inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + inet_sk_flowi_flags(sk)); +} +EXPORT_SYMBOL_GPL(ipv4_sk_redirect); + static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { struct rtable *rt = (struct rtable *) dst; -- cgit v1.2.3 From 1f42539d257af671d56d4bdbcf13aef31abff6ef Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2012 21:30:08 -0700 Subject: ipv4: Kill ip_rt_redirect(). No longer needed, as the protocol handlers now all properly propagate the redirect back into the routing code. Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/icmp.c | 1 - net/ipv4/route.c | 44 -------------------------------------------- 3 files changed, 46 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 6ab93eeb8660..ace3cb442519 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -108,7 +108,6 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; extern int ip_rt_init(void); -extern void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw); extern void rt_cache_flush(struct net *net, int how); extern void rt_cache_flush_batch(struct net *net); extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 70a793559bb5..d01aeb4d492e 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -763,7 +763,6 @@ static void icmp_redirect(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct iphdr))) return; - ip_rt_redirect(skb, icmp_hdr(skb)->un.gateway); icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index aabece6b729a..e98207dcd088 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1345,50 +1345,6 @@ reject_redirect: ; } -/* called in rcu_read_lock() section */ -void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) -{ - const struct iphdr *iph = (const struct iphdr *) skb->data; - __be32 daddr = iph->daddr; - __be32 saddr = iph->saddr; - struct net_device *dev = skb->dev; - int ikeys[2] = { dev->ifindex, 0 }; - __be32 skeys[2] = { saddr, 0 }; - struct net *net; - int s, i; - - net = dev_net(dev); - for (s = 0; s < 2; s++) { - for (i = 0; i < 2; i++) { - unsigned int hash; - struct rtable __rcu **rthp; - struct rtable *rt; - - hash = rt_hash(daddr, skeys[s], ikeys[i], rt_genid(net)); - - rthp = &rt_hash_table[hash].chain; - - while ((rt = rcu_dereference(*rthp)) != NULL) { - rthp = &rt->dst.rt_next; - - if (rt->rt_key_dst != daddr || - rt->rt_key_src != skeys[s] || - rt->rt_oif != ikeys[i] || - rt_is_input_route(rt) || - rt_is_expired(rt) || - !net_eq(dev_net(rt->dst.dev), net) || - rt->dst.error || - rt->dst.dev != dev) - continue; - - ip_do_redirect(&rt->dst, skb); - } - } - } - return; - -} - static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; -- cgit v1.2.3 From 30f2a5f379d0b4b4e733df138a49e054ebf75ff8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2012 23:26:46 -0700 Subject: ipv6: Export ndisc option parsing from ndisc.c This is going to be used internally by the rt6 redirect code. Signed-off-by: David S. Miller --- include/net/ndisc.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/ndisc.c | 47 ++--------------------------------------------- 2 files changed, 52 insertions(+), 45 deletions(-) (limited to 'include/net') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index c02b6ad3f6c5..96a3b5c03e37 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -47,6 +47,8 @@ enum { #include #include #include +#include +#include #include @@ -80,6 +82,54 @@ struct nd_opt_hdr { __u8 nd_opt_len; } __packed; +/* ND options */ +struct ndisc_options { + struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX]; +#ifdef CONFIG_IPV6_ROUTE_INFO + struct nd_opt_hdr *nd_opts_ri; + struct nd_opt_hdr *nd_opts_ri_end; +#endif + struct nd_opt_hdr *nd_useropts; + struct nd_opt_hdr *nd_useropts_end; +}; + +#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR] +#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR] +#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO] +#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END] +#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR] +#define nd_opts_mtu nd_opt_array[ND_OPT_MTU] + +#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) + +extern struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, + struct ndisc_options *ndopts); + +/* + * Return the padding between the option length and the start of the + * link addr. Currently only IP-over-InfiniBand needs this, although + * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may + * also need a pad of 2. + */ +static int ndisc_addr_option_pad(unsigned short type) +{ + switch (type) { + case ARPHRD_INFINIBAND: return 2; + default: return 0; + } +} + +static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p, + struct net_device *dev) +{ + u8 *lladdr = (u8 *)(p + 1); + int lladdrlen = p->nd_opt_len << 3; + int prepad = ndisc_addr_option_pad(dev->type); + if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad)) + return NULL; + return lladdr + prepad; +} + static inline u32 ndisc_hashfn(const void *pkey, const struct net_device *dev, __u32 *hash_rnd) { const u32 *p32 = pkey; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0fddd571400d..a3189baa9f4f 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -143,40 +143,8 @@ struct neigh_table nd_tbl = { .gc_thresh3 = 1024, }; -/* ND options */ -struct ndisc_options { - struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX]; -#ifdef CONFIG_IPV6_ROUTE_INFO - struct nd_opt_hdr *nd_opts_ri; - struct nd_opt_hdr *nd_opts_ri_end; -#endif - struct nd_opt_hdr *nd_useropts; - struct nd_opt_hdr *nd_useropts_end; -}; - -#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR] -#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR] -#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO] -#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END] -#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR] -#define nd_opts_mtu nd_opt_array[ND_OPT_MTU] - #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) -/* - * Return the padding between the option length and the start of the - * link addr. Currently only IP-over-InfiniBand needs this, although - * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may - * also need a pad of 2. - */ -static int ndisc_addr_option_pad(unsigned short type) -{ - switch (type) { - case ARPHRD_INFINIBAND: return 2; - default: return 0; - } -} - static inline int ndisc_opt_addr_space(struct net_device *dev) { return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type)); @@ -233,8 +201,8 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, return cur <= end && ndisc_is_useropt(cur) ? cur : NULL; } -static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, - struct ndisc_options *ndopts) +struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, + struct ndisc_options *ndopts) { struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt; @@ -297,17 +265,6 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, return ndopts; } -static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p, - struct net_device *dev) -{ - u8 *lladdr = (u8 *)(p + 1); - int lladdrlen = p->nd_opt_len << 3; - int prepad = ndisc_addr_option_pad(dev->type); - if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad)) - return NULL; - return lladdr + prepad; -} - int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir) { switch (dev->type) { -- cgit v1.2.3 From e8599ff4b1d6b0d61e1074ae4ba9fca8dd0c41d0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2012 23:43:53 -0700 Subject: ipv6: Move bulk of redirect handling into rt6_redirect(). This sets things up so that we can have the protocol error handlers call down into the ipv6 route code for redirects just as ipv4 already does. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 7 +---- net/ipv6/ndisc.c | 72 +---------------------------------------------- net/ipv6/route.c | 75 +++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 72 insertions(+), 82 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 58cb3fc34879..5cedbd7688c6 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -133,12 +133,7 @@ extern int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); -extern void rt6_redirect(const struct in6_addr *dest, - const struct in6_addr *src, - const struct in6_addr *saddr, - struct neighbour *neigh, - u8 *lladdr, - int on_link); +extern void rt6_redirect(struct sk_buff *skb); extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, u32 mark); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a3189baa9f4f..b8d53e186a78 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -143,8 +143,6 @@ struct neigh_table nd_tbl = { .gc_thresh3 = 1024, }; -#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) - static inline int ndisc_opt_addr_space(struct net_device *dev) { return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type)); @@ -1336,16 +1334,6 @@ out: static void ndisc_redirect_rcv(struct sk_buff *skb) { - struct inet6_dev *in6_dev; - struct icmp6hdr *icmph; - const struct in6_addr *dest; - const struct in6_addr *target; /* new first hop to destination */ - struct neighbour *neigh; - int on_link = 0; - struct ndisc_options ndopts; - int optlen; - u8 *lladdr = NULL; - #ifdef CONFIG_IPV6_NDISC_NODETYPE switch (skb->ndisc_nodetype) { case NDISC_NODETYPE_HOST: @@ -1362,65 +1350,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } - optlen = skb->tail - skb->transport_header; - optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); - - if (optlen < 0) { - ND_PRINTK(2, warn, "Redirect: packet too short\n"); - return; - } - - icmph = icmp6_hdr(skb); - target = (const struct in6_addr *) (icmph + 1); - dest = target + 1; - - if (ipv6_addr_is_multicast(dest)) { - ND_PRINTK(2, warn, - "Redirect: destination address is multicast\n"); - return; - } - - if (ipv6_addr_equal(dest, target)) { - on_link = 1; - } else if (ipv6_addr_type(target) != - (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK(2, warn, - "Redirect: target address is not link-local unicast\n"); - return; - } - - in6_dev = __in6_dev_get(skb->dev); - if (!in6_dev) - return; - if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) - return; - - /* RFC2461 8.1: - * The IP source address of the Redirect MUST be the same as the current - * first-hop router for the specified ICMP Destination Address. - */ - - if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { - ND_PRINTK(2, warn, "Redirect: invalid ND options\n"); - return; - } - if (ndopts.nd_opts_tgt_lladdr) { - lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, - skb->dev); - if (!lladdr) { - ND_PRINTK(2, warn, - "Redirect: invalid link-layer address length\n"); - return; - } - } - - neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); - if (neigh) { - rt6_redirect(dest, &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr, neigh, lladdr, - on_link); - neigh_release(neigh); - } + rt6_redirect(skb); } void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 563f12c1c99c..73cf3f78aaa8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1690,14 +1690,78 @@ static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest, flags, __ip6_route_redirect); } -void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, - const struct in6_addr *saddr, - struct neighbour *neigh, u8 *lladdr, int on_link) +void rt6_redirect(struct sk_buff *skb) { - struct rt6_info *rt, *nrt = NULL; + struct net *net = dev_net(skb->dev); struct netevent_redirect netevent; - struct net *net = dev_net(neigh->dev); + struct rt6_info *rt, *nrt = NULL; + const struct in6_addr *target; struct neighbour *old_neigh; + const struct in6_addr *dest; + const struct in6_addr *src; + const struct in6_addr *saddr; + struct ndisc_options ndopts; + struct inet6_dev *in6_dev; + struct neighbour *neigh; + struct icmp6hdr *icmph; + int on_link, optlen; + u8 *lladdr = NULL; + + optlen = skb->tail - skb->transport_header; + optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + + if (optlen < 0) { + net_dbg_ratelimited("rt6_redirect: packet too short\n"); + return; + } + + icmph = icmp6_hdr(skb); + target = (const struct in6_addr *) (icmph + 1); + dest = target + 1; + + if (ipv6_addr_is_multicast(dest)) { + net_dbg_ratelimited("rt6_redirect: destination address is multicast\n"); + return; + } + + if (ipv6_addr_equal(dest, target)) { + on_link = 1; + } else if (ipv6_addr_type(target) != + (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { + net_dbg_ratelimited("rt6_redirect: target address is not link-local unicast\n"); + return; + } + + in6_dev = __in6_dev_get(skb->dev); + if (!in6_dev) + return; + if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) + return; + + /* RFC2461 8.1: + * The IP source address of the Redirect MUST be the same as the current + * first-hop router for the specified ICMP Destination Address. + */ + + if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { + net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); + return; + } + if (ndopts.nd_opts_tgt_lladdr) { + lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, + skb->dev); + if (!lladdr) { + net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); + return; + } + } + + neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); + if (!neigh) + return; + + src = &ipv6_hdr(skb)->daddr; + saddr = &ipv6_hdr(skb)->saddr; rt = ip6_route_redirect(dest, src, saddr, neigh->dev); @@ -1756,6 +1820,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, } out: + neigh_release(neigh); dst_release(&rt->dst); } -- cgit v1.2.3 From 3a5ad2ee5e2c5030d8a303d06f9148a2f893a369 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2012 00:08:07 -0700 Subject: ipv6: Add ip6_redirect() and ip6_sk_redirect() helper functions. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 ++ net/ipv6/route.c | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5cedbd7688c6..693940565d8a 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -139,6 +139,8 @@ extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, u32 mark); extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); +extern void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark); +extern void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); struct netlink_callback; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 545b1526c143..f52cf83bb1e0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1114,6 +1114,33 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) } EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) +{ + const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; + struct dst_entry *dst; + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = oif; + fl6.flowi6_mark = mark; + fl6.flowi6_flags = 0; + fl6.daddr = iph->daddr; + fl6.saddr = iph->saddr; + fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + + dst = ip6_route_output(net, NULL, &fl6); + if (!dst->error) + rt6_do_redirect(dst, skb); + dst_release(dst); +} +EXPORT_SYMBOL_GPL(ip6_redirect); + +void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) +{ + ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); +} +EXPORT_SYMBOL_GPL(ip6_sk_redirect); + static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; -- cgit v1.2.3 From ec18d9a2691d69cd14b48f9b919fddcef28b7f5c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2012 00:25:15 -0700 Subject: ipv6: Add redirect support to all protocol icmp error handlers. Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 ++ net/dccp/ipv6.c | 7 +++++++ net/ipv6/ah6.c | 10 ++++++---- net/ipv6/esp6.c | 11 +++++++---- net/ipv6/ip6_tunnel.c | 7 +++++++ net/ipv6/ipcomp6.c | 11 +++++++---- net/ipv6/raw.c | 2 ++ net/ipv6/sit.c | 8 ++++++++ net/ipv6/tcp_ipv6.c | 7 +++++++ net/ipv6/udp.c | 2 ++ net/ipv6/xfrm6_policy.c | 9 +++++++++ net/sctp/input.c | 4 ++-- net/sctp/ipv6.c | 3 +++ 13 files changed, 69 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index a2ef81466b00..1f2735dba753 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -162,6 +162,8 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *, void sctp_err_finish(struct sock *, struct sctp_association *); void sctp_icmp_frag_needed(struct sock *, struct sctp_association *, struct sctp_transport *t, __u32 pmtu); +void sctp_icmp_redirect(struct sock *, struct sctp_transport *, + struct sk_buff *); void sctp_icmp_proto_unreachable(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 02162cfa5048..b4d7d28ce6d2 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -130,6 +130,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); + if (type == NDISC_REDIRECT) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst && dst->ops->redirect) + dst->ops->redirect(dst, skb); + } + if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 49d4d26bda88..7e6139508ee7 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -613,16 +613,18 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct xfrm_state *x; if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG) + type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) return; x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", - ntohl(ah->spi), &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 89a615ba84f8..6dc7fd353ef5 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -434,16 +434,19 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct xfrm_state *x; if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG) + type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) return; x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; - pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n", - ntohl(esph->spi), &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); + + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 6af3fcfdcbbd..0b5b60ec6f4a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -550,6 +550,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_FRAG_NEEDED; break; + case NDISC_REDIRECT: + rel_type = ICMP_REDIRECT; + rel_code = ICMP_REDIR_HOST; default: return 0; } @@ -608,6 +611,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); } + if (rel_type == ICMP_REDIRECT) { + if (skb_dst(skb2)->ops->redirect) + skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2); + } icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 92832385a8ef..7af5aee75d98 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -64,7 +64,9 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; - if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG) + if (type != ICMPV6_DEST_UNREACH && + type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) return; spi = htonl(ntohs(ipcomph->cpi)); @@ -73,9 +75,10 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n", - spi, &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index b5c1dcb27737..ef0579d5bca6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -332,6 +332,8 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, ip6_sk_update_pmtu(skb, sk, info); harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); } + if (type == NDISC_REDIRECT) + ip6_sk_redirect(skb, sk); if (np->recverr) { u8 *payload = skb->data; if (!inet->hdrincl) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 49aea94c9be3..fbf1622fdeef 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -539,6 +539,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (code != ICMP_EXC_TTL) return 0; break; + case ICMP_REDIRECT: + break; } err = -ENOENT; @@ -557,6 +559,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info) err = 0; goto out; } + if (type == ICMP_REDIRECT) { + ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + IPPROTO_IPV6, 0); + err = 0; + goto out; + } if (t->parms.iph.daddr == 0) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 70458a9cd837..7249e4bb9b8a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -363,6 +363,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); + if (type == NDISC_REDIRECT) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst && dst->ops->redirect) + dst->ops->redirect(dst,skb); + } + if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1ecd10249488..99d0077b56b8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -483,6 +483,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == ICMPV6_PKT_TOOBIG) ip6_sk_update_pmtu(skb, sk, info); + if (type == NDISC_REDIRECT) + ip6_sk_redirect(skb, sk); np = inet6_sk(sk); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index bb02038b822b..f5a9cb8257b9 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -215,6 +215,14 @@ static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) path->ops->update_pmtu(path, mtu); } +static void xfrm6_redirect(struct dst_entry *dst, struct sk_buff *skb) +{ + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + struct dst_entry *path = xdst->route; + + path->ops->redirect(path, skb); +} + static void xfrm6_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -261,6 +269,7 @@ static struct dst_ops xfrm6_dst_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, + .redirect = xfrm6_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, diff --git a/net/sctp/input.c b/net/sctp/input.c index 9fb4247f9a99..5943b7d77ddb 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -423,8 +423,8 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } -static void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, - struct sk_buff *skb) +void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, + struct sk_buff *skb) { struct dst_entry *dst; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 91f479121c55..ed7139ea7978 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -185,6 +185,9 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out_unlock; } break; + case NDISC_REDIRECT: + sctp_icmp_redirect(sk, transport, skb); + break; default: break; } -- cgit v1.2.3 From b94f1c0904da9b8bf031667afc48080ba7c3e8c9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2012 00:33:37 -0700 Subject: ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect(). And delete rt6_redirect(), since it is no longer used. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 - include/net/ipv6.h | 2 + net/ipv6/icmp.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/route.c | 107 ------------------------------------------------ 5 files changed, 4 insertions(+), 111 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 693940565d8a..b6b6f7d6f3c0 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -133,8 +133,6 @@ extern int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); -extern void rt6_redirect(struct sk_buff *skb); - extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, u32 mark); extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d4261d4d6c47..f695f39e8926 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -251,6 +251,8 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl) atomic_dec(&fl->users); } +extern void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info); + extern int ip6_ra_control(struct sock *sk, int sel); extern int ipv6_parse_hopopts(struct sk_buff *skb); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a113f7d7e938..24d69dbca4d6 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -598,7 +598,7 @@ out: icmpv6_xmit_unlock(sk); } -static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) +void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { const struct inet6_protocol *ipprot; int inner_offset; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b8d53e186a78..ff36194a71aa 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1350,7 +1350,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } - rt6_redirect(skb); + icmpv6_notify(skb, NDISC_REDIRECT, 0, 0); } void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f52cf83bb1e0..7296af144d6c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1633,92 +1633,6 @@ static int ip6_route_del(struct fib6_config *cfg) return err; } -/* - * Handle redirects - */ -struct ip6rd_flowi { - struct flowi6 fl6; - struct in6_addr gateway; -}; - -static struct rt6_info *__ip6_route_redirect(struct net *net, - struct fib6_table *table, - struct flowi6 *fl6, - int flags) -{ - struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *rt; - struct fib6_node *fn; - - /* - * Get the "current" route for this destination and - * check if the redirect has come from approriate router. - * - * RFC 2461 specifies that redirects should only be - * accepted if they come from the nexthop to the target. - * Due to the way the routes are chosen, this notion - * is a bit fuzzy and one might need to check all possible - * routes. - */ - - read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); -restart: - for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { - /* - * Current route is on-link; redirect is always invalid. - * - * Seems, previous statement is not true. It could - * be node, which looks for us as on-link (f.e. proxy ndisc) - * But then router serving it might decide, that we should - * know truth 8)8) --ANK (980726). - */ - if (rt6_check_expired(rt)) - continue; - if (!(rt->rt6i_flags & RTF_GATEWAY)) - continue; - if (fl6->flowi6_oif != rt->dst.dev->ifindex) - continue; - if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) - continue; - break; - } - - if (!rt) - rt = net->ipv6.ip6_null_entry; - BACKTRACK(net, &fl6->saddr); -out: - dst_hold(&rt->dst); - - read_unlock_bh(&table->tb6_lock); - - return rt; -}; - -static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest, - const struct in6_addr *src, - const struct in6_addr *gateway, - struct net_device *dev) -{ - int flags = RT6_LOOKUP_F_HAS_SADDR; - struct net *net = dev_net(dev); - struct ip6rd_flowi rdfl = { - .fl6 = { - .flowi6_oif = dev->ifindex, - .daddr = *dest, - .saddr = *src, - }, - }; - - rdfl.gateway = *gateway; - - if (rt6_need_strict(dest)) - flags |= RT6_LOOKUP_F_IFACE; - - return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, - flags, __ip6_route_redirect); -} - static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); @@ -1848,27 +1762,6 @@ out: neigh_release(neigh); } -void rt6_redirect(struct sk_buff *skb) -{ - const struct in6_addr *target; - const struct in6_addr *dest; - const struct in6_addr *src; - const struct in6_addr *saddr; - struct icmp6hdr *icmph; - struct rt6_info *rt; - - icmph = icmp6_hdr(skb); - target = (const struct in6_addr *) (icmph + 1); - dest = target + 1; - - src = &ipv6_hdr(skb)->daddr; - saddr = &ipv6_hdr(skb)->saddr; - - rt = ip6_route_redirect(dest, src, saddr, skb->dev); - rt6_do_redirect(&rt->dst, skb); - dst_release(&rt->dst); -} - /* * Misc support functions */ -- cgit v1.2.3 From 84efbb84cf76238faf26facf481c8675859bfaeb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 16 Jun 2012 00:00:26 +0200 Subject: cfg80211: use wireless_dev for interface management In order to be able to create P2P Device wdevs, move the virtual interface management over to wireless_dev structures. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 30 +++++++++++++++--------------- drivers/net/wireless/ath/ath6kl/cfg80211.h | 6 +++--- drivers/net/wireless/ath/ath6kl/core.c | 8 ++++---- drivers/net/wireless/mwifiex/cfg80211.c | 24 ++++++++++++------------ drivers/net/wireless/mwifiex/main.c | 4 ++-- drivers/net/wireless/mwifiex/main.h | 10 ++++++---- include/net/cfg80211.h | 19 ++++++++++--------- net/mac80211/cfg.c | 20 ++++++++++---------- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/iface.c | 6 +++--- net/wireless/nl80211.c | 26 +++++++++++++++++--------- 11 files changed, 83 insertions(+), 72 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 5f0c66bb6bdf..88bed02f7521 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1487,14 +1487,14 @@ static int ath6kl_cfg80211_set_power_mgmt(struct wiphy *wiphy, return 0; } -static struct net_device *ath6kl_cfg80211_add_iface(struct wiphy *wiphy, - char *name, - enum nl80211_iftype type, - u32 *flags, - struct vif_params *params) +static struct wireless_dev *ath6kl_cfg80211_add_iface(struct wiphy *wiphy, + char *name, + enum nl80211_iftype type, + u32 *flags, + struct vif_params *params) { struct ath6kl *ar = wiphy_priv(wiphy); - struct net_device *ndev; + struct wireless_dev *wdev; u8 if_idx, nw_type; if (ar->num_vif == ar->vif_max) { @@ -1507,20 +1507,20 @@ static struct net_device *ath6kl_cfg80211_add_iface(struct wiphy *wiphy, return ERR_PTR(-EINVAL); } - ndev = ath6kl_interface_add(ar, name, type, if_idx, nw_type); - if (!ndev) + wdev = ath6kl_interface_add(ar, name, type, if_idx, nw_type); + if (!wdev) return ERR_PTR(-ENOMEM); ar->num_vif++; - return ndev; + return wdev; } static int ath6kl_cfg80211_del_iface(struct wiphy *wiphy, - struct net_device *ndev) + struct wireless_dev *wdev) { struct ath6kl *ar = wiphy_priv(wiphy); - struct ath6kl_vif *vif = netdev_priv(ndev); + struct ath6kl_vif *vif = netdev_priv(wdev->netdev); spin_lock_bh(&ar->list_lock); list_del(&vif->list); @@ -3477,9 +3477,9 @@ void ath6kl_cfg80211_vif_cleanup(struct ath6kl_vif *vif) ar->num_vif--; } -struct net_device *ath6kl_interface_add(struct ath6kl *ar, char *name, - enum nl80211_iftype type, u8 fw_vif_idx, - u8 nw_type) +struct wireless_dev *ath6kl_interface_add(struct ath6kl *ar, char *name, + enum nl80211_iftype type, + u8 fw_vif_idx, u8 nw_type) { struct net_device *ndev; struct ath6kl_vif *vif; @@ -3533,7 +3533,7 @@ struct net_device *ath6kl_interface_add(struct ath6kl *ar, char *name, list_add_tail(&vif->list, &ar->vif_list); spin_unlock_bh(&ar->list_lock); - return ndev; + return &vif->wdev; err: aggr_module_destroy(vif->aggr_cntxt); diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.h b/drivers/net/wireless/ath/ath6kl/cfg80211.h index b992046a1b0e..56b1ebe79812 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.h +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.h @@ -25,9 +25,9 @@ enum ath6kl_cfg_suspend_mode { ATH6KL_CFG_SUSPEND_SCHED_SCAN, }; -struct net_device *ath6kl_interface_add(struct ath6kl *ar, char *name, - enum nl80211_iftype type, - u8 fw_vif_idx, u8 nw_type); +struct wireless_dev *ath6kl_interface_add(struct ath6kl *ar, char *name, + enum nl80211_iftype type, + u8 fw_vif_idx, u8 nw_type); void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq, enum wmi_phy_mode mode); void ath6kl_cfg80211_scan_complete_event(struct ath6kl_vif *vif, bool aborted); diff --git a/drivers/net/wireless/ath/ath6kl/core.c b/drivers/net/wireless/ath/ath6kl/core.c index fdb3b1decc76..82c4dd2a960e 100644 --- a/drivers/net/wireless/ath/ath6kl/core.c +++ b/drivers/net/wireless/ath/ath6kl/core.c @@ -56,7 +56,7 @@ EXPORT_SYMBOL(ath6kl_core_rx_complete); int ath6kl_core_init(struct ath6kl *ar, enum ath6kl_htc_type htc_type) { struct ath6kl_bmi_target_info targ_info; - struct net_device *ndev; + struct wireless_dev *wdev; int ret = 0, i; switch (htc_type) { @@ -187,12 +187,12 @@ int ath6kl_core_init(struct ath6kl *ar, enum ath6kl_htc_type htc_type) rtnl_lock(); /* Add an initial station interface */ - ndev = ath6kl_interface_add(ar, "wlan%d", NL80211_IFTYPE_STATION, 0, + wdev = ath6kl_interface_add(ar, "wlan%d", NL80211_IFTYPE_STATION, 0, INFRA_NETWORK); rtnl_unlock(); - if (!ndev) { + if (!wdev) { ath6kl_err("Failed to instantiate a network device\n"); ret = -ENOMEM; wiphy_unregister(ar->wiphy); @@ -200,7 +200,7 @@ int ath6kl_core_init(struct ath6kl *ar, enum ath6kl_htc_type htc_type) } ath6kl_dbg(ATH6KL_DBG_TRC, "%s: name=%s dev=0x%p, ar=0x%p\n", - __func__, ndev->name, ndev, ar); + __func__, wdev->netdev->name, wdev->netdev, ar); return ret; diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index 9c2e08e4b093..1e8024ea6910 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -1512,11 +1512,11 @@ mwifiex_setup_ht_caps(struct ieee80211_sta_ht_cap *ht_info, /* * create a new virtual interface with the given name */ -struct net_device *mwifiex_add_virtual_intf(struct wiphy *wiphy, - char *name, - enum nl80211_iftype type, - u32 *flags, - struct vif_params *params) +struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, + char *name, + enum nl80211_iftype type, + u32 *flags, + struct vif_params *params) { struct mwifiex_adapter *adapter = mwifiex_cfg80211_get_adapter(wiphy); struct mwifiex_private *priv; @@ -1634,7 +1634,7 @@ struct net_device *mwifiex_add_virtual_intf(struct wiphy *wiphy, #ifdef CONFIG_DEBUG_FS mwifiex_dev_debugfs_init(priv); #endif - return dev; + return wdev; error: if (dev && (dev->reg_state == NETREG_UNREGISTERED)) free_netdev(dev); @@ -1647,9 +1647,9 @@ EXPORT_SYMBOL_GPL(mwifiex_add_virtual_intf); /* * del_virtual_intf: remove the virtual interface determined by dev */ -int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct net_device *dev) +int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev) { - struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); + struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); #ifdef CONFIG_DEBUG_FS mwifiex_dev_debugfs_remove(priv); @@ -1661,11 +1661,11 @@ int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct net_device *dev) if (netif_carrier_ok(priv->netdev)) netif_carrier_off(priv->netdev); - if (dev->reg_state == NETREG_REGISTERED) - unregister_netdevice(dev); + if (wdev->netdev->reg_state == NETREG_REGISTERED) + unregister_netdevice(wdev->netdev); - if (dev->reg_state == NETREG_UNREGISTERED) - free_netdev(dev); + if (wdev->netdev->reg_state == NETREG_UNREGISTERED) + free_netdev(wdev->netdev); /* Clear the priv in adapter */ priv->netdev = NULL; diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c index f0219efc8953..46803621d015 100644 --- a/drivers/net/wireless/mwifiex/main.c +++ b/drivers/net/wireless/mwifiex/main.c @@ -377,7 +377,7 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) goto done; err_add_intf: - mwifiex_del_virtual_intf(adapter->wiphy, priv->netdev); + mwifiex_del_virtual_intf(adapter->wiphy, priv->wdev); rtnl_unlock(); err_init_fw: pr_debug("info: %s: unregister device\n", __func__); @@ -844,7 +844,7 @@ int mwifiex_remove_card(struct mwifiex_adapter *adapter, struct semaphore *sem) rtnl_lock(); if (priv->wdev && priv->netdev) - mwifiex_del_virtual_intf(adapter->wiphy, priv->netdev); + mwifiex_del_virtual_intf(adapter->wiphy, priv->wdev); rtnl_unlock(); } diff --git a/drivers/net/wireless/mwifiex/main.h b/drivers/net/wireless/mwifiex/main.h index 0b3b5aa9830d..434a90e46934 100644 --- a/drivers/net/wireless/mwifiex/main.h +++ b/drivers/net/wireless/mwifiex/main.h @@ -1005,10 +1005,12 @@ int mwifiex_update_bss_desc_with_ie(struct mwifiex_adapter *adapter, int mwifiex_check_network_compatibility(struct mwifiex_private *priv, struct mwifiex_bssdescriptor *bss_desc); -struct net_device *mwifiex_add_virtual_intf(struct wiphy *wiphy, - char *name, enum nl80211_iftype type, - u32 *flags, struct vif_params *params); -int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct net_device *dev); +struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, + char *name, + enum nl80211_iftype type, + u32 *flags, + struct vif_params *params); +int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev); void mwifiex_set_sys_config_invalid_data(struct mwifiex_uap_bss_param *config); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7eaaee7bb07d..aaaa3a255ed5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1435,10 +1435,10 @@ struct cfg80211_gtk_rekey_data { * * @add_virtual_intf: create a new virtual interface with the given name, * must set the struct wireless_dev's iftype. Beware: You must create - * the new netdev in the wiphy's network namespace! Returns the netdev, - * or an ERR_PTR. + * the new netdev in the wiphy's network namespace! Returns the struct + * wireless_dev, or an ERR_PTR. * - * @del_virtual_intf: remove the virtual interface determined by ifindex. + * @del_virtual_intf: remove the virtual interface * * @change_virtual_intf: change type/configuration of virtual interface, * keep the struct wireless_dev's iftype updated. @@ -1617,12 +1617,13 @@ struct cfg80211_ops { int (*resume)(struct wiphy *wiphy); void (*set_wakeup)(struct wiphy *wiphy, bool enabled); - struct net_device * (*add_virtual_intf)(struct wiphy *wiphy, - char *name, - enum nl80211_iftype type, - u32 *flags, - struct vif_params *params); - int (*del_virtual_intf)(struct wiphy *wiphy, struct net_device *dev); + struct wireless_dev * (*add_virtual_intf)(struct wiphy *wiphy, + char *name, + enum nl80211_iftype type, + u32 *flags, + struct vif_params *params); + int (*del_virtual_intf)(struct wiphy *wiphy, + struct wireless_dev *wdev); int (*change_virtual_intf)(struct wiphy *wiphy, struct net_device *dev, enum nl80211_iftype type, u32 *flags, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7d9abea37b17..a752c7341d62 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -20,31 +20,31 @@ #include "rate.h" #include "mesh.h" -static struct net_device *ieee80211_add_iface(struct wiphy *wiphy, char *name, - enum nl80211_iftype type, - u32 *flags, - struct vif_params *params) +static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, char *name, + enum nl80211_iftype type, + u32 *flags, + struct vif_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); - struct net_device *dev; + struct wireless_dev *wdev; struct ieee80211_sub_if_data *sdata; int err; - err = ieee80211_if_add(local, name, &dev, type, params); + err = ieee80211_if_add(local, name, &wdev, type, params); if (err) return ERR_PTR(err); if (type == NL80211_IFTYPE_MONITOR && flags) { - sdata = IEEE80211_DEV_TO_SUB_IF(dev); + sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); sdata->u.mntr_flags = *flags; } - return dev; + return wdev; } -static int ieee80211_del_iface(struct wiphy *wiphy, struct net_device *dev) +static int ieee80211_del_iface(struct wiphy *wiphy, struct wireless_dev *wdev) { - ieee80211_if_remove(IEEE80211_DEV_TO_SUB_IF(dev)); + ieee80211_if_remove(IEEE80211_WDEV_TO_SUB_IF(wdev)); return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8f8535ee5995..c3241c3ec6d1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1284,7 +1284,7 @@ void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); int ieee80211_iface_init(void); void ieee80211_iface_exit(void); int ieee80211_if_add(struct ieee80211_local *local, const char *name, - struct net_device **new_dev, enum nl80211_iftype type, + struct wireless_dev **new_wdev, enum nl80211_iftype type, struct vif_params *params); int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, enum nl80211_iftype type); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index fbef7a1ada7a..b1edf60fbba7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1373,7 +1373,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, } int ieee80211_if_add(struct ieee80211_local *local, const char *name, - struct net_device **new_dev, enum nl80211_iftype type, + struct wireless_dev **new_wdev, enum nl80211_iftype type, struct vif_params *params) { struct net_device *ndev; @@ -1463,8 +1463,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, list_add_tail_rcu(&sdata->list, &local->interfaces); mutex_unlock(&local->iflist_mtx); - if (new_dev) - *new_dev = ndev; + if (new_wdev) + *new_wdev = &sdata->wdev; return 0; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0dc3356eea40..789d0c7b287e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1971,7 +1971,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct vif_params params; - struct net_device *dev; + struct wireless_dev *wdev; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; u32 flags; @@ -2001,16 +2001,14 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); - dev = rdev->ops->add_virtual_intf(&rdev->wiphy, + wdev = rdev->ops->add_virtual_intf(&rdev->wiphy, nla_data(info->attrs[NL80211_ATTR_IFNAME]), type, err ? NULL : &flags, ¶ms); - if (IS_ERR(dev)) - return PTR_ERR(dev); + if (IS_ERR(wdev)) + return PTR_ERR(wdev); if (type == NL80211_IFTYPE_MESH_POINT && info->attrs[NL80211_ATTR_MESH_ID]) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - wdev_lock(wdev); BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); @@ -2027,12 +2025,22 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; if (!rdev->ops->del_virtual_intf) return -EOPNOTSUPP; - return rdev->ops->del_virtual_intf(&rdev->wiphy, dev); + /* + * If we remove a wireless device without a netdev then clear + * user_ptr[1] so that nl80211_post_doit won't dereference it + * to check if it needs to do dev_put(). Otherwise it crashes + * since the wdev has been freed, unlike with a netdev where + * we need the dev_put() for the netdev to really be freed. + */ + if (!wdev->netdev) + info->user_ptr[1] = NULL; + + return rdev->ops->del_virtual_intf(&rdev->wiphy, wdev); } static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info) @@ -6874,7 +6882,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_del_interface, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, }, { -- cgit v1.2.3 From fd0142844efa85d89017c89227a0f03de1eee327 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 18 Jun 2012 19:17:03 +0200 Subject: nl80211: move scan API to wdev The new P2P Device will have to be able to scan for P2P search, so move scanning to use struct wireless_dev instead of struct net_device. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 6 ++--- .../net/wireless/brcm80211/brcmfmac/wl_cfg80211.c | 3 ++- drivers/net/wireless/iwmc3200wifi/cfg80211.c | 3 ++- drivers/net/wireless/libertas/cfg.c | 1 - drivers/net/wireless/mwifiex/cfg80211.c | 3 ++- drivers/net/wireless/orinoco/cfg.c | 2 +- drivers/net/wireless/rndis_wlan.c | 5 ++-- include/net/cfg80211.h | 7 ++--- net/mac80211/cfg.c | 5 ++-- net/wireless/core.c | 2 +- net/wireless/nl80211.c | 31 ++++++++++++---------- net/wireless/nl80211.h | 6 ++--- net/wireless/scan.c | 24 +++++++++-------- net/wireless/sme.c | 6 ++--- 14 files changed, 57 insertions(+), 47 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 88bed02f7521..86aeef4b9d7e 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -966,11 +966,11 @@ static int ath6kl_set_probed_ssids(struct ath6kl *ar, return 0; } -static int ath6kl_cfg80211_scan(struct wiphy *wiphy, struct net_device *ndev, +static int ath6kl_cfg80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { - struct ath6kl *ar = ath6kl_priv(ndev); - struct ath6kl_vif *vif = netdev_priv(ndev); + struct ath6kl_vif *vif = ath6kl_vif_from_wdev(request->wdev); + struct ath6kl *ar = ath6kl_priv(vif->ndev); s8 n_channels = 0; u16 *channels = NULL; int ret = 0; diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c index d13ae9c299f2..c6a10caec79f 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c @@ -691,9 +691,10 @@ scan_out: } static s32 -brcmf_cfg80211_scan(struct wiphy *wiphy, struct net_device *ndev, +brcmf_cfg80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { + struct net_device *ndev = request->wdev->netdev; s32 err = 0; WL_TRACE("Enter\n"); diff --git a/drivers/net/wireless/iwmc3200wifi/cfg80211.c b/drivers/net/wireless/iwmc3200wifi/cfg80211.c index 48e8218fd23b..523dd646f052 100644 --- a/drivers/net/wireless/iwmc3200wifi/cfg80211.c +++ b/drivers/net/wireless/iwmc3200wifi/cfg80211.c @@ -353,9 +353,10 @@ static int iwm_cfg80211_change_iface(struct wiphy *wiphy, return 0; } -static int iwm_cfg80211_scan(struct wiphy *wiphy, struct net_device *ndev, +static int iwm_cfg80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { + struct net_device *ndev = request->wdev->netdev; struct iwm_priv *iwm = ndev_to_iwm(ndev); int ret; diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c index f4a203049fb4..706781316195 100644 --- a/drivers/net/wireless/libertas/cfg.c +++ b/drivers/net/wireless/libertas/cfg.c @@ -805,7 +805,6 @@ void lbs_scan_done(struct lbs_private *priv) } static int lbs_cfg_scan(struct wiphy *wiphy, - struct net_device *dev, struct cfg80211_scan_request *request) { struct lbs_private *priv = wiphy_priv(wiphy); diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index 1e8024ea6910..6ca571a1b8e2 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -1376,9 +1376,10 @@ mwifiex_cfg80211_leave_ibss(struct wiphy *wiphy, struct net_device *dev) * it also informs the results. */ static int -mwifiex_cfg80211_scan(struct wiphy *wiphy, struct net_device *dev, +mwifiex_cfg80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { + struct net_device *dev = request->wdev->netdev; struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); int i; struct ieee80211_channel *chan; diff --git a/drivers/net/wireless/orinoco/cfg.c b/drivers/net/wireless/orinoco/cfg.c index e15675585fb1..7b751fba7e1f 100644 --- a/drivers/net/wireless/orinoco/cfg.c +++ b/drivers/net/wireless/orinoco/cfg.c @@ -138,7 +138,7 @@ static int orinoco_change_vif(struct wiphy *wiphy, struct net_device *dev, return err; } -static int orinoco_scan(struct wiphy *wiphy, struct net_device *dev, +static int orinoco_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { struct orinoco_private *priv = wiphy_priv(wiphy); diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index dfcd02ab6cae..241162e8111d 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -484,7 +484,7 @@ static int rndis_change_virtual_intf(struct wiphy *wiphy, enum nl80211_iftype type, u32 *flags, struct vif_params *params); -static int rndis_scan(struct wiphy *wiphy, struct net_device *dev, +static int rndis_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request); static int rndis_set_wiphy_params(struct wiphy *wiphy, u32 changed); @@ -1941,9 +1941,10 @@ static int rndis_get_tx_power(struct wiphy *wiphy, int *dbm) } #define SCAN_DELAY_JIFFIES (6 * HZ) -static int rndis_scan(struct wiphy *wiphy, struct net_device *dev, +static int rndis_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { + struct net_device *dev = request->wdev->netdev; struct usbnet *usbdev = netdev_priv(dev); struct rndis_wlan_private *priv = get_rndis_wlan_priv(usbdev); int ret; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index aaaa3a255ed5..5a67165f3b19 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -999,7 +999,7 @@ struct cfg80211_ssid { * @ie_len: length of ie in octets * @rates: bitmap of rates to advertise for each band * @wiphy: the wiphy this was for - * @dev: the interface + * @wdev: the wireless device to scan for * @aborted: (internal) scan request was notified as aborted * @no_cck: used to send probe requests at non CCK rate in 2GHz band */ @@ -1012,9 +1012,10 @@ struct cfg80211_scan_request { u32 rates[IEEE80211_NUM_BANDS]; + struct wireless_dev *wdev; + /* internal */ struct wiphy *wiphy; - struct net_device *dev; bool aborted; bool no_cck; @@ -1700,7 +1701,7 @@ struct cfg80211_ops { struct ieee80211_channel *chan, enum nl80211_channel_type channel_type); - int (*scan)(struct wiphy *wiphy, struct net_device *dev, + int (*scan)(struct wiphy *wiphy, struct cfg80211_scan_request *request); int (*auth)(struct wiphy *wiphy, struct net_device *dev, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a752c7341d62..cfdc03f59e27 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1763,10 +1763,11 @@ static int ieee80211_resume(struct wiphy *wiphy) #endif static int ieee80211_scan(struct wiphy *wiphy, - struct net_device *dev, struct cfg80211_scan_request *req) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata; + + sdata = IEEE80211_WDEV_TO_SUB_IF(req->wdev); switch (ieee80211_vif_type_p2p(&sdata->vif)) { case NL80211_IFTYPE_STATION: diff --git a/net/wireless/core.c b/net/wireless/core.c index 2781a411cecc..0557bb159025 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -708,7 +708,7 @@ static void wdev_cleanup_work(struct work_struct *work) cfg80211_lock_rdev(rdev); - if (WARN_ON(rdev->scan_req && rdev->scan_req->dev == wdev->netdev)) { + if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { rdev->scan_req->aborted = true; ___cfg80211_scan_done(rdev, true); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6a9a1d7f51d1..6472c7f928dc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4130,7 +4130,7 @@ static int validate_scan_freqs(struct nlattr *freqs) static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_scan_request *request; struct nlattr *attr; struct wiphy *wiphy; @@ -4290,15 +4290,16 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); - request->dev = dev; + request->wdev = wdev; request->wiphy = &rdev->wiphy; rdev->scan_req = request; - err = rdev->ops->scan(&rdev->wiphy, dev, request); + err = rdev->ops->scan(&rdev->wiphy, request); if (!err) { - nl80211_send_scan_start(rdev, dev); - dev_hold(dev); + nl80211_send_scan_start(rdev, wdev); + if (wdev->netdev) + dev_hold(wdev->netdev); } else { out_free: rdev->scan_req = NULL; @@ -7066,7 +7067,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_trigger_scan, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -7458,7 +7459,7 @@ static int nl80211_add_scan_req(struct sk_buff *msg, static int nl80211_send_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, - struct net_device *netdev, + struct wireless_dev *wdev, u32 pid, u32 seq, int flags, u32 cmd) { @@ -7469,7 +7470,9 @@ static int nl80211_send_scan_msg(struct sk_buff *msg, return -1; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) + (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) goto nla_put_failure; /* ignore errors and send incomplete event anyway */ @@ -7506,7 +7509,7 @@ nl80211_send_sched_scan_msg(struct sk_buff *msg, } void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, - struct net_device *netdev) + struct wireless_dev *wdev) { struct sk_buff *msg; @@ -7514,7 +7517,7 @@ void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, if (!msg) return; - if (nl80211_send_scan_msg(msg, rdev, netdev, 0, 0, 0, + if (nl80211_send_scan_msg(msg, rdev, wdev, 0, 0, 0, NL80211_CMD_TRIGGER_SCAN) < 0) { nlmsg_free(msg); return; @@ -7525,7 +7528,7 @@ void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, } void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, - struct net_device *netdev) + struct wireless_dev *wdev) { struct sk_buff *msg; @@ -7533,7 +7536,7 @@ void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, if (!msg) return; - if (nl80211_send_scan_msg(msg, rdev, netdev, 0, 0, 0, + if (nl80211_send_scan_msg(msg, rdev, wdev, 0, 0, 0, NL80211_CMD_NEW_SCAN_RESULTS) < 0) { nlmsg_free(msg); return; @@ -7544,7 +7547,7 @@ void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, } void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, - struct net_device *netdev) + struct wireless_dev *wdev) { struct sk_buff *msg; @@ -7552,7 +7555,7 @@ void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, if (!msg) return; - if (nl80211_send_scan_msg(msg, rdev, netdev, 0, 0, 0, + if (nl80211_send_scan_msg(msg, rdev, wdev, 0, 0, 0, NL80211_CMD_SCAN_ABORTED) < 0) { nlmsg_free(msg); return; diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 0469303b5c3c..89ce99675e61 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -7,11 +7,11 @@ int nl80211_init(void); void nl80211_exit(void); void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev); void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, - struct net_device *netdev); + struct wireless_dev *wdev); void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, - struct net_device *netdev); + struct wireless_dev *wdev); void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, - struct net_device *netdev); + struct wireless_dev *wdev); void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev, struct net_device *netdev, u32 cmd); void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/scan.c b/net/wireless/scan.c index af2b1caa37fa..848523a2b22f 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -23,7 +23,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) { struct cfg80211_scan_request *request; - struct net_device *dev; + struct wireless_dev *wdev; #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -35,29 +35,31 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) if (!request) return; - dev = request->dev; + wdev = request->wdev; /* * This must be before sending the other events! * Otherwise, wpa_supplicant gets completely confused with * wext events. */ - cfg80211_sme_scan_done(dev); + if (wdev->netdev) + cfg80211_sme_scan_done(wdev->netdev); if (request->aborted) - nl80211_send_scan_aborted(rdev, dev); + nl80211_send_scan_aborted(rdev, wdev); else - nl80211_send_scan_done(rdev, dev); + nl80211_send_scan_done(rdev, wdev); #ifdef CONFIG_CFG80211_WEXT - if (!request->aborted) { + if (wdev->netdev && !request->aborted) { memset(&wrqu, 0, sizeof(wrqu)); - wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL); + wireless_send_event(wdev->netdev, SIOCGIWSCAN, &wrqu, NULL); } #endif - dev_put(dev); + if (wdev->netdev) + dev_put(wdev->netdev); rdev->scan_req = NULL; @@ -955,7 +957,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, } creq->wiphy = wiphy; - creq->dev = dev; + creq->wdev = dev->ieee80211_ptr; /* SSIDs come after channels */ creq->ssids = (void *)&creq->channels[n_channels]; creq->n_channels = n_channels; @@ -1024,12 +1026,12 @@ int cfg80211_wext_siwscan(struct net_device *dev, creq->rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1; rdev->scan_req = creq; - err = rdev->ops->scan(wiphy, dev, creq); + err = rdev->ops->scan(wiphy, creq); if (err) { rdev->scan_req = NULL; /* creq will be freed below */ } else { - nl80211_send_scan_start(rdev, dev); + nl80211_send_scan_start(rdev, dev->ieee80211_ptr); /* creq now owned by driver */ creq = NULL; dev_hold(dev); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index dec97981e689..6f39cb808302 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -136,15 +136,15 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) wdev->conn->params.ssid_len); request->ssids[0].ssid_len = wdev->conn->params.ssid_len; - request->dev = wdev->netdev; + request->wdev = wdev; request->wiphy = &rdev->wiphy; rdev->scan_req = request; - err = rdev->ops->scan(wdev->wiphy, wdev->netdev, request); + err = rdev->ops->scan(wdev->wiphy, request); if (!err) { wdev->conn->state = CFG80211_CONN_SCANNING; - nl80211_send_scan_start(rdev, wdev->netdev); + nl80211_send_scan_start(rdev, wdev); dev_hold(wdev->netdev); } else { rdev->scan_req = NULL; -- cgit v1.2.3 From 30f422925c39edf61cbcf6d35140d726402d4b04 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Jul 2012 13:14:18 +0200 Subject: mac80211: optimize ieee80211_rx_status struct layout We waste a lot of space in this struct because it uses int values where smaller ones would be sufficient. The upcoming A-MPDU information needs some space, optimize the struct now. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c5dbb46debb0..7300cb51dd5f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -709,13 +709,13 @@ enum mac80211_rx_flags { */ struct ieee80211_rx_status { u64 mactime; - enum ieee80211_band band; - int freq; - int signal; - int antenna; - int rate_idx; - int flag; - unsigned int rx_flags; + u16 flag; + u16 freq; + u8 rate_idx; + u8 rx_flags; + u8 band; + u8 antenna; + s8 signal; }; /** -- cgit v1.2.3 From 8c358bcd097fa1f63e57fb82525ba52f4a537bfa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 22 May 2012 22:13:05 +0200 Subject: mac80211: add time synchronisation with BSS for assoc Some drivers (iwlegacy, iwlwifi and rt2x00) today use the bss_conf.last_tsf value. By itself though that value is completely worthless since it may be ancient. What really is needed is synchronisation between some device time and the TSF. To clarify this, rename bss_conf.last_tsf to sync_tsf and add sync_device_ts which is obtained from rx_status which gets a new field device_timestamp for this purpose. This is intentionally not using the mactime field since that is used for other things and in IBSS is expected to sync with the IBSS's TSF which isn't necessarily true for the device timestamp. Also, since we have the information and it's useful even before the connection has been established, give all the timing details to the driver before authenticating. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlegacy/common.c | 2 +- drivers/net/wireless/iwlwifi/dvm/rxon.c | 2 +- drivers/net/wireless/rt2x00/rt2x00config.c | 2 +- include/net/mac80211.h | 10 ++++++++-- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/mlme.c | 15 ++++++++------- net/mac80211/scan.c | 3 ++- net/mac80211/trace.h | 6 ++++-- 8 files changed, 27 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c index cbf2dc18341f..9cbed0b15b07 100644 --- a/drivers/net/wireless/iwlegacy/common.c +++ b/drivers/net/wireless/iwlegacy/common.c @@ -5360,7 +5360,7 @@ il_mac_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (changes & BSS_CHANGED_ASSOC) { D_MAC80211("ASSOC %d\n", bss_conf->assoc); if (bss_conf->assoc) { - il->timestamp = bss_conf->last_tsf; + il->timestamp = bss_conf->sync_tsf; if (!il_is_rfkill(il)) il->ops->post_associate(il); diff --git a/drivers/net/wireless/iwlwifi/dvm/rxon.c b/drivers/net/wireless/iwlwifi/dvm/rxon.c index 6ee940f497f9..10896393e5a0 100644 --- a/drivers/net/wireless/iwlwifi/dvm/rxon.c +++ b/drivers/net/wireless/iwlwifi/dvm/rxon.c @@ -1447,7 +1447,7 @@ void iwlagn_bss_info_changed(struct ieee80211_hw *hw, if (changes & BSS_CHANGED_ASSOC) { if (bss_conf->assoc) { - priv->timestamp = bss_conf->last_tsf; + priv->timestamp = bss_conf->sync_tsf; ctx->staging.filter_flags |= RXON_FILTER_ASSOC_MSK; } else { /* diff --git a/drivers/net/wireless/rt2x00/rt2x00config.c b/drivers/net/wireless/rt2x00/rt2x00config.c index e7361d913e8e..49a63e973934 100644 --- a/drivers/net/wireless/rt2x00/rt2x00config.c +++ b/drivers/net/wireless/rt2x00/rt2x00config.c @@ -102,7 +102,7 @@ void rt2x00lib_config_erp(struct rt2x00_dev *rt2x00dev, /* Update the AID, this is needed for dynamic PS support */ rt2x00dev->aid = bss_conf->assoc ? bss_conf->aid : 0; - rt2x00dev->last_beacon = bss_conf->last_tsf; + rt2x00dev->last_beacon = bss_conf->sync_tsf; /* Update global beacon interval time, this is needed for PS support */ rt2x00dev->beacon_int = bss_conf->beacon_int; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7300cb51dd5f..bb86aa6f98dd 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -233,8 +233,10 @@ enum ieee80211_rssi_event { * valid in station mode only while @assoc is true and if also * requested by %IEEE80211_HW_NEED_DTIM_PERIOD (cf. also hw conf * @ps_dtim_period) - * @last_tsf: last beacon's/probe response's TSF timestamp (could be old + * @sync_tsf: last beacon's/probe response's TSF timestamp (could be old * as it may have been received during scanning long ago) + * @sync_device_ts: the device timestamp corresponding to the sync_tsf, + * the driver/device can use this to calculate synchronisation * @beacon_int: beacon interval * @assoc_capability: capabilities taken from assoc resp * @basic_rates: bitmap of basic rates, each bit stands for an @@ -281,7 +283,8 @@ struct ieee80211_bss_conf { u8 dtim_period; u16 beacon_int; u16 assoc_capability; - u64 last_tsf; + u64 sync_tsf; + u32 sync_device_ts; u32 basic_rates; int mcast_rate[IEEE80211_NUM_BANDS]; u16 ht_operation_mode; @@ -696,6 +699,8 @@ enum mac80211_rx_flags { * * @mactime: value in microseconds of the 64-bit Time Synchronization Function * (TSF) timer when the first data symbol (MPDU) arrived at the hardware. + * @device_timestamp: arbitrary timestamp for the device, mac80211 doesn't use + * it but can store it and pass it back to the driver for synchronisation * @band: the active band when this frame was received * @freq: frequency the radio was tuned to when receiving this frame, in MHz * @signal: signal strength when receiving this frame, either in dBm, in dB or @@ -709,6 +714,7 @@ enum mac80211_rx_flags { */ struct ieee80211_rx_status { u64 mactime; + u32 device_timestamp; u16 flag; u16 freq; u8 rate_idx; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2a97d668d2da..7998513ec831 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -85,6 +85,8 @@ struct ieee80211_bss { size_t ssid_len; u8 ssid[IEEE80211_MAX_SSID_LEN]; + u32 device_ts; + u8 dtim_period; bool wmm_used; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4b503ce893d8..4efcbf89a72d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1269,11 +1269,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; bss_info_changed |= BSS_CHANGED_ASSOC; - /* set timing information */ - bss_conf->beacon_int = cbss->beacon_interval; - bss_conf->last_tsf = cbss->tsf; - - bss_info_changed |= BSS_CHANGED_BEACON_INT; bss_info_changed |= ieee80211_handle_bss_capability(sdata, bss_conf->assoc_capability, bss->has_erp_value, bss->erp_value); @@ -3135,9 +3130,15 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, memcpy(ifmgd->bssid, cbss->bssid, ETH_ALEN); - /* tell driver about BSSID and basic rates */ + /* set timing information */ + sdata->vif.bss_conf.beacon_int = cbss->beacon_interval; + sdata->vif.bss_conf.sync_tsf = cbss->tsf; + sdata->vif.bss_conf.sync_device_ts = bss->device_ts; + + /* tell driver about BSSID, basic rates and timing */ ieee80211_bss_info_change_notify(sdata, - BSS_CHANGED_BSSID | BSS_CHANGED_BASIC_RATES); + BSS_CHANGED_BSSID | BSS_CHANGED_BASIC_RATES | + BSS_CHANGED_BEACON_INT); if (assoc) sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 1a893f3637c5..e80a8b644aa0 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -83,13 +83,14 @@ ieee80211_bss_info_update(struct ieee80211_local *local, cbss = cfg80211_inform_bss_frame(local->hw.wiphy, channel, mgmt, len, signal, GFP_ATOMIC); - if (!cbss) return NULL; cbss->free_priv = ieee80211_rx_bss_free; bss = (void *)cbss->priv; + bss->device_ts = rx_status->device_timestamp; + if (elems->parse_error) { if (beacon) bss->corrupt_data |= IEEE80211_BSS_CORRUPT_BEACON; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index e1e9d10ec2e7..c6d33b55b2df 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -306,7 +306,8 @@ TRACE_EVENT(drv_bss_info_changed, __field(u8, dtimper) __field(u16, bcnint) __field(u16, assoc_cap) - __field(u64, timestamp) + __field(u64, sync_tsf) + __field(u32, sync_device_ts) __field(u32, basic_rates) __field(u32, changed) __field(bool, enable_beacon) @@ -325,7 +326,8 @@ TRACE_EVENT(drv_bss_info_changed, __entry->dtimper = info->dtim_period; __entry->bcnint = info->beacon_int; __entry->assoc_cap = info->assoc_capability; - __entry->timestamp = info->last_tsf; + __entry->sync_tsf = info->sync_tsf; + __entry->sync_device_ts = info->sync_device_ts; __entry->basic_rates = info->basic_rates; __entry->enable_beacon = info->enable_beacon; __entry->ht_operation_mode = info->ht_operation_mode; -- cgit v1.2.3 From 391e5c22f5f4e55817f8ba18a08ea717ed2d4a1f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2012 09:39:28 -0700 Subject: ipv4: Remove tb_peers from fib_table. No longer used. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 - net/ipv4/fib_trie.c | 3 --- 2 files changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 000c4674e18e..e91fedd22db2 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -162,7 +162,6 @@ struct fib_table { u32 tb_id; int tb_default; int tb_num_default; - struct inet_peer_base tb_peers; unsigned long tb_data[0]; }; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 9b0f25930fbc..18cbc15b20d5 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1843,8 +1843,6 @@ int fib_table_flush(struct fib_table *tb) if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll); - inetpeer_invalidate_tree(&tb->tb_peers); - pr_debug("trie_flush found=%d\n", found); return found; } @@ -1993,7 +1991,6 @@ struct fib_table *fib_trie_table(u32 id) tb->tb_id = id; tb->tb_default = -1; tb->tb_num_default = 0; - inet_peer_base_init(&tb->tb_peers); t = (struct trie *) tb->tb_data; memset(t, 0, sizeof(*t)); -- cgit v1.2.3 From 5b7ccaf3fc7446e42b83a77fd7aa7ad92850acdd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Jul 2012 19:45:08 +0200 Subject: cfg80211/mac80211: re-add get_channel operation This essentially reverts commit 2e165b818456 but introduces the get_channel operation with a new wireless_dev argument so that you can retrieve the channel per interface. This is necessary as even though we can track all interface channels (except monitor) we can't track the channel type used. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 9 +++++++++ net/mac80211/cfg.c | 11 +++++++++++ net/wireless/nl80211.c | 16 +++++++++++----- net/wireless/wext-compat.c | 9 +++++++-- 4 files changed, 38 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5a67165f3b19..8115d68eb603 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1612,6 +1612,10 @@ struct cfg80211_gtk_rekey_data { * @get_et_strings: Ethtool API to get a set of strings to describe stats * and perhaps other supported types of ethtool data-sets. * See @ethtool_ops.get_strings + * + * @get_channel: Get the current operating channel for the virtual interface. + * For monitor interfaces, it should return %NULL unless there's a single + * current monitoring channel. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -1821,6 +1825,11 @@ struct cfg80211_ops { u32 sset, u8 *data); void (*set_monitor_enabled)(struct wiphy *wiphy, bool enabled); + + struct ieee80211_channel * + (*get_channel)(struct wiphy *wiphy, + struct wireless_dev *wdev, + enum nl80211_channel_type *type); }; /* diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e95f24eef870..10dd9631e4da 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2982,6 +2982,16 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, return 0; } +static struct ieee80211_channel * +ieee80211_cfg_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + enum nl80211_channel_type *type) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + *type = local->_oper_channel_type; + return local->oper_channel; +} + #ifdef CONFIG_PM static void ieee80211_set_wakeup(struct wiphy *wiphy, bool enabled) { @@ -3062,4 +3072,5 @@ struct cfg80211_ops mac80211_config_ops = { .get_et_sset_count = ieee80211_get_et_sset_count, .get_et_stats = ieee80211_get_et_stats, .get_et_strings = ieee80211_get_et_strings, + .get_channel = ieee80211_cfg_get_channel, }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 079fc49e3975..6b001e445718 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1759,11 +1759,17 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, (cfg80211_rdev_list_generation << 2))) goto nla_put_failure; - if (rdev->monitor_channel) { - if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, - rdev->monitor_channel->center_freq) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, - rdev->monitor_channel_type)) + if (rdev->ops->get_channel) { + struct ieee80211_channel *chan; + enum nl80211_channel_type channel_type; + + chan = rdev->ops->get_channel(&rdev->wiphy, wdev, + &channel_type); + if (chan && + (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, + chan->center_freq) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, + channel_type))) goto nla_put_failure; } diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 7df42f541873..494379eb464f 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -827,6 +827,8 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct ieee80211_channel *chan; + enum nl80211_channel_type channel_type; switch (wdev->iftype) { case NL80211_IFTYPE_STATION: @@ -834,10 +836,13 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); case NL80211_IFTYPE_MONITOR: - if (!rdev->monitor_channel) + if (!rdev->ops->get_channel) return -EINVAL; - freq->m = rdev->monitor_channel->center_freq; + chan = rdev->ops->get_channel(wdev->wiphy, wdev, &channel_type); + if (!chan) + return -EINVAL; + freq->m = chan->center_freq; freq->e = 6; return 0; default: -- cgit v1.2.3 From 4290cb4bf212112e3d6f860e25f000ca8a1ca6a4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Jul 2012 22:19:48 +0200 Subject: cfg80211: reduce monitor interface tracking Revert commit b78e8ceac23655e1e06b30aa95ab11742d1ac7c0 ("cfg80211: track monitor channel") and remove the set_monitor_enabled() callback. Due to the tracking happening in NETDEV_PRE_UP, it had introduced bugs because the monitor interface callback would be called before the device was started. It looks like there's no way to fix this, and using NETDEV_PRE_UP is broken anyway (since there's no NETDEV_UP_FAIL), so remove all that code, track interfaces in NETDEV_UP and also stop tracking the monitor channel in cfg80211. This mostly reverts to before the tracking, except that we keep the interface count tracking so that setting the monitor channel can be rejected properly. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ---- net/wireless/chan.c | 9 +-------- net/wireless/core.c | 48 +----------------------------------------------- net/wireless/core.h | 3 --- 4 files changed, 2 insertions(+), 62 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8115d68eb603..0245208c2978 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1504,8 +1504,6 @@ struct cfg80211_gtk_rekey_data { * interfaces are active this callback should reject the configuration. * If no interfaces are active or the device is down, the channel should * be stored for when a monitor interface becomes active. - * @set_monitor_enabled: Notify driver that there are only monitor - * interfaces running. * * @scan: Request to do a scan. If returning zero, the scan request is given * the driver, and will be valid until passed to cfg80211_scan_done(). @@ -1824,8 +1822,6 @@ struct cfg80211_ops { void (*get_et_strings)(struct wiphy *wiphy, struct net_device *dev, u32 sset, u8 *data); - void (*set_monitor_enabled)(struct wiphy *wiphy, bool enabled); - struct ieee80211_channel * (*get_channel)(struct wiphy *wiphy, struct wireless_dev *wdev, diff --git a/net/wireless/chan.c b/net/wireless/chan.c index a16cdffb24a9..d355f67d0cdd 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -82,7 +82,6 @@ int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, int freq, enum nl80211_channel_type chantype) { struct ieee80211_channel *chan; - int err; if (!rdev->ops->set_monitor_channel) return -EOPNOTSUPP; @@ -93,13 +92,7 @@ int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, if (!chan) return -EINVAL; - err = rdev->ops->set_monitor_channel(&rdev->wiphy, chan, chantype); - if (!err) { - rdev->monitor_channel = chan; - rdev->monitor_channel_type = chantype; - } - - return err; + return rdev->ops->set_monitor_channel(&rdev->wiphy, chan, chantype); } void diff --git a/net/wireless/core.c b/net/wireless/core.c index 0557bb159025..71b684b5a675 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -736,60 +736,14 @@ static struct device_type wiphy_type = { .name = "wlan", }; -static struct ieee80211_channel * -cfg80211_get_any_chan(struct cfg80211_registered_device *rdev) -{ - struct ieee80211_supported_band *sband; - int i; - - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { - sband = rdev->wiphy.bands[i]; - if (sband && sband->n_channels > 0) - return &sband->channels[0]; - } - - return NULL; -} - -static void cfg80211_init_mon_chan(struct cfg80211_registered_device *rdev) -{ - struct ieee80211_channel *chan; - - chan = cfg80211_get_any_chan(rdev); - if (WARN_ON(!chan)) - return; - - mutex_lock(&rdev->devlist_mtx); - WARN_ON(cfg80211_set_monitor_channel(rdev, chan->center_freq, - NL80211_CHAN_NO_HT)); - mutex_unlock(&rdev->devlist_mtx); -} - void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num) { - bool has_monitors_only_old = cfg80211_has_monitors_only(rdev); - bool has_monitors_only_new; - ASSERT_RTNL(); rdev->num_running_ifaces += num; if (iftype == NL80211_IFTYPE_MONITOR) rdev->num_running_monitor_ifaces += num; - - has_monitors_only_new = cfg80211_has_monitors_only(rdev); - if (has_monitors_only_new != has_monitors_only_old) { - if (rdev->ops->set_monitor_enabled) - rdev->ops->set_monitor_enabled(&rdev->wiphy, - has_monitors_only_new); - - if (!has_monitors_only_new) { - rdev->monitor_channel = NULL; - rdev->monitor_channel_type = NL80211_CHAN_NO_HT; - } else { - cfg80211_init_mon_chan(rdev); - } - } } static int cfg80211_netdev_notifier_call(struct notifier_block *nb, @@ -912,6 +866,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, mutex_unlock(&rdev->devlist_mtx); dev_put(dev); } + cfg80211_update_iface_num(rdev, wdev->iftype, 1); cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); @@ -1006,7 +961,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, mutex_unlock(&rdev->devlist_mtx); if (ret) return notifier_from_errno(ret); - cfg80211_update_iface_num(rdev, wdev->iftype, 1); break; } diff --git a/net/wireless/core.h b/net/wireless/core.h index bac97da751df..5206c6844fd7 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -61,9 +61,6 @@ struct cfg80211_registered_device { int num_running_ifaces; int num_running_monitor_ifaces; - struct ieee80211_channel *monitor_channel; - enum nl80211_channel_type monitor_channel_type; - /* BSSes/scanning */ spinlock_t bss_lock; struct list_head bss_list; -- cgit v1.2.3 From 85b91b0339e764f7e56ff5968fa10d85451378b4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 Jul 2012 08:21:29 -0700 Subject: ipv4: Don't store a rule pointer in fib_result. We only use it to fetch the rule's tclassid, so just store the tclassid there instead. This also decreases the size of fib_result by a full 8 bytes on 64-bit. On 32-bits it's a wash. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 12 +++--------- net/ipv4/fib_frontend.c | 8 -------- net/ipv4/fib_rules.c | 15 ++++++--------- net/ipv4/route.c | 6 ++---- 4 files changed, 11 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e91fedd22db2..5697acefeba3 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -106,12 +106,10 @@ struct fib_result { unsigned char nh_sel; unsigned char type; unsigned char scope; + u32 tclassid; struct fib_info *fi; struct fib_table *table; struct list_head *fa_head; -#ifdef CONFIG_IP_MULTIPLE_TABLES - struct fib_rule *r; -#endif }; struct fib_result_nl { @@ -215,10 +213,6 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, extern int __net_init fib4_rules_init(struct net *net); extern void __net_exit fib4_rules_exit(struct net *net); -#ifdef CONFIG_IP_ROUTE_CLASSID -extern u32 fib_rules_tclass(const struct fib_result *res); -#endif - extern struct fib_table *fib_new_table(struct net *net, u32 id); extern struct fib_table *fib_get_table(struct net *net, u32 id); @@ -229,7 +223,7 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) { if (!net->ipv4.fib_has_custom_rules) { - res->r = NULL; + res->tclassid = 0; if (net->ipv4.fib_local && !fib_table_lookup(net->ipv4.fib_local, flp, res, FIB_LOOKUP_NOREF)) @@ -289,7 +283,7 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) #endif *itag = FIB_RES_NH(*res).nh_tclassid<<16; #ifdef CONFIG_IP_MULTIPLE_TABLES - rtag = fib_rules_tclass(res); + rtag = res->tclassid; if (*itag == 0) *itag = (rtag<<16); *itag |= (rtag>>16); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 81f85716a894..7a31194ec633 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -169,10 +169,6 @@ static inline unsigned int __inet_dev_addr_type(struct net *net, if (ipv4_is_multicast(addr)) return RTN_MULTICAST; -#ifdef CONFIG_IP_MULTIPLE_TABLES - res.r = NULL; -#endif - local_table = fib_get_table(net, RT_TABLE_LOCAL); if (local_table) { ret = RTN_UNICAST; @@ -934,10 +930,6 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) .flowi4_scope = frn->fl_scope, }; -#ifdef CONFIG_IP_MULTIPLE_TABLES - res.r = NULL; -#endif - frn->err = -ENOENT; if (tb) { local_bh_disable(); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index c06da93b0b70..a83d74e498d2 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -47,13 +47,6 @@ struct fib4_rule { #endif }; -#ifdef CONFIG_IP_ROUTE_CLASSID -u32 fib_rules_tclass(const struct fib_result *res) -{ - return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; -} -#endif - int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) { struct fib_lookup_arg arg = { @@ -63,8 +56,12 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) int err; err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); - res->r = arg.rule; - +#ifdef CONFIG_IP_ROUTE_CLASSID + if (arg.rule) + res->tclassid = ((struct fib4_rule *)arg.rule)->tclassid; + else + res->tclassid = 0; +#endif return err; } EXPORT_SYMBOL_GPL(__fib_lookup); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9319bf1f8354..aad21819316d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1735,7 +1735,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, #ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_MULTIPLE_TABLES - set_class_tag(rt, fib_rules_tclass(res)); + set_class_tag(rt, res->tclassid); #endif set_class_tag(rt, itag); #endif @@ -2353,11 +2353,9 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) __be32 orig_saddr; int orig_oif; + res.tclassid = 0; res.fi = NULL; res.table = NULL; -#ifdef CONFIG_IP_MULTIPLE_TABLES - res.r = NULL; -#endif orig_daddr = fl4->daddr; orig_saddr = fl4->saddr; -- cgit v1.2.3 From c20f8e35ca8b0583323d310ec63a0f0d17cfdcf5 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 10 Jul 2012 05:47:07 -0700 Subject: Bluetooth: Use tx window from config response for ack timing This change addresses an L2CAP ERTM throughput problem when a remote device does not fully utilize the available transmit window. The L2CAP ERTM transmit window size determines the maximum number of unacked frames that may be outstanding at any time. It is configured separately for each direction of an ERTM connection. Each side sends a configuration request with a tx_win field indicating how many unacked frames it is capable of receiving before sending an ack. The configuration response's tx_win field shows how many frames the transmitter will actually send before waiting for an ack. It's important to trace both the actual transmit window (to check for validity of incoming frames) and the number of frames that the transmitter will send before waiting (to send acks at the appropriate time). Now there are separate tx_win and ack_win values. ack_win is updated based on configuration responses, and is used to determine when acks are sent. Signed-off-by: Mat Martineau Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap_core.c | 59 +++++++++++++++++++++++++------------------ 2 files changed, 35 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index e5164ff55f27..a7679f8913d2 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -464,6 +464,7 @@ struct l2cap_chan { __u16 tx_win; __u16 tx_win_max; + __u16 ack_win; __u8 max_tx; __u16 retrans_timeout; __u16 monitor_timeout; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9fd05993f5b4..a8964db04bfb 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -431,6 +431,7 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan) chan->max_tx = L2CAP_DEFAULT_MAX_TX; chan->tx_win = L2CAP_DEFAULT_TX_WINDOW; chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW; + chan->ack_win = L2CAP_DEFAULT_TX_WINDOW; chan->sec_level = BT_SECURITY_LOW; set_bit(FLAG_FORCE_ACTIVE, &chan->flags); @@ -1877,10 +1878,10 @@ static void l2cap_send_ack(struct l2cap_chan *chan) frames_to_ack = 0; } - /* Ack now if the tx window is 3/4ths full. + /* Ack now if the window is 3/4ths full. * Calculate without mul or div */ - threshold = chan->tx_win; + threshold = chan->ack_win; threshold += threshold << 1; threshold >>= 2; @@ -2786,6 +2787,7 @@ static inline void l2cap_txwin_setup(struct l2cap_chan *chan) L2CAP_DEFAULT_TX_WINDOW); chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW; } + chan->ack_win = chan->tx_win; } static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data) @@ -3175,10 +3177,9 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, voi break; case L2CAP_CONF_EWS: - chan->tx_win = min_t(u16, val, - L2CAP_DEFAULT_EXT_WINDOW); + chan->ack_win = min_t(u16, val, chan->ack_win); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, - chan->tx_win); + chan->tx_win); break; case L2CAP_CONF_EFS: @@ -3207,6 +3208,9 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, voi chan->retrans_timeout = le16_to_cpu(rfc.retrans_timeout); chan->monitor_timeout = le16_to_cpu(rfc.monitor_timeout); chan->mps = le16_to_cpu(rfc.max_pdu_size); + if (!test_bit(FLAG_EXT_CTRL, &chan->flags)) + chan->ack_win = min_t(u16, chan->ack_win, + rfc.txwin_size); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { chan->local_msdu = le16_to_cpu(efs.msdu); @@ -3268,7 +3272,17 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len) { int type, olen; unsigned long val; - struct l2cap_conf_rfc rfc; + /* Use sane default values in case a misbehaving remote device + * did not send an RFC or extended window size option. + */ + u16 txwin_ext = chan->ack_win; + struct l2cap_conf_rfc rfc = { + .mode = chan->mode, + .retrans_timeout = __constant_cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO), + .monitor_timeout = __constant_cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO), + .max_pdu_size = cpu_to_le16(chan->imtu), + .txwin_size = min_t(u16, chan->ack_win, L2CAP_DEFAULT_TX_WINDOW), + }; BT_DBG("chan %p, rsp %p, len %d", chan, rsp, len); @@ -3278,32 +3292,27 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len) while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); - if (type != L2CAP_CONF_RFC) - continue; - - if (olen != sizeof(rfc)) + switch (type) { + case L2CAP_CONF_RFC: + if (olen == sizeof(rfc)) + memcpy(&rfc, (void *)val, olen); break; - - memcpy(&rfc, (void *)val, olen); - goto done; + case L2CAP_CONF_EWS: + txwin_ext = val; + break; + } } - /* Use sane default values in case a misbehaving remote device - * did not send an RFC option. - */ - rfc.mode = chan->mode; - rfc.retrans_timeout = __constant_cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO); - rfc.monitor_timeout = __constant_cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO); - rfc.max_pdu_size = cpu_to_le16(chan->imtu); - - BT_ERR("Expected RFC option was not found, using defaults"); - -done: switch (rfc.mode) { case L2CAP_MODE_ERTM: chan->retrans_timeout = le16_to_cpu(rfc.retrans_timeout); chan->monitor_timeout = le16_to_cpu(rfc.monitor_timeout); - chan->mps = le16_to_cpu(rfc.max_pdu_size); + chan->mps = le16_to_cpu(rfc.max_pdu_size); + if (test_bit(FLAG_EXT_CTRL, &chan->flags)) + chan->ack_win = min_t(u16, chan->ack_win, txwin_ext); + else + chan->ack_win = min_t(u16, chan->ack_win, + rfc.txwin_size); break; case L2CAP_MODE_STREAMING: chan->mps = le16_to_cpu(rfc.max_pdu_size); -- cgit v1.2.3 From 80d0a69fc57715dc9080c0567df1ed911b78abea Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 16 Jul 2012 03:28:06 -0700 Subject: ipv4: Add helper inet_csk_update_pmtu(). This abstracts away the call to dst_ops->update_pmtu() so that we can transparently handle the fact that, in the future, the dst itself can be invalidated by the PMTU update (when we have non-host routes cached in sockets). So we try to rebuild the socket cached route after the method invocation if necessary. This isn't used by SCTP because it needs to cache dsts per-transport, and thus will need it's own local version of this helper. Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 2 ++ net/dccp/ipv4.c | 11 ++------- net/ipv4/inet_connection_sock.c | 46 ++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 11 ++------- 4 files changed, 52 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 291e7cee14e7..2cf44b4ed2e6 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -337,4 +337,6 @@ extern int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); extern int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); + +extern struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 129ed8f74138..683902fcc8ed 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -161,17 +161,10 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, if (sk->sk_state == DCCP_LISTEN) return; - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) + dst = inet_csk_update_pmtu(sk, mtu); + if (!dst) return; - dst->ops->update_pmtu(dst, mtu); - /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 76825be3b643..200d21809379 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -803,3 +803,49 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, } EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); #endif + +static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) +{ + struct inet_sock *inet = inet_sk(sk); + struct ip_options_rcu *inet_opt; + __be32 daddr = inet->inet_daddr; + struct flowi4 *fl4; + struct rtable *rt; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + fl4 = &fl->u.ip4; + rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, + inet->inet_saddr, inet->inet_dport, + inet->inet_sport, sk->sk_protocol, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); + if (IS_ERR(rt)) + rt = NULL; + if (rt) + sk_setup_caps(sk, &rt->dst); + rcu_read_unlock(); + + return &rt->dst; +} + +struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) +{ + struct dst_entry *dst = __sk_dst_check(sk, 0); + struct inet_sock *inet = inet_sk(sk); + + if (!dst) { + dst = inet_csk_rebuild_route(sk, &inet->cork.fl); + if (!dst) + goto out; + } + dst->ops->update_pmtu(dst, mtu); + + dst = __sk_dst_check(sk, 0); + if (!dst) + dst = inet_csk_rebuild_route(sk, &inet->cork.fl); +out: + return dst; +} +EXPORT_SYMBOL_GPL(inet_csk_update_pmtu); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7a0062cb4ed0..b8e7e0595407 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -289,17 +289,10 @@ static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) if (sk->sk_state == TCP_LISTEN) return; - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) + dst = inet_csk_update_pmtu(sk, mtu); + if (!dst) return; - dst->ops->update_pmtu(dst, mtu); - /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ -- cgit v1.2.3 From 35ad9b9cf7d8a2e6259a0d24022e910adb6f3489 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 16 Jul 2012 03:44:56 -0700 Subject: ipv6: Add helper inet6_csk_update_pmtu(). This is the ipv6 version of inet_csk_update_pmtu(). Signed-off-by: David S. Miller --- include/net/inet6_connection_sock.h | 2 ++ net/dccp/ipv6.c | 35 +++----------------------- net/ipv6/inet6_connection_sock.c | 49 ++++++++++++++++++++++++++----------- net/ipv6/tcp_ipv6.c | 37 +++------------------------- 4 files changed, 45 insertions(+), 78 deletions(-) (limited to 'include/net') diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index df2a857e853d..04642c920431 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -43,4 +43,6 @@ extern void inet6_csk_reqsk_queue_hash_add(struct sock *sk, extern void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); extern int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl); + +extern struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu); #endif /* _INET6_CONNECTION_SOCK_H */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 090c0800ce03..3ee0342e1cec 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -145,39 +145,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) goto out; - /* icmp should have updated the destination cache entry */ - dst = __sk_dst_check(sk, np->dst_cookie); - if (dst == NULL) { - struct inet_sock *inet = inet_sk(sk); - struct flowi6 fl6; - - /* BUGGG_FUTURE: Again, it is not clear how - to handle rthdr case. Ignore this complexity - for now. - */ - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_DCCP; - fl6.daddr = np->daddr; - fl6.saddr = np->saddr; - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.fl6_dport = inet->inet_dport; - fl6.fl6_sport = inet->inet_sport; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - - dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); - if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); - goto out; - } - } else - dst_hold(dst); - - dst->ops->update_pmtu(dst, ntohl(info)); + dst = inet6_csk_update_pmtu(sk, ntohl(info)); + if (!dst) + goto out; - if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) dccp_sync_mss(sk, dst_mtu(dst)); - } /* else let the usual retransmit timer handle it */ - dst_release(dst); goto out; } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index bceb14450a1d..62539a4b2dc7 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -203,15 +203,13 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) return dst; } -int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) +static struct dst_entry *inet6_csk_route_socket(struct sock *sk) { - struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi6 fl6; - struct dst_entry *dst; struct in6_addr *final_p, final; - int res; + struct dst_entry *dst; + struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; @@ -228,18 +226,29 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) final_p = fl6_update_dst(&fl6, np->opt, &final); dst = __inet6_csk_dst_check(sk, np->dst_cookie); - - if (dst == NULL) { + if (!dst) { dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); - if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); - sk->sk_route_caps = 0; - kfree_skb(skb); - return PTR_ERR(dst); - } + if (!IS_ERR(dst)) + __inet6_csk_dst_store(sk, dst, NULL, NULL); + } + return dst; +} - __inet6_csk_dst_store(sk, dst, NULL, NULL); +int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) +{ + struct sock *sk = skb->sk; + struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi6 fl6; + struct dst_entry *dst; + int res; + + dst = inet6_csk_route_socket(sk); + if (IS_ERR(dst)) { + sk->sk_err_soft = -PTR_ERR(dst); + sk->sk_route_caps = 0; + kfree_skb(skb); + return PTR_ERR(dst); } rcu_read_lock(); @@ -253,3 +262,15 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) return res; } EXPORT_SYMBOL_GPL(inet6_csk_xmit); + +struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) +{ + struct dst_entry *dst = inet6_csk_route_socket(sk); + + if (IS_ERR(dst)) + return NULL; + dst->ops->update_pmtu(dst, mtu); + + return inet6_csk_route_socket(sk); +} +EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3071f377145c..ecdf241cad02 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -378,43 +378,14 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) goto out; - /* icmp should have updated the destination cache entry */ - dst = __sk_dst_check(sk, np->dst_cookie); - - if (dst == NULL) { - struct inet_sock *inet = inet_sk(sk); - struct flowi6 fl6; - - /* BUGGG_FUTURE: Again, it is not clear how - to handle rthdr case. Ignore this complexity - for now. - */ - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = np->daddr; - fl6.saddr = np->saddr; - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = inet->inet_dport; - fl6.fl6_sport = inet->inet_sport; - security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); - - dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); - if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); - goto out; - } - - } else - dst_hold(dst); - - dst->ops->update_pmtu(dst, ntohl(info)); + dst = inet6_csk_update_pmtu(sk, ntohl(info)); + if (!dst) + goto out; if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); - } /* else let the usual retransmit timer handle it */ - dst_release(dst); + } goto out; } -- cgit v1.2.3 From 02f3d4ce9e81434a365f4643020b0402f6fe3332 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 16 Jul 2012 03:57:14 -0700 Subject: sctp: Adjust PMTU updates to accomodate route invalidation. This adjusts the call to dst_ops->update_pmtu() so that we can transparently handle the fact that, in the future, the dst itself can be invalidated by the PMTU update (when we have non-host routes cached in sockets). Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 4 ++-- include/net/sctp/structs.h | 4 ++-- net/sctp/associola.c | 4 ++-- net/sctp/input.c | 4 ++-- net/sctp/output.c | 2 +- net/sctp/socket.c | 6 +++--- net/sctp/transport.c | 12 ++++++++++-- 7 files changed, 22 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 1f2735dba753..ff499640528b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -519,10 +519,10 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) return frag; } -static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc) +static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_association *asoc) { - sctp_assoc_sync_pmtu(asoc); + sctp_assoc_sync_pmtu(sk, asoc); asoc->pmtu_pending = 0; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index fecdf31816f2..536e439ddf1d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1091,7 +1091,7 @@ void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *); -void sctp_transport_update_pmtu(struct sctp_transport *, u32); +void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32); void sctp_transport_immediate_rtx(struct sctp_transport *); @@ -2003,7 +2003,7 @@ void sctp_assoc_update(struct sctp_association *old, __u32 sctp_association_get_next_tsn(struct sctp_association *); -void sctp_assoc_sync_pmtu(struct sctp_association *); +void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *); void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int); void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int); void sctp_assoc_set_primary(struct sctp_association *, diff --git a/net/sctp/associola.c b/net/sctp/associola.c index b16517ee1aaf..8cf348e62e74 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1360,7 +1360,7 @@ struct sctp_transport *sctp_assoc_choose_alter_transport( /* Update the association's pmtu and frag_point by going through all the * transports. This routine is called when a transport's PMTU has changed. */ -void sctp_assoc_sync_pmtu(struct sctp_association *asoc) +void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) { struct sctp_transport *t; __u32 pmtu = 0; @@ -1372,7 +1372,7 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu(t, dst_mtu(t->dst)); + sctp_transport_update_pmtu(sk, t, dst_mtu(t->dst)); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/input.c b/net/sctp/input.c index f050d45faa98..a67bc31f49fd 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -408,10 +408,10 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, if (t->param_flags & SPP_PMTUD_ENABLE) { /* Update transports view of the MTU */ - sctp_transport_update_pmtu(t, pmtu); + sctp_transport_update_pmtu(sk, t, pmtu); /* Update association pmtu. */ - sctp_assoc_sync_pmtu(asoc); + sctp_assoc_sync_pmtu(sk, asoc); } /* Retransmit with the new pmtu setting. diff --git a/net/sctp/output.c b/net/sctp/output.c index 539f35d07f4e..838e18b4d7ea 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -410,7 +410,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sctp_sk(sk)); if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) { - sctp_assoc_sync_pmtu(asoc); + sctp_assoc_sync_pmtu(sk, asoc); } } dst = dst_clone(tp->dst); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b3b8a8d813eb..74bd3c47350a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1853,7 +1853,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } if (asoc->pmtu_pending) - sctp_assoc_pending_pmtu(asoc); + sctp_assoc_pending_pmtu(sk, asoc); /* If fragmentation is disabled and the message length exceeds the * association fragmentation point, return EMSGSIZE. The I-D @@ -2365,7 +2365,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) { if (trans) { trans->pathmtu = params->spp_pathmtu; - sctp_assoc_sync_pmtu(asoc); + sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); } else if (asoc) { asoc->pathmtu = params->spp_pathmtu; sctp_frag_point(asoc, params->spp_pathmtu); @@ -2382,7 +2382,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, (trans->param_flags & ~SPP_PMTUD) | pmtud_change; if (update) { sctp_transport_pmtu(trans, sctp_opt2sk(sp)); - sctp_assoc_sync_pmtu(asoc); + sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); } } else if (asoc) { asoc->param_flags = diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 1dcceb6e0ce6..e69e1a2175a4 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -228,7 +228,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) +void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 pmtu) { struct dst_entry *dst; @@ -245,8 +245,16 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) } dst = sctp_transport_dst_check(t); - if (dst) + if (!dst) + t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); + + if (dst) { dst->ops->update_pmtu(dst, pmtu); + + dst = sctp_transport_dst_check(t); + if (!dst) + t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); + } } /* Caches the dst entry and source address for a transport's destination -- cgit v1.2.3 From 51d7cccf07238f5236c5b9269231a30dd5f8e714 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 16 Jul 2012 04:28:49 +0000 Subject: net: make sock diag per-namespace Before this patch sock_diag works for init_net only and dumps information about sockets from all namespaces. This patch expands sock_diag for all name-spaces. It creates a netlink kernel socket for each netns and filters data during dumping. v2: filter accoding with netns in all places remove an unused variable. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Pavel Emelyanov CC: Eric Dumazet Cc: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Andrew Vagin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 1 - include/net/net_namespace.h | 1 + net/core/sock_diag.c | 27 ++++++++++++++++++++------- net/ipv4/inet_diag.c | 21 ++++++++++++++++----- net/ipv4/udp_diag.c | 10 +++++++--- net/unix/diag.c | 9 +++++++-- 6 files changed, 51 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 6793fac5eab5..e3e395acc2fd 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -44,6 +44,5 @@ void sock_diag_save_cookie(void *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); -extern struct sock *sock_diag_nlsk; #endif /* KERNEL */ #endif diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index ac9195e6a062..ae1cd6c9ba52 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -101,6 +101,7 @@ struct net { struct netns_xfrm xfrm; #endif struct netns_ipvs *ipvs; + struct sock *diag_nlsk; }; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 07a29eb34a41..9d8755e4a7a5 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -166,23 +166,36 @@ static void sock_diag_rcv(struct sk_buff *skb) mutex_unlock(&sock_diag_mutex); } -struct sock *sock_diag_nlsk; -EXPORT_SYMBOL_GPL(sock_diag_nlsk); - -static int __init sock_diag_init(void) +static int __net_init diag_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = sock_diag_rcv, }; - sock_diag_nlsk = netlink_kernel_create(&init_net, NETLINK_SOCK_DIAG, + net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, THIS_MODULE, &cfg); - return sock_diag_nlsk == NULL ? -ENOMEM : 0; + return net->diag_nlsk == NULL ? -ENOMEM : 0; +} + +static void __net_exit diag_net_exit(struct net *net) +{ + netlink_kernel_release(net->diag_nlsk); + net->diag_nlsk = NULL; +} + +static struct pernet_operations diag_net_ops = { + .init = diag_net_init, + .exit = diag_net_exit, +}; + +static int __init sock_diag_init(void) +{ + return register_pernet_subsys(&diag_net_ops); } static void __exit sock_diag_exit(void) { - netlink_kernel_release(sock_diag_nlsk); + unregister_pernet_subsys(&diag_net_ops); } module_init(sock_diag_init); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 38064a285cca..570e61f9611f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -272,16 +272,17 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s int err; struct sock *sk; struct sk_buff *rep; + struct net *net = sock_net(in_skb->sk); err = -EINVAL; if (req->sdiag_family == AF_INET) { - sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0], + sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); } #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) { - sk = inet6_lookup(&init_net, hashinfo, + sk = inet6_lookup(net, hashinfo, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, @@ -317,7 +318,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s nlmsg_free(rep); goto out; } - err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -724,6 +725,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, { int i, num; int s_i, s_num; + struct net *net = sock_net(skb->sk); s_i = cb->args[1]; s_num = num = cb->args[2]; @@ -743,6 +745,9 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, sk_nulls_for_each(sk, node, &ilb->head) { struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) { num++; continue; @@ -813,6 +818,8 @@ skip_listen_ht: sk_nulls_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net)) + continue; if (num < s_num) goto next_normal; if (!(r->idiag_states & (1 << sk->sk_state))) @@ -839,6 +846,8 @@ next_normal: inet_twsk_for_each(tw, node, &head->twchain) { + if (!net_eq(twsk_net(tw), net)) + continue; if (num < s_num) goto next_dying; @@ -943,6 +952,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) { int hdrlen = sizeof(struct inet_diag_req); + struct net *net = sock_net(skb->sk); if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || nlmsg_len(nlh) < hdrlen) @@ -963,7 +973,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) struct netlink_dump_control c = { .dump = inet_diag_dump_compat, }; - return netlink_dump_start(sock_diag_nlsk, skb, nlh, &c); + return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); } } @@ -973,6 +983,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct inet_diag_req_v2); + struct net *net = sock_net(skb->sk); if (nlmsg_len(h) < hdrlen) return -EINVAL; @@ -991,7 +1002,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) struct netlink_dump_control c = { .dump = inet_diag_dump, }; - return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + return netlink_dump_start(net->diag_nlsk, skb, h, &c); } } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index a7f86a3cd502..16d0960062be 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -34,15 +34,16 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, int err = -EINVAL; struct sock *sk; struct sk_buff *rep; + struct net *net = sock_net(in_skb->sk); if (req->sdiag_family == AF_INET) - sk = __udp4_lib_lookup(&init_net, + sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_if, tbl); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) - sk = __udp6_lib_lookup(&init_net, + sk = __udp6_lib_lookup(net, (struct in6_addr *)req->id.idiag_src, req->id.idiag_sport, (struct in6_addr *)req->id.idiag_dst, @@ -75,7 +76,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, kfree_skb(rep); goto out; } - err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -90,6 +91,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlin struct inet_diag_req_v2 *r, struct nlattr *bc) { int num, s_num, slot, s_slot; + struct net *net = sock_net(skb->sk); s_slot = cb->args[0]; num = s_num = cb->args[1]; @@ -106,6 +108,8 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlin sk_nulls_for_each(sk, node, &hslot->head) { struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net)) + continue; if (num < s_num) goto next; if (!(r->idiag_states & (1 << sk->sk_state))) diff --git a/net/unix/diag.c b/net/unix/diag.c index a74864eedfcd..750b13408449 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -177,6 +177,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct unix_diag_req *req; int num, s_num, slot, s_slot; + struct net *net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); @@ -192,6 +193,8 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) num = 0; sk_for_each(sk, node, &unix_socket_table[slot]) { + if (!net_eq(sock_net(sk), net)) + continue; if (num < s_num) goto next; if (!(req->udiag_states & (1 << sk->sk_state))) @@ -243,6 +246,7 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, struct sock *sk; struct sk_buff *rep; unsigned int extra_len; + struct net *net = sock_net(in_skb->sk); if (req->udiag_ino == 0) goto out_nosk; @@ -273,7 +277,7 @@ again: goto again; } - err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -287,6 +291,7 @@ out_nosk: static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct unix_diag_req); + struct net *net = sock_net(skb->sk); if (nlmsg_len(h) < hdrlen) return -EINVAL; @@ -295,7 +300,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) struct netlink_dump_control c = { .dump = unix_diag_dump, }; - return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + return netlink_dump_start(net->diag_nlsk, skb, h, &c); } else return unix_diag_get_exact(skb, h, nlmsg_data(h)); } -- cgit v1.2.3 From 282f23c6ee343126156dd41218b22ece96d747e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Jul 2012 10:13:05 +0200 Subject: tcp: implement RFC 5961 3.2 Implement the RFC 5691 mitigation against Blind Reset attack using RST bit. Idea is to validate incoming RST sequence, to match RCV.NXT value, instead of previouly accepted window : (RCV.NXT <= SEG.SEQ < RCV.NXT+RCV.WND) If sequence is in window but not an exact match, send a "challenge ACK", so that the other part can resend an RST with the appropriate sequence. Add a new sysctl, tcp_challenge_ack_limit, to limit number of challenge ACK sent per second. Add a new SNMP counter to count number of challenge acks sent. (netstat -s | grep TCPChallengeACK) Signed-off-by: Eric Dumazet Cc: Kiran Kumar Kella Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 5 +++++ include/linux/snmp.h | 1 + include/net/tcp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/sysctl_net_ipv4.c | 7 +++++++ net/ipv4/tcp_input.c | 31 ++++++++++++++++++++++++++++++- 6 files changed, 45 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index e20c17a7d34e..e1e021594cff 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -565,6 +565,11 @@ tcp_limit_output_bytes - INTEGER reduce the size of individual GSO packet (64KB being the max) Default: 131072 +tcp_challenge_ack_limit - INTEGER + Limits number of Challenge ACK sent per second, as recommended + in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks) + Default: 100 + UDP variables: udp_mem - vector of 3 INTEGERs: min, pressure, max diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 6e4c51123828..673e0e928b2b 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -237,6 +237,7 @@ enum LINUX_MIB_TCPOFOQUEUE, /* TCPOFOQueue */ LINUX_MIB_TCPOFODROP, /* TCPOFODrop */ LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */ + LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */ __LINUX_MIB_MAX }; diff --git a/include/net/tcp.h b/include/net/tcp.h index 439984b9af49..85c5090bfe25 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -254,6 +254,7 @@ extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; extern int sysctl_tcp_limit_output_bytes; +extern int sysctl_tcp_challenge_ack_limit; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index dae25e7622cf..3e8e78f12a38 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -261,6 +261,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPOFOQueue", LINUX_MIB_TCPOFOQUEUE), SNMP_MIB_ITEM("TCPOFODrop", LINUX_MIB_TCPOFODROP), SNMP_MIB_ITEM("TCPOFOMerge", LINUX_MIB_TCPOFOMERGE), + SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 70730f7aeafe..3f6a1e762e9c 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -605,6 +605,13 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_challenge_ack_limit", + .data = &sysctl_tcp_challenge_ack_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, #ifdef CONFIG_NET_DMA { .procname = "tcp_dma_copybreak", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cc4e12f1f2f7..c841a8990377 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -88,6 +88,9 @@ int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); +/* rfc5961 challenge ack rate limiting */ +int sysctl_tcp_challenge_ack_limit = 100; + int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; @@ -5247,6 +5250,23 @@ out: } #endif /* CONFIG_NET_DMA */ +static void tcp_send_challenge_ack(struct sock *sk) +{ + /* unprotected vars, we dont care of overwrites */ + static u32 challenge_timestamp; + static unsigned int challenge_count; + u32 now = jiffies / HZ; + + if (now != challenge_timestamp) { + challenge_timestamp = now; + challenge_count = 0; + } + if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + tcp_send_ack(sk); + } +} + /* Does PAWS and seqno based validation of an incoming segment, flags will * play significant role here. */ @@ -5283,7 +5303,16 @@ static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, /* Step 2: check RST bit */ if (th->rst) { - tcp_reset(sk); + /* RFC 5961 3.2 : + * If sequence number exactly matches RCV.NXT, then + * RESET the connection + * else + * Send a challenge ACK + */ + if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) + tcp_reset(sk); + else + tcp_send_challenge_ack(sk); goto discard; } -- cgit v1.2.3 From 84f10708f73254878246772cead70a2eb6a123f2 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 12 Jul 2012 16:17:33 -0700 Subject: cfg80211: support TX error rate CQM Let the user configure serveral TX error conection quality monitoring parameters: % error rate, survey interval, and # of attempted packets. On exceeding the TX failure rate over the given interval, the driver will send a CQM notify event with the actual TX failure rate and packets attempted. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 16 ++++++++++ include/net/cfg80211.h | 21 ++++++++++++ net/wireless/mlme.c | 13 ++++++++ net/wireless/nl80211.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.h | 5 +++ 5 files changed, 140 insertions(+) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e791487ead37..d6cfacc3ce4d 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1550,6 +1550,8 @@ enum nl80211_attrs { /* default RSSI threshold for scan results if none specified. */ #define NL80211_SCAN_RSSI_THOLD_OFF -300 +#define NL80211_CQM_TXE_MAX_INTVL 1800 + /** * enum nl80211_iftype - (virtual) interface types * @@ -2589,6 +2591,17 @@ enum nl80211_ps_state { * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event * @NL80211_ATTR_CQM_PKT_LOSS_EVENT: a u32 value indicating that this many * consecutive packets were not acknowledged by the peer + * @NL80211_ATTR_CQM_TXE_RATE: TX error rate in %. Minimum % of TX failures + * during the given %NL80211_ATTR_CQM_TXE_INTVL before an + * %NL80211_CMD_NOTIFY_CQM with reported %NL80211_ATTR_CQM_TXE_RATE and + * %NL80211_ATTR_CQM_TXE_PKTS is generated. + * @NL80211_ATTR_CQM_TXE_PKTS: number of attempted packets in a given + * %NL80211_ATTR_CQM_TXE_INTVL before %NL80211_ATTR_CQM_TXE_RATE is + * checked. + * @NL80211_ATTR_CQM_TXE_INTVL: interval in seconds. Specifies the periodic + * interval in which %NL80211_ATTR_CQM_TXE_PKTS and + * %NL80211_ATTR_CQM_TXE_RATE must be satisfied before generating an + * %NL80211_CMD_NOTIFY_CQM. Set to 0 to turn off TX error reporting. * @__NL80211_ATTR_CQM_AFTER_LAST: internal * @NL80211_ATTR_CQM_MAX: highest key attribute */ @@ -2598,6 +2611,9 @@ enum nl80211_attr_cqm { NL80211_ATTR_CQM_RSSI_HYST, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT, NL80211_ATTR_CQM_PKT_LOSS_EVENT, + NL80211_ATTR_CQM_TXE_RATE, + NL80211_ATTR_CQM_TXE_PKTS, + NL80211_ATTR_CQM_TXE_INTVL, /* keep last */ __NL80211_ATTR_CQM_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0245208c2978..493fa0c79005 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1573,6 +1573,8 @@ struct cfg80211_gtk_rekey_data { * @set_power_mgmt: Configure WLAN power management. A timeout value of -1 * allows the driver to adjust the dynamic ps timeout value. * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold. + * @set_cqm_txe_config: Configure connection quality monitor TX error + * thresholds. * @sched_scan_start: Tell the driver to start a scheduled scan. * @sched_scan_stop: Tell the driver to stop an ongoing scheduled * scan. The driver_initiated flag specifies whether the driver @@ -1783,6 +1785,10 @@ struct cfg80211_ops { struct net_device *dev, s32 rssi_thold, u32 rssi_hyst); + int (*set_cqm_txe_config)(struct wiphy *wiphy, + struct net_device *dev, + u32 rate, u32 pkts, u32 intvl); + void (*mgmt_frame_register)(struct wiphy *wiphy, struct wireless_dev *wdev, u16 frame_type, bool reg); @@ -3395,6 +3401,21 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, void cfg80211_cqm_pktloss_notify(struct net_device *dev, const u8 *peer, u32 num_packets, gfp_t gfp); +/** + * cfg80211_cqm_txe_notify - TX error rate event + * @dev: network device + * @peer: peer's MAC address + * @num_packets: how many packets were lost + * @rate: % of packets which failed transmission + * @intvl: interval (in s) over which the TX failure threshold was breached. + * @gfp: context flags + * + * Notify userspace when configured % TX failures over number of packets in a + * given interval is exceeded. + */ +void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer, + u32 num_packets, u32 rate, u32 intvl, gfp_t gfp); + /** * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying * @dev: network device diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index abe9f82d5a82..1cdb1d5e6b0f 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -919,6 +919,19 @@ void cfg80211_cqm_pktloss_notify(struct net_device *dev, } EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify); +void cfg80211_cqm_txe_notify(struct net_device *dev, + const u8 *peer, u32 num_packets, + u32 rate, u32 intvl, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_send_cqm_txe_notify(rdev, dev, peer, num_packets, + rate, intvl, gfp); +} +EXPORT_SYMBOL(cfg80211_cqm_txe_notify); + void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index be8750f91d78..9216e45e53a0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6267,8 +6267,35 @@ nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] __read_mostly = { [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 }, + [NL80211_ATTR_CQM_TXE_RATE] = { .type = NLA_U32 }, + [NL80211_ATTR_CQM_TXE_PKTS] = { .type = NLA_U32 }, + [NL80211_ATTR_CQM_TXE_INTVL] = { .type = NLA_U32 }, }; +static int nl80211_set_cqm_txe(struct genl_info *info, + u32 rate, u32 pkts, u32 intvl) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev; + struct net_device *dev = info->user_ptr[1]; + + if ((rate < 0 || rate > 100) || + (intvl < 0 || intvl > NL80211_CQM_TXE_MAX_INTVL)) + return -EINVAL; + + wdev = dev->ieee80211_ptr; + + if (!rdev->ops->set_cqm_txe_config) + return -EOPNOTSUPP; + + if (wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) + return -EOPNOTSUPP; + + return rdev->ops->set_cqm_txe_config(wdev->wiphy, dev, + rate, pkts, intvl); +} + static int nl80211_set_cqm_rssi(struct genl_info *info, s32 threshold, u32 hysteresis) { @@ -6316,6 +6343,14 @@ static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) threshold = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]); hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]); err = nl80211_set_cqm_rssi(info, threshold, hysteresis); + } else if (attrs[NL80211_ATTR_CQM_TXE_RATE] && + attrs[NL80211_ATTR_CQM_TXE_PKTS] && + attrs[NL80211_ATTR_CQM_TXE_INTVL]) { + u32 rate, pkts, intvl; + rate = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_RATE]); + pkts = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_PKTS]); + intvl = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_INTVL]); + err = nl80211_set_cqm_txe(info, rate, pkts, intvl); } else err = -EINVAL; @@ -8495,6 +8530,56 @@ void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void +nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, u32 rate, u32 intvl, gfp_t gfp) +{ + struct sk_buff *msg; + struct nlattr *pinfoattr; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer)) + goto nla_put_failure; + + pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM); + if (!pinfoattr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_CQM_TXE_PKTS, num_packets)) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_CQM_TXE_RATE, rate)) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_CQM_TXE_INTVL, intvl)) + goto nla_put_failure; + + nla_nest_end(msg, pinfoattr); + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + void nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 89ce99675e61..9f2616fffb40 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -110,6 +110,11 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, u32 num_packets, gfp_t gfp); +void +nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, u32 rate, u32 intvl, gfp_t gfp); + void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp); -- cgit v1.2.3 From 9e33ce453f8ac8452649802bee1f410319408f4b Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Sat, 7 Jul 2012 18:26:10 +0800 Subject: ipvs: fix oops on NAT reply in br_nf context IPVS should not reset skb->nf_bridge in FORWARD hook by calling nf_reset for NAT replies. It triggers oops in br_nf_forward_finish. [ 579.781508] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [ 579.781669] IP: [] br_nf_forward_finish+0x58/0x112 [ 579.781792] PGD 218f9067 PUD 0 [ 579.781865] Oops: 0000 [#1] SMP [ 579.781945] CPU 0 [ 579.781983] Modules linked in: [ 579.782047] [ 579.782080] [ 579.782114] Pid: 4644, comm: qemu Tainted: G W 3.5.0-rc5-00006-g95e69f9 #282 Hewlett-Packard /30E8 [ 579.782300] RIP: 0010:[] [] br_nf_forward_finish+0x58/0x112 [ 579.782455] RSP: 0018:ffff88007b003a98 EFLAGS: 00010287 [ 579.782541] RAX: 0000000000000008 RBX: ffff8800762ead00 RCX: 000000000001670a [ 579.782653] RDX: 0000000000000000 RSI: 000000000000000a RDI: ffff8800762ead00 [ 579.782845] RBP: ffff88007b003ac8 R08: 0000000000016630 R09: ffff88007b003a90 [ 579.782957] R10: ffff88007b0038e8 R11: ffff88002da37540 R12: ffff88002da01a02 [ 579.783066] R13: ffff88002da01a80 R14: ffff88002d83c000 R15: ffff88002d82a000 [ 579.783177] FS: 0000000000000000(0000) GS:ffff88007b000000(0063) knlGS:00000000f62d1b70 [ 579.783306] CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b [ 579.783395] CR2: 0000000000000004 CR3: 00000000218fe000 CR4: 00000000000027f0 [ 579.783505] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 579.783684] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 579.783795] Process qemu (pid: 4644, threadinfo ffff880021b20000, task ffff880021aba760) [ 579.783919] Stack: [ 579.783959] ffff88007693cedc ffff8800762ead00 ffff88002da01a02 ffff8800762ead00 [ 579.784110] ffff88002da01a02 ffff88002da01a80 ffff88007b003b18 ffffffff817b26c7 [ 579.784260] ffff880080000000 ffffffff81ef59f0 ffff8800762ead00 ffffffff81ef58b0 [ 579.784477] Call Trace: [ 579.784523] [ 579.784562] [ 579.784603] [] br_nf_forward_ip+0x275/0x2c8 [ 579.784707] [] nf_iterate+0x47/0x7d [ 579.784797] [] ? br_dev_queue_push_xmit+0xae/0xae [ 579.784906] [] nf_hook_slow+0x6d/0x102 [ 579.784995] [] ? br_dev_queue_push_xmit+0xae/0xae [ 579.785175] [] ? _raw_write_unlock_bh+0x19/0x1b [ 579.785179] [] __br_forward+0x97/0xa2 [ 579.785179] [] br_handle_frame_finish+0x1a6/0x257 [ 579.785179] [] br_nf_pre_routing_finish+0x26d/0x2cb [ 579.785179] [] br_nf_pre_routing+0x55d/0x5c1 [ 579.785179] [] nf_iterate+0x47/0x7d [ 579.785179] [] ? br_handle_local_finish+0x44/0x44 [ 579.785179] [] nf_hook_slow+0x6d/0x102 [ 579.785179] [] ? br_handle_local_finish+0x44/0x44 [ 579.785179] [] ? sky2_poll+0xb35/0xb54 [ 579.785179] [] br_handle_frame+0x213/0x229 [ 579.785179] [] ? br_handle_frame_finish+0x257/0x257 [ 579.785179] [] __netif_receive_skb+0x2b4/0x3f1 [ 579.785179] [] process_backlog+0x99/0x1e2 [ 579.785179] [] net_rx_action+0xdf/0x242 [ 579.785179] [] __do_softirq+0xc1/0x1e0 [ 579.785179] [] ? trace_hardirqs_off_thunk+0x3a/0x6c [ 579.785179] [] call_softirq+0x1c/0x30 The steps to reproduce as follow, 1. On Host1, setup brige br0(192.168.1.106) 2. Boot a kvm guest(192.168.1.105) on Host1 and start httpd 3. Start IPVS service on Host1 ipvsadm -A -t 192.168.1.106:80 -s rr ipvsadm -a -t 192.168.1.106:80 -r 192.168.1.105:80 -m 4. Run apache benchmark on Host2(192.168.1.101) ab -n 1000 http://192.168.1.106/ ip_vs_reply4 ip_vs_out handle_response ip_vs_notrack nf_reset() { skb->nf_bridge = NULL; } Actually, IPVS wants in this case just to replace nfct with untracked version. So replace the nf_reset(skb) call in ip_vs_notrack() with a nf_conntrack_put(skb->nfct) call. Signed-off-by: Lin Ming Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d6146b4811c2..95374d1696a1 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1425,7 +1425,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb) struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (!ct || !nf_ct_is_untracked(ct)) { - nf_reset(skb); + nf_conntrack_put(skb->nfct); skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); -- cgit v1.2.3 From 57b5ce072e7361218a8e2ea1d62960cbb71d9cff Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 12 Jul 2012 11:49:18 -0700 Subject: cfg80211: add cellular base station regulatory hint support Cellular base stations can provide hints to cfg80211 about where they think we are. This can be done for example on a cell phone. To enable these hints we simply allow them through as user regulatory hints but we allow userspace to clasify the hint as either coming directly from the user or coming from a cellular base station. This option is only available when you enable CONFIG_CFG80211_CERTIFICATION_ONUS. The base station hints themselves will not be processed by the core unless at least one device on the system supports this feature. Signed-off-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 32 ++++++++++++++ include/net/regulatory.h | 5 +++ net/wireless/core.c | 1 + net/wireless/nl80211.c | 23 +++++++++- net/wireless/reg.c | 113 ++++++++++++++++++++++++++++++++++++++++++++--- net/wireless/reg.h | 5 ++- 6 files changed, 171 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index d6cfacc3ce4d..2f3878806403 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1245,6 +1245,12 @@ enum nl80211_commands { * @NL80211_ATTR_BG_SCAN_PERIOD: Background scan period in seconds * or 0 to disable background scan. * + * @NL80211_ATTR_USER_REG_HINT_TYPE: type of regulatory hint passed from + * userspace. If unset it is assumed the hint comes directly from + * a user. If set code could specify exactly what type of source + * was used to provide the hint. For the different types of + * allowed user regulatory hints see nl80211_user_reg_hint_type. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1498,6 +1504,8 @@ enum nl80211_attrs { NL80211_ATTR_WDEV, + NL80211_ATTR_USER_REG_HINT_TYPE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2060,6 +2068,26 @@ enum nl80211_dfs_regions { NL80211_DFS_JP = 3, }; +/** + * enum nl80211_user_reg_hint_type - type of user regulatory hint + * + * @NL80211_USER_REG_HINT_USER: a user sent the hint. This is always + * assumed if the attribute is not set. + * @NL80211_USER_REG_HINT_CELL_BASE: the hint comes from a cellular + * base station. Device drivers that have been tested to work + * properly to support this type of hint can enable these hints + * by setting the NL80211_FEATURE_CELL_BASE_REG_HINTS feature + * capability on the struct wiphy. The wireless core will + * ignore all cell base station hints until at least one device + * present has been registered with the wireless core that + * has listed NL80211_FEATURE_CELL_BASE_REG_HINTS as a + * supported feature. + */ +enum nl80211_user_reg_hint_type { + NL80211_USER_REG_HINT_USER = 0, + NL80211_USER_REG_HINT_CELL_BASE = 1, +}; + /** * enum nl80211_survey_info - survey information * @@ -2963,11 +2991,15 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_HT_IBSS: This driver supports IBSS with HT datarates. * @NL80211_FEATURE_INACTIVITY_TIMER: This driver takes care of freeing up * the connected inactive stations in AP mode. + * @NL80211_FEATURE_CELL_BASE_REG_HINTS: This driver has been tested + * to work properly to suppport receiving regulatory hints from + * cellular base stations. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, NL80211_FEATURE_HT_IBSS = 1 << 1, NL80211_FEATURE_INACTIVITY_TIMER = 1 << 2, + NL80211_FEATURE_CELL_BASE_REG_HINTS = 1 << 3, }; /** diff --git a/include/net/regulatory.h b/include/net/regulatory.h index a5f79933e211..7dcaa2794fde 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -52,6 +52,10 @@ enum environment_cap { * DFS master operation on a known DFS region (NL80211_DFS_*), * dfs_region represents that region. Drivers can use this and the * @alpha2 to adjust their device's DFS parameters as required. + * @user_reg_hint_type: if the @initiator was of type + * %NL80211_REGDOM_SET_BY_USER, this classifies the type + * of hint passed. This could be any of the %NL80211_USER_REG_HINT_* + * types. * @intersect: indicates whether the wireless core should intersect * the requested regulatory domain with the presently set regulatory * domain. @@ -70,6 +74,7 @@ enum environment_cap { struct regulatory_request { int wiphy_idx; enum nl80211_reg_initiator initiator; + enum nl80211_user_reg_hint_type user_reg_hint_type; char alpha2[2]; u8 dfs_region; bool intersect; diff --git a/net/wireless/core.c b/net/wireless/core.c index 71b684b5a675..c0307b05986c 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -542,6 +542,7 @@ int wiphy_register(struct wiphy *wiphy) } /* set up regulatory info */ + wiphy_regulatory_register(wiphy); regulatory_update(wiphy, NL80211_REGDOM_SET_BY_CORE); list_add_rcu(&rdev->list, &cfg80211_rdev_list); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9216e45e53a0..50b1a0e84f1a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -354,6 +354,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_INACTIVITY_TIMEOUT] = { .type = NLA_U16 }, [NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 }, [NL80211_ATTR_WDEV] = { .type = NLA_U64 }, + [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -3582,6 +3583,7 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) { int r; char *data = NULL; + enum nl80211_user_reg_hint_type user_reg_hint_type; /* * You should only get this when cfg80211 hasn't yet initialized @@ -3601,7 +3603,21 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); - r = regulatory_hint_user(data); + if (info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]) + user_reg_hint_type = + nla_get_u32(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]); + else + user_reg_hint_type = NL80211_USER_REG_HINT_USER; + + switch (user_reg_hint_type) { + case NL80211_USER_REG_HINT_USER: + case NL80211_USER_REG_HINT_CELL_BASE: + break; + default: + return -EINVAL; + } + + r = regulatory_hint_user(data, user_reg_hint_type); return r; } @@ -3971,6 +3987,11 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) cfg80211_regdomain->dfs_region))) goto nla_put_failure; + if (reg_last_request_cell_base() && + nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, + NL80211_USER_REG_HINT_CELL_BASE)) + goto nla_put_failure; + nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES); if (!nl_reg_rules) goto nla_put_failure; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index ad6f9029c564..83583a9c15d9 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -97,9 +97,16 @@ const struct ieee80211_regdomain *cfg80211_regdomain; * - cfg80211_world_regdom * - cfg80211_regdom * - last_request + * - reg_num_devs_support_basehint */ static DEFINE_MUTEX(reg_mutex); +/* + * Number of devices that registered to the core + * that support cellular base station regulatory hints + */ +static int reg_num_devs_support_basehint; + static inline void assert_reg_lock(void) { lockdep_assert_held(®_mutex); @@ -911,6 +918,59 @@ static void handle_band(struct wiphy *wiphy, handle_channel(wiphy, initiator, band, i); } +static bool reg_request_cell_base(struct regulatory_request *request) +{ + if (request->initiator != NL80211_REGDOM_SET_BY_USER) + return false; + if (request->user_reg_hint_type != NL80211_USER_REG_HINT_CELL_BASE) + return false; + return true; +} + +bool reg_last_request_cell_base(void) +{ + assert_cfg80211_lock(); + + mutex_lock(®_mutex); + return reg_request_cell_base(last_request); + mutex_unlock(®_mutex); +} + +#ifdef CONFIG_CFG80211_CERTIFICATION_ONUS + +/* Core specific check */ +static int reg_ignore_cell_hint(struct regulatory_request *pending_request) +{ + if (!reg_num_devs_support_basehint) + return -EOPNOTSUPP; + + if (reg_request_cell_base(last_request)) { + if (!regdom_changes(pending_request->alpha2)) + return -EALREADY; + return 0; + } + return 0; +} + +/* Device specific check */ +static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy) +{ + if (!(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS)) + return true; + return false; +} +#else +static int reg_ignore_cell_hint(struct regulatory_request *pending_request) +{ + return -EOPNOTSUPP; +} +static int reg_dev_ignore_cell_hint(struct wiphy *wiphy) +{ + return true; +} +#endif + + static bool ignore_reg_update(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { @@ -944,6 +1004,9 @@ static bool ignore_reg_update(struct wiphy *wiphy, return true; } + if (reg_request_cell_base(last_request)) + return reg_dev_ignore_cell_hint(wiphy); + return false; } @@ -1307,6 +1370,13 @@ static int ignore_request(struct wiphy *wiphy, return 0; case NL80211_REGDOM_SET_BY_COUNTRY_IE: + if (reg_request_cell_base(last_request)) { + /* Trust a Cell base station over the AP's country IE */ + if (regdom_changes(pending_request->alpha2)) + return -EOPNOTSUPP; + return -EALREADY; + } + last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); if (unlikely(!is_an_alpha2(pending_request->alpha2))) @@ -1351,6 +1421,12 @@ static int ignore_request(struct wiphy *wiphy, return REG_INTERSECT; case NL80211_REGDOM_SET_BY_USER: + if (reg_request_cell_base(pending_request)) + return reg_ignore_cell_hint(pending_request); + + if (reg_request_cell_base(last_request)) + return -EOPNOTSUPP; + if (last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) return REG_INTERSECT; /* @@ -1640,7 +1716,8 @@ static int regulatory_hint_core(const char *alpha2) } /* User hints */ -int regulatory_hint_user(const char *alpha2) +int regulatory_hint_user(const char *alpha2, + enum nl80211_user_reg_hint_type user_reg_hint_type) { struct regulatory_request *request; @@ -1654,6 +1731,7 @@ int regulatory_hint_user(const char *alpha2) request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_USER; + request->user_reg_hint_type = user_reg_hint_type; queue_regulatory_request(request); @@ -1906,7 +1984,7 @@ static void restore_regulatory_settings(bool reset_user) * settings, user regulatory settings takes precedence. */ if (is_an_alpha2(alpha2)) - regulatory_hint_user(user_alpha2); + regulatory_hint_user(user_alpha2, NL80211_USER_REG_HINT_USER); if (list_empty(&tmp_reg_req_list)) return; @@ -2081,9 +2159,16 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) else { if (is_unknown_alpha2(rd->alpha2)) pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n"); - else - pr_info("Regulatory domain changed to country: %c%c\n", - rd->alpha2[0], rd->alpha2[1]); + else { + if (reg_request_cell_base(last_request)) + pr_info("Regulatory domain changed " + "to country: %c%c by Cell Station\n", + rd->alpha2[0], rd->alpha2[1]); + else + pr_info("Regulatory domain changed " + "to country: %c%c\n", + rd->alpha2[0], rd->alpha2[1]); + } } print_dfs_region(rd->dfs_region); print_rd_rules(rd); @@ -2293,6 +2378,18 @@ int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) } #endif /* CONFIG_HOTPLUG */ +void wiphy_regulatory_register(struct wiphy *wiphy) +{ + assert_cfg80211_lock(); + + mutex_lock(®_mutex); + + if (!reg_dev_ignore_cell_hint(wiphy)) + reg_num_devs_support_basehint++; + + mutex_unlock(®_mutex); +} + /* Caller must hold cfg80211_mutex */ void reg_device_remove(struct wiphy *wiphy) { @@ -2302,6 +2399,9 @@ void reg_device_remove(struct wiphy *wiphy) mutex_lock(®_mutex); + if (!reg_dev_ignore_cell_hint(wiphy)) + reg_num_devs_support_basehint--; + kfree(wiphy->regd); if (last_request) @@ -2367,7 +2467,8 @@ int __init regulatory_init(void) * as a user hint. */ if (!is_world_regdom(ieee80211_regdom)) - regulatory_hint_user(ieee80211_regdom); + regulatory_hint_user(ieee80211_regdom, + NL80211_USER_REG_HINT_USER); return 0; } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index e2aaaf525a22..519492fdda3c 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -22,9 +22,11 @@ bool is_world_regdom(const char *alpha2); bool reg_is_valid_request(const char *alpha2); bool reg_supported_dfs_region(u8 dfs_region); -int regulatory_hint_user(const char *alpha2); +int regulatory_hint_user(const char *alpha2, + enum nl80211_user_reg_hint_type user_reg_hint_type); int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env); +void wiphy_regulatory_register(struct wiphy *wiphy); void reg_device_remove(struct wiphy *wiphy); int __init regulatory_init(void); @@ -33,6 +35,7 @@ void regulatory_exit(void); int set_regdom(const struct ieee80211_regdomain *rd); void regulatory_update(struct wiphy *wiphy, enum nl80211_reg_initiator setby); +bool reg_last_request_cell_base(void); /** * regulatory_hint_found_beacon - hints a beacon was found on a channel -- cgit v1.2.3 From 6700c2709c08d74ae2c3c29b84a30da012dbc7f1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 03:29:28 -0700 Subject: net: Pass optional SKB and SK arguments to dst_ops->{update_pmtu,redirect}() This will be used so that we can compose a full flow key. Even though we have a route in this context, we need more. In the future the routes will be without destination address, source address, etc. keying. One ipv4 route will cover entire subnets, etc. In this environment we have to have a way to possess persistent storage for redirects and PMTU information. This persistent storage will exist in the FIB tables, and that's why we'll need to be able to rebuild a full lookup flow key here. Using that flow key will do a fib_lookup() and create/update the persistent entry. Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 +- include/net/dst_ops.h | 6 ++++-- net/bridge/br_netfilter.c | 6 ++++-- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/decnet/dn_route.c | 12 ++++++++---- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/ip_gre.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/route.c | 21 +++++++++++++-------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/xfrm4_policy.c | 10 ++++++---- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_tunnel.c | 6 +++--- net/ipv6/route.c | 21 +++++++++++++-------- net/ipv6/sit.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/xfrm6_policy.c | 10 ++++++---- net/netfilter/ipvs/ip_vs_xmit.c | 4 ++-- net/sctp/input.c | 2 +- net/sctp/transport.c | 2 +- 21 files changed, 71 insertions(+), 49 deletions(-) (limited to 'include/net') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 014504d8e43c..1ca732201f33 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1397,7 +1397,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, int e = skb_queue_empty(&priv->cm.skb_queue); if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); skb_queue_tail(&priv->cm.skb_queue, skb); if (e) diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 085931fa7ce0..d079fc61c123 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -24,8 +24,10 @@ struct dst_ops { struct net_device *dev, int how); struct dst_entry * (*negative_advice)(struct dst_entry *); void (*link_failure)(struct sk_buff *); - void (*update_pmtu)(struct dst_entry *dst, u32 mtu); - void (*redirect)(struct dst_entry *dst, struct sk_buff *skb); + void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); + void (*redirect)(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); int (*local_out)(struct sk_buff *skb); struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, struct sk_buff *skb, diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 81f76c402cf2..68e8f364bbf8 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -111,11 +111,13 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) -static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) +static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void fake_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void fake_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 683902fcc8ed..ab4f44c9bb21 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -193,7 +193,7 @@ static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk) struct dst_entry *dst = __sk_dst_check(sk, 0); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3ee0342e1cec..56840b249f3b 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -134,7 +134,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } if (type == ICMPV6_PKT_TOOBIG) { diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index e9c4e2e864c6..47de90d8fe94 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -117,8 +117,10 @@ static void dn_dst_destroy(struct dst_entry *); static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); -static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); -static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb , u32 mtu); +static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); @@ -266,7 +268,8 @@ static int dn_dst_gc(struct dst_ops *ops) * We update both the mtu and the advertised mss (i.e. the segment size we * advertise to the other end). */ -static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) +static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct dn_route *rt = (struct dn_route *) dst; struct neighbour *n = rt->n; @@ -294,7 +297,8 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) } } -static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 200d21809379..3ea465286a39 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -840,7 +840,7 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) if (!dst) goto out; } - dst->ops->update_pmtu(dst, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu); dst = __sk_dst_check(sk, 0); if (!dst) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0c3123566d76..42c44b1403c9 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -833,7 +833,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { df |= (old_iph->frag_off&htons(IP_DF)); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index c2d0e6d8baaf..2c2c35bace76 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -519,7 +519,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if ((old_iph->frag_off & htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index aad21819316d..b35d3bfc66cd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -148,8 +148,10 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst); static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); -static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); -static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); +static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); static int rt_garbage_collect(struct dst_ops *ops); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -1273,7 +1275,7 @@ static void rt_del(unsigned int hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { __be32 new_gw = icmp_hdr(skb)->un.gateway; __be32 old_gw = ip_hdr(skb)->saddr; @@ -1506,7 +1508,8 @@ out: kfree_skb(skb); return 0; } -static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct rtable *rt = (struct rtable *) dst; @@ -1531,7 +1534,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, iph->daddr, iph->saddr, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_rt_update_pmtu(&rt->dst, mtu); + ip_rt_update_pmtu(&rt->dst, NULL, skb, mtu); ip_rt_put(rt); } } @@ -1559,7 +1562,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_do_redirect(&rt->dst, skb); + ip_do_redirect(&rt->dst, NULL, skb); ip_rt_put(rt); } } @@ -2587,11 +2590,13 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b8e7e0595407..d9caf5c07aae 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -319,7 +319,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) struct dst_entry *dst = __sk_dst_check(sk, 0); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 737131cef375..fcf7678bc009 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -194,20 +194,22 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops) return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); } -static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) +static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, mtu); + path->ops->update_pmtu(path, sk, skb, mtu); } -static void xfrm4_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->redirect(path, skb); + path->ops->redirect(path, sk, skb); } static void xfrm4_dst_destroy(struct dst_entry *dst) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 62539a4b2dc7..4a0c4d2d8b05 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -269,7 +269,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) if (IS_ERR(dst)) return NULL; - dst->ops->update_pmtu(dst, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu); return inet6_csk_route_socket(sk); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 61d106597296..db3284667968 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -609,10 +609,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); + skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); } if (rel_type == ICMP_REDIRECT) - skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2); + skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); @@ -952,7 +952,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu) { *pmtu = mtu; err = -EMSGSIZE; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2a4c8d48977f..31af1ed6c1dc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -78,8 +78,10 @@ static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); -static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); -static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); +static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, @@ -187,11 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } @@ -1071,7 +1075,8 @@ static void ip6_link_failure(struct sk_buff *skb) } } -static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info*)dst; @@ -1108,7 +1113,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - ip6_rt_update_pmtu(dst, ntohl(mtu)); + ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu); @@ -1136,7 +1141,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - rt6_do_redirect(dst, skb); + rt6_do_redirect(dst, NULL, skb); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_redirect); @@ -1639,7 +1644,7 @@ static int ip6_route_del(struct fib6_config *cfg) return err; } -static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct netevent_redirect netevent; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index fbf1622fdeef..3bd1bfc01f85 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -807,7 +807,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (tunnel->parms.iph.daddr && skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ecdf241cad02..c9dabdd832d7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -367,7 +367,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) - dst->ops->redirect(dst,skb); + dst->ops->redirect(dst, sk, skb); } if (type == ICMPV6_PKT_TOOBIG) { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f5a9cb8257b9..ef39812107b1 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -207,20 +207,22 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) return dst_entries_get_fast(ops) > ops->gc_thresh * 2; } -static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) +static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, mtu); + path->ops->update_pmtu(path, sk, skb, mtu); } -static void xfrm6_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->redirect(path, skb); + path->ops->redirect(path, sk, skb); } static void xfrm6_dst_destroy(struct dst_entry *dst) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 71d6ecb65926..65b616ae1716 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -797,7 +797,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; } if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); df |= (old_iph->frag_off & htons(IP_DF)); @@ -913,7 +913,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; } if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) && !skb_is_gso(skb)) { diff --git a/net/sctp/input.c b/net/sctp/input.c index a67bc31f49fd..c201b26879a1 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -432,7 +432,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, return; dst = sctp_transport_dst_check(t); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e69e1a2175a4..a6b7ee9ce28a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -249,7 +249,7 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); if (dst) { - dst->ops->update_pmtu(dst, pmtu); + dst->ops->update_pmtu(dst, sk, NULL, pmtu); dst = sctp_transport_dst_check(t); if (!dst) -- cgit v1.2.3 From 4895c771c7f006b4b90f9d6b1d2210939ba57b38 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 04:19:00 -0700 Subject: ipv4: Add FIB nexthop exceptions. In a regime where we have subnetted route entries, we need a way to store persistent storage about destination specific learned values such as redirects and PMTU values. This is implemented here via nexthop exceptions. The initial implementation is a 2048 entry hash table with relaiming starting at chain length 5. A more sophisticated scheme can be devised if that proves necessary. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 18 ++++ net/ipv4/fib_semantics.c | 23 +++++ net/ipv4/route.c | 256 +++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 266 insertions(+), 31 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 5697acefeba3..e9ee1ca07087 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -46,6 +47,22 @@ struct fib_config { struct fib_info; +struct fib_nh_exception { + struct fib_nh_exception __rcu *fnhe_next; + __be32 fnhe_daddr; + u32 fnhe_pmtu; + u32 fnhe_gw; + unsigned long fnhe_expires; + unsigned long fnhe_stamp; +}; + +struct fnhe_hash_bucket { + struct fib_nh_exception __rcu *chain; +}; + +#define FNHE_HASH_SIZE 2048 +#define FNHE_RECLAIM_DEPTH 5 + struct fib_nh { struct net_device *nh_dev; struct hlist_node nh_hash; @@ -63,6 +80,7 @@ struct fib_nh { __be32 nh_gw; __be32 nh_saddr; int nh_saddr_genid; + struct fnhe_hash_bucket *nh_exceptions; }; /* diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d71bfbdc0bf4..1e09852df512 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -140,6 +140,27 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { }, }; +static void free_nh_exceptions(struct fib_nh *nh) +{ + struct fnhe_hash_bucket *hash = nh->nh_exceptions; + int i; + + for (i = 0; i < FNHE_HASH_SIZE; i++) { + struct fib_nh_exception *fnhe; + + fnhe = rcu_dereference(hash[i].chain); + while (fnhe) { + struct fib_nh_exception *next; + + next = rcu_dereference(fnhe->fnhe_next); + kfree(fnhe); + + fnhe = next; + } + } + kfree(hash); +} + /* Release a nexthop info record */ static void free_fib_info_rcu(struct rcu_head *head) { @@ -148,6 +169,8 @@ static void free_fib_info_rcu(struct rcu_head *head) change_nexthops(fi) { if (nexthop_nh->nh_dev) dev_put(nexthop_nh->nh_dev); + if (nexthop_nh->nh_exceptions) + free_nh_exceptions(nexthop_nh); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b35d3bfc66cd..a5bd0b4acc61 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1275,14 +1275,130 @@ static void rt_del(unsigned int hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) +static void __build_flow_key(struct flowi4 *fl4, struct sock *sk, + const struct iphdr *iph, + int oif, u8 tos, + u8 prot, u32 mark, int flow_flags) +{ + if (sk) { + const struct inet_sock *inet = inet_sk(sk); + + oif = sk->sk_bound_dev_if; + mark = sk->sk_mark; + tos = RT_CONN_FLAGS(sk); + prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol; + } + flowi4_init_output(fl4, oif, mark, tos, + RT_SCOPE_UNIVERSE, prot, + flow_flags, + iph->daddr, iph->saddr, 0, 0); +} + +static void build_skb_flow_key(struct flowi4 *fl4, struct sk_buff *skb, struct sock *sk) +{ + const struct iphdr *iph = ip_hdr(skb); + int oif = skb->dev->ifindex; + u8 tos = RT_TOS(iph->tos); + u8 prot = iph->protocol; + u32 mark = skb->mark; + + __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); +} + +static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + struct ip_options_rcu *inet_opt; + __be32 daddr = inet->inet_daddr; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + inet_sk_flowi_flags(sk), + daddr, inet->inet_saddr, 0, 0); + rcu_read_unlock(); +} + +static void ip_rt_build_flow_key(struct flowi4 *fl4, struct sock *sk, + struct sk_buff *skb) +{ + if (skb) + build_skb_flow_key(fl4, skb, sk); + else + build_sk_flow_key(fl4, sk); +} + +static DEFINE_SPINLOCK(fnhe_lock); + +static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash, __be32 daddr) +{ + struct fib_nh_exception *fnhe, *oldest; + + oldest = rcu_dereference(hash->chain); + for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; + fnhe = rcu_dereference(fnhe->fnhe_next)) { + if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) + oldest = fnhe; + } + return oldest; +} + +static struct fib_nh_exception *find_or_create_fnhe(struct fib_nh *nh, __be32 daddr) +{ + struct fnhe_hash_bucket *hash = nh->nh_exceptions; + struct fib_nh_exception *fnhe; + int depth; + u32 hval; + + if (!hash) { + hash = nh->nh_exceptions = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), + GFP_ATOMIC); + if (!hash) + return NULL; + } + + hval = (__force u32) daddr; + hval ^= (hval >> 11) ^ (hval >> 22); + hash += hval; + + depth = 0; + for (fnhe = rcu_dereference(hash->chain); fnhe; + fnhe = rcu_dereference(fnhe->fnhe_next)) { + if (fnhe->fnhe_daddr == daddr) + goto out; + depth++; + } + + if (depth > FNHE_RECLAIM_DEPTH) { + fnhe = fnhe_oldest(hash + hval, daddr); + goto out_daddr; + } + fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); + if (!fnhe) + return NULL; + + fnhe->fnhe_next = hash->chain; + rcu_assign_pointer(hash->chain, fnhe); + +out_daddr: + fnhe->fnhe_daddr = daddr; +out: + fnhe->fnhe_stamp = jiffies; + return fnhe; +} + +static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4) { __be32 new_gw = icmp_hdr(skb)->un.gateway; __be32 old_gw = ip_hdr(skb)->saddr; struct net_device *dev = skb->dev; struct in_device *in_dev; + struct fib_result res; struct neighbour *n; - struct rtable *rt; struct net *net; switch (icmp_hdr(skb)->code & 7) { @@ -1296,7 +1412,6 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf return; } - rt = (struct rtable *) dst; if (rt->rt_gateway != old_gw) return; @@ -1320,11 +1435,21 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf goto reject_redirect; } - n = ipv4_neigh_lookup(dst, NULL, &new_gw); + n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); if (n) { if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); } else { + if (fib_lookup(net, fl4, &res) == 0) { + struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh_exception *fnhe; + + spin_lock_bh(&fnhe_lock); + fnhe = find_or_create_fnhe(nh, fl4->daddr); + if (fnhe) + fnhe->fnhe_gw = new_gw; + spin_unlock_bh(&fnhe_lock); + } rt->rt_gateway = new_gw; rt->rt_flags |= RTCF_REDIRECTED; call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); @@ -1349,6 +1474,17 @@ reject_redirect: ; } +static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) +{ + struct rtable *rt; + struct flowi4 fl4; + + rt = (struct rtable *) dst; + + ip_rt_build_flow_key(&fl4, sk, skb); + __ip_do_redirect(rt, skb, &fl4); +} + static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; @@ -1508,33 +1644,51 @@ out: kfree_skb(skb); return 0; } -static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) +static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { - struct rtable *rt = (struct rtable *) dst; - - dst_confirm(dst); + struct fib_result res; if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; + if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { + struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh_exception *fnhe; + + spin_lock_bh(&fnhe_lock); + fnhe = find_or_create_fnhe(nh, fl4->daddr); + if (fnhe) { + fnhe->fnhe_pmtu = mtu; + fnhe->fnhe_expires = jiffies + ip_rt_mtu_expires; + } + spin_unlock_bh(&fnhe_lock); + } rt->rt_pmtu = mtu; dst_set_expires(&rt->dst, ip_rt_mtu_expires); } +static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) +{ + struct rtable *rt = (struct rtable *) dst; + struct flowi4 fl4; + + ip_rt_build_flow_key(&fl4, sk, skb); + __ip_rt_update_pmtu(rt, &fl4, mtu); +} + void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u32 mark, u8 protocol, int flow_flags) { - const struct iphdr *iph = (const struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; - flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, - protocol, flow_flags, - iph->daddr, iph->saddr, 0, 0); + __build_flow_key(&fl4, NULL, iph, oif, + RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_rt_update_pmtu(&rt->dst, NULL, skb, mtu); + __ip_rt_update_pmtu(rt, &fl4, mtu); ip_rt_put(rt); } } @@ -1542,27 +1696,31 @@ EXPORT_SYMBOL_GPL(ipv4_update_pmtu); void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) { - const struct inet_sock *inet = inet_sk(sk); + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct flowi4 fl4; + struct rtable *rt; - return ipv4_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark, - inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk)); + __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(sock_net(sk), &fl4); + if (!IS_ERR(rt)) { + __ip_rt_update_pmtu(rt, &fl4, mtu); + ip_rt_put(rt); + } } EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, u8 protocol, int flow_flags) { - const struct iphdr *iph = (const struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; - flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, - protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); + __build_flow_key(&fl4, NULL, iph, oif, + RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_do_redirect(&rt->dst, NULL, skb); + __ip_do_redirect(rt, skb, &fl4); ip_rt_put(rt); } } @@ -1570,12 +1728,16 @@ EXPORT_SYMBOL_GPL(ipv4_redirect); void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) { - const struct inet_sock *inet = inet_sk(sk); + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct flowi4 fl4; + struct rtable *rt; - return ipv4_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, - sk->sk_mark, - inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk)); + __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(sock_net(sk), &fl4); + if (!IS_ERR(rt)) { + __ip_do_redirect(rt, skb, &fl4); + ip_rt_put(rt); + } } EXPORT_SYMBOL_GPL(ipv4_sk_redirect); @@ -1722,14 +1884,46 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, dst_init_metrics(&rt->dst, fi->fib_metrics, true); } +static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr) +{ + struct fnhe_hash_bucket *hash = nh->nh_exceptions; + struct fib_nh_exception *fnhe; + u32 hval; + + hval = (__force u32) daddr; + hval ^= (hval >> 11) ^ (hval >> 22); + + for (fnhe = rcu_dereference(hash[hval].chain); fnhe; + fnhe = rcu_dereference(fnhe->fnhe_next)) { + if (fnhe->fnhe_daddr == daddr) { + if (fnhe->fnhe_pmtu) { + unsigned long expires = fnhe->fnhe_expires; + unsigned long diff = jiffies - expires; + + if (time_before(jiffies, expires)) { + rt->rt_pmtu = fnhe->fnhe_pmtu; + dst_set_expires(&rt->dst, diff); + } + } + if (fnhe->fnhe_gw) + rt->rt_gateway = fnhe->fnhe_gw; + fnhe->fnhe_stamp = jiffies; + break; + } + } +} + static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, const struct fib_result *res, struct fib_info *fi, u16 type, u32 itag) { if (fi) { - if (FIB_RES_GW(*res) && - FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - rt->rt_gateway = FIB_RES_GW(*res); + struct fib_nh *nh = &FIB_RES_NH(*res); + + if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) + rt->rt_gateway = nh->nh_gw; + if (unlikely(nh->nh_exceptions)) + rt_bind_exception(rt, nh, fl4->daddr); rt_init_metrics(rt, fl4, fi); #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; -- cgit v1.2.3 From d3818c92afabecfe6b8e5d2e3734c8753522987c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Jul 2012 21:38:04 +0000 Subject: ipv6: fix inet6_csk_xmit() We should provide to inet6_csk_route_socket a struct flowi6 pointer, so that net6_csk_xmit() works correctly instead of sending garbage. Also add some consts Signed-off-by: Eric Dumazet Reported-by: Yuchung Cheng Cc: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 ++-- include/net/ip6_route.h | 3 ++- net/ipv6/inet6_connection_sock.c | 40 +++++++++++++++++++++------------------- 3 files changed, 25 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bc6c8fd8ed01..379e433e15e0 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -299,9 +299,9 @@ struct ipv6_pinfo { struct in6_addr rcv_saddr; struct in6_addr daddr; struct in6_pktinfo sticky_pktinfo; - struct in6_addr *daddr_cache; + const struct in6_addr *daddr_cache; #ifdef CONFIG_IPV6_SUBTREES - struct in6_addr *saddr_cache; + const struct in6_addr *saddr_cache; #endif __be32 flow_label; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index b6b6f7d6f3c0..5fa2af00634a 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -158,7 +158,8 @@ extern void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); * Store a destination cache entry in a socket */ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) + const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt = (struct rt6_info *) dst; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 4a0c4d2d8b05..0251a6005be8 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -171,7 +171,8 @@ EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); static inline void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) + const struct in6_addr *daddr, + const struct in6_addr *saddr) { __ip6_dst_store(sk, dst, daddr, saddr); @@ -203,31 +204,31 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) return dst; } -static struct dst_entry *inet6_csk_route_socket(struct sock *sk) +static struct dst_entry *inet6_csk_route_socket(struct sock *sk, + struct flowi6 *fl6) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; - struct flowi6 fl6; - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = np->daddr; - fl6.saddr = np->saddr; - fl6.flowlabel = np->flow_label; - IP6_ECN_flow_xmit(sk, fl6.flowlabel); - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_sport = inet->inet_sport; - fl6.fl6_dport = inet->inet_dport; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + memset(fl6, 0, sizeof(*fl6)); + fl6->flowi6_proto = sk->sk_protocol; + fl6->daddr = np->daddr; + fl6->saddr = np->saddr; + fl6->flowlabel = np->flow_label; + IP6_ECN_flow_xmit(sk, fl6->flowlabel); + fl6->flowi6_oif = sk->sk_bound_dev_if; + fl6->flowi6_mark = sk->sk_mark; + fl6->fl6_sport = inet->inet_sport; + fl6->fl6_dport = inet->inet_dport; + security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + final_p = fl6_update_dst(fl6, np->opt, &final); dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (!dst) { - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); if (!IS_ERR(dst)) __inet6_csk_dst_store(sk, dst, NULL, NULL); @@ -243,7 +244,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) struct dst_entry *dst; int res; - dst = inet6_csk_route_socket(sk); + dst = inet6_csk_route_socket(sk, &fl6); if (IS_ERR(dst)) { sk->sk_err_soft = -PTR_ERR(dst); sk->sk_route_caps = 0; @@ -265,12 +266,13 @@ EXPORT_SYMBOL_GPL(inet6_csk_xmit); struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) { - struct dst_entry *dst = inet6_csk_route_socket(sk); + struct flowi6 fl6; + struct dst_entry *dst = inet6_csk_route_socket(sk, &fl6); if (IS_ERR(dst)) return NULL; dst->ops->update_pmtu(dst, sk, NULL, mtu); - return inet6_csk_route_socket(sk); + return inet6_csk_route_socket(sk, &fl6); } EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu); -- cgit v1.2.3 From eb8637cd4a0d651cf4fcc1559231facee829a0ac Mon Sep 17 00:00:00 2001 From: Saurabh Date: Tue, 17 Jul 2012 09:44:49 +0000 Subject: net/ipv4: VTI support rx-path hook in xfrm4_mode_tunnel. Incorporated David and Steffen's comments. Add hook for rx-path xfmr4_mode_tunnel for VTI tunnel module. Signed-off-by: Saurabh Mohan Reviewed-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 ++ net/ipv4/xfrm4_mode_tunnel.c | 68 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 17acbc92476d..d9509eb29b80 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1475,6 +1475,8 @@ extern int xfrm4_output(struct sk_buff *skb); extern int xfrm4_output_finish(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); +extern int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler); +extern int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler); extern int xfrm6_extract_header(struct sk_buff *skb); extern int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index ed4bf11ef9f4..ddee0a099a2c 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -15,6 +15,65 @@ #include #include +/* Informational hook. The decap is still done here. */ +static struct xfrm_tunnel __rcu *rcv_notify_handlers __read_mostly; +static DEFINE_MUTEX(xfrm4_mode_tunnel_input_mutex); + +int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler) +{ + struct xfrm_tunnel __rcu **pprev; + struct xfrm_tunnel *t; + int ret = -EEXIST; + int priority = handler->priority; + + mutex_lock(&xfrm4_mode_tunnel_input_mutex); + + for (pprev = &rcv_notify_handlers; + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm4_mode_tunnel_input_mutex))) != NULL; + pprev = &t->next) { + if (t->priority > priority) + break; + if (t->priority == priority) + goto err; + + } + + handler->next = *pprev; + rcu_assign_pointer(*pprev, handler); + + ret = 0; + +err: + mutex_unlock(&xfrm4_mode_tunnel_input_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_register); + +int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler) +{ + struct xfrm_tunnel __rcu **pprev; + struct xfrm_tunnel *t; + int ret = -ENOENT; + + mutex_lock(&xfrm4_mode_tunnel_input_mutex); + for (pprev = &rcv_notify_handlers; + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm4_mode_tunnel_input_mutex))) != NULL; + pprev = &t->next) { + if (t == handler) { + *pprev = handler->next; + ret = 0; + break; + } + } + mutex_unlock(&xfrm4_mode_tunnel_input_mutex); + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_deregister); + static inline void ipip_ecn_decapsulate(struct sk_buff *skb) { struct iphdr *inner_iph = ipip_hdr(skb); @@ -64,8 +123,14 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } +#define for_each_input_rcu(head, handler) \ + for (handler = rcu_dereference(head); \ + handler != NULL; \ + handler = rcu_dereference(handler->next)) + static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { + struct xfrm_tunnel *handler; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) @@ -74,6 +139,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; + for_each_input_rcu(rcv_notify_handlers, handler) + handler->handler(skb); + if (skb_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto out; -- cgit v1.2.3 From ddbe503203855939946430e39bae58de11b70b69 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Jul 2012 08:11:12 +0000 Subject: ipv6: add ipv6_addr_hash() helper Introduce ipv6_addr_hash() helper doing a XOR on all bits of an IPv6 address, with an optimized x86_64 version. Use it in flow dissector, as suggested by Andrew McGregor, to reduce hash collision probabilities in fq_codel (and other users of flow dissector) Use it in ip6_tunnel.c and use more bit shuffling, as suggested by David Laight, as existing hash was ignoring most of them. Use it in sunrpc and use more bit shuffling, using hash_32(). Use it in net/ipv6/addrconf.c, using hash_32() as well. As a cleanup, use it in net/ipv4/tcp_metrics.c Signed-off-by: Eric Dumazet Reported-by: Andrew McGregor Cc: Dave Taht Cc: Tom Herbert Cc: David Laight Cc: Joe Perches Signed-off-by: David S. Miller --- include/net/addrconf.h | 3 ++- include/net/ipv6.h | 13 +++++++++++++ net/core/flow_dissector.c | 5 +++-- net/ipv4/tcp_metrics.c | 15 +++------------ net/ipv6/addrconf.c | 21 ++++++++------------- net/ipv6/ip6_tunnel.c | 20 ++++++++++++-------- net/sunrpc/svcauth_unix.c | 22 ++++------------------ 7 files changed, 45 insertions(+), 54 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f2b801c4b555..089a09d001d1 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -46,7 +46,8 @@ struct prefix_info { #include #include -#define IN6_ADDR_HSIZE 16 +#define IN6_ADDR_HSIZE_SHIFT 4 +#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) extern int addrconf_init(void); extern void addrconf_cleanup(void); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f695f39e8926..01c34b363a34 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -419,6 +419,19 @@ static inline bool ipv6_addr_any(const struct in6_addr *a) #endif } +static inline u32 ipv6_addr_hash(const struct in6_addr *a) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + const unsigned long *ul = (const unsigned long *)a; + unsigned long x = ul[0] ^ ul[1]; + + return (u32)(x ^ (x >> 32)); +#else + return (__force u32)(a->s6_addr32[0] ^ a->s6_addr32[1] ^ + a->s6_addr32[2] ^ a->s6_addr32[3]); +#endif +} + static inline bool ipv6_addr_loopback(const struct in6_addr *a) { return (a->s6_addr32[0] | a->s6_addr32[1] | diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index a225089df5b6..466820b6e344 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -55,8 +56,8 @@ ipv6: return false; ip_proto = iph->nexthdr; - flow->src = iph->saddr.s6_addr32[3]; - flow->dst = iph->daddr.s6_addr32[3]; + flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); + flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); nhoff += sizeof(struct ipv6hdr); break; } diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 5a38a2d5a95b..1a115b665792 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -211,10 +211,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, break; case AF_INET6: *(struct in6_addr *)addr.addr.a6 = inet6_rsk(req)->rmt_addr; - hash = ((__force unsigned int) addr.addr.a6[0] ^ - (__force unsigned int) addr.addr.a6[1] ^ - (__force unsigned int) addr.addr.a6[2] ^ - (__force unsigned int) addr.addr.a6[3]); + hash = ipv6_addr_hash(&inet6_rsk(req)->rmt_addr); break; default: return NULL; @@ -251,10 +248,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock case AF_INET6: tw6 = inet6_twsk((struct sock *)tw); *(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr; - hash = ((__force unsigned int) addr.addr.a6[0] ^ - (__force unsigned int) addr.addr.a6[1] ^ - (__force unsigned int) addr.addr.a6[2] ^ - (__force unsigned int) addr.addr.a6[3]); + hash = ipv6_addr_hash(&tw6->tw_v6_daddr); break; default: return NULL; @@ -291,10 +285,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, break; case AF_INET6: *(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr; - hash = ((__force unsigned int) addr.addr.a6[0] ^ - (__force unsigned int) addr.addr.a6[1] ^ - (__force unsigned int) addr.addr.a6[2] ^ - (__force unsigned int) addr.addr.a6[3]); + hash = ipv6_addr_hash(&inet6_sk(sk)->daddr); break; default: return NULL; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8f6411c97189..79181819a24f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -579,15 +580,9 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) list_add_tail(&ifp->if_list, p); } -static u32 ipv6_addr_hash(const struct in6_addr *addr) +static u32 inet6_addr_hash(const struct in6_addr *addr) { - /* - * We perform the hash function over the last 64 bits of the address - * This will include the IEEE address token on links that support it. - */ - return jhash_2words((__force u32)addr->s6_addr32[2], - (__force u32)addr->s6_addr32[3], 0) - & (IN6_ADDR_HSIZE - 1); + return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT); } /* On success it returns ifp with increased reference count */ @@ -662,7 +657,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, in6_ifa_hold(ifa); /* Add to big hash table */ - hash = ipv6_addr_hash(addr); + hash = inet6_addr_hash(addr); hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); spin_unlock(&addrconf_hash_lock); @@ -1270,7 +1265,7 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, { struct inet6_ifaddr *ifp; struct hlist_node *node; - unsigned int hash = ipv6_addr_hash(addr); + unsigned int hash = inet6_addr_hash(addr); rcu_read_lock_bh(); hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { @@ -1293,7 +1288,7 @@ EXPORT_SYMBOL(ipv6_chk_addr); static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, struct net_device *dev) { - unsigned int hash = ipv6_addr_hash(addr); + unsigned int hash = inet6_addr_hash(addr); struct inet6_ifaddr *ifp; struct hlist_node *node; @@ -1336,7 +1331,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add struct net_device *dev, int strict) { struct inet6_ifaddr *ifp, *result = NULL; - unsigned int hash = ipv6_addr_hash(addr); + unsigned int hash = inet6_addr_hash(addr); struct hlist_node *node; rcu_read_lock_bh(); @@ -3223,7 +3218,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) int ret = 0; struct inet6_ifaddr *ifp = NULL; struct hlist_node *n; - unsigned int hash = ipv6_addr_hash(addr); + unsigned int hash = inet6_addr_hash(addr); rcu_read_lock_bh(); hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index db3284667968..9a1d5fe6aef8 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -70,11 +71,15 @@ MODULE_ALIAS_NETDEV("ip6tnl0"); #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 -#define HASH_SIZE 32 +#define HASH_SIZE_SHIFT 5 +#define HASH_SIZE (1 << HASH_SIZE_SHIFT) -#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \ - (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ - (HASH_SIZE - 1)) +static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) +{ + u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); + + return hash_32(hash, HASH_SIZE_SHIFT); +} static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); @@ -166,12 +171,11 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) static struct ip6_tnl * ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) { - unsigned int h0 = HASH(remote); - unsigned int h1 = HASH(local); + unsigned int hash = HASH(remote, local); struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) { + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr) && (t->dev->flags & IFF_UP)) @@ -205,7 +209,7 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p) if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { prio = 1; - h = HASH(remote) ^ HASH(local); + h = HASH(remote, local); } return &ip6n->tnls[prio][h]; } diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 2777fa896645..4d0129203733 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -104,23 +104,9 @@ static void ip_map_put(struct kref *kref) kfree(im); } -#if IP_HASHBITS == 8 -/* hash_long on a 64 bit machine is currently REALLY BAD for - * IP addresses in reverse-endian (i.e. on a little-endian machine). - * So use a trivial but reliable hash instead - */ -static inline int hash_ip(__be32 ip) -{ - int hash = (__force u32)ip ^ ((__force u32)ip>>16); - return (hash ^ (hash>>8)) & 0xff; -} -#endif -static inline int hash_ip6(struct in6_addr ip) +static inline int hash_ip6(const struct in6_addr *ip) { - return (hash_ip(ip.s6_addr32[0]) ^ - hash_ip(ip.s6_addr32[1]) ^ - hash_ip(ip.s6_addr32[2]) ^ - hash_ip(ip.s6_addr32[3])); + return hash_32(ipv6_addr_hash(ip), IP_HASHBITS); } static int ip_map_match(struct cache_head *corig, struct cache_head *cnew) { @@ -301,7 +287,7 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, ip.m_addr = *addr; ch = sunrpc_cache_lookup(cd, &ip.h, hash_str(class, IP_HASHBITS) ^ - hash_ip6(*addr)); + hash_ip6(addr)); if (ch) return container_of(ch, struct ip_map, h); @@ -331,7 +317,7 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, ip.h.expiry_time = expiry; ch = sunrpc_cache_update(cd, &ip.h, &ipm->h, hash_str(ipm->m_class, IP_HASHBITS) ^ - hash_ip6(ipm->m_addr)); + hash_ip6(&ipm->m_addr)); if (!ch) return -ENOMEM; cache_put(ch, cd); -- cgit v1.2.3 From aee06da6726d4981c51928c2d6d1e2cabeec7a10 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 18 Jul 2012 10:15:35 +0000 Subject: ipv4: use seqlock for nh_exceptions Use global seqlock for the nh_exceptions. Call fnhe_oldest with the right hash chain. Correct the diff value for dst_set_expires. v2: after suggestions from Eric Dumazet: * get rid of spin lock fnhe_lock, rearrange update_or_create_fnhe * continue daddr search in rt_bind_exception v3: * remove the daddr check before seqlock in rt_bind_exception * restart lookup in rt_bind_exception on detected seqlock change, as suggested by David Miller Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- net/ipv4/route.c | 118 +++++++++++++++++++++++++++++---------------------- 2 files changed, 69 insertions(+), 51 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e9ee1ca07087..2daf096dfc60 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -51,7 +51,7 @@ struct fib_nh_exception { struct fib_nh_exception __rcu *fnhe_next; __be32 fnhe_daddr; u32 fnhe_pmtu; - u32 fnhe_gw; + __be32 fnhe_gw; unsigned long fnhe_expires; unsigned long fnhe_stamp; }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2c25581bf25c..89e39dc5336b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1333,9 +1333,9 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, build_sk_flow_key(fl4, sk); } -static DEFINE_SPINLOCK(fnhe_lock); +static DEFINE_SEQLOCK(fnhe_seqlock); -static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash, __be32 daddr) +static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) { struct fib_nh_exception *fnhe, *oldest; @@ -1358,47 +1358,63 @@ static inline u32 fnhe_hashfun(__be32 daddr) return hval & (FNHE_HASH_SIZE - 1); } -static struct fib_nh_exception *find_or_create_fnhe(struct fib_nh *nh, __be32 daddr) +static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, + u32 pmtu, unsigned long expires) { - struct fnhe_hash_bucket *hash = nh->nh_exceptions; + struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; int depth; - u32 hval; + u32 hval = fnhe_hashfun(daddr); + + write_seqlock_bh(&fnhe_seqlock); + hash = nh->nh_exceptions; if (!hash) { - hash = nh->nh_exceptions = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), - GFP_ATOMIC); + hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC); if (!hash) - return NULL; + goto out_unlock; + nh->nh_exceptions = hash; } - hval = fnhe_hashfun(daddr); hash += hval; depth = 0; for (fnhe = rcu_dereference(hash->chain); fnhe; fnhe = rcu_dereference(fnhe->fnhe_next)) { if (fnhe->fnhe_daddr == daddr) - goto out; + break; depth++; } - if (depth > FNHE_RECLAIM_DEPTH) { - fnhe = fnhe_oldest(hash + hval, daddr); - goto out_daddr; + if (fnhe) { + if (gw) + fnhe->fnhe_gw = gw; + if (pmtu) { + fnhe->fnhe_pmtu = pmtu; + fnhe->fnhe_expires = expires; + } + } else { + if (depth > FNHE_RECLAIM_DEPTH) + fnhe = fnhe_oldest(hash); + else { + fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); + if (!fnhe) + goto out_unlock; + + fnhe->fnhe_next = hash->chain; + rcu_assign_pointer(hash->chain, fnhe); + } + fnhe->fnhe_daddr = daddr; + fnhe->fnhe_gw = gw; + fnhe->fnhe_pmtu = pmtu; + fnhe->fnhe_expires = expires; } - fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); - if (!fnhe) - return NULL; - - fnhe->fnhe_next = hash->chain; - rcu_assign_pointer(hash->chain, fnhe); -out_daddr: - fnhe->fnhe_daddr = daddr; -out: fnhe->fnhe_stamp = jiffies; - return fnhe; + +out_unlock: + write_sequnlock_bh(&fnhe_seqlock); + return; } static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4) @@ -1452,13 +1468,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow } else { if (fib_lookup(net, fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); - struct fib_nh_exception *fnhe; - spin_lock_bh(&fnhe_lock); - fnhe = find_or_create_fnhe(nh, fl4->daddr); - if (fnhe) - fnhe->fnhe_gw = new_gw; - spin_unlock_bh(&fnhe_lock); + update_or_create_fnhe(nh, fl4->daddr, new_gw, + 0, 0); } rt->rt_gateway = new_gw; rt->rt_flags |= RTCF_REDIRECTED; @@ -1663,15 +1675,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); - struct fib_nh_exception *fnhe; - spin_lock_bh(&fnhe_lock); - fnhe = find_or_create_fnhe(nh, fl4->daddr); - if (fnhe) { - fnhe->fnhe_pmtu = mtu; - fnhe->fnhe_expires = jiffies + ip_rt_mtu_expires; - } - spin_unlock_bh(&fnhe_lock); + update_or_create_fnhe(nh, fl4->daddr, 0, mtu, + jiffies + ip_rt_mtu_expires); } rt->rt_pmtu = mtu; dst_set_expires(&rt->dst, ip_rt_mtu_expires); @@ -1902,23 +1908,35 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr hval = fnhe_hashfun(daddr); +restart: for (fnhe = rcu_dereference(hash[hval].chain); fnhe; fnhe = rcu_dereference(fnhe->fnhe_next)) { - if (fnhe->fnhe_daddr == daddr) { - if (fnhe->fnhe_pmtu) { - unsigned long expires = fnhe->fnhe_expires; - unsigned long diff = expires - jiffies; - - if (time_before(jiffies, expires)) { - rt->rt_pmtu = fnhe->fnhe_pmtu; - dst_set_expires(&rt->dst, diff); - } + __be32 fnhe_daddr, gw; + unsigned long expires; + unsigned int seq; + u32 pmtu; + + seq = read_seqbegin(&fnhe_seqlock); + fnhe_daddr = fnhe->fnhe_daddr; + gw = fnhe->fnhe_gw; + pmtu = fnhe->fnhe_pmtu; + expires = fnhe->fnhe_expires; + if (read_seqretry(&fnhe_seqlock, seq)) + goto restart; + if (daddr != fnhe_daddr) + continue; + if (pmtu) { + unsigned long diff = jiffies - expires; + + if (time_before(jiffies, expires)) { + rt->rt_pmtu = pmtu; + dst_set_expires(&rt->dst, diff); } - if (fnhe->fnhe_gw) - rt->rt_gateway = fnhe->fnhe_gw; - fnhe->fnhe_stamp = jiffies; - break; } + if (gw) + rt->rt_gateway = gw; + fnhe->fnhe_stamp = jiffies; + break; } } -- cgit v1.2.3 From be9f4a44e7d41cee50ddb5f038fc2391cbbb4046 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Jul 2012 07:34:03 +0000 Subject: ipv4: tcp: remove per net tcp_sock tcp_v4_send_reset() and tcp_v4_send_ack() use a single socket per network namespace. This leads to bad behavior on multiqueue NICS, because many cpus contend for the socket lock and once socket lock is acquired, extra false sharing on various socket fields slow down the operations. To better resist to attacks, we use a percpu socket. Each cpu can run without contention, using appropriate memory (local node) Additional features : 1) We also mirror the queue_mapping of the incoming skb, so that answers use the same queue if possible. 2) Setting SOCK_USE_WRITE_QUEUE socket flag speedup sock_wfree() 3) We now limit the number of in-flight RST/ACK [1] packets per cpu, instead of per namespace, and we honor the sysctl_wmem_default limit dynamically. (Prior to this patch, sysctl_wmem_default value was copied at boot time, so any further change would not affect tcp_sock limit) [1] These packets are only generated when no socket was matched for the incoming packet. Reported-by: Bill Sommerfeld Signed-off-by: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- include/net/netns/ipv4.h | 1 - net/ipv4/ip_output.c | 50 +++++++++++++++++++++++++++++++----------------- net/ipv4/tcp_ipv4.c | 8 +++----- 4 files changed, 36 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index ec5cfde85e9a..bd5e444a19ce 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -158,7 +158,7 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; } -void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, +void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, unsigned int len); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2e089a99d603..d909c7fc3da1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -38,7 +38,6 @@ struct netns_ipv4 { struct sock *fibnl; struct sock **icmp_sk; - struct sock *tcp_sock; struct inet_peer_base *peers; struct tcpm_hash_bucket *tcp_metrics_hash; unsigned int tcp_metrics_hash_mask; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index cc52679790b2..c528f841ca4b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1463,20 +1463,33 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, /* * Generic function to send a packet as reply to another packet. - * Used to send TCP resets so far. + * Used to send some TCP resets/acks so far. * - * Should run single threaded per socket because it uses the sock - * structure to pass arguments. + * Use a fake percpu inet socket to avoid false sharing and contention. */ -void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, +static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = { + .sk = { + .__sk_common = { + .skc_refcnt = ATOMIC_INIT(1), + }, + .sk_wmem_alloc = ATOMIC_INIT(1), + .sk_allocation = GFP_ATOMIC, + .sk_flags = (1UL << SOCK_USE_WRITE_QUEUE), + }, + .pmtudisc = IP_PMTUDISC_WANT, +}; + +void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, unsigned int len) { - struct inet_sock *inet = inet_sk(sk); struct ip_options_data replyopts; struct ipcm_cookie ipc; struct flowi4 fl4; struct rtable *rt = skb_rtable(skb); + struct sk_buff *nskb; + struct sock *sk; + struct inet_sock *inet; if (ip_options_echo(&replyopts.opt.opt, skb)) return; @@ -1494,38 +1507,39 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, flowi4_init_output(&fl4, arg->bound_dev_if, 0, RT_TOS(arg->tos), - RT_SCOPE_UNIVERSE, sk->sk_protocol, + RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), daddr, saddr, tcp_hdr(skb)->source, tcp_hdr(skb)->dest); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); - rt = ip_route_output_key(sock_net(sk), &fl4); + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return; - /* And let IP do all the hard work. + inet = &get_cpu_var(unicast_sock); - This chunk is not reenterable, hence spinlock. - Note that it uses the fact, that this function is called - with locally disabled BH and that sk cannot be already spinlocked. - */ - bh_lock_sock(sk); inet->tos = arg->tos; + sk = &inet->sk; sk->sk_priority = skb->priority; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; + sock_net_set(sk, net); + __skb_queue_head_init(&sk->sk_write_queue); + sk->sk_sndbuf = sysctl_wmem_default; ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, &rt, MSG_DONTWAIT); - if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { + nskb = skb_peek(&sk->sk_write_queue); + if (nskb) { if (arg->csumoffset >= 0) - *((__sum16 *)skb_transport_header(skb) + - arg->csumoffset) = csum_fold(csum_add(skb->csum, + *((__sum16 *)skb_transport_header(nskb) + + arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); - skb->ip_summed = CHECKSUM_NONE; + nskb->ip_summed = CHECKSUM_NONE; + skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); ip_push_pending_frames(sk, &fl4); } - bh_unlock_sock(sk); + put_cpu_var(unicast_sock); ip_rt_put(rt); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d9caf5c07aae..d7d2fa50f07f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -688,7 +688,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) net = dev_net(skb_dst(skb)->dev); arg.tos = ip_hdr(skb)->tos; - ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, + ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); @@ -771,7 +771,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, if (oif) arg.bound_dev_if = oif; arg.tos = tos; - ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, + ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); @@ -2624,13 +2624,11 @@ EXPORT_SYMBOL(tcp_prot); static int __net_init tcp_sk_init(struct net *net) { - return inet_ctl_sock_create(&net->ipv4.tcp_sock, - PF_INET, SOCK_RAW, IPPROTO_TCP, net); + return 0; } static void __net_exit tcp_sk_exit(struct net *net) { - inet_ctl_sock_destroy(net->ipv4.tcp_sock); } static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) -- cgit v1.2.3 From d8f1641b5829629d3af8e52750ba3b542f8f56c8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 19 Jul 2012 10:43:03 -0700 Subject: net: Fix warnings in dst_ops.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit include/net/dst_ops.h:28:20: warning: ‘struct sock’ declared inside parameter list Signed-off-by: David S. Miller --- include/net/dst_ops.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index d079fc61c123..2f26dfb8450e 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -8,6 +8,7 @@ struct dst_entry; struct kmem_cachep; struct net_device; struct sk_buff; +struct sock; struct dst_ops { unsigned short family; -- cgit v1.2.3 From 2100c8d2d9db23c0a09901a782bb4e3b21bee298 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 19 Jul 2012 06:43:05 +0000 Subject: net-tcp: Fast Open base This patch impelements the common code for both the client and server. 1. TCP Fast Open option processing. Since Fast Open does not have an option number assigned by IANA yet, it shares the experiment option code 254 by implementing draft-ietf-tcpm-experimental-options with a 16 bits magic number 0xF989. This enables global experiments without clashing the scarce(2) experimental options available for TCP. When the draft status becomes standard (maybe), the client should switch to the new option number assigned while the server supports both numbers for transistion. 2. The new sysctl tcp_fastopen 3. A place holder init function Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 10 ++++++++++ include/net/tcp.h | 9 ++++++++- net/ipv4/Makefile | 2 +- net/ipv4/syncookies.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 7 +++++++ net/ipv4/tcp_fastopen.c | 11 +++++++++++ net/ipv4/tcp_input.c | 26 ++++++++++++++++++++++---- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 4 ++-- net/ipv4/tcp_output.c | 25 +++++++++++++++++++++---- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 12 files changed, 86 insertions(+), 16 deletions(-) create mode 100644 net/ipv4/tcp_fastopen.c (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1888169e07c7..12948f543839 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -243,6 +243,16 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) return (tcp_hdr(skb)->doff - 5) * 4; } +/* TCP Fast Open */ +#define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ +#define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ + +/* TCP Fast Open Cookie as stored in memory */ +struct tcp_fastopen_cookie { + s8 len; + u8 val[TCP_FASTOPEN_COOKIE_MAX]; +}; + /* This defines a selective acknowledgement block. */ struct tcp_sack_block_wire { __be32 start_seq; diff --git a/include/net/tcp.h b/include/net/tcp.h index 85c5090bfe25..5aed3718fde8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -170,6 +170,11 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ #define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ +#define TCPOPT_EXP 254 /* Experimental */ +/* Magic number to be after the option value for sharing TCP + * experimental options. See draft-ietf-tcpm-experimental-options-00.txt + */ +#define TCPOPT_FASTOPEN_MAGIC 0xF989 /* * TCP option lengths @@ -180,6 +185,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_SACK_PERM 2 #define TCPOLEN_TIMESTAMP 10 #define TCPOLEN_MD5SIG 18 +#define TCPOLEN_EXP_FASTOPEN_BASE 4 #define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */ #define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */ #define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN) @@ -222,6 +228,7 @@ extern int sysctl_tcp_retries1; extern int sysctl_tcp_retries2; extern int sysctl_tcp_orphan_retries; extern int sysctl_tcp_syncookies; +extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; @@ -418,7 +425,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); extern void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, const u8 **hvpp, - int estab); + int estab, struct tcp_fastopen_cookie *foc); extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); /* diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index a677d804e53e..ae2ccf2890e4 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -7,7 +7,7 @@ obj-y := route.o inetpeer.o protocol.o \ ip_output.o ip_sockglue.o inet_hashtables.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ - tcp_minisocks.o tcp_cong.o tcp_metrics.o \ + tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index eab2a7fb15d1..650e1528e1e6 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -293,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) goto out; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3f6a1e762e9c..5840c3255721 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -366,6 +366,13 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, #endif + { + .procname = "tcp_fastopen", + .data = &sysctl_tcp_fastopen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "tcp_tw_recycle", .data = &tcp_death_row.sysctl_tw_recycle, diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c new file mode 100644 index 000000000000..a7f729c409d7 --- /dev/null +++ b/net/ipv4/tcp_fastopen.c @@ -0,0 +1,11 @@ +#include +#include + +int sysctl_tcp_fastopen; + +static int __init tcp_fastopen_init(void) +{ + return 0; +} + +late_initcall(tcp_fastopen_init); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fdd49f1b7a52..a06bb8959e7e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3732,7 +3732,8 @@ old_ack: * the fast version below fails. */ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, - const u8 **hvpp, int estab) + const u8 **hvpp, int estab, + struct tcp_fastopen_cookie *foc) { const unsigned char *ptr; const struct tcphdr *th = tcp_hdr(skb); @@ -3839,8 +3840,25 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o break; } break; - } + case TCPOPT_EXP: + /* Fast Open option shares code 254 using a + * 16 bits magic number. It's valid only in + * SYN or SYN-ACK with an even size. + */ + if (opsize < TCPOLEN_EXP_FASTOPEN_BASE || + get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC || + foc == NULL || !th->syn || (opsize & 1)) + break; + foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE; + if (foc->len >= TCP_FASTOPEN_COOKIE_MIN && + foc->len <= TCP_FASTOPEN_COOKIE_MAX) + memcpy(foc->val, ptr + 2, foc->len); + else if (foc->len != 0) + foc->len = -1; + break; + + } ptr += opsize-2; length -= opsize; } @@ -3882,7 +3900,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, if (tcp_parse_aligned_timestamp(tp, th)) return true; } - tcp_parse_options(skb, &tp->rx_opt, hvpp, 1); + tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); return true; } @@ -5637,7 +5655,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcp_cookie_values *cvp = tp->cookie_values; int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0); + tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, NULL); if (th->ack) { /* rfc793: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d7d2fa50f07f..01aa77a97020 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1307,7 +1307,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); if (tmp_opt.cookie_plus > 0 && tmp_opt.saw_tstamp && diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index c66f2ede160e..5912ac3fd240 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -97,7 +97,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = tcptw->tw_ts_recent; @@ -534,7 +534,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 15a7c7bc3e58..4849be76ccd6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -385,15 +385,17 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_MD5 (1 << 2) #define OPTION_WSCALE (1 << 3) #define OPTION_COOKIE_EXTENSION (1 << 4) +#define OPTION_FAST_OPEN_COOKIE (1 << 8) struct tcp_out_options { - u8 options; /* bit field of OPTION_* */ + u16 options; /* bit field of OPTION_* */ + u16 mss; /* 0 to disable */ u8 ws; /* window scale, 0 to disable */ u8 num_sack_blocks; /* number of SACK blocks to include */ u8 hash_size; /* bytes in hash_location */ - u16 mss; /* 0 to disable */ - __u32 tsval, tsecr; /* need to include OPTION_TS */ __u8 *hash_location; /* temporary pointer, overloaded */ + __u32 tsval, tsecr; /* need to include OPTION_TS */ + struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ }; /* The sysctl int routines are generic, so check consistency here. @@ -442,7 +444,7 @@ static u8 tcp_cookie_size_check(u8 desired) static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, struct tcp_out_options *opts) { - u8 options = opts->options; /* mungable copy */ + u16 options = opts->options; /* mungable copy */ /* Having both authentication and cookies for security is redundant, * and there's certainly not enough room. Instead, the cookie-less @@ -564,6 +566,21 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, tp->rx_opt.dsack = 0; } + + if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { + struct tcp_fastopen_cookie *foc = opts->fastopen_cookie; + + *ptr++ = htonl((TCPOPT_EXP << 24) | + ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) | + TCPOPT_FASTOPEN_MAGIC); + + memcpy(ptr, foc->val, foc->len); + if ((foc->len & 3) == 2) { + u8 *align = ((u8 *)ptr) + foc->len; + align[0] = align[1] = TCPOPT_NOP; + } + ptr += (foc->len + 3) >> 2; + } } /* Compute TCP options for SYN packets. This is not the final diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 7bf3cc427c28..bb46061c813a 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -177,7 +177,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c9dabdd832d7..0302ec3fecfc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1033,7 +1033,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); if (tmp_opt.cookie_plus > 0 && tmp_opt.saw_tstamp && -- cgit v1.2.3 From 1fe4c481ba637660793217769695c146a037bd54 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 19 Jul 2012 06:43:06 +0000 Subject: net-tcp: Fast Open client - cookie cache With help from Eric Dumazet, add Fast Open metrics in tcp metrics cache. The basic ones are MSS and the cookies. Later patch will cache more to handle unfriendly middleboxes. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++++ net/ipv4/tcp_metrics.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 5aed3718fde8..e601da197361 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -405,6 +405,10 @@ extern void tcp_metrics_init(void); extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check); extern bool tcp_remember_stamp(struct sock *sk); extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); +extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie); +extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss, + struct tcp_fastopen_cookie *cookie); extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 1a115b665792..d02ff3777785 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -30,6 +30,11 @@ enum tcp_metric_index { TCP_METRIC_MAX, }; +struct tcp_fastopen_metrics { + u16 mss; + struct tcp_fastopen_cookie cookie; +}; + struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; struct inetpeer_addr tcpm_addr; @@ -38,6 +43,7 @@ struct tcp_metrics_block { u32 tcpm_ts_stamp; u32 tcpm_lock; u32 tcpm_vals[TCP_METRIC_MAX]; + struct tcp_fastopen_metrics tcpm_fastopen; }; static bool tcp_metric_locked(struct tcp_metrics_block *tm, @@ -118,6 +124,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst) tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); tm->tcpm_ts = 0; tm->tcpm_ts_stamp = 0; + tm->tcpm_fastopen.mss = 0; + tm->tcpm_fastopen.cookie.len = 0; } static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, @@ -633,6 +641,49 @@ bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) return ret; } +static DEFINE_SEQLOCK(fastopen_seqlock); + +void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie) +{ + struct tcp_metrics_block *tm; + + rcu_read_lock(); + tm = tcp_get_metrics(sk, __sk_dst_get(sk), false); + if (tm) { + struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen; + unsigned int seq; + + do { + seq = read_seqbegin(&fastopen_seqlock); + if (tfom->mss) + *mss = tfom->mss; + *cookie = tfom->cookie; + } while (read_seqretry(&fastopen_seqlock, seq)); + } + rcu_read_unlock(); +} + + +void tcp_fastopen_cache_set(struct sock *sk, u16 mss, + struct tcp_fastopen_cookie *cookie) +{ + struct tcp_metrics_block *tm; + + rcu_read_lock(); + tm = tcp_get_metrics(sk, __sk_dst_get(sk), true); + if (tm) { + struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen; + + write_seqlock_bh(&fastopen_seqlock); + tfom->mss = mss; + if (cookie->len > 0) + tfom->cookie = *cookie; + write_sequnlock_bh(&fastopen_seqlock); + } + rcu_read_unlock(); +} + static unsigned long tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { -- cgit v1.2.3 From 783237e8daf13481ee234997cbbbb823872ac388 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 19 Jul 2012 06:43:07 +0000 Subject: net-tcp: Fast Open client - sending SYN-data This patch implements sending SYN-data in tcp_connect(). The data is from tcp_sendmsg() with flag MSG_FASTOPEN (implemented in a later patch). The length of the cookie in tcp_fastopen_req, init'd to 0, controls the type of the SYN. If the cookie is not cached (len==0), the host sends data-less SYN with Fast Open cookie request option to solicit a cookie from the remote. If cookie is not available (len > 0), the host sends a SYN-data with Fast Open cookie option. If cookie length is negative, the SYN will not include any Fast Open option (for fall back operations). To deal with middleboxes that may drop SYN with data or experimental TCP option, the SYN-data is only sent once. SYN retransmits do not include data or Fast Open options. The connection will fall back to regular TCP handshake. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/snmp.h | 1 + include/linux/tcp.h | 6 ++- include/net/tcp.h | 9 ++++ net/ipv4/af_inet.c | 10 ++++- net/ipv4/proc.c | 1 + net/ipv4/tcp_output.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++---- 6 files changed, 130 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index e5fcbd079e4a..00bc189cb395 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -238,6 +238,7 @@ enum LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */ LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */ LINUX_MIB_TCPSYNCHALLENGE, /* TCPSYNChallenge */ + LINUX_MIB_TCPFASTOPENACTIVE, /* TCPFastOpenActive */ __LINUX_MIB_MAX }; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 12948f543839..1edf96afab44 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -386,7 +386,8 @@ struct tcp_sock { unused : 1; u8 repair_queue; u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ - early_retrans_delayed:1; /* Delayed ER timer installed */ + early_retrans_delayed:1, /* Delayed ER timer installed */ + syn_fastopen:1; /* SYN includes Fast Open option */ /* RTT measurement */ u32 srtt; /* smoothed round trip time << 3 */ @@ -500,6 +501,9 @@ struct tcp_sock { struct tcp_md5sig_info __rcu *md5sig_info; #endif +/* TCP fastopen related information */ + struct tcp_fastopen_request *fastopen_req; + /* When the cookie options are generated and exchanged, then this * object holds a reference to them (cookie_values->kref). Also * contains related tcp_cookie_transactions fields. diff --git a/include/net/tcp.h b/include/net/tcp.h index e601da197361..867557b4244a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1289,6 +1289,15 @@ extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key); +struct tcp_fastopen_request { + /* Fast Open cookie. Size 0 means a cookie request */ + struct tcp_fastopen_cookie cookie; + struct msghdr *data; /* data in MSG_FASTOPEN */ + u16 copied; /* queued in tcp_connect() */ +}; + +void tcp_free_fastopen_req(struct tcp_sock *tp); + /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 07a02f6e9696..edc414625be2 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -556,11 +556,12 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, } EXPORT_SYMBOL(inet_dgram_connect); -static long inet_wait_for_connect(struct sock *sk, long timeo) +static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) { DEFINE_WAIT(wait); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + sk->sk_write_pending += writebias; /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. @@ -576,6 +577,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo) prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } finish_wait(sk_sleep(sk), &wait); + sk->sk_write_pending -= writebias; return timeo; } @@ -634,8 +636,12 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { + int writebias = (sk->sk_protocol == IPPROTO_TCP) && + tcp_sk(sk)->fastopen_req && + tcp_sk(sk)->fastopen_req->data ? 1 : 0; + /* Error code is set above */ - if (!timeo || !inet_wait_for_connect(sk, timeo)) + if (!timeo || !inet_wait_for_connect(sk, timeo, writebias)) goto out; err = sock_intr_errno(timeo); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 2a5240b2ea61..957acd12250b 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -262,6 +262,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPOFOMerge", LINUX_MIB_TCPOFOMERGE), SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), + SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4849be76ccd6..88693281da4c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -596,6 +596,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? tcp_cookie_size_check(cvp->cookie_desired) : 0; + struct tcp_fastopen_request *fastopen = tp->fastopen_req; #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); @@ -636,6 +637,16 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, remaining -= TCPOLEN_SACKPERM_ALIGNED; } + if (fastopen && fastopen->cookie.len >= 0) { + u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len; + need = (need + 3) & ~3U; /* Align to 32 bits */ + if (remaining >= need) { + opts->options |= OPTION_FAST_OPEN_COOKIE; + opts->fastopen_cookie = &fastopen->cookie; + remaining -= need; + tp->syn_fastopen = 1; + } + } /* Note that timestamps are required by the specification. * * Odd numbers of bytes are prohibited by the specification, ensuring @@ -2824,6 +2835,96 @@ void tcp_connect_init(struct sock *sk) tcp_clear_retrans(tp); } +static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + + tcb->end_seq += skb->len; + skb_header_release(skb); + __tcp_add_write_queue_tail(sk, skb); + sk->sk_wmem_queued += skb->truesize; + sk_mem_charge(sk, skb->truesize); + tp->write_seq = tcb->end_seq; + tp->packets_out += tcp_skb_pcount(skb); +} + +/* Build and send a SYN with data and (cached) Fast Open cookie. However, + * queue a data-only packet after the regular SYN, such that regular SYNs + * are retransmitted on timeouts. Also if the remote SYN-ACK acknowledges + * only the SYN sequence, the data are retransmitted in the first ACK. + * If cookie is not cached or other error occurs, falls back to send a + * regular SYN with Fast Open cookie request option. + */ +static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_fastopen_request *fo = tp->fastopen_req; + int space, i, err = 0, iovlen = fo->data->msg_iovlen; + struct sk_buff *syn_data = NULL, *data; + + tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie); + if (fo->cookie.len <= 0) + goto fallback; + + /* MSS for SYN-data is based on cached MSS and bounded by PMTU and + * user-MSS. Reserve maximum option space for middleboxes that add + * private TCP options. The cost is reduced data space in SYN :( + */ + if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp) + tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; + space = tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - + MAX_TCP_OPTION_SPACE; + + syn_data = skb_copy_expand(syn, skb_headroom(syn), space, + sk->sk_allocation); + if (syn_data == NULL) + goto fallback; + + for (i = 0; i < iovlen && syn_data->len < space; ++i) { + struct iovec *iov = &fo->data->msg_iov[i]; + unsigned char __user *from = iov->iov_base; + int len = iov->iov_len; + + if (syn_data->len + len > space) + len = space - syn_data->len; + else if (i + 1 == iovlen) + /* No more data pending in inet_wait_for_connect() */ + fo->data = NULL; + + if (skb_add_data(syn_data, from, len)) + goto fallback; + } + + /* Queue a data-only packet after the regular SYN for retransmission */ + data = pskb_copy(syn_data, sk->sk_allocation); + if (data == NULL) + goto fallback; + TCP_SKB_CB(data)->seq++; + TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN; + TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH); + tcp_connect_queue_skb(sk, data); + fo->copied = data->len; + + if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); + goto done; + } + syn_data = NULL; + +fallback: + /* Send a regular SYN with Fast Open cookie request option */ + if (fo->cookie.len > 0) + fo->cookie.len = 0; + err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); + if (err) + tp->syn_fastopen = 0; + kfree_skb(syn_data); +done: + fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ + return err; +} + /* Build a SYN and send it off. */ int tcp_connect(struct sock *sk) { @@ -2841,17 +2942,13 @@ int tcp_connect(struct sock *sk) skb_reserve(buff, MAX_TCP_HEADER); tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); + tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp; + tcp_connect_queue_skb(sk, buff); TCP_ECN_send_syn(sk, buff); - /* Send it off. */ - TCP_SKB_CB(buff)->when = tcp_time_stamp; - tp->retrans_stamp = TCP_SKB_CB(buff)->when; - skb_header_release(buff); - __tcp_add_write_queue_tail(sk, buff); - sk->sk_wmem_queued += buff->truesize; - sk_mem_charge(sk, buff->truesize); - tp->packets_out += tcp_skb_pcount(buff); - err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); + /* Send off SYN; include data in Fast Open. */ + err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : + tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); if (err == -ECONNREFUSED) return err; -- cgit v1.2.3 From cf60af03ca4e71134206809ea892e49b92a88896 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 19 Jul 2012 06:43:09 +0000 Subject: net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN) sendmsg() (or sendto()) with MSG_FASTOPEN is a combo of connect(2) and write(2). The application should replace connect() with it to send data in the opening SYN packet. For blocking socket, sendmsg() blocks until all the data are buffered locally and the handshake is completed like connect() call. It returns similar errno like connect() if the TCP handshake fails. For non-blocking socket, it returns the number of bytes queued (and transmitted in the SYN-data packet) if cookie is available. If cookie is not available, it transmits a data-less SYN packet with Fast Open cookie request option and returns -EINPROGRESS like connect(). Using MSG_FASTOPEN on connecting or connected socket will result in simlar errno like repeating connect() calls. Therefore the application should only use this flag on new sockets. The buffer size of sendmsg() is independent of the MSS of the connection. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 11 ++++++ include/linux/socket.h | 1 + include/net/inet_common.h | 6 ++-- include/net/tcp.h | 3 ++ net/ipv4/af_inet.c | 19 ++++++++--- net/ipv4/tcp.c | 61 +++++++++++++++++++++++++++++++--- net/ipv4/tcp_ipv4.c | 3 ++ 7 files changed, 92 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index e1e021594cff..03964e088180 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -468,6 +468,17 @@ tcp_syncookies - BOOLEAN SYN flood warnings in logs not being really flooded, your server is seriously misconfigured. +tcp_fastopen - INTEGER + Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data + in the opening SYN packet. To use this feature, the client application + must not use connect(). Instead, it should use sendmsg() or sendto() + with MSG_FASTOPEN flag which performs a TCP handshake automatically. + + The values (bitmap) are: + 1: Enables sending data in the opening SYN on the client + + Default: 0 + tcp_syn_retries - INTEGER Number of times initial SYNs for an active TCP connection attempt will be retransmitted. Should not be higher than 255. Default value diff --git a/include/linux/socket.h b/include/linux/socket.h index 25d6322fb635..ba7b2e817cfa 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -268,6 +268,7 @@ struct ucred { #define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */ #define MSG_EOF MSG_FIN +#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exit for file descriptor received through SCM_RIGHTS */ diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 22fac9892b16..234008782c8c 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -14,9 +14,11 @@ struct sockaddr; struct socket; extern int inet_release(struct socket *sock); -extern int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr, +extern int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); -extern int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, +extern int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags); +extern int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); extern int inet_accept(struct socket *sock, struct socket *newsock, int flags); extern int inet_sendmsg(struct kiocb *iocb, struct socket *sock, diff --git a/include/net/tcp.h b/include/net/tcp.h index 867557b4244a..c0258100d70c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -212,6 +212,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); /* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */ #define TCP_INIT_CWND 10 +/* Bit Flags for sysctl_tcp_fastopen */ +#define TFO_CLIENT_ENABLE 1 + extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index edc414625be2..fe4582ca969a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -585,8 +585,8 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) * Connect to a remote host. There is regrettably still a little * TCP 'magic' in here. */ -int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) +int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) { struct sock *sk = sock->sk; int err; @@ -595,8 +595,6 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; - lock_sock(sk); - if (uaddr->sa_family == AF_UNSPEC) { err = sk->sk_prot->disconnect(sk, flags); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; @@ -663,7 +661,6 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTED; err = 0; out: - release_sock(sk); return err; sock_error: @@ -673,6 +670,18 @@ sock_error: sock->state = SS_DISCONNECTING; goto out; } +EXPORT_SYMBOL(__inet_stream_connect); + +int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + int err; + + lock_sock(sock->sk); + err = __inet_stream_connect(sock, uaddr, addr_len, flags); + release_sock(sock->sk); + return err; +} EXPORT_SYMBOL(inet_stream_connect); /* diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4252cd8f39fd..581ecf02c6b5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -270,6 +270,7 @@ #include #include +#include #include #include #include @@ -982,26 +983,67 @@ static inline int select_size(const struct sock *sk, bool sg) return tmp; } +void tcp_free_fastopen_req(struct tcp_sock *tp) +{ + if (tp->fastopen_req != NULL) { + kfree(tp->fastopen_req); + tp->fastopen_req = NULL; + } +} + +static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) +{ + struct tcp_sock *tp = tcp_sk(sk); + int err, flags; + + if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) + return -EOPNOTSUPP; + if (tp->fastopen_req != NULL) + return -EALREADY; /* Another Fast Open is in progress */ + + tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), + sk->sk_allocation); + if (unlikely(tp->fastopen_req == NULL)) + return -ENOBUFS; + tp->fastopen_req->data = msg; + + flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; + err = __inet_stream_connect(sk->sk_socket, msg->msg_name, + msg->msg_namelen, flags); + *size = tp->fastopen_req->copied; + tcp_free_fastopen_req(tp); + return err; +} + int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size) { struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int iovlen, flags, err, copied; - int mss_now = 0, size_goal; + int iovlen, flags, err, copied = 0; + int mss_now = 0, size_goal, copied_syn = 0, offset = 0; bool sg; long timeo; lock_sock(sk); flags = msg->msg_flags; + if (flags & MSG_FASTOPEN) { + err = tcp_sendmsg_fastopen(sk, msg, &copied_syn); + if (err == -EINPROGRESS && copied_syn > 0) + goto out; + else if (err) + goto out_err; + offset = copied_syn; + } + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); /* Wait for a connection to finish. */ if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) - goto out_err; + goto do_error; if (unlikely(tp->repair)) { if (tp->repair_queue == TCP_RECV_QUEUE) { @@ -1037,6 +1079,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, unsigned char __user *from = iov->iov_base; iov++; + if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */ + if (offset >= seglen) { + offset -= seglen; + continue; + } + seglen -= offset; + from += offset; + offset = 0; + } while (seglen > 0) { int copy = 0; @@ -1199,7 +1250,7 @@ out: if (copied && likely(!tp->repair)) tcp_push(sk, flags, mss_now, tp->nonagle); release_sock(sk); - return copied; + return copied + copied_syn; do_fault: if (!skb->len) { @@ -1212,7 +1263,7 @@ do_fault: } do_error: - if (copied) + if (copied + copied_syn) goto out; out_err: err = sk_stream_error(sk, flags, err); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 01aa77a97020..1d8b75a58981 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1952,6 +1952,9 @@ void tcp_v4_destroy_sock(struct sock *sk) tp->cookie_values = NULL; } + /* If socket is aborted during connect operation */ + tcp_free_fastopen_req(tp); + sk_sockets_allocated_dec(sk); sock_release_memcg(sk); } -- cgit v1.2.3 From aab4874355679c70f93993cf3b3fd74643b9ac33 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 19 Jul 2012 06:43:10 +0000 Subject: net-tcp: Fast Open client - detecting SYN-data drops On paths with firewalls dropping SYN with data or experimental TCP options, Fast Open connections will have experience SYN timeout and bad performance. The solution is to track such incidents in the cookie cache and disables Fast Open temporarily. Since only the original SYN includes data and/or Fast Open option, the SYN-ACK has some tell-tale sign (tcp_rcv_fastopen_synack()) to detect such drops. If a path has recurring Fast Open SYN drops, Fast Open is disabled for 2^(recurring_losses) minutes starting from four minutes up to roughly one and half day. sendmsg with MSG_FASTOPEN flag will succeed but it behaves as connect() then write(). Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ++++-- net/ipv4/tcp_input.c | 10 +++++++++- net/ipv4/tcp_metrics.c | 16 +++++++++++++--- net/ipv4/tcp_output.c | 13 +++++++++++-- 4 files changed, 37 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index c0258100d70c..e07878d246aa 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -409,9 +409,11 @@ extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, extern bool tcp_remember_stamp(struct sock *sk); extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, - struct tcp_fastopen_cookie *cookie); + struct tcp_fastopen_cookie *cookie, + int *syn_loss, unsigned long *last_syn_loss); extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss, - struct tcp_fastopen_cookie *cookie); + struct tcp_fastopen_cookie *cookie, + bool syn_lost); extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 38b6a811edfc..c49a4fc175bd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5652,6 +5652,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *data = tcp_write_queue_head(sk); u16 mss = tp->rx_opt.mss_clamp; + bool syn_drop; if (mss == tp->rx_opt.user_mss) { struct tcp_options_received opt; @@ -5664,7 +5665,14 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, mss = opt.mss_clamp; } - tcp_fastopen_cache_set(sk, mss, cookie); + /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably + * the remote receives only the retransmitted (regular) SYNs: either + * the original SYN-data or the corresponding SYN-ACK is lost. + */ + syn_drop = (cookie->len <= 0 && data && + inet_csk(sk)->icsk_retransmits); + + tcp_fastopen_cache_set(sk, mss, cookie, syn_drop); if (data) { /* Retransmit unacked data in SYN */ tcp_retransmit_skb(sk, data); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index d02ff3777785..99779ae44f64 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -32,6 +32,8 @@ enum tcp_metric_index { struct tcp_fastopen_metrics { u16 mss; + u16 syn_loss:10; /* Recurring Fast Open SYN losses */ + unsigned long last_syn_loss; /* Last Fast Open SYN loss */ struct tcp_fastopen_cookie cookie; }; @@ -125,6 +127,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst) tm->tcpm_ts = 0; tm->tcpm_ts_stamp = 0; tm->tcpm_fastopen.mss = 0; + tm->tcpm_fastopen.syn_loss = 0; tm->tcpm_fastopen.cookie.len = 0; } @@ -644,7 +647,8 @@ bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) static DEFINE_SEQLOCK(fastopen_seqlock); void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, - struct tcp_fastopen_cookie *cookie) + struct tcp_fastopen_cookie *cookie, + int *syn_loss, unsigned long *last_syn_loss) { struct tcp_metrics_block *tm; @@ -659,14 +663,15 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, if (tfom->mss) *mss = tfom->mss; *cookie = tfom->cookie; + *syn_loss = tfom->syn_loss; + *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0; } while (read_seqretry(&fastopen_seqlock, seq)); } rcu_read_unlock(); } - void tcp_fastopen_cache_set(struct sock *sk, u16 mss, - struct tcp_fastopen_cookie *cookie) + struct tcp_fastopen_cookie *cookie, bool syn_lost) { struct tcp_metrics_block *tm; @@ -679,6 +684,11 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, tfom->mss = mss; if (cookie->len > 0) tfom->cookie = *cookie; + if (syn_lost) { + ++tfom->syn_loss; + tfom->last_syn_loss = jiffies; + } else + tfom->syn_loss = 0; write_sequnlock_bh(&fastopen_seqlock); } rcu_read_unlock(); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 88693281da4c..c5cfd5ec3184 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2860,10 +2860,19 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; - int space, i, err = 0, iovlen = fo->data->msg_iovlen; + int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen; struct sk_buff *syn_data = NULL, *data; + unsigned long last_syn_loss = 0; + + tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, + &syn_loss, &last_syn_loss); + /* Recurring FO SYN losses: revert to regular handshake temporarily */ + if (syn_loss > 1 && + time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) { + fo->cookie.len = -1; + goto fallback; + } - tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie); if (fo->cookie.len <= 0) goto fallback; -- cgit v1.2.3 From 67da22d23fa6f3324e03bcd0580b914b2e4afbf3 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 19 Jul 2012 06:43:11 +0000 Subject: net-tcp: Fast Open client - cookie-less mode In trusted networks, e.g., intranet, data-center, the client does not need to use Fast Open cookie to mitigate DoS attacks. In cookie-less mode, sendmsg() with MSG_FASTOPEN flag will send SYN-data regardless of cookie availability. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 2 ++ include/linux/tcp.h | 1 + include/net/tcp.h | 1 + net/ipv4/tcp_input.c | 8 ++++++-- net/ipv4/tcp_output.c | 6 +++++- 5 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 03964e088180..5f3ef7f7fcec 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -476,6 +476,8 @@ tcp_fastopen - INTEGER The values (bitmap) are: 1: Enables sending data in the opening SYN on the client + 5: Enables sending data in the opening SYN on the client regardless + of cookie availability. Default: 0 diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1edf96afab44..9febfb685c33 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -387,6 +387,7 @@ struct tcp_sock { u8 repair_queue; u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ early_retrans_delayed:1, /* Delayed ER timer installed */ + syn_data:1, /* SYN includes data */ syn_fastopen:1; /* SYN includes Fast Open option */ /* RTT measurement */ diff --git a/include/net/tcp.h b/include/net/tcp.h index e07878d246aa..bc7c134ec054 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -214,6 +214,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); /* Bit Flags for sysctl_tcp_fastopen */ #define TFO_CLIENT_ENABLE 1 +#define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */ extern struct inet_timewait_death_row tcp_death_row; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c49a4fc175bd..e67d685a6c0e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5650,7 +5650,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, struct tcp_fastopen_cookie *cookie) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *data = tcp_write_queue_head(sk); + struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL; u16 mss = tp->rx_opt.mss_clamp; bool syn_drop; @@ -5665,6 +5665,9 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, mss = opt.mss_clamp; } + if (!tp->syn_fastopen) /* Ignore an unsolicited cookie */ + cookie->len = -1; + /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably * the remote receives only the retransmitted (regular) SYNs: either * the original SYN-data or the corresponding SYN-ACK is lost. @@ -5816,7 +5819,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_finish_connect(sk, skb); - if (tp->syn_fastopen && tcp_rcv_fastopen_synack(sk, skb, &foc)) + if ((tp->syn_fastopen || tp->syn_data) && + tcp_rcv_fastopen_synack(sk, skb, &foc)) return -1; if (sk->sk_write_pending || diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c5cfd5ec3184..27a32acfdb62 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2864,6 +2864,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) struct sk_buff *syn_data = NULL, *data; unsigned long last_syn_loss = 0; + tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, &syn_loss, &last_syn_loss); /* Recurring FO SYN losses: revert to regular handshake temporarily */ @@ -2873,7 +2874,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) goto fallback; } - if (fo->cookie.len <= 0) + if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) + fo->cookie.len = -1; + else if (fo->cookie.len <= 0) goto fallback; /* MSS for SYN-data is based on cached MSS and bounded by PMTU and @@ -2916,6 +2919,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) fo->copied = data->len; if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) { + tp->syn_data = (fo->copied > 0); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); goto done; } -- cgit v1.2.3 From 5815d5e7aae3cc9c5e85af83094d4d6498c3f4fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Jul 2012 23:02:34 +0000 Subject: tcp: use hash_32() in tcp_metrics Fix a missing roundup_pow_of_two(), since tcpmhash_entries is not guaranteed to be a power of two. Uses hash_32() instead of custom hash. tcpmhash_entries should be an unsigned int. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 +- net/ipv4/tcp_metrics.c | 25 ++++++++++--------------- 2 files changed, 11 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d909c7fc3da1..0ffb8e31f3cd 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,7 +40,7 @@ struct netns_ipv4 { struct sock **icmp_sk; struct inet_peer_base *peers; struct tcpm_hash_bucket *tcp_metrics_hash; - unsigned int tcp_metrics_hash_mask; + unsigned int tcp_metrics_hash_log; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 99779ae44f64..992f1bff4fc6 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -228,10 +229,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return NULL; } - hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); - net = dev_net(dst->dev); - hash &= net->ipv4.tcp_metrics_hash_mask; + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { @@ -265,10 +264,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock return NULL; } - hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); - net = twsk_net(tw); - hash &= net->ipv4.tcp_metrics_hash_mask; + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { @@ -302,10 +299,8 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, return NULL; } - hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); - net = dev_net(dst->dev); - hash &= net->ipv4.tcp_metrics_hash_mask; + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); tm = __tcp_get_metrics(&addr, net, hash); reclaim = false; @@ -694,7 +689,7 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, rcu_read_unlock(); } -static unsigned long tcpmhash_entries; +static unsigned int tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { ssize_t ret; @@ -702,7 +697,7 @@ static int __init set_tcpmhash_entries(char *str) if (!str) return 0; - ret = kstrtoul(str, 0, &tcpmhash_entries); + ret = kstrtouint(str, 0, &tcpmhash_entries); if (ret) return 0; @@ -712,7 +707,8 @@ __setup("tcpmhash_entries=", set_tcpmhash_entries); static int __net_init tcp_net_metrics_init(struct net *net) { - int slots, size; + size_t size; + unsigned int slots; slots = tcpmhash_entries; if (!slots) { @@ -722,14 +718,13 @@ static int __net_init tcp_net_metrics_init(struct net *net) slots = 8 * 1024; } - size = slots * sizeof(struct tcpm_hash_bucket); + net->ipv4.tcp_metrics_hash_log = order_base_2(slots); + size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log; net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL); if (!net->ipv4.tcp_metrics_hash) return -ENOMEM; - net->ipv4.tcp_metrics_hash_mask = (slots - 1); - return 0; } -- cgit v1.2.3 From 6f458dfb409272082c9bfa412f77ff2fc21c626f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jul 2012 05:45:50 +0000 Subject: tcp: improve latencies of timer triggered events Modern TCP stack highly depends on tcp_write_timer() having a small latency, but current implementation doesn't exactly meet the expectations. When a timer fires but finds the socket is owned by the user, it rearms itself for an additional delay hoping next run will be more successful. tcp_write_timer() for example uses a 50ms delay for next try, and it defeats many attempts to get predictable TCP behavior in term of latencies. Use the recently introduced tcp_release_cb(), so that the user owning the socket will call various handlers right before socket release. This will permit us to post a followup patch to address the tcp_tso_should_defer() syndrome (some deferred packets have to wait RTO timer to be transmitted, while cwnd should allow us to send them sooner) Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Nandita Dukkipati Cc: H.K. Jerry Chu Cc: John Heffner Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++- include/net/tcp.h | 2 ++ net/ipv4/tcp_output.c | 46 ++++++++++++++++++++------------- net/ipv4/tcp_timer.c | 70 +++++++++++++++++++++++++++------------------------ 4 files changed, 71 insertions(+), 51 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9febfb685c33..2761856987b2 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -515,7 +515,9 @@ struct tcp_sock { enum tsq_flags { TSQ_THROTTLED, TSQ_QUEUED, - TSQ_OWNED, /* tcp_tasklet_func() found socket was locked */ + TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */ + TCP_WRITE_TIMER_DEFERRED, /* tcp_write_timer() found socket was owned */ + TCP_DELACK_TIMER_DEFERRED, /* tcp_delack_timer() found socket was owned */ }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) diff --git a/include/net/tcp.h b/include/net/tcp.h index bc7c134ec054..e19124b84cd2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -350,6 +350,8 @@ extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, extern int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); extern void tcp_release_cb(struct sock *sk); +extern void tcp_write_timer_handler(struct sock *sk); +extern void tcp_delack_timer_handler(struct sock *sk); extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, unsigned int len); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 27a32acfdb62..950aebfd9967 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -837,6 +837,13 @@ struct tsq_tasklet { }; static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); +static void tcp_tsq_handler(struct sock *sk) +{ + if ((1 << sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | + TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) + tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); +} /* * One tasklest per cpu tries to send more skbs. * We run in tasklet context but need to disable irqs when @@ -864,16 +871,10 @@ static void tcp_tasklet_func(unsigned long data) bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { - if ((1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | - TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) - tcp_write_xmit(sk, - tcp_current_mss(sk), - 0, 0, - GFP_ATOMIC); + tcp_tsq_handler(sk); } else { /* defer the work to tcp_release_cb() */ - set_bit(TSQ_OWNED, &tp->tsq_flags); + set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); } bh_unlock_sock(sk); @@ -882,6 +883,9 @@ static void tcp_tasklet_func(unsigned long data) } } +#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ + (1UL << TCP_WRITE_TIMER_DEFERRED) | \ + (1UL << TCP_DELACK_TIMER_DEFERRED)) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -892,16 +896,24 @@ static void tcp_tasklet_func(unsigned long data) void tcp_release_cb(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + unsigned long flags, nflags; - if (test_and_clear_bit(TSQ_OWNED, &tp->tsq_flags)) { - if ((1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | - TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) - tcp_write_xmit(sk, - tcp_current_mss(sk), - 0, 0, - GFP_ATOMIC); - } + /* perform an atomic operation only if at least one flag is set */ + do { + flags = tp->tsq_flags; + if (!(flags & TCP_DEFERRED_ALL)) + return; + nflags = flags & ~TCP_DEFERRED_ALL; + } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags); + + if (flags & (1UL << TCP_TSQ_DEFERRED)) + tcp_tsq_handler(sk); + + if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) + tcp_write_timer_handler(sk); + + if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) + tcp_delack_timer_handler(sk); } EXPORT_SYMBOL(tcp_release_cb); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index e911e6c523ec..6df36ad55a38 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -32,17 +32,6 @@ int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; int sysctl_tcp_thin_linear_timeouts __read_mostly; -static void tcp_write_timer(unsigned long); -static void tcp_delack_timer(unsigned long); -static void tcp_keepalive_timer (unsigned long data); - -void tcp_init_xmit_timers(struct sock *sk) -{ - inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, - &tcp_keepalive_timer); -} -EXPORT_SYMBOL(tcp_init_xmit_timers); - static void tcp_write_err(struct sock *sk) { sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; @@ -205,21 +194,11 @@ static int tcp_write_timeout(struct sock *sk) return 0; } -static void tcp_delack_timer(unsigned long data) +void tcp_delack_timer_handler(struct sock *sk) { - struct sock *sk = (struct sock *)data; struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later. */ - icsk->icsk_ack.blocked = 1; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); - sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); - goto out_unlock; - } - sk_mem_reclaim_partial(sk); if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) @@ -260,7 +239,21 @@ static void tcp_delack_timer(unsigned long data) out: if (sk_under_memory_pressure(sk)) sk_mem_reclaim(sk); -out_unlock: +} + +static void tcp_delack_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { + tcp_delack_timer_handler(sk); + } else { + inet_csk(sk)->icsk_ack.blocked = 1; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); + /* deleguate our work to tcp_release_cb() */ + set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + } bh_unlock_sock(sk); sock_put(sk); } @@ -450,19 +443,11 @@ out_reset_timer: out:; } -static void tcp_write_timer(unsigned long data) +void tcp_write_timer_handler(struct sock *sk) { - struct sock *sk = (struct sock *)data; struct inet_connection_sock *icsk = inet_csk(sk); int event; - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later */ - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); - goto out_unlock; - } - if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) goto out; @@ -485,7 +470,19 @@ static void tcp_write_timer(unsigned long data) out: sk_mem_reclaim(sk); -out_unlock: +} + +static void tcp_write_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { + tcp_write_timer_handler(sk); + } else { + /* deleguate our work to tcp_release_cb() */ + set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + } bh_unlock_sock(sk); sock_put(sk); } @@ -602,3 +599,10 @@ out: bh_unlock_sock(sk); sock_put(sk); } + +void tcp_init_xmit_timers(struct sock *sk) +{ + inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, + &tcp_keepalive_timer); +} +EXPORT_SYMBOL(tcp_init_xmit_timers); -- cgit v1.2.3 From d40156aa5ecbd51fed932ed4813df82b56e5ff4d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 20 Jul 2012 02:28:47 +0000 Subject: rtnl: allow to specify different num for rx and tx queue count Also cut out unused function parameters and possible err in return value. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 14 ++++++++------ include/net/rtnetlink.h | 10 ++++++---- net/core/rtnetlink.c | 16 ++++++++-------- 3 files changed, 22 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3960b1b26178..f41ddc2d48be 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4845,17 +4845,19 @@ static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int bond_get_tx_queues(struct net *net, struct nlattr *tb[]) +static unsigned int bond_get_num_tx_queues(void) { return tx_queues; } static struct rtnl_link_ops bond_link_ops __read_mostly = { - .kind = "bond", - .priv_size = sizeof(struct bonding), - .setup = bond_setup, - .validate = bond_validate, - .get_tx_queues = bond_get_tx_queues, + .kind = "bond", + .priv_size = sizeof(struct bonding), + .setup = bond_setup, + .validate = bond_validate, + .get_num_tx_queues = bond_get_num_tx_queues, + .get_num_rx_queues = bond_get_num_tx_queues, /* Use the same number + as for TX queues */ }; /* Create a new bond based on the specified name and bonding parameters. diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index bbcfd0993432..6b00c4fc4291 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -44,8 +44,10 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) * @get_xstats_size: Function to calculate required room for dumping device * specific statistics * @fill_xstats: Function to dump device specific statistics - * @get_tx_queues: Function to determine number of transmit queues to create when - * creating a new device. + * @get_num_tx_queues: Function to determine number of transmit queues + * to create when creating a new device. + * @get_num_rx_queues: Function to determine number of receive queues + * to create when creating a new device. */ struct rtnl_link_ops { struct list_head list; @@ -77,8 +79,8 @@ struct rtnl_link_ops { size_t (*get_xstats_size)(const struct net_device *dev); int (*fill_xstats)(struct sk_buff *skb, const struct net_device *dev); - int (*get_tx_queues)(struct net *net, - struct nlattr *tb[]); + unsigned int (*get_num_tx_queues)(void); + unsigned int (*get_num_rx_queues)(void); }; extern int __rtnl_link_register(struct rtnl_link_ops *ops); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 045db8ad87c8..db5a8ad8a79b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1624,17 +1624,17 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, { int err; struct net_device *dev; - unsigned int num_queues = 1; + unsigned int num_tx_queues = 1; + unsigned int num_rx_queues = 1; - if (ops->get_tx_queues) { - err = ops->get_tx_queues(src_net, tb); - if (err < 0) - goto err; - num_queues = err; - } + if (ops->get_num_tx_queues) + num_tx_queues = ops->get_num_tx_queues(); + if (ops->get_num_rx_queues) + num_rx_queues = ops->get_num_rx_queues(); err = -ENOMEM; - dev = alloc_netdev_mq(ops->priv_size, ifname, ops->setup, num_queues); + dev = alloc_netdev_mqs(ops->priv_size, ifname, ops->setup, + num_tx_queues, num_rx_queues); if (!dev) goto err; -- cgit v1.2.3 From df4ab5b3c295050da09153fa9760042e4de3ffff Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 20 Jul 2012 02:28:49 +0000 Subject: net: rename bond_queue_mapping to slave_dev_queue_mapping As this is going to be used not only by bonding. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 +++--- include/net/sch_generic.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f41ddc2d48be..6fae5f3ec7f6 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -395,8 +395,8 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, skb->dev = slave_dev; BUILD_BUG_ON(sizeof(skb->queue_mapping) != - sizeof(qdisc_skb_cb(skb)->bond_queue_mapping)); - skb->queue_mapping = qdisc_skb_cb(skb)->bond_queue_mapping; + sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping)); + skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping; if (unlikely(netpoll_tx_running(slave_dev))) bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); @@ -4184,7 +4184,7 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb) /* * Save the original txq to restore before passing to the driver */ - qdisc_skb_cb(skb)->bond_queue_mapping = skb->queue_mapping; + qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; if (unlikely(txq >= dev->real_num_tx_queues)) { do { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9d7d54a00e63..d9611e032418 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -220,7 +220,7 @@ struct tcf_proto { struct qdisc_skb_cb { unsigned int pkt_len; - u16 bond_queue_mapping; + u16 slave_dev_queue_mapping; u16 _pad; unsigned char data[20]; }; -- cgit v1.2.3 From 89aef8921bfbac22f00e04f8450f6e447db13e42 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 11:00:09 -0700 Subject: ipv4: Delete routing cache. The ipv4 routing cache is non-deterministic, performance wise, and is subject to reasonably easy to launch denial of service attacks. The routing cache works great for well behaved traffic, and the world was a much friendlier place when the tradeoffs that led to the routing cache's design were considered. What it boils down to is that the performance of the routing cache is a product of the traffic patterns seen by a system rather than being a product of the contents of the routing tables. The former of which is controllable by external entitites. Even for "well behaved" legitimate traffic, high volume sites can see hit rates in the routing cache of only ~%10. Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/fib_frontend.c | 5 - net/ipv4/route.c | 940 +----------------------------------------------- 3 files changed, 13 insertions(+), 933 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index ace3cb442519..5dcfeb621e06 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -109,7 +109,6 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; extern int ip_rt_init(void); extern void rt_cache_flush(struct net *net, int how); -extern void rt_cache_flush_batch(struct net *net); extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, struct sock *sk); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b83203658ee3..f277cf0e6321 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1072,11 +1072,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo rt_cache_flush(dev_net(dev), 0); break; case NETDEV_UNREGISTER_BATCH: - /* The batch unregister is only called on the first - * device in the list of devices being unregistered. - * Therefore we should not pass dev_net(dev) in here. - */ - rt_cache_flush_batch(NULL); break; } return NOTIFY_DONE; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d547f6fae20d..6d6146d31f22 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -133,10 +133,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8; static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; -static int rt_chain_length_max __read_mostly = 20; - -static struct delayed_work expires_work; -static unsigned long expires_ljiffies; /* * Interface to generic destination cache. @@ -152,7 +148,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); -static int rt_garbage_collect(struct dst_ops *ops); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) @@ -172,7 +167,6 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, static struct dst_ops ipv4_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), - .gc = rt_garbage_collect, .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, .mtu = ipv4_mtu, @@ -209,184 +203,30 @@ const __u8 ip_tos2prio[16] = { }; EXPORT_SYMBOL(ip_tos2prio); -/* - * Route cache. - */ - -/* The locking scheme is rather straight forward: - * - * 1) Read-Copy Update protects the buckets of the central route hash. - * 2) Only writers remove entries, and they hold the lock - * as they look at rtable reference counts. - * 3) Only readers acquire references to rtable entries, - * they do so with atomic increments and with the - * lock held. - */ - -struct rt_hash_bucket { - struct rtable __rcu *chain; -}; - -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ - defined(CONFIG_PROVE_LOCKING) -/* - * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks - * The size of this table is a power of two and depends on the number of CPUS. - * (on lockdep we have a quite big spinlock_t, so keep the size down there) - */ -#ifdef CONFIG_LOCKDEP -# define RT_HASH_LOCK_SZ 256 -#else -# if NR_CPUS >= 32 -# define RT_HASH_LOCK_SZ 4096 -# elif NR_CPUS >= 16 -# define RT_HASH_LOCK_SZ 2048 -# elif NR_CPUS >= 8 -# define RT_HASH_LOCK_SZ 1024 -# elif NR_CPUS >= 4 -# define RT_HASH_LOCK_SZ 512 -# else -# define RT_HASH_LOCK_SZ 256 -# endif -#endif - -static spinlock_t *rt_hash_locks; -# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)] - -static __init void rt_hash_lock_init(void) -{ - int i; - - rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, - GFP_KERNEL); - if (!rt_hash_locks) - panic("IP: failed to allocate rt_hash_locks\n"); - - for (i = 0; i < RT_HASH_LOCK_SZ; i++) - spin_lock_init(&rt_hash_locks[i]); -} -#else -# define rt_hash_lock_addr(slot) NULL - -static inline void rt_hash_lock_init(void) -{ -} -#endif - -static struct rt_hash_bucket *rt_hash_table __read_mostly; -static unsigned int rt_hash_mask __read_mostly; -static unsigned int rt_hash_log __read_mostly; - static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) -static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, - int genid) -{ - return jhash_3words((__force u32)daddr, (__force u32)saddr, - idx, genid) - & rt_hash_mask; -} - static inline int rt_genid(struct net *net) { return atomic_read(&net->ipv4.rt_genid); } #ifdef CONFIG_PROC_FS -struct rt_cache_iter_state { - struct seq_net_private p; - int bucket; - int genid; -}; - -static struct rtable *rt_cache_get_first(struct seq_file *seq) -{ - struct rt_cache_iter_state *st = seq->private; - struct rtable *r = NULL; - - for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { - if (!rcu_access_pointer(rt_hash_table[st->bucket].chain)) - continue; - rcu_read_lock_bh(); - r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); - while (r) { - if (dev_net(r->dst.dev) == seq_file_net(seq) && - r->rt_genid == st->genid) - return r; - r = rcu_dereference_bh(r->dst.rt_next); - } - rcu_read_unlock_bh(); - } - return r; -} - -static struct rtable *__rt_cache_get_next(struct seq_file *seq, - struct rtable *r) -{ - struct rt_cache_iter_state *st = seq->private; - - r = rcu_dereference_bh(r->dst.rt_next); - while (!r) { - rcu_read_unlock_bh(); - do { - if (--st->bucket < 0) - return NULL; - } while (!rcu_access_pointer(rt_hash_table[st->bucket].chain)); - rcu_read_lock_bh(); - r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); - } - return r; -} - -static struct rtable *rt_cache_get_next(struct seq_file *seq, - struct rtable *r) -{ - struct rt_cache_iter_state *st = seq->private; - while ((r = __rt_cache_get_next(seq, r)) != NULL) { - if (dev_net(r->dst.dev) != seq_file_net(seq)) - continue; - if (r->rt_genid == st->genid) - break; - } - return r; -} - -static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) -{ - struct rtable *r = rt_cache_get_first(seq); - - if (r) - while (pos && (r = rt_cache_get_next(seq, r))) - --pos; - return pos ? NULL : r; -} - static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) { - struct rt_cache_iter_state *st = seq->private; if (*pos) - return rt_cache_get_idx(seq, *pos - 1); - st->genid = rt_genid(seq_file_net(seq)); + return NULL; return SEQ_START_TOKEN; } static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct rtable *r; - - if (v == SEQ_START_TOKEN) - r = rt_cache_get_first(seq); - else - r = rt_cache_get_next(seq, v); ++*pos; - return r; + return NULL; } static void rt_cache_seq_stop(struct seq_file *seq, void *v) { - if (v && v != SEQ_START_TOKEN) - rcu_read_unlock_bh(); } static int rt_cache_seq_show(struct seq_file *seq, void *v) @@ -396,24 +236,6 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" "HHUptod\tSpecDst"); - else { - struct rtable *r = v; - int len; - - seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" - "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", - r->dst.dev ? r->dst.dev->name : "*", - (__force u32)r->rt_dst, - (__force u32)r->rt_gateway, - r->rt_flags, atomic_read(&r->dst.__refcnt), - r->dst.__use, 0, (__force u32)r->rt_src, - dst_metric_advmss(&r->dst) + 40, - dst_metric(&r->dst, RTAX_WINDOW), 0, - r->rt_key_tos, - -1, 0, 0, &len); - - seq_printf(seq, "%*s\n", 127 - len, ""); - } return 0; } @@ -426,8 +248,7 @@ static const struct seq_operations rt_cache_seq_ops = { static int rt_cache_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &rt_cache_seq_ops, - sizeof(struct rt_cache_iter_state)); + return seq_open(file, &rt_cache_seq_ops); } static const struct file_operations rt_cache_seq_fops = { @@ -435,7 +256,7 @@ static const struct file_operations rt_cache_seq_fops = { .open = rt_cache_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_net, + .release = seq_release, }; @@ -625,262 +446,11 @@ static inline int ip_rt_proc_init(void) } #endif /* CONFIG_PROC_FS */ -static inline void rt_free(struct rtable *rt) -{ - call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); -} - -static inline void rt_drop(struct rtable *rt) -{ - ip_rt_put(rt); - call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); -} - -static inline int rt_fast_clean(struct rtable *rth) -{ - /* Kill broadcast/multicast entries very aggresively, if they - collide in hash table with more useful entries */ - return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && - rt_is_input_route(rth) && rth->dst.rt_next; -} - -static inline int rt_valuable(struct rtable *rth) -{ - return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - rth->dst.expires; -} - -static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) -{ - unsigned long age; - int ret = 0; - - if (atomic_read(&rth->dst.__refcnt)) - goto out; - - age = jiffies - rth->dst.lastuse; - if ((age <= tmo1 && !rt_fast_clean(rth)) || - (age <= tmo2 && rt_valuable(rth))) - goto out; - ret = 1; -out: return ret; -} - -/* Bits of score are: - * 31: very valuable - * 30: not quite useless - * 29..0: usage counter - */ -static inline u32 rt_score(struct rtable *rt) -{ - u32 score = jiffies - rt->dst.lastuse; - - score = ~score & ~(3<<30); - - if (rt_valuable(rt)) - score |= (1<<31); - - if (rt_is_output_route(rt) || - !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) - score |= (1<<30); - - return score; -} - -static inline bool rt_caching(const struct net *net) -{ - return net->ipv4.current_rt_cache_rebuild_count <= - net->ipv4.sysctl_rt_cache_rebuild_count; -} - -static inline bool compare_hash_inputs(const struct rtable *rt1, - const struct rtable *rt2) -{ - return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | - ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | - (rt1->rt_route_iif ^ rt2->rt_route_iif)) == 0); -} - -static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) -{ - return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | - ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | - (rt1->rt_mark ^ rt2->rt_mark) | - (rt1->rt_key_tos ^ rt2->rt_key_tos) | - (rt1->rt_route_iif ^ rt2->rt_route_iif) | - (rt1->rt_oif ^ rt2->rt_oif)) == 0; -} - -static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) -{ - return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev)); -} - static inline int rt_is_expired(struct rtable *rth) { return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); } -/* - * Perform a full scan of hash table and free all entries. - * Can be called by a softirq or a process. - * In the later case, we want to be reschedule if necessary - */ -static void rt_do_flush(struct net *net, int process_context) -{ - unsigned int i; - struct rtable *rth, *next; - - for (i = 0; i <= rt_hash_mask; i++) { - struct rtable __rcu **pprev; - struct rtable *list; - - if (process_context && need_resched()) - cond_resched(); - rth = rcu_access_pointer(rt_hash_table[i].chain); - if (!rth) - continue; - - spin_lock_bh(rt_hash_lock_addr(i)); - - list = NULL; - pprev = &rt_hash_table[i].chain; - rth = rcu_dereference_protected(*pprev, - lockdep_is_held(rt_hash_lock_addr(i))); - - while (rth) { - next = rcu_dereference_protected(rth->dst.rt_next, - lockdep_is_held(rt_hash_lock_addr(i))); - - if (!net || - net_eq(dev_net(rth->dst.dev), net)) { - rcu_assign_pointer(*pprev, next); - rcu_assign_pointer(rth->dst.rt_next, list); - list = rth; - } else { - pprev = &rth->dst.rt_next; - } - rth = next; - } - - spin_unlock_bh(rt_hash_lock_addr(i)); - - for (; list; list = next) { - next = rcu_dereference_protected(list->dst.rt_next, 1); - rt_free(list); - } - } -} - -/* - * While freeing expired entries, we compute average chain length - * and standard deviation, using fixed-point arithmetic. - * This to have an estimation of rt_chain_length_max - * rt_chain_length_max = max(elasticity, AVG + 4*SD) - * We use 3 bits for frational part, and 29 (or 61) for magnitude. - */ - -#define FRACT_BITS 3 -#define ONE (1UL << FRACT_BITS) - -/* - * Given a hash chain and an item in this hash chain, - * find if a previous entry has the same hash_inputs - * (but differs on tos, mark or oif) - * Returns 0 if an alias is found. - * Returns ONE if rth has no alias before itself. - */ -static int has_noalias(const struct rtable *head, const struct rtable *rth) -{ - const struct rtable *aux = head; - - while (aux != rth) { - if (compare_hash_inputs(aux, rth)) - return 0; - aux = rcu_dereference_protected(aux->dst.rt_next, 1); - } - return ONE; -} - -static void rt_check_expire(void) -{ - static unsigned int rover; - unsigned int i = rover, goal; - struct rtable *rth; - struct rtable __rcu **rthp; - unsigned long samples = 0; - unsigned long sum = 0, sum2 = 0; - unsigned long delta; - u64 mult; - - delta = jiffies - expires_ljiffies; - expires_ljiffies = jiffies; - mult = ((u64)delta) << rt_hash_log; - if (ip_rt_gc_timeout > 1) - do_div(mult, ip_rt_gc_timeout); - goal = (unsigned int)mult; - if (goal > rt_hash_mask) - goal = rt_hash_mask + 1; - for (; goal > 0; goal--) { - unsigned long tmo = ip_rt_gc_timeout; - unsigned long length; - - i = (i + 1) & rt_hash_mask; - rthp = &rt_hash_table[i].chain; - - if (need_resched()) - cond_resched(); - - samples++; - - if (rcu_dereference_raw(*rthp) == NULL) - continue; - length = 0; - spin_lock_bh(rt_hash_lock_addr(i)); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { - prefetch(rth->dst.rt_next); - if (rt_is_expired(rth) || - rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { - *rthp = rth->dst.rt_next; - rt_free(rth); - continue; - } - - /* We only count entries on a chain with equal - * hash inputs once so that entries for - * different QOS levels, and other non-hash - * input attributes don't unfairly skew the - * length computation - */ - tmo >>= 1; - rthp = &rth->dst.rt_next; - length += has_noalias(rt_hash_table[i].chain, rth); - } - spin_unlock_bh(rt_hash_lock_addr(i)); - sum += length; - sum2 += length*length; - } - if (samples) { - unsigned long avg = sum / samples; - unsigned long sd = int_sqrt(sum2 / samples - avg*avg); - rt_chain_length_max = max_t(unsigned long, - ip_rt_gc_elasticity, - (avg + 4*sd) >> FRACT_BITS); - } - rover = i; -} - -/* - * rt_worker_func() is run in process context. - * we call rt_check_expire() to scan part of the hash table - */ -static void rt_worker_func(struct work_struct *work) -{ - rt_check_expire(); - schedule_delayed_work(&expires_work, ip_rt_gc_interval); -} - /* * Perturbation of rt_genid by a small quantity [1..256] * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() @@ -902,167 +472,6 @@ static void rt_cache_invalidate(struct net *net) void rt_cache_flush(struct net *net, int delay) { rt_cache_invalidate(net); - if (delay >= 0) - rt_do_flush(net, !in_softirq()); -} - -/* Flush previous cache invalidated entries from the cache */ -void rt_cache_flush_batch(struct net *net) -{ - rt_do_flush(net, !in_softirq()); -} - -static void rt_emergency_hash_rebuild(struct net *net) -{ - net_warn_ratelimited("Route hash chain too long!\n"); - rt_cache_invalidate(net); -} - -/* - Short description of GC goals. - - We want to build algorithm, which will keep routing cache - at some equilibrium point, when number of aged off entries - is kept approximately equal to newly generated ones. - - Current expiration strength is variable "expire". - We try to adjust it dynamically, so that if networking - is idle expires is large enough to keep enough of warm entries, - and when load increases it reduces to limit cache size. - */ - -static int rt_garbage_collect(struct dst_ops *ops) -{ - static unsigned long expire = RT_GC_TIMEOUT; - static unsigned long last_gc; - static int rover; - static int equilibrium; - struct rtable *rth; - struct rtable __rcu **rthp; - unsigned long now = jiffies; - int goal; - int entries = dst_entries_get_fast(&ipv4_dst_ops); - - /* - * Garbage collection is pretty expensive, - * do not make it too frequently. - */ - - RT_CACHE_STAT_INC(gc_total); - - if (now - last_gc < ip_rt_gc_min_interval && - entries < ip_rt_max_size) { - RT_CACHE_STAT_INC(gc_ignored); - goto out; - } - - entries = dst_entries_get_slow(&ipv4_dst_ops); - /* Calculate number of entries, which we want to expire now. */ - goal = entries - (ip_rt_gc_elasticity << rt_hash_log); - if (goal <= 0) { - if (equilibrium < ipv4_dst_ops.gc_thresh) - equilibrium = ipv4_dst_ops.gc_thresh; - goal = entries - equilibrium; - if (goal > 0) { - equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); - goal = entries - equilibrium; - } - } else { - /* We are in dangerous area. Try to reduce cache really - * aggressively. - */ - goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); - equilibrium = entries - goal; - } - - if (now - last_gc >= ip_rt_gc_min_interval) - last_gc = now; - - if (goal <= 0) { - equilibrium += goal; - goto work_done; - } - - do { - int i, k; - - for (i = rt_hash_mask, k = rover; i >= 0; i--) { - unsigned long tmo = expire; - - k = (k + 1) & rt_hash_mask; - rthp = &rt_hash_table[k].chain; - spin_lock_bh(rt_hash_lock_addr(k)); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) { - if (!rt_is_expired(rth) && - !rt_may_expire(rth, tmo, expire)) { - tmo >>= 1; - rthp = &rth->dst.rt_next; - continue; - } - *rthp = rth->dst.rt_next; - rt_free(rth); - goal--; - } - spin_unlock_bh(rt_hash_lock_addr(k)); - if (goal <= 0) - break; - } - rover = k; - - if (goal <= 0) - goto work_done; - - /* Goal is not achieved. We stop process if: - - - if expire reduced to zero. Otherwise, expire is halfed. - - if table is not full. - - if we are called from interrupt. - - jiffies check is just fallback/debug loop breaker. - We will not spin here for long time in any case. - */ - - RT_CACHE_STAT_INC(gc_goal_miss); - - if (expire == 0) - break; - - expire >>= 1; - - if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) - goto out; - } while (!in_softirq() && time_before_eq(jiffies, now)); - - if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) - goto out; - if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) - goto out; - net_warn_ratelimited("dst cache overflow\n"); - RT_CACHE_STAT_INC(gc_dst_overflow); - return 1; - -work_done: - expire += ip_rt_gc_min_interval; - if (expire > ip_rt_gc_timeout || - dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || - dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) - expire = ip_rt_gc_timeout; -out: return 0; -} - -/* - * Returns number of entries in a hash chain that have different hash_inputs - */ -static int slow_chain_length(const struct rtable *head) -{ - int length = 0; - const struct rtable *rth = head; - - while (rth) { - length += has_noalias(head, rth); - rth = rcu_dereference_protected(rth->dst.rt_next, 1); - } - return length >> FRACT_BITS; } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -1086,139 +495,6 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, return neigh_create(&arp_tbl, pkey, dev); } -static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt, - struct sk_buff *skb, int ifindex) -{ - struct rtable *rth, *cand; - struct rtable __rcu **rthp, **candp; - unsigned long now; - u32 min_score; - int chain_length; - -restart: - chain_length = 0; - min_score = ~(u32)0; - cand = NULL; - candp = NULL; - now = jiffies; - - if (!rt_caching(dev_net(rt->dst.dev)) || (rt->dst.flags & DST_NOCACHE)) { - /* - * If we're not caching, just tell the caller we - * were successful and don't touch the route. The - * caller hold the sole reference to the cache entry, and - * it will be released when the caller is done with it. - * If we drop it here, the callers have no way to resolve routes - * when we're not caching. Instead, just point *rp at rt, so - * the caller gets a single use out of the route - * Note that we do rt_free on this new route entry, so that - * once its refcount hits zero, we are still able to reap it - * (Thanks Alexey) - * Note: To avoid expensive rcu stuff for this uncached dst, - * we set DST_NOCACHE so that dst_release() can free dst without - * waiting a grace period. - */ - - rt->dst.flags |= DST_NOCACHE; - goto skip_hashing; - } - - rthp = &rt_hash_table[hash].chain; - - spin_lock_bh(rt_hash_lock_addr(hash)); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { - if (rt_is_expired(rth)) { - *rthp = rth->dst.rt_next; - rt_free(rth); - continue; - } - if (compare_keys(rth, rt) && compare_netns(rth, rt)) { - /* Put it first */ - *rthp = rth->dst.rt_next; - /* - * Since lookup is lockfree, the deletion - * must be visible to another weakly ordered CPU before - * the insertion at the start of the hash chain. - */ - rcu_assign_pointer(rth->dst.rt_next, - rt_hash_table[hash].chain); - /* - * Since lookup is lockfree, the update writes - * must be ordered for consistency on SMP. - */ - rcu_assign_pointer(rt_hash_table[hash].chain, rth); - - dst_use(&rth->dst, now); - spin_unlock_bh(rt_hash_lock_addr(hash)); - - rt_drop(rt); - if (skb) - skb_dst_set(skb, &rth->dst); - return rth; - } - - if (!atomic_read(&rth->dst.__refcnt)) { - u32 score = rt_score(rth); - - if (score <= min_score) { - cand = rth; - candp = rthp; - min_score = score; - } - } - - chain_length++; - - rthp = &rth->dst.rt_next; - } - - if (cand) { - /* ip_rt_gc_elasticity used to be average length of chain - * length, when exceeded gc becomes really aggressive. - * - * The second limit is less certain. At the moment it allows - * only 2 entries per bucket. We will see. - */ - if (chain_length > ip_rt_gc_elasticity) { - *candp = cand->dst.rt_next; - rt_free(cand); - } - } else { - if (chain_length > rt_chain_length_max && - slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { - struct net *net = dev_net(rt->dst.dev); - int num = ++net->ipv4.current_rt_cache_rebuild_count; - if (!rt_caching(net)) { - pr_warn("%s: %d rebuilds is over limit, route caching disabled\n", - rt->dst.dev->name, num); - } - rt_emergency_hash_rebuild(net); - spin_unlock_bh(rt_hash_lock_addr(hash)); - - hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, - ifindex, rt_genid(net)); - goto restart; - } - } - - rt->dst.rt_next = rt_hash_table[hash].chain; - - /* - * Since lookup is lockfree, we must make sure - * previous writes to rt are committed to memory - * before making rt visible to other CPUS. - */ - rcu_assign_pointer(rt_hash_table[hash].chain, rt); - - spin_unlock_bh(rt_hash_lock_addr(hash)); - -skip_hashing: - if (skb) - skb_dst_set(skb, &rt->dst); - return rt; -} - /* * Peer allocation may fail only in serious out-of-memory conditions. However * we still can generate some output. @@ -1255,26 +531,6 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) } EXPORT_SYMBOL(__ip_select_ident); -static void rt_del(unsigned int hash, struct rtable *rt) -{ - struct rtable __rcu **rthp; - struct rtable *aux; - - rthp = &rt_hash_table[hash].chain; - spin_lock_bh(rt_hash_lock_addr(hash)); - ip_rt_put(rt); - while ((aux = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { - if (aux == rt || rt_is_expired(aux)) { - *rthp = aux->dst.rt_next; - rt_free(aux); - continue; - } - rthp = &aux->dst.rt_next; - } - spin_unlock_bh(rt_hash_lock_addr(hash)); -} - static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, const struct iphdr *iph, int oif, u8 tos, @@ -1518,10 +774,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || rt->dst.expires) { - unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, - rt->rt_oif, - rt_genid(dev_net(dst->dev))); - rt_del(hash, rt); + ip_rt_put(rt); ret = NULL; } } @@ -1969,7 +1222,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, bool nopolicy, bool noxfrm) { return dst_alloc(&ipv4_dst_ops, dev, 1, -1, - DST_HOST | + DST_HOST | DST_NOCACHE | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); } @@ -1978,7 +1231,6 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, int our) { - unsigned int hash; struct rtable *rth; struct in_device *in_dev = __in_dev_get_rcu(dev); u32 itag = 0; @@ -2042,9 +1294,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif RT_CACHE_STAT_INC(in_slow_mc); - hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); - rth = rt_intern_hash(hash, rth, skb, dev->ifindex); - return IS_ERR(rth) ? PTR_ERR(rth) : 0; + skb_dst_set(skb, &rth->dst); + return 0; e_nobufs: return -ENOBUFS; @@ -2176,7 +1427,6 @@ static int ip_mkroute_input(struct sk_buff *skb, { struct rtable *rth = NULL; int err; - unsigned int hash; #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) @@ -2188,12 +1438,7 @@ static int ip_mkroute_input(struct sk_buff *skb, if (err) return err; - /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl4->flowi4_iif, - rt_genid(dev_net(rth->dst.dev))); - rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif); - if (IS_ERR(rth)) - return PTR_ERR(rth); + skb_dst_set(skb, &rth->dst); return 0; } @@ -2217,7 +1462,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, unsigned int flags = 0; u32 itag = 0; struct rtable *rth; - unsigned int hash; int err = -EINVAL; struct net *net = dev_net(dev); @@ -2339,11 +1583,8 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); - rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); + skb_dst_set(skb, &rth->dst); err = 0; - if (IS_ERR(rth)) - err = PTR_ERR(rth); goto out; no_route: @@ -2382,46 +1623,10 @@ martian_source_keep_err: int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, bool noref) { - struct rtable *rth; - unsigned int hash; - int iif = dev->ifindex; - struct net *net; int res; - net = dev_net(dev); - rcu_read_lock(); - if (!rt_caching(net)) - goto skip_cache; - - tos &= IPTOS_RT_MASK; - hash = rt_hash(daddr, saddr, iif, rt_genid(net)); - - for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->dst.rt_next)) { - if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | - ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | - (rth->rt_route_iif ^ iif) | - (rth->rt_key_tos ^ tos)) == 0 && - rth->rt_mark == skb->mark && - net_eq(dev_net(rth->dst.dev), net) && - !rt_is_expired(rth)) { - if (noref) { - dst_use_noref(&rth->dst, jiffies); - skb_dst_set_noref(skb, &rth->dst); - } else { - dst_use(&rth->dst, jiffies); - skb_dst_set(skb, &rth->dst); - } - RT_CACHE_STAT_INC(in_hit); - rcu_read_unlock(); - return 0; - } - RT_CACHE_STAT_INC(in_hlist_search); - } - -skip_cache: /* Multicast recognition logic is moved from route cache to here. The problem was that too many Ethernet cards have broken/missing hardware multicast filters :-( As result the host on multicasting @@ -2563,10 +1768,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, /* * Major route resolver routine. - * called with rcu_read_lock(); */ -static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) +struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) { struct net_device *dev_out = NULL; __u8 tos = RT_FL_TOS(fl4); @@ -2746,57 +1950,11 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) make_route: rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, tos, dev_out, flags); - if (!IS_ERR(rth)) { - unsigned int hash; - - hash = rt_hash(orig_daddr, orig_saddr, orig_oif, - rt_genid(dev_net(dev_out))); - rth = rt_intern_hash(hash, rth, NULL, orig_oif); - } out: rcu_read_unlock(); return rth; } - -struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) -{ - struct rtable *rth; - unsigned int hash; - - if (!rt_caching(net)) - goto slow_output; - - hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)); - - rcu_read_lock_bh(); - for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; - rth = rcu_dereference_bh(rth->dst.rt_next)) { - if (rth->rt_key_dst == flp4->daddr && - rth->rt_key_src == flp4->saddr && - rt_is_output_route(rth) && - rth->rt_oif == flp4->flowi4_oif && - rth->rt_mark == flp4->flowi4_mark && - !((rth->rt_key_tos ^ flp4->flowi4_tos) & - (IPTOS_RT_MASK | RTO_ONLINK)) && - net_eq(dev_net(rth->dst.dev), net) && - !rt_is_expired(rth)) { - dst_use(&rth->dst, jiffies); - RT_CACHE_STAT_INC(out_hit); - rcu_read_unlock_bh(); - if (!flp4->saddr) - flp4->saddr = rth->rt_src; - if (!flp4->daddr) - flp4->daddr = rth->rt_dst; - return rth; - } - RT_CACHE_STAT_INC(out_hlist_search); - } - rcu_read_unlock_bh(); - -slow_output: - return ip_route_output_slow(net, flp4); -} EXPORT_SYMBOL_GPL(__ip_route_output_key); static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) @@ -3106,43 +2264,6 @@ errout_free: int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct rtable *rt; - int h, s_h; - int idx, s_idx; - struct net *net; - - net = sock_net(skb->sk); - - s_h = cb->args[0]; - if (s_h < 0) - s_h = 0; - s_idx = idx = cb->args[1]; - for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) { - if (!rt_hash_table[h].chain) - continue; - rcu_read_lock_bh(); - for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference_bh(rt->dst.rt_next), idx++) { - if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx) - continue; - if (rt_is_expired(rt)) - continue; - skb_dst_set_noref(skb, &rt->dst); - if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWROUTE, - 1, NLM_F_MULTI) <= 0) { - skb_dst_drop(skb); - rcu_read_unlock_bh(); - goto done; - } - skb_dst_drop(skb); - } - rcu_read_unlock_bh(); - } - -done: - cb->args[0] = h; - cb->args[1] = idx; return skb->len; } @@ -3376,22 +2497,6 @@ static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; #endif /* CONFIG_IP_ROUTE_CLASSID */ -static __initdata unsigned long rhash_entries; -static int __init set_rhash_entries(char *str) -{ - ssize_t ret; - - if (!str) - return 0; - - ret = kstrtoul(str, 0, &rhash_entries); - if (ret) - return 0; - - return 1; -} -__setup("rhash_entries=", set_rhash_entries); - int __init ip_rt_init(void) { int rc = 0; @@ -3414,31 +2519,12 @@ int __init ip_rt_init(void) if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); - rt_hash_table = (struct rt_hash_bucket *) - alloc_large_system_hash("IP route cache", - sizeof(struct rt_hash_bucket), - rhash_entries, - (totalram_pages >= 128 * 1024) ? - 15 : 17, - 0, - &rt_hash_log, - &rt_hash_mask, - 0, - rhash_entries ? 0 : 512 * 1024); - memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); - rt_hash_lock_init(); - - ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); - ip_rt_max_size = (rt_hash_mask + 1) * 16; + ipv4_dst_ops.gc_thresh = ~0; + ip_rt_max_size = INT_MAX; devinet_init(); ip_fib_init(); - INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); - expires_ljiffies = jiffies; - schedule_delayed_work(&expires_work, - net_random() % ip_rt_gc_interval + ip_rt_gc_interval); - if (ip_rt_proc_init()) pr_err("Unable to create route proc files\n"); #ifdef CONFIG_XFRM -- cgit v1.2.3 From 38a424e4657462fe9f8b76f01a0e879abde99ab4 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 1 Jul 2012 02:02:53 +0000 Subject: ipv4: Kill ip_route_input_noref(). The "noref" argument to ip_route_input_common() is now always ignored because we do not cache routes, and in that case we must always grab a reference to the resulting 'dst'. Signed-off-by: David S. Miller --- include/net/route.h | 16 ++-------------- net/ipv4/arp.c | 2 +- net/ipv4/ip_fragment.c | 4 ++-- net/ipv4/ip_input.c | 4 ++-- net/ipv4/route.c | 6 +++--- net/ipv4/xfrm4_input.c | 4 ++-- 6 files changed, 12 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 5dcfeb621e06..5c86c4773b2b 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -160,20 +160,8 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 return ip_route_output_key(net, fl4); } -extern int ip_route_input_common(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin, bool noref); - -static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin) -{ - return ip_route_input_common(skb, dst, src, tos, devin, false); -} - -static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin) -{ - return ip_route_input_common(skb, dst, src, tos, devin, true); -} +extern int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin); extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u32 mark, u8 protocol, int flow_flags); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 2e560f0c757d..c38293f38161 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -828,7 +828,7 @@ static int arp_process(struct sk_buff *skb) } if (arp->ar_op == htons(ARPOP_REQUEST) && - ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { + ip_route_input(skb, tip, sip, 0, dev) == 0) { rt = skb_rtable(skb); addr_type = rt->rt_type; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8d07c973409c..7ad88e5e7110 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -258,8 +258,8 @@ static void ip_expire(unsigned long arg) /* skb dst is stale, drop it, and perform route lookup again */ skb_dst_drop(head); iph = ip_hdr(head); - err = ip_route_input_noref(head, iph->daddr, iph->saddr, - iph->tos, head->dev); + err = ip_route_input(head, iph->daddr, iph->saddr, + iph->tos, head->dev); if (err) goto out_rcu_unlock; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index b27d4440f523..4ebc6feee250 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -336,8 +336,8 @@ static int ip_rcv_finish(struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (!skb_dst(skb)) { - int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); + int err = ip_route_input(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev); if (unlikely(err)) { if (err == -EXDEV) NET_INC_STATS_BH(dev_net(skb->dev), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6d6146d31f22..55eb4634ed60 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1620,8 +1620,8 @@ martian_source_keep_err: goto out; } -int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, bool noref) +int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev) { int res; @@ -1664,7 +1664,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, rcu_read_unlock(); return res; } -EXPORT_SYMBOL(ip_route_input_common); +EXPORT_SYMBOL(ip_route_input); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 06814b6216dc..58d23a572509 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -27,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) if (skb_dst(skb) == NULL) { const struct iphdr *iph = ip_hdr(skb); - if (ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) + if (ip_route_input(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev)) goto drop; } return dst_input(skb); -- cgit v1.2.3 From 1a00fee4ffb22312a0ac40045ecd6f222b55eb3d Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 1 Jul 2012 02:02:56 +0000 Subject: ipv4: Remove rt_key_{src,dst,tos} from struct rtable. They are always used in contexts where they can be reconstituted, or where the finally resolved rt->rt_{src,dst} is semantically equivalent. Signed-off-by: David S. Miller --- include/net/route.h | 5 ----- net/ipv4/route.c | 39 +++++++++------------------------------ net/ipv4/xfrm4_policy.c | 3 --- 3 files changed, 9 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 5c86c4773b2b..935fa59630b0 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -44,14 +44,9 @@ struct fib_info; struct rtable { struct dst_entry dst; - /* Lookup key. */ - __be32 rt_key_dst; - __be32 rt_key_src; - int rt_genid; unsigned int rt_flags; __u16 rt_type; - __u8 rt_key_tos; __be32 rt_dst; /* Path destination */ __be32 rt_src; /* Path source */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 55eb4634ed60..c89d690acdfa 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1268,12 +1268,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif rth->dst.output = ip_rt_bug; - rth->rt_key_dst = daddr; - rth->rt_key_src = saddr; rth->rt_genid = rt_genid(dev_net(dev)); rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; - rth->rt_key_tos = tos; rth->rt_dst = daddr; rth->rt_src = saddr; rth->rt_route_iif = dev->ifindex; @@ -1392,12 +1389,9 @@ static int __mkroute_input(struct sk_buff *skb, goto cleanup; } - rth->rt_key_dst = daddr; - rth->rt_key_src = saddr; rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); rth->rt_flags = flags; rth->rt_type = res->type; - rth->rt_key_tos = tos; rth->rt_dst = daddr; rth->rt_src = saddr; rth->rt_route_iif = in_dev->dev->ifindex; @@ -1563,12 +1557,9 @@ local_input: rth->dst.tclassid = itag; #endif - rth->rt_key_dst = daddr; - rth->rt_key_src = saddr; rth->rt_genid = rt_genid(net); rth->rt_flags = flags|RTCF_LOCAL; rth->rt_type = res.type; - rth->rt_key_tos = tos; rth->rt_dst = daddr; rth->rt_src = saddr; rth->rt_route_iif = dev->ifindex; @@ -1668,9 +1659,7 @@ EXPORT_SYMBOL(ip_route_input); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, - const struct flowi4 *fl4, - __be32 orig_daddr, __be32 orig_saddr, - int orig_oif, __u8 orig_rtos, + const struct flowi4 *fl4, int orig_oif, struct net_device *dev_out, unsigned int flags) { @@ -1721,12 +1710,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->dst.output = ip_output; - rth->rt_key_dst = orig_daddr; - rth->rt_key_src = orig_saddr; rth->rt_genid = rt_genid(dev_net(dev_out)); rth->rt_flags = flags; rth->rt_type = type; - rth->rt_key_tos = orig_rtos; rth->rt_dst = fl4->daddr; rth->rt_src = fl4->saddr; rth->rt_route_iif = 0; @@ -1777,16 +1763,12 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) unsigned int flags = 0; struct fib_result res; struct rtable *rth; - __be32 orig_daddr; - __be32 orig_saddr; int orig_oif; res.tclassid = 0; res.fi = NULL; res.table = NULL; - orig_daddr = fl4->daddr; - orig_saddr = fl4->saddr; orig_oif = fl4->flowi4_oif; fl4->flowi4_iif = net->loopback_dev->ifindex; @@ -1948,8 +1930,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) make_route: - rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, - tos, dev_out, flags); + rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags); out: rcu_read_unlock(); @@ -2014,9 +1995,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or if (new->dev) dev_hold(new->dev); - rt->rt_key_dst = ort->rt_key_dst; - rt->rt_key_src = ort->rt_key_src; - rt->rt_key_tos = ort->rt_key_tos; rt->rt_route_iif = ort->rt_route_iif; rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; @@ -2058,7 +2036,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); -static int rt_fill_info(struct net *net, +static int rt_fill_info(struct net *net, __be32 src, u8 tos, struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait, unsigned int flags) { @@ -2077,7 +2055,7 @@ static int rt_fill_info(struct net *net, r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; - r->rtm_tos = rt->rt_key_tos; + r->rtm_tos = tos; r->rtm_table = RT_TABLE_MAIN; if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN)) goto nla_put_failure; @@ -2090,9 +2068,9 @@ static int rt_fill_info(struct net *net, if (nla_put_be32(skb, RTA_DST, rt->rt_dst)) goto nla_put_failure; - if (rt->rt_key_src) { + if (src) { r->rtm_src_len = 32; - if (nla_put_be32(skb, RTA_SRC, rt->rt_key_src)) + if (nla_put_be32(skb, RTA_SRC, src)) goto nla_put_failure; } if (rt->dst.dev && @@ -2104,7 +2082,7 @@ static int rt_fill_info(struct net *net, goto nla_put_failure; #endif if (!rt_is_input_route(rt) && - rt->rt_src != rt->rt_key_src) { + rt->rt_src != src) { if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src)) goto nla_put_failure; } @@ -2248,7 +2226,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + err = rt_fill_info(net, src, rtm->rtm_tos, skb, + NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err <= 0) goto errout_free; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index fcf7678bc009..2a8d5cfc340f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -79,9 +79,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, struct rtable *rt = (struct rtable *)xdst->route; const struct flowi4 *fl4 = &fl->u.ip4; - xdst->u.rt.rt_key_dst = fl4->daddr; - xdst->u.rt.rt_key_src = fl4->saddr; - xdst->u.rt.rt_key_tos = fl4->flowi4_tos; xdst->u.rt.rt_route_iif = fl4->flowi4_iif; xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.rt.rt_oif = fl4->flowi4_oif; -- cgit v1.2.3 From d6c0a4f609847d6e65658913f9ccbcb1c137cff3 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 1 Jul 2012 02:02:59 +0000 Subject: ipv4: Kill 'rt_src' from 'struct rtable' Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/route.c | 34 +++++++++++++++------------------- net/ipv4/xfrm4_policy.c | 1 - 3 files changed, 15 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 935fa59630b0..85d1093e42de 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -49,7 +49,6 @@ struct rtable { __u16 rt_type; __be32 rt_dst; /* Path destination */ - __be32 rt_src; /* Path source */ int rt_route_iif; int rt_iif; int rt_oif; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c89d690acdfa..fc1199dc23e7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1272,7 +1272,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; rth->rt_dst = daddr; - rth->rt_src = saddr; rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; @@ -1393,7 +1392,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_flags = flags; rth->rt_type = res->type; rth->rt_dst = daddr; - rth->rt_src = saddr; rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; @@ -1561,7 +1559,6 @@ local_input: rth->rt_flags = flags|RTCF_LOCAL; rth->rt_type = res.type; rth->rt_dst = daddr; - rth->rt_src = saddr; rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; @@ -1714,7 +1711,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_flags = flags; rth->rt_type = type; rth->rt_dst = fl4->daddr; - rth->rt_src = fl4->saddr; rth->rt_route_iif = 0; rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; @@ -2005,7 +2001,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_dst = ort->rt_dst; - rt->rt_src = ort->rt_src; rt->rt_gateway = ort->rt_gateway; rt->fi = ort->fi; if (rt->fi) @@ -2036,7 +2031,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); -static int rt_fill_info(struct net *net, __be32 src, u8 tos, +static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4, struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait, unsigned int flags) { @@ -2055,7 +2050,7 @@ static int rt_fill_info(struct net *net, __be32 src, u8 tos, r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; - r->rtm_tos = tos; + r->rtm_tos = fl4->flowi4_tos; r->rtm_table = RT_TABLE_MAIN; if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN)) goto nla_put_failure; @@ -2082,11 +2077,11 @@ static int rt_fill_info(struct net *net, __be32 src, u8 tos, goto nla_put_failure; #endif if (!rt_is_input_route(rt) && - rt->rt_src != src) { - if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src)) + fl4->saddr != src) { + if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } - if (rt->rt_dst != rt->rt_gateway && + if (fl4->daddr != rt->rt_gateway && nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) goto nla_put_failure; @@ -2116,7 +2111,7 @@ static int rt_fill_info(struct net *net, __be32 src, u8 tos, if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { int err = ipmr_get_route(net, skb, - rt->rt_src, rt->rt_dst, + fl4->saddr, fl4->daddr, r, nowait); if (err <= 0) { if (!nowait) { @@ -2151,6 +2146,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; struct rtable *rt = NULL; + struct flowi4 fl4; __be32 dst = 0; __be32 src = 0; u32 iif; @@ -2185,6 +2181,13 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; + memset(&fl4, 0, sizeof(fl4)); + fl4.daddr = dst; + fl4.saddr = src; + fl4.flowi4_tos = rtm->rtm_tos; + fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; + fl4.flowi4_mark = mark; + if (iif) { struct net_device *dev; @@ -2205,13 +2208,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (err == 0 && rt->dst.error) err = -rt->dst.error; } else { - struct flowi4 fl4 = { - .daddr = dst, - .saddr = src, - .flowi4_tos = rtm->rtm_tos, - .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, - .flowi4_mark = mark, - }; rt = ip_route_output_key(net, &fl4); err = 0; @@ -2226,7 +2222,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - err = rt_fill_info(net, src, rtm->rtm_tos, skb, + err = rt_fill_info(net, src, &fl4, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err <= 0) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 2a8d5cfc340f..00d49e415113 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -92,7 +92,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); xdst->u.rt.rt_type = rt->rt_type; - xdst->u.rt.rt_src = rt->rt_src; xdst->u.rt.rt_dst = rt->rt_dst; xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; -- cgit v1.2.3 From b48698895de86e07b685f8e4b8db0f1cd5a97e9a Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 1 Jul 2012 02:03:01 +0000 Subject: ipv4: Remove 'rt_mark' from 'struct rtable' Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/ipmr.c | 2 +- net/ipv4/route.c | 9 ++------- net/ipv4/xfrm4_policy.c | 1 - 4 files changed, 3 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 85d1093e42de..757fe40b6cea 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -52,7 +52,6 @@ struct rtable { int rt_route_iif; int rt_iif; int rt_oif; - __u32 rt_mark; /* Info on neighbour */ __be32 rt_gateway; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5716c6b808d6..eee3bf6676fe 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1797,7 +1797,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) .flowi4_tos = RT_TOS(iph->tos), .flowi4_oif = rt->rt_oif, .flowi4_iif = rt->rt_iif, - .flowi4_mark = rt->rt_mark, + .flowi4_mark = skb->mark, }; struct mr_table *mrt; int err; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fc1199dc23e7..264617c98c25 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1275,7 +1275,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; - rth->rt_mark = skb->mark; rth->rt_pmtu = 0; rth->rt_gateway = daddr; rth->fi = NULL; @@ -1395,7 +1394,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; - rth->rt_mark = skb->mark; rth->rt_pmtu = 0; rth->rt_gateway = daddr; rth->fi = NULL; @@ -1562,7 +1560,6 @@ local_input: rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; - rth->rt_mark = skb->mark; rth->rt_pmtu = 0; rth->rt_gateway = daddr; rth->fi = NULL; @@ -1714,7 +1711,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_route_iif = 0; rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; - rth->rt_mark = fl4->flowi4_mark; rth->rt_pmtu = 0; rth->rt_gateway = fl4->daddr; rth->fi = NULL; @@ -1994,7 +1990,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_route_iif = ort->rt_route_iif; rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; - rt->rt_mark = ort->rt_mark; rt->rt_pmtu = ort->rt_pmtu; rt->rt_genid = rt_genid(net); @@ -2091,8 +2086,8 @@ static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4, if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; - if (rt->rt_mark && - nla_put_be32(skb, RTA_MARK, rt->rt_mark)) + if (fl4->flowi4_mark && + nla_put_be32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; error = rt->dst.error; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 00d49e415113..f73ba8210bd3 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -82,7 +82,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_route_iif = fl4->flowi4_iif; xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.rt.rt_oif = fl4->flowi4_oif; - xdst->u.rt.rt_mark = fl4->flowi4_mark; xdst->u.dst.dev = dev; dev_hold(dev); -- cgit v1.2.3 From f1ce3062c53809d862d8a04e7a0566c3cc4e0bda Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2012 10:10:17 -0700 Subject: ipv4: Remove 'rt_dst' from 'struct rtable' Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/route.c | 45 +++++++++------------------------------------ net/ipv4/xfrm4_policy.c | 1 - 3 files changed, 9 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 757fe40b6cea..6d111bceb160 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -48,7 +48,6 @@ struct rtable { unsigned int rt_flags; __u16 rt_type; - __be32 rt_dst; /* Path destination */ int rt_route_iif; int rt_iif; int rt_oif; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 264617c98c25..85d103fee88e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -850,7 +850,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) peer->rate_tokens == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, rt->rt_iif, - &rt->rt_dst, &rt->rt_gateway); + &ip_hdr(skb)->daddr, &rt->rt_gateway); #endif } out_put_peer: @@ -1132,8 +1132,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { - - if (rt->rt_gateway != rt->rt_dst && mtu > 576) + if (rt->rt_gateway != 0 && mtu > 576) mtu = 576; } @@ -1271,7 +1270,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_genid = rt_genid(dev_net(dev)); rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; - rth->rt_dst = daddr; rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; @@ -1390,7 +1388,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); rth->rt_flags = flags; rth->rt_type = res->type; - rth->rt_dst = daddr; rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; @@ -1556,7 +1553,6 @@ local_input: rth->rt_genid = rt_genid(net); rth->rt_flags = flags|RTCF_LOCAL; rth->rt_type = res.type; - rth->rt_dst = daddr; rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; @@ -1707,7 +1703,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_genid = rt_genid(dev_net(dev_out)); rth->rt_flags = flags; rth->rt_type = type; - rth->rt_dst = fl4->daddr; rth->rt_route_iif = 0; rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; @@ -1995,7 +1990,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; - rt->rt_dst = ort->rt_dst; rt->rt_gateway = ort->rt_gateway; rt->fi = ort->fi; if (rt->fi) @@ -2026,9 +2020,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); -static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4, - struct sk_buff *skb, u32 pid, u32 seq, int event, - int nowait, unsigned int flags) +static int rt_fill_info(struct net *net, __be32 dst, __be32 src, + struct flowi4 *fl4, struct sk_buff *skb, u32 pid, + u32 seq, int event, int nowait, unsigned int flags) { struct rtable *rt = skb_rtable(skb); struct rtmsg *r; @@ -2056,7 +2050,7 @@ static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4, if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; - if (nla_put_be32(skb, RTA_DST, rt->rt_dst)) + if (nla_put_be32(skb, RTA_DST, dst)) goto nla_put_failure; if (src) { r->rtm_src_len = 32; @@ -2100,29 +2094,8 @@ static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4, } if (rt_is_input_route(rt)) { -#ifdef CONFIG_IP_MROUTE - __be32 dst = rt->rt_dst; - - if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && - IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { - int err = ipmr_get_route(net, skb, - fl4->saddr, fl4->daddr, - r, nowait); - if (err <= 0) { - if (!nowait) { - if (err == 0) - return 0; - goto nla_put_failure; - } else { - if (err == -EMSGSIZE) - goto nla_put_failure; - error = err; - } - } - } else -#endif - if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) - goto nla_put_failure; + if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) + goto nla_put_failure; } if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) @@ -2217,7 +2190,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - err = rt_fill_info(net, src, &fl4, skb, + err = rt_fill_info(net, dst, src, &fl4, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err <= 0) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index f73ba8210bd3..6074b694f118 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -91,7 +91,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); xdst->u.rt.rt_type = rt->rt_type; - xdst->u.rt.rt_dst = rt->rt_dst; xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; -- cgit v1.2.3 From f8126f1d5136be1ca1a3536d43ad7a710b5620f8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 Jul 2012 05:03:45 -0700 Subject: ipv4: Adjust semantics of rt->rt_gateway. In order to allow prefixed routes, we have to adjust how rt_gateway is set and interpreted. The new interpretation is: 1) rt_gateway == 0, destination is on-link, nexthop is iph->daddr 2) rt_gateway != 0, destination requires a nexthop gateway Abstract the fetching of the proper nexthop value using a new inline helper, rt_nexthop(), as suggested by Joe Perches. Signed-off-by: David S. Miller Tested-by: Vijay Subramanian --- include/net/route.h | 7 +++++++ net/ipv4/arp.c | 3 +-- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/netfilter/ipt_MASQUERADE.c | 5 +++-- net/ipv4/route.c | 17 +++++++++-------- 8 files changed, 25 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 6d111bceb160..3c1eeab9749b 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -70,6 +70,13 @@ static inline bool rt_is_output_route(const struct rtable *rt) return rt->rt_route_iif == 0; } +static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr) +{ + if (rt->rt_gateway) + return rt->rt_gateway; + return daddr; +} + struct ip_rt_acct { __u32 o_bytes; __u32 o_packets; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c38293f38161..a0124eb7dbea 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -475,8 +475,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) return 1; } - paddr = skb_rtable(skb)->rt_gateway; - + paddr = rt_nexthop(skb_rtable(skb), ip_hdr(skb)->daddr); if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev)) return 0; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c7a4de05ca04..0a290d719bc7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -389,7 +389,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_gateway) goto route_err; return &rt->dst; @@ -422,7 +422,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_gateway) goto route_err; return &rt->dst; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 42c44b1403c9..b062a98574f2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -766,7 +766,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (skb->protocol == htons(ETH_P_IP)) { rt = skb_rtable(skb); - dst = rt->rt_gateway; + dst = rt_nexthop(rt, old_iph->daddr); } #if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c528f841ca4b..4494015f7e32 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -371,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) skb_dst_set_noref(skb, &rt->dst); packet_routed: - if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) + if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway) goto no_route; /* OK, we know where to send it, allocate and build IP header. */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2c2c35bace76..99af1f0cc658 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -487,7 +487,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_fifo_errors++; goto tx_error; } - dst = rt->rt_gateway; + dst = rt_nexthop(rt, old_iph->daddr); } rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 2f210c79dc87..cbb6a1a6f6f7 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -52,7 +52,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) struct nf_nat_ipv4_range newrange; const struct nf_nat_ipv4_multi_range_compat *mr; const struct rtable *rt; - __be32 newsrc; + __be32 newsrc, nh; NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); @@ -70,7 +70,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) mr = par->targinfo; rt = skb_rtable(skb); - newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); + nh = rt_nexthop(rt, ip_hdr(skb)->daddr); + newsrc = inet_select_addr(par->out, nh, RT_SCOPE_UNIVERSE); if (!newsrc) { pr_info("%s ate my IP address\n", par->out->name); return NF_DROP; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 85d103fee88e..d1d579638092 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1085,8 +1085,9 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); else - src = inet_select_addr(rt->dst.dev, rt->rt_gateway, - RT_SCOPE_UNIVERSE); + src = inet_select_addr(rt->dst.dev, + rt_nexthop(rt, iph->daddr), + RT_SCOPE_UNIVERSE); rcu_read_unlock(); } memcpy(addr, &src, 4); @@ -1132,7 +1133,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { - if (rt->rt_gateway != 0 && mtu > 576) + if (rt->rt_gateway && mtu > 576) mtu = 576; } @@ -1274,7 +1275,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_pmtu = 0; - rth->rt_gateway = daddr; + rth->rt_gateway = 0; rth->fi = NULL; if (our) { rth->dst.input= ip_local_deliver; @@ -1392,7 +1393,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; rth->rt_pmtu = 0; - rth->rt_gateway = daddr; + rth->rt_gateway = 0; rth->fi = NULL; rth->dst.input = ip_forward; @@ -1557,7 +1558,7 @@ local_input: rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_pmtu = 0; - rth->rt_gateway = daddr; + rth->rt_gateway = 0; rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -1707,7 +1708,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; rth->rt_pmtu = 0; - rth->rt_gateway = fl4->daddr; + rth->rt_gateway = 0; rth->fi = NULL; RT_CACHE_STAT_INC(out_slow_tot); @@ -2070,7 +2071,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } - if (fl4->daddr != rt->rt_gateway && + if (rt->rt_gateway && nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) goto nla_put_failure; -- cgit v1.2.3 From f5b0a8743601a4477419171f5046bd07d1c080a0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 19 Jul 2012 12:31:33 -0700 Subject: net: Document dst->obsolete better. Add a big comment explaining how the field works, and use defines instead of magic constants for the values assigned to it. Suggested by Joe Perches. Signed-off-by: David S. Miller --- include/net/dst.h | 14 +++++++++++++- net/core/dst.c | 4 ++-- net/decnet/dn_route.c | 4 ++-- net/ipv4/route.c | 5 +++-- net/ipv6/route.c | 4 ++-- net/sctp/transport.c | 2 +- net/xfrm/xfrm_policy.c | 23 ++++++++++++----------- 7 files changed, 35 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 51610468c63d..0df661a0c295 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -65,7 +65,19 @@ struct dst_entry { unsigned short pending_confirm; short error; + + /* A non-zero value of dst->obsolete forces by-hand validation + * of the route entry. Positive values are set by the generic + * dst layer to indicate that the entry has been forcefully + * destroyed. + * + * Negative values are used by the implementation layer code to + * force invocation of the dst_ops->check() method. + */ short obsolete; +#define DST_OBSOLETE_NONE 0 +#define DST_OBSOLETE_DEAD 2 +#define DST_OBSOLETE_FORCE_CHK -1 unsigned short header_len; /* more space at head required */ unsigned short trailer_len; /* space to reserve at tail */ #ifdef CONFIG_IP_ROUTE_CLASSID @@ -359,7 +371,7 @@ extern struct dst_entry *dst_destroy(struct dst_entry *dst); static inline void dst_free(struct dst_entry *dst) { - if (dst->obsolete > 1) + if (dst->obsolete > 0) return; if (!atomic_read(&dst->__refcnt)) { dst = dst_destroy(dst); diff --git a/net/core/dst.c b/net/core/dst.c index 07bacff84aa4..069d51d29414 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -94,7 +94,7 @@ loop: * But we do not have state "obsoleted, but * referenced by parent", so it is right. */ - if (dst->obsolete > 1) + if (dst->obsolete > 0) continue; ___dst_free(dst); @@ -202,7 +202,7 @@ static void ___dst_free(struct dst_entry *dst) */ if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) dst->input = dst->output = dst_discard; - dst->obsolete = 2; + dst->obsolete = DST_OBSOLETE_DEAD; } void __dst_free(struct dst_entry *dst) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 47de90d8fe94..23cc11dd4e40 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1176,7 +1176,7 @@ make_route: if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; - rt = dst_alloc(&dn_dst_ops, dev_out, 1, 0, DST_HOST); + rt = dst_alloc(&dn_dst_ops, dev_out, 1, DST_OBSOLETE_NONE, DST_HOST); if (rt == NULL) goto e_nobufs; @@ -1444,7 +1444,7 @@ static int dn_route_input_slow(struct sk_buff *skb) } make_route: - rt = dst_alloc(&dn_dst_ops, out_dev, 0, 0, DST_HOST); + rt = dst_alloc(&dn_dst_ops, out_dev, 0, DST_OBSOLETE_NONE, DST_HOST); if (rt == NULL) goto e_nobufs; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d1d579638092..50d2498c9284 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1221,7 +1221,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, static struct rtable *rt_dst_alloc(struct net_device *dev, bool nopolicy, bool noxfrm) { - return dst_alloc(&ipv4_dst_ops, dev, 1, -1, + return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, DST_HOST | DST_NOCACHE | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); @@ -1969,9 +1969,10 @@ static struct dst_ops ipv4_dst_blackhole_ops = { struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) { - struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0); struct rtable *ort = (struct rtable *) dst_orig; + struct rtable *rt; + rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0); if (rt) { struct dst_entry *new = &rt->dst; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 84f6564dd372..cf02cb97bbdd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -281,7 +281,7 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct fib6_table *table) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, - 0, 0, flags); + 0, DST_OBSOLETE_NONE, flags); if (rt) { struct dst_entry *dst = &rt->dst; @@ -985,7 +985,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; struct dst_entry *new = NULL; - rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); + rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); if (rt) { new = &rt->dst; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index a6b7ee9ce28a..ec3a12b9b802 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -216,7 +216,7 @@ void sctp_transport_set_owner(struct sctp_transport *transport, void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) { /* If we don't have a fresh route, look one up */ - if (!transport->dst || transport->dst->obsolete > 1) { + if (!transport->dst || transport->dst->obsolete) { dst_release(transport->dst); transport->af_specific->get_dst(transport, &transport->saddr, &transport->fl, sk); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 65bd1ca51517..c5a5165a5927 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1350,7 +1350,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) default: BUG(); } - xdst = dst_alloc(dst_ops, NULL, 0, 0, 0); + xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0); if (likely(xdst)) { struct dst_entry *dst = &xdst->u.dst; @@ -1477,7 +1477,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst1->xfrm = xfrm[i]; xdst->xfrm_genid = xfrm[i]->genid; - dst1->obsolete = -1; + dst1->obsolete = DST_OBSOLETE_FORCE_CHK; dst1->flags |= DST_HOST; dst1->lastuse = now; @@ -2219,12 +2219,13 @@ EXPORT_SYMBOL(__xfrm_route_forward); static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) { /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete - * to "-1" to force all XFRM destinations to get validated by - * dst_ops->check on every use. We do this because when a - * normal route referenced by an XFRM dst is obsoleted we do - * not go looking around for all parent referencing XFRM dsts - * so that we can invalidate them. It is just too much work. - * Instead we make the checks here on every use. For example: + * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to + * get validated by dst_ops->check on every use. We do this + * because when a normal route referenced by an XFRM dst is + * obsoleted we do not go looking around for all parent + * referencing XFRM dsts so that we can invalidate them. It + * is just too much work. Instead we make the checks here on + * every use. For example: * * XFRM dst A --> IPv4 dst X * @@ -2234,9 +2235,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) * stale_bundle() check. * * When a policy's bundle is pruned, we dst_free() the XFRM - * dst which causes it's ->obsolete field to be set to a - * positive non-zero integer. If an XFRM dst has been pruned - * like this, we want to force a new route lookup. + * dst which causes it's ->obsolete field to be set to + * DST_OBSOLETE_DEAD. If an XFRM dst has been pruned like + * this, we want to force a new route lookup. */ if (dst->obsolete < 0 && !stale_bundle(dst)) return dst; -- cgit v1.2.3 From ceb3320610d6f15ff20dd4c042b36473d77de76f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 11:31:28 -0700 Subject: ipv4: Kill routes during PMTU/redirect updates. Mark them obsolete so there will be a re-lookup to fetch the FIB nexthop exception info. Signed-off-by: David S. Miller --- include/net/dst.h | 1 + net/ipv4/route.c | 41 +++++++++++++++++++++++++++++------------ 2 files changed, 30 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 0df661a0c295..baf597890064 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -78,6 +78,7 @@ struct dst_entry { #define DST_OBSOLETE_NONE 0 #define DST_OBSOLETE_DEAD 2 #define DST_OBSOLETE_FORCE_CHK -1 +#define DST_OBSOLETE_KILL -2 unsigned short header_len; /* more space at head required */ unsigned short trailer_len; /* space to reserve at tail */ #ifdef CONFIG_IP_ROUTE_CLASSID diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 50d2498c9284..d52f7699c2fa 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -673,7 +673,8 @@ out_unlock: return; } -static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4) +static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4, + bool kill_route) { __be32 new_gw = icmp_hdr(skb)->un.gateway; __be32 old_gw = ip_hdr(skb)->saddr; @@ -728,8 +729,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow update_or_create_fnhe(nh, fl4->daddr, new_gw, 0, 0); } - rt->rt_gateway = new_gw; - rt->rt_flags |= RTCF_REDIRECTED; + if (kill_route) + rt->dst.obsolete = DST_OBSOLETE_KILL; call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } neigh_release(n); @@ -760,7 +761,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; ip_rt_build_flow_key(&fl4, sk, skb); - __ip_do_redirect(rt, skb, &fl4); + __ip_do_redirect(rt, skb, &fl4, true); } static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) @@ -919,7 +920,7 @@ out: kfree_skb(skb); return 0; } -static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) +static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct fib_result res; @@ -932,8 +933,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) update_or_create_fnhe(nh, fl4->daddr, 0, mtu, jiffies + ip_rt_mtu_expires); } - rt->rt_pmtu = mtu; - dst_set_expires(&rt->dst, ip_rt_mtu_expires); + return mtu; } static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, @@ -943,7 +943,14 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); - __ip_rt_update_pmtu(rt, &fl4, mtu); + mtu = __ip_rt_update_pmtu(rt, &fl4, mtu); + + if (!rt->rt_pmtu) { + dst->obsolete = DST_OBSOLETE_KILL; + } else { + rt->rt_pmtu = mtu; + dst_set_expires(&rt->dst, ip_rt_mtu_expires); + } } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, @@ -989,7 +996,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - __ip_do_redirect(rt, skb, &fl4); + __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); } } @@ -1004,7 +1011,7 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); rt = __ip_route_output_key(sock_net(sk), &fl4); if (!IS_ERR(rt)) { - __ip_do_redirect(rt, skb, &fl4); + __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); } } @@ -1014,7 +1021,15 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { struct rtable *rt = (struct rtable *) dst; - if (rt_is_expired(rt)) + /* All IPV4 dsts are created with ->obsolete set to the value + * DST_OBSOLETE_FORCE_CHK which forces validation calls down + * into this function always. + * + * When a PMTU/redirect information update invalidates a + * route, this is indicated by setting obsolete to + * DST_OBSOLETE_KILL. + */ + if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt)) return NULL; return dst; } @@ -1186,8 +1201,10 @@ restart: dst_set_expires(&rt->dst, diff); } } - if (gw) + if (gw) { + rt->rt_flags |= RTCF_REDIRECTED; rt->rt_gateway = gw; + } fnhe->fnhe_stamp = jiffies; break; } -- cgit v1.2.3 From f2bb4bedf35d5167a073dcdddf16543f351ef3ae Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 12:20:47 -0700 Subject: ipv4: Cache output routes in fib_info nexthops. If we have an output route that lacks nexthop exceptions, we can cache it in the FIB info nexthop. Such routes will have DST_HOST cleared because such routes refer to a family of destinations, rather than just one. The sequence of the handling of exceptions during route lookup is adjusted to make the logic work properly. Before we allocate the route, we lookup the exception. Then we know if we will cache this route or not, and therefore whether DST_HOST should be set on the allocated route. Then we use DST_HOST to key off whether we should store the resulting route, during rt_set_nexthop(), in the FIB nexthop cache. With help from Eric Dumazet. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 + net/ipv4/fib_semantics.c | 2 + net/ipv4/route.c | 140 ++++++++++++++++++++++++++++++++--------------- 3 files changed, 101 insertions(+), 43 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 2daf096dfc60..fb62c590360e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -46,6 +46,7 @@ struct fib_config { }; struct fib_info; +struct rtable; struct fib_nh_exception { struct fib_nh_exception __rcu *fnhe_next; @@ -80,6 +81,7 @@ struct fib_nh { __be32 nh_gw; __be32 nh_saddr; int nh_saddr_genid; + struct rtable *nh_rth_output; struct fnhe_hash_bucket *nh_exceptions; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2b57d768240d..83d0f42b619a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -171,6 +171,8 @@ static void free_fib_info_rcu(struct rcu_head *head) dev_put(nexthop_nh->nh_dev); if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); + if (nexthop_nh->nh_rth_output) + dst_release(&nexthop_nh->nh_rth_output->dst); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d52f7699c2fa..8a0260010ea1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1158,8 +1158,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) return mtu; } -static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, - struct fib_info *fi) +static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) { if (fi->fib_metrics != (u32 *) dst_default_metrics) { rt->fi = fi; @@ -1168,50 +1167,83 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, dst_init_metrics(&rt->dst, fi->fib_metrics, true); } -static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr) +static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) { struct fnhe_hash_bucket *hash = nh->nh_exceptions; struct fib_nh_exception *fnhe; u32 hval; + if (!hash) + return NULL; + hval = fnhe_hashfun(daddr); -restart: for (fnhe = rcu_dereference(hash[hval].chain); fnhe; fnhe = rcu_dereference(fnhe->fnhe_next)) { - __be32 fnhe_daddr, gw; - unsigned long expires; - unsigned int seq; - u32 pmtu; - - seq = read_seqbegin(&fnhe_seqlock); - fnhe_daddr = fnhe->fnhe_daddr; - gw = fnhe->fnhe_gw; - pmtu = fnhe->fnhe_pmtu; - expires = fnhe->fnhe_expires; - if (read_seqretry(&fnhe_seqlock, seq)) - goto restart; - if (daddr != fnhe_daddr) - continue; - if (pmtu) { - unsigned long diff = expires - jiffies; + if (fnhe->fnhe_daddr == daddr) + return fnhe; + } + return NULL; +} - if (time_before(jiffies, expires)) { - rt->rt_pmtu = pmtu; - dst_set_expires(&rt->dst, diff); - } - } - if (gw) { - rt->rt_flags |= RTCF_REDIRECTED; - rt->rt_gateway = gw; +static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, + __be32 daddr) +{ + __be32 fnhe_daddr, gw; + unsigned long expires; + unsigned int seq; + u32 pmtu; + +restart: + seq = read_seqbegin(&fnhe_seqlock); + fnhe_daddr = fnhe->fnhe_daddr; + gw = fnhe->fnhe_gw; + pmtu = fnhe->fnhe_pmtu; + expires = fnhe->fnhe_expires; + if (read_seqretry(&fnhe_seqlock, seq)) + goto restart; + + if (daddr != fnhe_daddr) + return; + + if (pmtu) { + unsigned long diff = expires - jiffies; + + if (time_before(jiffies, expires)) { + rt->rt_pmtu = pmtu; + dst_set_expires(&rt->dst, diff); } - fnhe->fnhe_stamp = jiffies; - break; + } + if (gw) { + rt->rt_flags |= RTCF_REDIRECTED; + rt->rt_gateway = gw; + } + fnhe->fnhe_stamp = jiffies; +} + +static inline void rt_release_rcu(struct rcu_head *head) +{ + struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); + dst_release(dst); +} + +static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) +{ + struct rtable *orig, *prev, **p = &nh->nh_rth_output; + + orig = *p; + + prev = cmpxchg(p, orig, rt); + if (prev == orig) { + dst_clone(&rt->dst); + if (orig) + call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu); } } -static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, +static void rt_set_nexthop(struct rtable *rt, __be32 daddr, const struct fib_result *res, + struct fib_nh_exception *fnhe, struct fib_info *fi, u16 type, u32 itag) { if (fi) { @@ -1219,12 +1251,15 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) rt->rt_gateway = nh->nh_gw; - if (unlikely(nh->nh_exceptions)) - rt_bind_exception(rt, nh, fl4->daddr); - rt_init_metrics(rt, fl4, fi); + if (unlikely(fnhe)) + rt_bind_exception(rt, fnhe, daddr); + rt_init_metrics(rt, fi); #ifdef CONFIG_IP_ROUTE_CLASSID - rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; + rt->dst.tclassid = nh->nh_tclassid; #endif + if (!(rt->dst.flags & DST_HOST) && + rt_is_output_route(rt)) + rt_cache_route(nh, rt); } #ifdef CONFIG_IP_ROUTE_CLASSID @@ -1236,10 +1271,10 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, } static struct rtable *rt_dst_alloc(struct net_device *dev, - bool nopolicy, bool noxfrm) + bool nopolicy, bool noxfrm, bool will_cache) { return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, - DST_HOST | DST_NOCACHE | + (will_cache ? 0 : DST_HOST) | DST_NOCACHE | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); } @@ -1276,7 +1311,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto e_err; } rth = rt_dst_alloc(dev_net(dev)->loopback_dev, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); if (!rth) goto e_nobufs; @@ -1349,6 +1384,7 @@ static int __mkroute_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, u32 tos, struct rtable **result) { + struct fib_nh_exception *fnhe; struct rtable *rth; int err; struct in_device *out_dev; @@ -1395,9 +1431,13 @@ static int __mkroute_input(struct sk_buff *skb, } } + fnhe = NULL; + if (res->fi) + fnhe = find_exception(&FIB_RES_NH(*res), daddr); + rth = rt_dst_alloc(out_dev->dev, IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(out_dev, NOXFRM)); + IN_DEV_CONF_GET(out_dev, NOXFRM), false); if (!rth) { err = -ENOBUFS; goto cleanup; @@ -1416,7 +1456,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->dst.input = ip_forward; rth->dst.output = ip_output; - rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); + rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); *result = rth; err = 0; @@ -1558,7 +1598,7 @@ brd_input: local_input: rth = rt_dst_alloc(net->loopback_dev, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); if (!rth) goto e_nobufs; @@ -1672,6 +1712,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, unsigned int flags) { struct fib_info *fi = res->fi; + struct fib_nh_exception *fnhe; struct in_device *in_dev; u16 type = res->type; struct rtable *rth; @@ -1710,9 +1751,22 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fi = NULL; } + fnhe = NULL; + if (fi) { + fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); + if (!fnhe) { + rth = FIB_RES_NH(*res).nh_rth_output; + if (rth && + rth->dst.obsolete == DST_OBSOLETE_FORCE_CHK) { + dst_use(&rth->dst, jiffies); + return rth; + } + } + } rth = rt_dst_alloc(dev_out, IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(in_dev, NOXFRM)); + IN_DEV_CONF_GET(in_dev, NOXFRM), + fi && !fnhe); if (!rth) return ERR_PTR(-ENOBUFS); @@ -1749,7 +1803,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, #endif } - rt_set_nexthop(rth, fl4, res, fi, type, 0); + rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0); if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE) rth->dst.flags |= DST_NOCACHE; -- cgit v1.2.3 From d2d68ba9fe8b38eb03124b3176a013bb8aa2b5e5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 12:58:50 -0700 Subject: ipv4: Cache input routes in fib_info nexthops. Caching input routes is slightly simpler than output routes, since we don't need to be concerned with nexthop exceptions. (locally destined, and routed packets, never trigger PMTU events or redirects that will be processed by us). However, we have to elide caching for the DIRECTSRC and non-zero itag cases. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_semantics.c | 2 ++ net/ipv4/route.c | 55 +++++++++++++++++++++++++++++++++++++----------- 3 files changed, 46 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index fb62c590360e..e69c3a47153d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -82,6 +82,7 @@ struct fib_nh { __be32 nh_saddr; int nh_saddr_genid; struct rtable *nh_rth_output; + struct rtable *nh_rth_input; struct fnhe_hash_bucket *nh_exceptions; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 83d0f42b619a..e55171f184f9 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -173,6 +173,8 @@ static void free_fib_info_rcu(struct rcu_head *head) free_nh_exceptions(nexthop_nh); if (nexthop_nh->nh_rth_output) dst_release(&nexthop_nh->nh_rth_output->dst); + if (nexthop_nh->nh_rth_input) + dst_release(&nexthop_nh->nh_rth_input->dst); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8a0260010ea1..97cca8a03d94 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1231,6 +1231,9 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) { struct rtable *orig, *prev, **p = &nh->nh_rth_output; + if (rt_is_input_route(rt)) + p = &nh->nh_rth_input; + orig = *p; prev = cmpxchg(p, orig, rt); @@ -1241,6 +1244,11 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) } } +static bool rt_cache_valid(struct rtable *rt) +{ + return (rt && rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK); +} + static void rt_set_nexthop(struct rtable *rt, __be32 daddr, const struct fib_result *res, struct fib_nh_exception *fnhe, @@ -1257,8 +1265,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif - if (!(rt->dst.flags & DST_HOST) && - rt_is_output_route(rt)) + if (!(rt->dst.flags & DST_HOST)) rt_cache_route(nh, rt); } @@ -1384,11 +1391,11 @@ static int __mkroute_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, u32 tos, struct rtable **result) { - struct fib_nh_exception *fnhe; struct rtable *rth; int err; struct in_device *out_dev; unsigned int flags = 0; + bool do_cache; u32 itag; /* get a working reference to the output device */ @@ -1431,13 +1438,21 @@ static int __mkroute_input(struct sk_buff *skb, } } - fnhe = NULL; - if (res->fi) - fnhe = find_exception(&FIB_RES_NH(*res), daddr); + do_cache = false; + if (res->fi) { + if (!(flags & RTCF_DIRECTSRC) && !itag) { + rth = FIB_RES_NH(*res).nh_rth_input; + if (rt_cache_valid(rth)) { + dst_use(&rth->dst, jiffies); + goto out; + } + do_cache = true; + } + } rth = rt_dst_alloc(out_dev->dev, IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(out_dev, NOXFRM), false); + IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache); if (!rth) { err = -ENOBUFS; goto cleanup; @@ -1456,8 +1471,8 @@ static int __mkroute_input(struct sk_buff *skb, rth->dst.input = ip_forward; rth->dst.output = ip_output; - rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); - + rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); +out: *result = rth; err = 0; cleanup: @@ -1509,6 +1524,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct rtable *rth; int err = -EINVAL; struct net *net = dev_net(dev); + bool do_cache; /* IP on this device is disabled. */ @@ -1522,6 +1538,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) goto martian_source; + res.fi = NULL; if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) goto brd_input; @@ -1597,8 +1614,20 @@ brd_input: RT_CACHE_STAT_INC(in_brd); local_input: + do_cache = false; + if (res.fi) { + if (!(flags & RTCF_DIRECTSRC) && !itag) { + rth = FIB_RES_NH(res).nh_rth_input; + if (rt_cache_valid(rth)) { + dst_use(&rth->dst, jiffies); + goto set_and_out; + } + do_cache = true; + } + } + rth = rt_dst_alloc(net->loopback_dev, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); + IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); if (!rth) goto e_nobufs; @@ -1622,6 +1651,9 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } + if (do_cache) + rt_cache_route(&FIB_RES_NH(res), rth); +set_and_out: skb_dst_set(skb, &rth->dst); err = 0; goto out; @@ -1756,8 +1788,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); if (!fnhe) { rth = FIB_RES_NH(*res).nh_rth_output; - if (rth && - rth->dst.obsolete == DST_OBSOLETE_FORCE_CHK) { + if (rt_cache_valid(rth)) { dst_use(&rth->dst, jiffies); return rth; } -- cgit v1.2.3 From ba3f7f04ef2b19aace38f855aedd17fe43035d50 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 14:02:46 -0700 Subject: ipv4: Kill FLOWI_FLAG_RT_NOCACHE and associated code. Signed-off-by: David S. Miller --- include/net/flow.h | 1 - include/net/inet_connection_sock.h | 3 +-- net/dccp/ipv4.c | 2 +- net/ipv4/inet_connection_sock.c | 5 +---- net/ipv4/route.c | 3 --- net/ipv4/tcp_ipv4.c | 4 ++-- 6 files changed, 5 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index ce9cb7656b47..e1dd5082ec7e 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -21,7 +21,6 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_CAN_SLEEP 0x02 -#define FLOWI_FLAG_RT_NOCACHE 0x04 __u32 flowic_secid; }; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 2cf44b4ed2e6..5ee66f517b4f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -250,8 +250,7 @@ extern int inet_csk_get_port(struct sock *sk, unsigned short snum); extern struct dst_entry* inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, - const struct request_sock *req, - bool nocache); + const struct request_sock *req); extern struct dst_entry* inet_csk_route_child_sock(struct sock *sk, struct sock *newsk, const struct request_sock *req); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ab4f44c9bb21..25428d0c50c9 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -508,7 +508,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, struct dst_entry *dst; struct flowi4 fl4; - dst = inet_csk_route_req(sk, &fl4, req, false); + dst = inet_csk_route_req(sk, &fl4, req); if (dst == NULL) goto out; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0a290d719bc7..db0cf17c00f7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -368,8 +368,7 @@ EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); struct dst_entry *inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, - const struct request_sock *req, - bool nocache) + const struct request_sock *req) { struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); @@ -377,8 +376,6 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct net *net = sock_net(sk); int flags = inet_sk_flowi_flags(sk); - if (nocache) - flags |= FLOWI_FLAG_RT_NOCACHE; flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 97cca8a03d94..7e1c0ed0ef70 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1836,9 +1836,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0); - if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE) - rth->dst.flags |= DST_NOCACHE; - return rth; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1d8b75a58981..59110caeb074 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -824,7 +824,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct sk_buff * skb; /* First, grab a route. */ - if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL) + if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; skb = tcp_make_synack(sk, dst, req, rvp); @@ -1378,7 +1378,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && + (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && fl4.daddr == saddr) { if (!tcp_peer_is_proven(req, dst, true)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); -- cgit v1.2.3 From 4fd551d7bed93af60af61c5a324b8f5dff37953a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 14:39:44 -0700 Subject: ipv4: Kill rt->rt_oif Never actually used. It was being set on output routes to the original OIF specified in the flow key used for the lookup. Adjust the only user, ipmr_rt_fib_lookup(), for greater correctness of the flowi4_oif and flowi4_iif values, thanks to feedback from Julian Anastasov. Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/ipmr.c | 7 +++++-- net/ipv4/route.c | 5 ----- net/ipv4/xfrm4_policy.c | 1 - 4 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 3c1eeab9749b..e789a92fd602 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -50,7 +50,6 @@ struct rtable { int rt_route_iif; int rt_iif; - int rt_oif; /* Info on neighbour */ __be32 rt_gateway; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index eee3bf6676fe..8eec8f4a0536 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1795,8 +1795,11 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) .daddr = iph->daddr, .saddr = iph->saddr, .flowi4_tos = RT_TOS(iph->tos), - .flowi4_oif = rt->rt_oif, - .flowi4_iif = rt->rt_iif, + .flowi4_oif = (rt_is_output_route(rt) ? + skb->dev->ifindex : 0), + .flowi4_iif = (rt_is_output_route(rt) ? + net->loopback_dev->ifindex : + skb->dev->ifindex), .flowi4_mark = skb->mark, }; struct mr_table *mrt; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b8707779b85d..a280b6ac8eb2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1332,7 +1332,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_type = RTN_MULTICAST; rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; - rth->rt_oif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; rth->fi = NULL; @@ -1463,7 +1462,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_type = res->type; rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; - rth->rt_oif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; rth->fi = NULL; @@ -1642,7 +1640,6 @@ local_input: rth->rt_type = res.type; rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; - rth->rt_oif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; rth->fi = NULL; @@ -1808,7 +1805,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_type = type; rth->rt_route_iif = 0; rth->rt_iif = orig_oif ? : dev_out->ifindex; - rth->rt_oif = orig_oif; rth->rt_pmtu = 0; rth->rt_gateway = 0; rth->fi = NULL; @@ -2085,7 +2081,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_route_iif = ort->rt_route_iif; rt->rt_iif = ort->rt_iif; - rt->rt_oif = ort->rt_oif; rt->rt_pmtu = ort->rt_pmtu; rt->rt_genid = rt_genid(net); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 6074b694f118..3c99b4cdd290 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -81,7 +81,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_route_iif = fl4->flowi4_iif; xdst->u.rt.rt_iif = fl4->flowi4_iif; - xdst->u.rt.rt_oif = fl4->flowi4_oif; xdst->u.dst.dev = dev; dev_hold(dev); -- cgit v1.2.3 From 9917e1e8762745191eba5a3bf2040278cbddbee1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 14:44:26 -0700 Subject: ipv4: Turn rt->rt_route_iif into rt->rt_is_input. That is this value's only use, as a boolean to indicate whether a route is an input route or not. So implement it that way, using a u16 gap present in the struct already. Signed-off-by: David S. Miller --- include/net/route.h | 6 +++--- net/ipv4/route.c | 10 +++++----- net/ipv4/xfrm4_policy.c | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index e789a92fd602..4bafe0bfe829 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -47,8 +47,8 @@ struct rtable { int rt_genid; unsigned int rt_flags; __u16 rt_type; + __u16 rt_is_input; - int rt_route_iif; int rt_iif; /* Info on neighbour */ @@ -61,12 +61,12 @@ struct rtable { static inline bool rt_is_input_route(const struct rtable *rt) { - return rt->rt_route_iif != 0; + return rt->rt_is_input != 0; } static inline bool rt_is_output_route(const struct rtable *rt) { - return rt->rt_route_iif == 0; + return rt->rt_is_input == 0; } static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a280b6ac8eb2..fac4c4acdbac 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1330,7 +1330,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_genid = rt_genid(dev_net(dev)); rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; - rth->rt_route_iif = dev->ifindex; + rth->rt_is_input= 1; rth->rt_iif = dev->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; @@ -1460,7 +1460,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); rth->rt_flags = flags; rth->rt_type = res->type; - rth->rt_route_iif = in_dev->dev->ifindex; + rth->rt_is_input = 1; rth->rt_iif = in_dev->dev->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; @@ -1638,7 +1638,7 @@ local_input: rth->rt_genid = rt_genid(net); rth->rt_flags = flags|RTCF_LOCAL; rth->rt_type = res.type; - rth->rt_route_iif = dev->ifindex; + rth->rt_is_input = 1; rth->rt_iif = dev->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; @@ -1803,7 +1803,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_genid = rt_genid(dev_net(dev_out)); rth->rt_flags = flags; rth->rt_type = type; - rth->rt_route_iif = 0; + rth->rt_is_input = 0; rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; @@ -2079,7 +2079,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or if (new->dev) dev_hold(new->dev); - rt->rt_route_iif = ort->rt_route_iif; + rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; rt->rt_pmtu = ort->rt_pmtu; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 3c99b4cdd290..c6281847f16a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -79,7 +79,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, struct rtable *rt = (struct rtable *)xdst->route; const struct flowi4 *fl4 = &fl->u.ip4; - xdst->u.rt.rt_route_iif = fl4->flowi4_iif; xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.dst.dev = dev; @@ -87,6 +86,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ + xdst->u.rt.rt_is_input = rt->rt_is_input; xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); xdst->u.rt.rt_type = rt->rt_type; -- cgit v1.2.3 From 2860583fe840d972573363dfa190b2149a604534 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 14:55:59 -0700 Subject: ipv4: Kill rt->fi It's not really needed. We only grabbed a reference to the fib_info for the sake of fib_info local metrics. However, fib_info objects are freed using RCU, as are therefore their private metrics (if any). We would have triggered a route cache flush if we eliminated a reference to a fib_info object in the routing tables. Therefore, any existing cached routes will first check and see that they have been invalidated before an errant reference to these metric values would occur. Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/route.c | 32 +------------------------------- 2 files changed, 1 insertion(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 4bafe0bfe829..60d611dc5cee 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -56,7 +56,6 @@ struct rtable { /* Miscellaneous cached information */ u32 rt_pmtu; - struct fib_info *fi; /* for client ref to shared metrics */ }; static inline bool rt_is_input_route(const struct rtable *rt) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fac4c4acdbac..9add08869c75 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -141,7 +141,6 @@ static int ip_rt_min_advmss __read_mostly = 256; static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); static unsigned int ipv4_mtu(const struct dst_entry *dst); -static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, @@ -171,7 +170,6 @@ static struct dst_ops ipv4_dst_ops = { .default_advmss = ipv4_default_advmss, .mtu = ipv4_mtu, .cow_metrics = ipv4_cow_metrics, - .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, @@ -1034,17 +1032,6 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) return dst; } -static void ipv4_dst_destroy(struct dst_entry *dst) -{ - struct rtable *rt = (struct rtable *) dst; - - if (rt->fi) { - fib_info_put(rt->fi); - rt->fi = NULL; - } -} - - static void ipv4_link_failure(struct sk_buff *skb) { struct rtable *rt; @@ -1158,15 +1145,6 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) return mtu; } -static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) -{ - if (fi->fib_metrics != (u32 *) dst_default_metrics) { - rt->fi = fi; - atomic_inc(&fi->fib_clntref); - } - dst_init_metrics(&rt->dst, fi->fib_metrics, true); -} - static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) { struct fnhe_hash_bucket *hash = nh->nh_exceptions; @@ -1261,7 +1239,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, rt->rt_gateway = nh->nh_gw; if (unlikely(fnhe)) rt_bind_exception(rt, fnhe, daddr); - rt_init_metrics(rt, fi); + dst_init_metrics(&rt->dst, fi->fib_metrics, true); #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif @@ -1334,7 +1312,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = dev->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; - rth->fi = NULL; if (our) { rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; @@ -1464,7 +1441,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = in_dev->dev->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; - rth->fi = NULL; rth->dst.input = ip_forward; rth->dst.output = ip_output; @@ -1642,7 +1618,6 @@ local_input: rth->rt_iif = dev->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; - rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; @@ -1807,7 +1782,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; - rth->fi = NULL; RT_CACHE_STAT_INC(out_slow_tot); @@ -2052,7 +2026,6 @@ static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, static struct dst_ops ipv4_dst_blackhole_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), - .destroy = ipv4_dst_destroy, .check = ipv4_blackhole_dst_check, .mtu = ipv4_blackhole_mtu, .default_advmss = ipv4_default_advmss, @@ -2087,9 +2060,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_gateway = ort->rt_gateway; - rt->fi = ort->fi; - if (rt->fi) - atomic_inc(&rt->fi->fib_clntref); dst_free(new); } -- cgit v1.2.3 From 0bb4087cbec0ef74fd416789d6aad67957063057 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 20 Jul 2012 16:00:53 -0700 Subject: ipv4: Fix neigh lookup keying over loopback/point-to-point devices. We were using a special key "0" for all loopback and point-to-point device neigh lookups under ipv4, but we wouldn't use that special key for the neigh creation. So basically we'd make a new neigh at each and every lookup :-) This special case to use only one neigh for these device types is of dubious value, so just remove it entirely. Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/arp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/arp.h b/include/net/arp.h index 4617d9841132..7f7df93f37cd 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -21,9 +21,6 @@ static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev struct neighbour *n; u32 hash_val; - if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) - key = 0; - hash_val = arp_hashfn(key, dev, nht->hash_rnd[0]) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; -- cgit v1.2.3 From 5aa93bcf66f4af094d6f11096e81d5501a0b4ba5 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Sat, 21 Jul 2012 07:56:07 +0000 Subject: sctp: Implement quick failover draft from tsvwg I've seen several attempts recently made to do quick failover of sctp transports by reducing various retransmit timers and counters. While its possible to implement a faster failover on multihomed sctp associations, its not particularly robust, in that it can lead to unneeded retransmits, as well as false connection failures due to intermittent latency on a network. Instead, lets implement the new ietf quick failover draft found here: http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05 This will let the sctp stack identify transports that have had a small number of errors, and avoid using them quickly until their reliability can be re-established. I've tested this out on two virt guests connected via multiple isolated virt networks and believe its in compliance with the above draft and works well. Signed-off-by: Neil Horman CC: Vlad Yasevich CC: Sridhar Samudrala CC: "David S. Miller" CC: linux-sctp@vger.kernel.org CC: joe@perches.com Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 14 +++++ include/net/sctp/constants.h | 1 + include/net/sctp/structs.h | 20 ++++++- include/net/sctp/user.h | 11 ++++ net/sctp/associola.c | 37 +++++++++--- net/sctp/outqueue.c | 6 +- net/sctp/sm_sideeffect.c | 33 +++++++++-- net/sctp/socket.c | 101 +++++++++++++++++++++++++++++++++ net/sctp/sysctl.c | 9 +++ net/sctp/transport.c | 4 +- 10 files changed, 221 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5f3ef7f7fcec..406a5226220d 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1440,6 +1440,20 @@ path_max_retrans - INTEGER Default: 5 +pf_retrans - INTEGER + The number of retransmissions that will be attempted on a given path + before traffic is redirected to an alternate transport (should one + exist). Note this is distinct from path_max_retrans, as a path that + passes the pf_retrans threshold can still be used. Its only + deprioritized when a transmission path is selected by the stack. This + setting is primarily used to enable fast failover mechanisms without + having to reduce path_max_retrans to a very low value. See: + http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt + for details. Note also that a value of pf_retrans > path_max_retrans + disables this feature + + Default: 0 + rto_initial - INTEGER The initial round trip timeout value in milliseconds that will be used in calculating round trip times. This is the initial time interval diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 942b864f6135..d053d2e99876 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -334,6 +334,7 @@ typedef enum { typedef enum { SCTP_TRANSPORT_UP, SCTP_TRANSPORT_DOWN, + SCTP_TRANSPORT_PF, } sctp_transport_cmd_t; /* These are the address scopes defined mainly for IPv4 addresses diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 536e439ddf1d..fc5e60016e37 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -161,6 +161,12 @@ extern struct sctp_globals { int max_retrans_path; int max_retrans_init; + /* Potentially-Failed.Max.Retrans sysctl value + * taken from: + * http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05 + */ + int pf_retrans; + /* * Policy for preforming sctp/socket accounting * 0 - do socket level accounting, all assocs share sk_sndbuf @@ -258,6 +264,7 @@ extern struct sctp_globals { #define sctp_sndbuf_policy (sctp_globals.sndbuf_policy) #define sctp_rcvbuf_policy (sctp_globals.rcvbuf_policy) #define sctp_max_retrans_path (sctp_globals.max_retrans_path) +#define sctp_pf_retrans (sctp_globals.pf_retrans) #define sctp_max_retrans_init (sctp_globals.max_retrans_init) #define sctp_sack_timeout (sctp_globals.sack_timeout) #define sctp_hb_interval (sctp_globals.hb_interval) @@ -990,10 +997,15 @@ struct sctp_transport { /* This is the max_retrans value for the transport and will * be initialized from the assocs value. This can be changed - * using SCTP_SET_PEER_ADDR_PARAMS socket option. + * using the SCTP_SET_PEER_ADDR_PARAMS socket option. */ __u16 pathmaxrxt; + /* This is the partially failed retrans value for the transport + * and will be initialized from the assocs value. This can be changed + * using the SCTP_PEER_ADDR_THLDS socket option + */ + int pf_retrans; /* PMTU : The current known path MTU. */ __u32 pathmtu; @@ -1664,6 +1676,12 @@ struct sctp_association { */ int max_retrans; + /* This is the partially failed retrans value for the transport + * and will be initialized from the assocs value. This can be + * changed using the SCTP_PEER_ADDR_THLDS socket option + */ + int pf_retrans; + /* Maximum number of times the endpoint will retransmit INIT */ __u16 max_init_attempts; diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index 0842ef00b2fe..1b02d7ad453b 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -93,6 +93,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_GET_ASSOC_NUMBER 28 /* Read only */ #define SCTP_GET_ASSOC_ID_LIST 29 /* Read only */ #define SCTP_AUTO_ASCONF 30 +#define SCTP_PEER_ADDR_THLDS 31 /* Internal Socket Options. Some of the sctp library functions are * implemented using these socket options. @@ -649,6 +650,7 @@ struct sctp_paddrinfo { */ enum sctp_spinfo_state { SCTP_INACTIVE, + SCTP_PF, SCTP_ACTIVE, SCTP_UNCONFIRMED, SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */ @@ -741,4 +743,13 @@ typedef struct { int sd; } sctp_peeloff_arg_t; +/* + * Peer Address Thresholds socket option + */ +struct sctp_paddrthlds { + sctp_assoc_t spt_assoc_id; + struct sockaddr_storage spt_address; + __u16 spt_pathmaxrxt; + __u16 spt_pathpfthld; +}; #endif /* __net_sctp_user_h__ */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 8cf348e62e74..ebaef3ed6065 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -124,6 +124,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * socket values. */ asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt; + asoc->pf_retrans = sctp_pf_retrans; + asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial); asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max); asoc->rto_min = msecs_to_jiffies(sp->rtoinfo.srto_min); @@ -686,6 +688,9 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Set the path max_retrans. */ peer->pathmaxrxt = asoc->pathmaxrxt; + /* And the partial failure retrnas threshold */ + peer->pf_retrans = asoc->pf_retrans; + /* Initialize the peer's SACK delay timeout based on the * association configured value. */ @@ -841,6 +846,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, struct sctp_ulpevent *event; struct sockaddr_storage addr; int spc_state = 0; + bool ulp_notify = true; /* Record the transition on the transport. */ switch (command) { @@ -854,6 +860,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, spc_state = SCTP_ADDR_CONFIRMED; else spc_state = SCTP_ADDR_AVAILABLE; + /* Don't inform ULP about transition from PF to + * active state and set cwnd to 1, see SCTP + * Quick failover draft section 5.1, point 5 + */ + if (transport->state == SCTP_PF) { + ulp_notify = false; + transport->cwnd = 1; + } transport->state = SCTP_ACTIVE; break; @@ -872,6 +886,11 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, spc_state = SCTP_ADDR_UNREACHABLE; break; + case SCTP_TRANSPORT_PF: + transport->state = SCTP_PF; + ulp_notify = false; + break; + default: return; } @@ -879,12 +898,15 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, /* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the * user. */ - memset(&addr, 0, sizeof(struct sockaddr_storage)); - memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len); - event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, - 0, spc_state, error, GFP_ATOMIC); - if (event) - sctp_ulpq_tail_event(&asoc->ulpq, event); + if (ulp_notify) { + memset(&addr, 0, sizeof(struct sockaddr_storage)); + memcpy(&addr, &transport->ipaddr, + transport->af_specific->sockaddr_len); + event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, + 0, spc_state, error, GFP_ATOMIC); + if (event) + sctp_ulpq_tail_event(&asoc->ulpq, event); + } /* Select new active and retran paths. */ @@ -900,7 +922,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, transports) { if ((t->state == SCTP_INACTIVE) || - (t->state == SCTP_UNCONFIRMED)) + (t->state == SCTP_UNCONFIRMED) || + (t->state == SCTP_PF)) continue; if (!first || t->last_time_heard > first->last_time_heard) { second = first; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index a0fa19f5650c..e7aa177c9522 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -792,7 +792,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) if (!new_transport) new_transport = asoc->peer.active_path; } else if ((new_transport->state == SCTP_INACTIVE) || - (new_transport->state == SCTP_UNCONFIRMED)) { + (new_transport->state == SCTP_UNCONFIRMED) || + (new_transport->state == SCTP_PF)) { /* If the chunk is Heartbeat or Heartbeat Ack, * send it to chunk->transport, even if it's * inactive. @@ -987,7 +988,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) new_transport = chunk->transport; if (!new_transport || ((new_transport->state == SCTP_INACTIVE) || - (new_transport->state == SCTP_UNCONFIRMED))) + (new_transport->state == SCTP_UNCONFIRMED) || + (new_transport->state == SCTP_PF))) new_transport = asoc->peer.active_path; if (new_transport->state == SCTP_UNCONFIRMED) continue; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8716da1a8592..fe99628e1257 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -76,6 +76,8 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, sctp_cmd_seq_t *commands, gfp_t gfp); +static void sctp_cmd_hb_timer_update(sctp_cmd_seq_t *cmds, + struct sctp_transport *t); /******************************************************************** * Helper functions ********************************************************************/ @@ -470,7 +472,8 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = { * notification SHOULD be sent to the upper layer. * */ -static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, +static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, + struct sctp_association *asoc, struct sctp_transport *transport, int is_hb) { @@ -495,6 +498,23 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, transport->error_count++; } + /* If the transport error count is greater than the pf_retrans + * threshold, and less than pathmaxrtx, then mark this transport + * as Partially Failed, ee SCTP Quick Failover Draft, secon 5.1, + * point 1 + */ + if ((transport->state != SCTP_PF) && + (asoc->pf_retrans < transport->pathmaxrxt) && + (transport->error_count > asoc->pf_retrans)) { + + sctp_assoc_control_transport(asoc, transport, + SCTP_TRANSPORT_PF, + 0); + + /* Update the hb timer to resend a heartbeat every rto */ + sctp_cmd_hb_timer_update(commands, transport); + } + if (transport->state != SCTP_INACTIVE && (transport->error_count > transport->pathmaxrxt)) { SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", @@ -699,6 +719,10 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, SCTP_HEARTBEAT_SUCCESS); } + if (t->state == SCTP_PF) + sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, + SCTP_HEARTBEAT_SUCCESS); + /* The receiver of the HEARTBEAT ACK should also perform an * RTT measurement for that destination transport address * using the time value carried in the HEARTBEAT ACK chunk. @@ -1565,8 +1589,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_STRIKE: /* Mark one strike against a transport. */ - sctp_do_8_2_transport_strike(asoc, cmd->obj.transport, - 0); + sctp_do_8_2_transport_strike(commands, asoc, + cmd->obj.transport, 0); break; case SCTP_CMD_TRANSPORT_IDLE: @@ -1576,7 +1600,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_TRANSPORT_HB_SENT: t = cmd->obj.transport; - sctp_do_8_2_transport_strike(asoc, t, 1); + sctp_do_8_2_transport_strike(commands, asoc, + t, 1); t->hb_sent = 1; break; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5d488cdcf679..5e259817a7f3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3478,6 +3478,56 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, } +/* + * SCTP_PEER_ADDR_THLDS + * + * This option allows us to alter the partially failed threshold for one or all + * transports in an association. See Section 6.1 of: + * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt + */ +static int sctp_setsockopt_paddr_thresholds(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_paddrthlds val; + struct sctp_transport *trans; + struct sctp_association *asoc; + + if (optlen < sizeof(struct sctp_paddrthlds)) + return -EINVAL; + if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, + sizeof(struct sctp_paddrthlds))) + return -EFAULT; + + + if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { + asoc = sctp_id2assoc(sk, val.spt_assoc_id); + if (!asoc) + return -ENOENT; + list_for_each_entry(trans, &asoc->peer.transport_addr_list, + transports) { + if (val.spt_pathmaxrxt) + trans->pathmaxrxt = val.spt_pathmaxrxt; + trans->pf_retrans = val.spt_pathpfthld; + } + + if (val.spt_pathmaxrxt) + asoc->pathmaxrxt = val.spt_pathmaxrxt; + asoc->pf_retrans = val.spt_pathpfthld; + } else { + trans = sctp_addr_id2transport(sk, &val.spt_address, + val.spt_assoc_id); + if (!trans) + return -ENOENT; + + if (val.spt_pathmaxrxt) + trans->pathmaxrxt = val.spt_pathmaxrxt; + trans->pf_retrans = val.spt_pathpfthld; + } + + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3627,6 +3677,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_AUTO_ASCONF: retval = sctp_setsockopt_auto_asconf(sk, optval, optlen); break; + case SCTP_PEER_ADDR_THLDS: + retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -5498,6 +5551,51 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len, return 0; } +/* + * SCTP_PEER_ADDR_THLDS + * + * This option allows us to fetch the partially failed threshold for one or all + * transports in an association. See Section 6.1 of: + * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt + */ +static int sctp_getsockopt_paddr_thresholds(struct sock *sk, + char __user *optval, + int len, + int __user *optlen) +{ + struct sctp_paddrthlds val; + struct sctp_transport *trans; + struct sctp_association *asoc; + + if (len < sizeof(struct sctp_paddrthlds)) + return -EINVAL; + len = sizeof(struct sctp_paddrthlds); + if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, len)) + return -EFAULT; + + if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { + asoc = sctp_id2assoc(sk, val.spt_assoc_id); + if (!asoc) + return -ENOENT; + + val.spt_pathpfthld = asoc->pf_retrans; + val.spt_pathmaxrxt = asoc->pathmaxrxt; + } else { + trans = sctp_addr_id2transport(sk, &val.spt_address, + val.spt_assoc_id); + if (!trans) + return -ENOENT; + + val.spt_pathmaxrxt = trans->pathmaxrxt; + val.spt_pathpfthld = trans->pf_retrans; + } + + if (put_user(len, optlen) || copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; +} + SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -5636,6 +5734,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_AUTO_ASCONF: retval = sctp_getsockopt_auto_asconf(sk, len, optval, optlen); break; + case SCTP_PEER_ADDR_THLDS: + retval = sctp_getsockopt_paddr_thresholds(sk, optval, len, optlen); + break; default: retval = -ENOPROTOOPT; break; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index e5fe639c89e7..2b2bfe933ff1 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -140,6 +140,15 @@ static ctl_table sctp_table[] = { .extra1 = &one, .extra2 = &int_max }, + { + .procname = "pf_retrans", + .data = &sctp_pf_retrans, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max + }, { .procname = "max_init_retransmits", .data = &sctp_max_retrans_init, diff --git a/net/sctp/transport.c b/net/sctp/transport.c index a6b7ee9ce28a..d1c652ed2f3d 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -87,6 +87,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, /* Initialize the default path max_retrans. */ peer->pathmaxrxt = sctp_max_retrans_path; + peer->pf_retrans = sctp_pf_retrans; INIT_LIST_HEAD(&peer->transmitted); INIT_LIST_HEAD(&peer->send_ready); @@ -595,7 +596,8 @@ unsigned long sctp_transport_timeout(struct sctp_transport *t) { unsigned long timeout; timeout = t->rto + sctp_jitter(t->rto); - if (t->state != SCTP_UNCONFIRMED) + if ((t->state != SCTP_UNCONFIRMED) && + (t->state != SCTP_PF)) timeout += t->hbinterval; timeout += jiffies; return timeout; -- cgit v1.2.3 From 406a3c638ce8b17d9704052c07955490f732c2b8 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 20 Jul 2012 10:39:25 +0000 Subject: net: netprio_cgroup: rework update socket logic Instead of updating the sk_cgrp_prioidx struct field on every send this only updates the field when a task is moved via cgroup infrastructure. This allows sockets that may be used by a kernel worker thread to be managed. For example in the iscsi case today a user can put iscsid in a netprio cgroup and control traffic will be sent with the correct sk_cgrp_prioidx value set but as soon as data is sent the kernel worker thread isssues a send and sk_cgrp_prioidx is updated with the kernel worker threads value which is the default case. It seems more correct to only update the field when the user explicitly sets it via control group infrastructure. This allows the users to manage sockets that may be used with other threads. Signed-off-by: John Fastabend Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/net.h | 1 + include/net/netprio_cgroup.h | 4 ++-- net/core/netprio_cgroup.c | 53 ++++++++++++++++++++++++++++++++++++++++++++ net/core/sock.c | 6 ++--- net/socket.c | 5 ++--- 5 files changed, 61 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/linux/net.h b/include/linux/net.h index dc95700de5df..99276c3dc89a 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -248,6 +248,7 @@ extern int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); extern int sock_map_fd(struct socket *sock, int flags); extern struct socket *sockfd_lookup(int fd, int *err); +extern struct socket *sock_from_file(struct file *file, int *err); #define sockfd_put(sock) fput(sock->file) extern int net_ratelimit(void); diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index d58fdec47597..2719dec6b5a8 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -35,7 +35,7 @@ struct cgroup_netprio_state { extern int net_prio_subsys_id; #endif -extern void sock_update_netprioidx(struct sock *sk); +extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task); #if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) @@ -82,7 +82,7 @@ static inline u32 task_netprioidx(struct task_struct *p) #endif /* CONFIG_NETPRIO_CGROUP */ #else -#define sock_update_netprioidx(sk) +#define sock_update_netprioidx(sk, task) #endif #endif /* _NET_CLS_CGROUP_H */ diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index b2e9caa1ad1a..63d15e8f80e9 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -25,6 +25,8 @@ #include #include +#include + #define PRIOIDX_SZ 128 static unsigned long prioidx_map[PRIOIDX_SZ]; @@ -272,6 +274,56 @@ out_free_devname: return ret; } +void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) +{ + struct task_struct *p; + char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); + + if (!tmp) { + pr_warn("Unable to attach cgrp due to alloc failure!\n"); + return; + } + + cgroup_taskset_for_each(p, cgrp, tset) { + unsigned int fd; + struct fdtable *fdt; + struct files_struct *files; + + task_lock(p); + files = p->files; + if (!files) { + task_unlock(p); + continue; + } + + rcu_read_lock(); + fdt = files_fdtable(files); + for (fd = 0; fd < fdt->max_fds; fd++) { + char *path; + struct file *file; + struct socket *sock; + unsigned long s; + int rv, err = 0; + + file = fcheck_files(files, fd); + if (!file) + continue; + + path = d_path(&file->f_path, tmp, PAGE_SIZE); + rv = sscanf(path, "socket:[%lu]", &s); + if (rv <= 0) + continue; + + sock = sock_from_file(file, &err); + if (!err) + sock_update_netprioidx(sock->sk, p); + } + rcu_read_unlock(); + task_unlock(p); + } + kfree(tmp); +} + static struct cftype ss_files[] = { { .name = "prioidx", @@ -289,6 +341,7 @@ struct cgroup_subsys net_prio_subsys = { .name = "net_prio", .create = cgrp_create, .destroy = cgrp_destroy, + .attach = net_prio_attach, #ifdef CONFIG_NETPRIO_CGROUP .subsys_id = net_prio_subsys_id, #endif diff --git a/net/core/sock.c b/net/core/sock.c index 24039ac12426..2676a88f533e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1180,12 +1180,12 @@ void sock_update_classid(struct sock *sk) } EXPORT_SYMBOL(sock_update_classid); -void sock_update_netprioidx(struct sock *sk) +void sock_update_netprioidx(struct sock *sk, struct task_struct *task) { if (in_interrupt()) return; - sk->sk_cgrp_prioidx = task_netprioidx(current); + sk->sk_cgrp_prioidx = task_netprioidx(task); } EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif @@ -1215,7 +1215,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, atomic_set(&sk->sk_wmem_alloc, 1); sock_update_classid(sk); - sock_update_netprioidx(sk); + sock_update_netprioidx(sk, current); } return sk; diff --git a/net/socket.c b/net/socket.c index 0452dca4cd24..dfe5b66c97e0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -398,7 +398,7 @@ int sock_map_fd(struct socket *sock, int flags) } EXPORT_SYMBOL(sock_map_fd); -static struct socket *sock_from_file(struct file *file, int *err) +struct socket *sock_from_file(struct file *file, int *err) { if (file->f_op == &socket_file_ops) return file->private_data; /* set in sock_map_fd */ @@ -406,6 +406,7 @@ static struct socket *sock_from_file(struct file *file, int *err) *err = -ENOTSOCK; return NULL; } +EXPORT_SYMBOL(sock_from_file); /** * sockfd_lookup - Go from a file number to its socket slot @@ -554,8 +555,6 @@ static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, sock_update_classid(sock->sk); - sock_update_netprioidx(sock->sk); - si->sock = sock; si->scm = NULL; si->msg = msg; -- cgit v1.2.3 From 6120d3dbb1220792ebea88cd475e1ec8f8620a93 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Jun 2012 10:03:05 +0400 Subject: get rid of ->scm_work_list recursion in __scm_destroy() will be cut by delaying final fput() Signed-off-by: Al Viro --- include/linux/sched.h | 1 - include/net/scm.h | 1 - net/core/scm.c | 22 +++------------------- 3 files changed, 3 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/linux/sched.h b/include/linux/sched.h index af3555cc760f..598ba2da7865 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1546,7 +1546,6 @@ struct task_struct { unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; - struct list_head *scm_work_list; #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack */ int curr_ret_stack; diff --git a/include/net/scm.h b/include/net/scm.h index d456f4c71a32..079d7887dac1 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -13,7 +13,6 @@ #define SCM_MAX_FD 253 struct scm_fp_list { - struct list_head list; short count; short max; struct file *fp[SCM_MAX_FD]; diff --git a/net/core/scm.c b/net/core/scm.c index 611c5efd4cb0..8f6ccfd68ef4 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -109,25 +109,9 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - if (current->scm_work_list) { - list_add_tail(&fpl->list, current->scm_work_list); - } else { - LIST_HEAD(work_list); - - current->scm_work_list = &work_list; - - list_add(&fpl->list, &work_list); - while (!list_empty(&work_list)) { - fpl = list_first_entry(&work_list, struct scm_fp_list, list); - - list_del(&fpl->list); - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); - } - - current->scm_work_list = NULL; - } + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); } } EXPORT_SYMBOL(__scm_destroy); -- cgit v1.2.3 From 563d34d05786263893ba4a1042eb9b9374127cf5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jul 2012 09:48:52 +0200 Subject: tcp: dont drop MTU reduction indications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ICMP messages generated in output path if frame length is bigger than mtu are actually lost because socket is owned by user (doing the xmit) One example is the ipgre_tunnel_xmit() calling icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); We had a similar case fixed in commit a34a101e1e6 (ipv6: disable GSO on sockets hitting dst_allfrag). Problem of such fix is that it relied on retransmit timers, so short tcp sessions paid a too big latency increase price. This patch uses the tcp_release_cb() infrastructure so that MTU reduction messages (ICMP messages) are not lost, and no extra delay is added in TCP transmits. Reported-by: Maciej Å»enczykowski Diagnosed-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: Nandita Dukkipati Cc: Tom Herbert Cc: Tore Anderson Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++++ include/net/sock.h | 1 + net/ipv4/tcp_ipv4.c | 19 +++++++++++++++---- net/ipv4/tcp_output.c | 6 +++++- net/ipv6/tcp_ipv6.c | 40 ++++++++++++++++++++++++---------------- 5 files changed, 51 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2761856987b2..eb125a4c30b3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -493,6 +493,9 @@ struct tcp_sock { u32 probe_seq_start; u32 probe_seq_end; } mtu_probe; + u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG + * while socket was owned by user. + */ #ifdef CONFIG_TCP_MD5SIG /* TCP AF-Specific parts; only used by MD5 Signature support so far */ @@ -518,6 +521,9 @@ enum tsq_flags { TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */ TCP_WRITE_TIMER_DEFERRED, /* tcp_write_timer() found socket was owned */ TCP_DELACK_TIMER_DEFERRED, /* tcp_delack_timer() found socket was owned */ + TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call + * tcp_v{4|6}_mtu_reduced() + */ }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) diff --git a/include/net/sock.h b/include/net/sock.h index 88de092df50f..e067f8c18f88 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -859,6 +859,7 @@ struct proto { struct sk_buff *skb); void (*release_cb)(struct sock *sk); + void (*mtu_reduced)(struct sock *sk); /* Keeping track of sk's, looking them up, and port selection methods. */ void (*hash)(struct sock *sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59110caeb074..bc5432e3c778 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -275,12 +275,15 @@ failure: EXPORT_SYMBOL(tcp_v4_connect); /* - * This routine does path mtu discovery as defined in RFC1191. + * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. + * It can be called through tcp_release_cb() if socket was owned by user + * at the time tcp_v4_err() was called to handle ICMP message. */ -static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) +static void tcp_v4_mtu_reduced(struct sock *sk) { struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); + u32 mtu = tcp_sk(sk)->mtu_info; /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs * send out by Linux are always <576bytes so they should go through @@ -373,8 +376,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) bh_lock_sock(sk); /* If too many ICMPs get dropped on busy * servers this needs to be solved differently. + * We do take care of PMTU discovery (RFC1191) special case : + * we can receive locally generated ICMP messages while socket is held. */ - if (sock_owned_by_user(sk)) + if (sock_owned_by_user(sk) && + type != ICMP_DEST_UNREACH && + code != ICMP_FRAG_NEEDED) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) @@ -409,8 +416,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ + tp->mtu_info = info; if (!sock_owned_by_user(sk)) - do_pmtu_discovery(sk, iph, info); + tcp_v4_mtu_reduced(sk); + else + set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); goto out; } @@ -2596,6 +2606,7 @@ struct proto tcp_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v4_do_rcv, .release_cb = tcp_release_cb, + .mtu_reduced = tcp_v4_mtu_reduced, .hash = inet_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 950aebfd9967..33cd065cfbd8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -885,7 +885,8 @@ static void tcp_tasklet_func(unsigned long data) #define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ (1UL << TCP_WRITE_TIMER_DEFERRED) | \ - (1UL << TCP_DELACK_TIMER_DEFERRED)) + (1UL << TCP_DELACK_TIMER_DEFERRED) | \ + (1UL << TCP_MTU_REDUCED_DEFERRED)) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -914,6 +915,9 @@ void tcp_release_cb(struct sock *sk) if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) tcp_delack_timer_handler(sk); + + if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) + sk->sk_prot->mtu_reduced(sk); } EXPORT_SYMBOL(tcp_release_cb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0302ec3fecfc..f49476e2d884 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -315,6 +315,23 @@ failure: return err; } +static void tcp_v6_mtu_reduced(struct sock *sk) +{ + struct dst_entry *dst; + + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) + return; + + dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info); + if (!dst) + return; + + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { + tcp_sync_mss(sk, dst_mtu(dst)); + tcp_simple_retransmit(sk); + } +} + static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { @@ -342,7 +359,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } bh_lock_sock(sk); - if (sock_owned_by_user(sk)) + if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) @@ -371,21 +388,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (type == ICMPV6_PKT_TOOBIG) { - struct dst_entry *dst; - - if (sock_owned_by_user(sk)) - goto out; - if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) - goto out; - - dst = inet6_csk_update_pmtu(sk, ntohl(info)); - if (!dst) - goto out; - - if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { - tcp_sync_mss(sk, dst_mtu(dst)); - tcp_simple_retransmit(sk); - } + tp->mtu_info = ntohl(info); + if (!sock_owned_by_user(sk)) + tcp_v6_mtu_reduced(sk); + else + set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); goto out; } @@ -1949,6 +1956,7 @@ struct proto tcpv6_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, + .mtu_reduced = tcp_v6_mtu_reduced, .hash = tcp_v6_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, -- cgit v1.2.3 From 92101b3b2e3178087127709a556b091dae314e9e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 23 Jul 2012 16:29:00 -0700 Subject: ipv4: Prepare for change of rt->rt_iif encoding. Use inet_iif() consistently, and for TCP record the input interface of cached RX dst in inet sock. rt->rt_iif is going to be encoded differently, so that we can legitimately cache input routes in the FIB info more aggressively. When the input interface is "use SKB device index" the rt->rt_iif will be set to zero. This forces us to move the TCP RX dst cache installation into the ipv4 specific code, and as well it should since doing the route caching for ipv6 is pointless at the moment since it is not inspected in the ipv6 input paths yet. Also, remove the unlikely on dst->obsolete, all ipv4 dsts have obsolete set to a non-zero value to force invocation of the check callback. Signed-off-by: David S. Miller --- include/net/inet_sock.h | 1 + net/dccp/ipv4.c | 2 +- net/ipv4/icmp.c | 2 +- net/ipv4/ip_sockglue.c | 5 ++--- net/ipv4/route.c | 2 +- net/ipv4/tcp_input.c | 12 ------------ net/ipv4/tcp_ipv4.c | 24 ++++++++++++++++++------ net/sched/cls_route.c | 2 +- net/sched/em_meta.c | 2 +- net/sctp/protocol.c | 2 +- 10 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 924d7b98ab60..613cfa401672 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -172,6 +172,7 @@ struct inet_sock { int uc_index; int mc_index; __be32 mc_addr; + int rx_dst_ifindex; struct ip_mc_socklist __rcu *mc_list; struct inet_cork_full cork; }; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 25428d0c50c9..176ecdba4a22 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -481,7 +481,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct rtable *rt; const struct iphdr *iph = ip_hdr(skb); struct flowi4 fl4 = { - .flowi4_oif = skb_rtable(skb)->rt_iif, + .flowi4_oif = inet_iif(skb), .daddr = iph->saddr, .saddr = iph->daddr, .flowi4_tos = RT_CONN_FLAGS(sk), diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index f2a06beffbd3..f2eccd531746 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -571,7 +571,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) rcu_read_lock(); if (rt_is_input_route(rt) && net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) - dev = dev_get_by_index_rcu(net, rt->rt_iif); + dev = dev_get_by_index_rcu(net, inet_iif(skb_in)); if (dev) saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index de29f46f68b0..5eea4a811042 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1027,10 +1027,9 @@ e_inval: void ipv4_pktinfo_prepare(struct sk_buff *skb) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); - const struct rtable *rt = skb_rtable(skb); - if (rt) { - pktinfo->ipi_ifindex = rt->rt_iif; + if (skb_rtable(skb)) { + pktinfo->ipi_ifindex = inet_iif(skb); pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { pktinfo->ipi_ifindex = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 34017be87c85..f6be78119396 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -848,7 +848,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) if (log_martians && peer->rate_tokens == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", - &ip_hdr(skb)->saddr, rt->rt_iif, + &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &rt->rt_gateway); #endif } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 21d7f8f3a7a5..3e07a64ca44e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5391,18 +5391,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); - if (sk->sk_rx_dst) { - struct dst_entry *dst = sk->sk_rx_dst; - if (unlikely(dst->obsolete)) { - if (dst->ops->check(dst, 0) == NULL) { - dst_release(dst); - sk->sk_rx_dst = NULL; - } - } - } - if (unlikely(sk->sk_rx_dst == NULL)) - sk->sk_rx_dst = dst_clone(skb_dst(skb)); - /* * Header prediction. * The code loosely follows the one in the famous diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bc5432e3c778..3e30548ac32a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1618,6 +1618,20 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ sock_rps_save_rxhash(sk, skb); + if (sk->sk_rx_dst) { + struct dst_entry *dst = sk->sk_rx_dst; + if (dst->ops->check(dst, 0) == NULL) { + dst_release(dst); + sk->sk_rx_dst = NULL; + } + } + if (unlikely(sk->sk_rx_dst == NULL)) { + struct inet_sock *icsk = inet_sk(sk); + struct rtable *rt = skb_rtable(skb); + + sk->sk_rx_dst = dst_clone(&rt->dst); + icsk->rx_dst_ifindex = inet_iif(skb); + } if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; @@ -1700,14 +1714,12 @@ void tcp_v4_early_demux(struct sk_buff *skb) skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { struct dst_entry *dst = sk->sk_rx_dst; + struct inet_sock *icsk = inet_sk(sk); if (dst) dst = dst_check(dst, 0); - if (dst) { - struct rtable *rt = (struct rtable *) dst; - - if (rt->rt_iif == dev->ifindex) - skb_dst_set_noref(skb, dst); - } + if (dst && + icsk->rx_dst_ifindex == dev->ifindex) + skb_dst_set_noref(skb, dst); } } } diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 36fec4227401..44f405cb9aaf 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -143,7 +143,7 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (head == NULL) goto old_method; - iif = ((struct rtable *)dst)->rt_iif; + iif = inet_iif(skb); h = route4_fastmap_hash(id, iif); if (id == head->fastmap[h].id && diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 4790c696cbce..4ab6e3325573 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -264,7 +264,7 @@ META_COLLECTOR(int_rtiif) if (unlikely(skb_rtable(skb) == NULL)) *err = -1; else - dst->value = skb_rtable(skb)->rt_iif; + dst->value = inet_iif(skb); } /************************************************************************** diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 9c90811d1134..1f89c4e69645 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -568,7 +568,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk, /* What interface did this skb arrive on? */ static int sctp_v4_skb_iif(const struct sk_buff *skb) { - return skb_rtable(skb)->rt_iif; + return inet_iif(skb); } /* Was this packet marked by Explicit Congestion Notification? */ -- cgit v1.2.3 From 13378cad02afc2adc6c0e07fca03903c7ada0b37 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 23 Jul 2012 13:57:45 -0700 Subject: ipv4: Change rt->rt_iif encoding. On input packet processing, rt->rt_iif will be zero if we should use skb->dev->ifindex. Since we access rt->rt_iif consistently via inet_iif(), that is the only spot whose interpretation have to adjust. Signed-off-by: David S. Miller --- include/net/route.h | 6 +++++- net/ipv4/route.c | 8 ++++---- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 60d611dc5cee..c29ef2733f2d 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -277,7 +277,11 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable static inline int inet_iif(const struct sk_buff *skb) { - return skb_rtable(skb)->rt_iif; + int iif = skb_rtable(skb)->rt_iif; + + if (iif) + return iif; + return skb->skb_iif; } extern int sysctl_ip_default_ttl; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f6be78119396..6bcb8fc71cbc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1309,7 +1309,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; rth->rt_is_input= 1; - rth->rt_iif = dev->ifindex; + rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; if (our) { @@ -1435,7 +1435,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_flags = flags; rth->rt_type = res->type; rth->rt_is_input = 1; - rth->rt_iif = in_dev->dev->ifindex; + rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; @@ -1608,7 +1608,7 @@ local_input: rth->rt_flags = flags|RTCF_LOCAL; rth->rt_type = res.type; rth->rt_is_input = 1; - rth->rt_iif = dev->ifindex; + rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; if (res.type == RTN_UNREACHABLE) { @@ -1772,7 +1772,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_flags = flags; rth->rt_type = type; rth->rt_is_input = 0; - rth->rt_iif = orig_oif ? : dev_out->ifindex; + rth->rt_iif = orig_oif ? : 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; -- cgit v1.2.3 From c6cffba4ffa26a8ffacd0bb9f3144e34f20da7de Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 26 Jul 2012 11:14:38 +0000 Subject: ipv4: Fix input route performance regression. With the routing cache removal we lost the "noref" code paths on input, and this can kill some routing workloads. Reinstate the noref path when we hit a cached route in the FIB nexthops. With help from Eric Dumazet. Reported-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/route.h | 19 +++++++++++++++++-- net/ipv4/arp.c | 2 +- net/ipv4/fib_semantics.c | 4 ++-- net/ipv4/ip_fragment.c | 4 ++-- net/ipv4/ip_input.c | 4 ++-- net/ipv4/route.c | 48 ++++++++++++++++++++++-------------------------- net/ipv4/xfrm4_input.c | 4 ++-- 7 files changed, 48 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index c29ef2733f2d..8c52bc6f1c90 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -157,8 +158,22 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 return ip_route_output_key(net, fl4); } -extern int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin); +extern int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin); + +static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin) +{ + int err; + + rcu_read_lock(); + err = ip_route_input_noref(skb, dst, src, tos, devin); + if (!err) + skb_dst_force(skb); + rcu_read_unlock(); + + return err; +} extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u32 mark, u8 protocol, int flow_flags); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a0124eb7dbea..77e87aff419a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -827,7 +827,7 @@ static int arp_process(struct sk_buff *skb) } if (arp->ar_op == htons(ARPOP_REQUEST) && - ip_route_input(skb, tip, sip, 0, dev) == 0) { + ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { rt = skb_rtable(skb); addr_type = rt->rt_type; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e55171f184f9..da0cc2e6b250 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -172,9 +172,9 @@ static void free_fib_info_rcu(struct rcu_head *head) if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); if (nexthop_nh->nh_rth_output) - dst_release(&nexthop_nh->nh_rth_output->dst); + dst_free(&nexthop_nh->nh_rth_output->dst); if (nexthop_nh->nh_rth_input) - dst_release(&nexthop_nh->nh_rth_input->dst); + dst_free(&nexthop_nh->nh_rth_input->dst); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7ad88e5e7110..8d07c973409c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -258,8 +258,8 @@ static void ip_expire(unsigned long arg) /* skb dst is stale, drop it, and perform route lookup again */ skb_dst_drop(head); iph = ip_hdr(head); - err = ip_route_input(head, iph->daddr, iph->saddr, - iph->tos, head->dev); + err = ip_route_input_noref(head, iph->daddr, iph->saddr, + iph->tos, head->dev); if (err) goto out_rcu_unlock; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 93134b0eab0c..bda8cac2ae91 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -339,8 +339,8 @@ static int ip_rcv_finish(struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (!skb_dst(skb)) { - int err = ip_route_input(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); + int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev); if (unlikely(err)) { if (err == -EXDEV) NET_INC_STATS_BH(dev_net(skb->dev), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3f7bb7185c50..fc1a81ca79a7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1199,10 +1199,9 @@ restart: fnhe->fnhe_stamp = jiffies; } -static inline void rt_release_rcu(struct rcu_head *head) +static inline void rt_free(struct rtable *rt) { - struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - dst_release(dst); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) @@ -1216,9 +1215,15 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) prev = cmpxchg(p, orig, rt); if (prev == orig) { - dst_clone(&rt->dst); if (orig) - call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu); + rt_free(orig); + } else { + /* Routes we intend to cache in the FIB nexthop have + * the DST_NOCACHE bit clear. However, if we are + * unsuccessful at storing this route into the cache + * we really need to set it. + */ + rt->dst.flags |= DST_NOCACHE; } } @@ -1245,7 +1250,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif - if (!(rt->dst.flags & DST_HOST)) + if (!(rt->dst.flags & DST_NOCACHE)) rt_cache_route(nh, rt); } @@ -1261,7 +1266,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, bool nopolicy, bool noxfrm, bool will_cache) { return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, - (will_cache ? 0 : DST_HOST) | DST_NOCACHE | + (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); } @@ -1366,8 +1371,7 @@ static void ip_handle_martian_source(struct net_device *dev, static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos, - struct rtable **result) + __be32 daddr, __be32 saddr, u32 tos) { struct rtable *rth; int err; @@ -1418,7 +1422,7 @@ static int __mkroute_input(struct sk_buff *skb, if (!itag) { rth = FIB_RES_NH(*res).nh_rth_input; if (rt_cache_valid(rth)) { - dst_hold(&rth->dst); + skb_dst_set_noref(skb, &rth->dst); goto out; } do_cache = true; @@ -1445,8 +1449,8 @@ static int __mkroute_input(struct sk_buff *skb, rth->dst.output = ip_output; rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); + skb_dst_set(skb, &rth->dst); out: - *result = rth; err = 0; cleanup: return err; @@ -1458,21 +1462,13 @@ static int ip_mkroute_input(struct sk_buff *skb, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { - struct rtable *rth = NULL; - int err; - #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) fib_select_multipath(res); #endif /* create a routing cache entry */ - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); - if (err) - return err; - - skb_dst_set(skb, &rth->dst); - return 0; + return __mkroute_input(skb, res, in_dev, daddr, saddr, tos); } /* @@ -1588,8 +1584,9 @@ local_input: if (!itag) { rth = FIB_RES_NH(res).nh_rth_input; if (rt_cache_valid(rth)) { - dst_hold(&rth->dst); - goto set_and_out; + skb_dst_set_noref(skb, &rth->dst); + err = 0; + goto out; } do_cache = true; } @@ -1620,7 +1617,6 @@ local_input: } if (do_cache) rt_cache_route(&FIB_RES_NH(res), rth); -set_and_out: skb_dst_set(skb, &rth->dst); err = 0; goto out; @@ -1658,8 +1654,8 @@ martian_source_keep_err: goto out; } -int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev) +int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev) { int res; @@ -1702,7 +1698,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, rcu_read_unlock(); return res; } -EXPORT_SYMBOL(ip_route_input); +EXPORT_SYMBOL(ip_route_input_noref); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 58d23a572509..06814b6216dc 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -27,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) if (skb_dst(skb) == NULL) { const struct iphdr *iph = ip_hdr(skb); - if (ip_route_input(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) + if (ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev)) goto drop; } return dst_input(skb); -- cgit v1.2.3 From c7109986db3c945f50ceed884a30e0fd8af3b89b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Jul 2012 12:18:11 +0000 Subject: ipv6: Early TCP socket demux This is the IPv6 missing bits for infrastructure added in commit 41063e9dd1195 (ipv4: Early TCP socket demux.) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 13 +++++++------ include/net/protocol.h | 2 ++ net/ipv4/ip_input.c | 1 + net/ipv6/ip6_input.c | 13 +++++++++++-- net/ipv6/tcp_ipv6.c | 38 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 00cbb4384c79..9e34c877a770 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -96,14 +96,15 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, const __be16 sport, const __be16 dport) { - struct sock *sk; + struct sock *sk = skb_steal_sock(skb); - if (unlikely(sk = skb_steal_sock(skb))) + if (sk) return sk; - else return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, - &ipv6_hdr(skb)->saddr, sport, - &ipv6_hdr(skb)->daddr, ntohs(dport), - inet6_iif(skb)); + + return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, + &ipv6_hdr(skb)->saddr, sport, + &ipv6_hdr(skb)->daddr, ntohs(dport), + inet6_iif(skb)); } extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, diff --git a/include/net/protocol.h b/include/net/protocol.h index 057f2d315567..929528c73fe8 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -52,6 +52,8 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { + void (*early_demux)(struct sk_buff *skb); + int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index bda8cac2ae91..981ff1eef28c 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -314,6 +314,7 @@ drop: } int sysctl_ip_early_demux __read_mostly = 1; +EXPORT_SYMBOL(sysctl_ip_early_demux); static int ip_rcv_finish(struct sk_buff *skb) { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5ab923e51af3..47975e363fcd 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -47,9 +47,18 @@ -inline int ip6_rcv_finish( struct sk_buff *skb) +int ip6_rcv_finish(struct sk_buff *skb) { - if (skb_dst(skb) == NULL) + if (sysctl_ip_early_demux && !skb_dst(skb)) { + const struct inet6_protocol *ipprot; + + rcu_read_lock(); + ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); + if (ipprot && ipprot->early_demux) + ipprot->early_demux(skb); + rcu_read_unlock(); + } + if (!skb_dst(skb)) ip6_route_input(skb); return dst_input(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f49476e2d884..221224e72507 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1674,6 +1674,43 @@ do_time_wait: goto discard_it; } +static void tcp_v6_early_demux(struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + const struct tcphdr *th; + struct sock *sk; + + if (skb->pkt_type != PACKET_HOST) + return; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) + return; + + hdr = ipv6_hdr(skb); + th = tcp_hdr(skb); + + if (th->doff < sizeof(struct tcphdr) / 4) + return; + + sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, + &hdr->saddr, th->source, + &hdr->daddr, ntohs(th->dest), + inet6_iif(skb)); + if (sk) { + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk->sk_state != TCP_TIME_WAIT) { + struct dst_entry *dst = sk->sk_rx_dst; + struct inet_sock *icsk = inet_sk(sk); + if (dst) + dst = dst_check(dst, 0); + if (dst && + icsk->rx_dst_ifindex == inet6_iif(skb)) + skb_dst_set_noref(skb, dst); + } + } +} + static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, @@ -1984,6 +2021,7 @@ struct proto tcpv6_prot = { }; static const struct inet6_protocol tcpv6_protocol = { + .early_demux = tcp_v6_early_demux, .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .gso_send_check = tcp_v6_gso_send_check, -- cgit v1.2.3 From 404e0a8b6a55d5e1cd138c6deb1bca9abdf75d8c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Jul 2012 23:20:37 +0000 Subject: net: ipv4: fix RCU races on dst refcounts commit c6cffba4ffa2 (ipv4: Fix input route performance regression.) added various fatal races with dst refcounts. crashes happen on tcp workloads if routes are added/deleted at the same time. The dst_free() calls from free_fib_info_rcu() are clearly racy. We need instead regular dst refcounting (dst_release()) and make sure dst_release() is aware of RCU grace periods : Add DST_RCU_FREE flag so that dst_release() respects an RCU grace period before dst destruction for cached dst Introduce a new inet_sk_rx_dst_set() helper, using atomic_inc_not_zero() to make sure we dont increase a zero refcount (On a dst currently waiting an rcu grace period before destruction) rt_cache_route() must take a reference on the new cached route, and release it if was not able to install it. With this patch, my machines survive various benchmarks. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 7 +------ include/net/inet_sock.h | 13 +++++++++++++ net/core/dst.c | 26 +++++++++++++++++++++----- net/decnet/dn_route.c | 6 ++++++ net/ipv4/fib_semantics.c | 4 ++-- net/ipv4/route.c | 16 ++++------------ net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_ipv4.c | 12 ++++++------ net/ipv4/tcp_minisocks.c | 3 +-- 9 files changed, 55 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index baf597890064..31a9fd39edb6 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -61,6 +61,7 @@ struct dst_entry { #define DST_NOPEER 0x0040 #define DST_FAKE_RTABLE 0x0080 #define DST_XFRM_TUNNEL 0x0100 +#define DST_RCU_FREE 0x0200 unsigned short pending_confirm; @@ -382,12 +383,6 @@ static inline void dst_free(struct dst_entry *dst) __dst_free(dst); } -static inline void dst_rcu_free(struct rcu_head *head) -{ - struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - dst_free(dst); -} - static inline void dst_confirm(struct dst_entry *dst) { dst->pending_confirm = 1; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 613cfa401672..e3fd34c83ac9 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -249,4 +249,17 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) return flags; } +static inline void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + if (atomic_inc_not_zero(&dst->__refcnt)) { + if (!(dst->flags & DST_RCU_FREE)) + dst->flags |= DST_RCU_FREE; + + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + } +} + #endif /* _INET_SOCK_H */ diff --git a/net/core/dst.c b/net/core/dst.c index 069d51d29414..d9e33ebe170f 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -258,6 +258,15 @@ again: } EXPORT_SYMBOL(dst_destroy); +static void dst_rcu_destroy(struct rcu_head *head) +{ + struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); + + dst = dst_destroy(dst); + if (dst) + __dst_free(dst); +} + void dst_release(struct dst_entry *dst) { if (dst) { @@ -265,10 +274,14 @@ void dst_release(struct dst_entry *dst) newrefcnt = atomic_dec_return(&dst->__refcnt); WARN_ON(newrefcnt < 0); - if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) { - dst = dst_destroy(dst); - if (dst) - __dst_free(dst); + if (unlikely(dst->flags & (DST_NOCACHE | DST_RCU_FREE)) && !newrefcnt) { + if (dst->flags & DST_RCU_FREE) { + call_rcu_bh(&dst->rcu_head, dst_rcu_destroy); + } else { + dst = dst_destroy(dst); + if (dst) + __dst_free(dst); + } } } } @@ -320,11 +333,14 @@ EXPORT_SYMBOL(__dst_destroy_metrics_generic); */ void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { + bool hold; + WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); /* If dst not in cache, we must take a reference, because * dst_release() will destroy dst as soon as its refcount becomes zero */ - if (unlikely(dst->flags & DST_NOCACHE)) { + hold = (dst->flags & (DST_NOCACHE | DST_RCU_FREE)) == DST_NOCACHE; + if (unlikely(hold)) { dst_hold(dst); skb_dst_set(skb, dst); } else { diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 85a3604c87c8..26719779ad8e 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -184,6 +184,12 @@ static __inline__ unsigned int dn_hash(__le16 src, __le16 dst) return dn_rt_hash_mask & (unsigned int)tmp; } +static inline void dst_rcu_free(struct rcu_head *head) +{ + struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); + dst_free(dst); +} + static inline void dnrt_free(struct dn_route *rt) { call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index da0cc2e6b250..e55171f184f9 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -172,9 +172,9 @@ static void free_fib_info_rcu(struct rcu_head *head) if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); if (nexthop_nh->nh_rth_output) - dst_free(&nexthop_nh->nh_rth_output->dst); + dst_release(&nexthop_nh->nh_rth_output->dst); if (nexthop_nh->nh_rth_input) - dst_free(&nexthop_nh->nh_rth_input->dst); + dst_release(&nexthop_nh->nh_rth_input->dst); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fc1a81ca79a7..d6eabcfe8a90 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1199,11 +1199,6 @@ restart: fnhe->fnhe_stamp = jiffies; } -static inline void rt_free(struct rtable *rt) -{ - call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); -} - static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) { struct rtable *orig, *prev, **p = &nh->nh_rth_output; @@ -1213,17 +1208,14 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) orig = *p; + rt->dst.flags |= DST_RCU_FREE; + dst_hold(&rt->dst); prev = cmpxchg(p, orig, rt); if (prev == orig) { if (orig) - rt_free(orig); + dst_release(&orig->dst); } else { - /* Routes we intend to cache in the FIB nexthop have - * the DST_NOCACHE bit clear. However, if we are - * unsuccessful at storing this route into the cache - * we really need to set it. - */ - rt->dst.flags |= DST_NOCACHE; + dst_release(&rt->dst); } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a356e1fecf9a..9be30b039ae3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5604,8 +5604,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_ESTABLISHED); if (skb != NULL) { - sk->sk_rx_dst = dst_clone(skb_dst(skb)); - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + inet_sk_rx_dst_set(sk, skb); security_inet_conn_established(sk, skb); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2fbd9921253f..7f91e5ac8277 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1617,19 +1617,19 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) #endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + struct dst_entry *dst = sk->sk_rx_dst; + sock_rps_save_rxhash(sk, skb); - if (sk->sk_rx_dst) { - struct dst_entry *dst = sk->sk_rx_dst; + if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || dst->ops->check(dst, 0) == NULL) { dst_release(dst); sk->sk_rx_dst = NULL; } } - if (unlikely(sk->sk_rx_dst == NULL)) { - sk->sk_rx_dst = dst_clone(skb_dst(skb)); - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - } + if (unlikely(sk->sk_rx_dst == NULL)) + inet_sk_rx_dst_set(sk, skb); + if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3f1cc2028edd..232a90c3ec86 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -387,8 +387,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_sock *oldtp = tcp_sk(sk); struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - newsk->sk_rx_dst = dst_clone(skb_dst(skb)); - inet_sk(newsk)->rx_dst_ifindex = skb->skb_iif; + inet_sk_rx_dst_set(newsk, skb); /* TCP Cookie Transactions require space for the cookie pair, * as it differs for each connection. There is no need to -- cgit v1.2.3 From 0c7462a2351b4cc502f326aad7fedd04909928be Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jul 2012 07:14:29 +0000 Subject: ipv4: remove rt_cache_rebuild_count After IP route cache removal, rt_cache_rebuild_count is no longer used. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 6 ------ include/net/netns/ipv4.h | 2 -- net/ipv4/sysctl_net_ipv4.c | 11 ----------- 3 files changed, 19 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 406a5226220d..ca447b35b833 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -48,12 +48,6 @@ min_adv_mss - INTEGER The advertised MSS depends on the first hop route MTU, but will never be lower than this setting. -rt_cache_rebuild_count - INTEGER - The per net-namespace route cache emergency rebuild threshold. - Any net-namespace having its route cache rebuilt due to - a hash bucket chain being too long more than this many times - will have its route caching disabled - IP Fragmentation: ipfrag_high_thresh - INTEGER diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 0ffb8e31f3cd..1474dd65c66f 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -61,8 +61,6 @@ struct netns_ipv4 { int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; int sysctl_icmp_errors_use_inbound_ifaddr; - int sysctl_rt_cache_rebuild_count; - int current_rt_cache_rebuild_count; unsigned int sysctl_ping_group_range[2]; long sysctl_tcp_mem[3]; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5840c3255721..4b6487a68279 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -783,13 +783,6 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "rt_cache_rebuild_count", - .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "ping_group_range", .data = &init_net.ipv4.sysctl_ping_group_range, @@ -829,8 +822,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) table[5].data = &net->ipv4.sysctl_icmp_ratemask; table[6].data = - &net->ipv4.sysctl_rt_cache_rebuild_count; - table[7].data = &net->ipv4.sysctl_ping_group_range; } @@ -842,8 +833,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) net->ipv4.sysctl_ping_group_range[0] = 1; net->ipv4.sysctl_ping_group_range[1] = 0; - net->ipv4.sysctl_rt_cache_rebuild_count = 4; - tcp_init_mem(net); net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); -- cgit v1.2.3 From 54764bb647b2e847c512acf8d443df965da35000 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jul 2012 01:08:23 +0000 Subject: ipv4: Restore old dst_free() behavior. commit 404e0a8b6a55 (net: ipv4: fix RCU races on dst refcounts) tried to solve a race but added a problem at device/fib dismantle time : We really want to call dst_free() as soon as possible, even if sockets still have dst in their cache. dst_release() calls in free_fib_info_rcu() are not welcomed. Root of the problem was that now we also cache output routes (in nh_rth_output), we must use call_rcu() instead of call_rcu_bh() in rt_free(), because output route lookups are done in process context. Based on feedback and initial patch from David Miller (adding another call_rcu_bh() call in fib, but it appears it was not the right fix) I left the inet_sk_rx_dst_set() helper and added __rcu attributes to nh_rth_output and nh_rth_input to better document what is going on in this code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 7 ++++++- include/net/inet_sock.h | 10 +++------- include/net/ip_fib.h | 4 ++-- net/core/dst.c | 26 +++++--------------------- net/decnet/dn_route.c | 6 ------ net/ipv4/fib_semantics.c | 21 +++++++++++++++++---- net/ipv4/route.c | 26 +++++++++++++++++--------- 7 files changed, 50 insertions(+), 50 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 31a9fd39edb6..baf597890064 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -61,7 +61,6 @@ struct dst_entry { #define DST_NOPEER 0x0040 #define DST_FAKE_RTABLE 0x0080 #define DST_XFRM_TUNNEL 0x0100 -#define DST_RCU_FREE 0x0200 unsigned short pending_confirm; @@ -383,6 +382,12 @@ static inline void dst_free(struct dst_entry *dst) __dst_free(dst); } +static inline void dst_rcu_free(struct rcu_head *head) +{ + struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); + dst_free(dst); +} + static inline void dst_confirm(struct dst_entry *dst) { dst->pending_confirm = 1; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index e3fd34c83ac9..83b567fe1941 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -253,13 +253,9 @@ static inline void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb { struct dst_entry *dst = skb_dst(skb); - if (atomic_inc_not_zero(&dst->__refcnt)) { - if (!(dst->flags & DST_RCU_FREE)) - dst->flags |= DST_RCU_FREE; - - sk->sk_rx_dst = dst; - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - } + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } #endif /* _INET_SOCK_H */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e69c3a47153d..e521a03515b1 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -81,8 +81,8 @@ struct fib_nh { __be32 nh_gw; __be32 nh_saddr; int nh_saddr_genid; - struct rtable *nh_rth_output; - struct rtable *nh_rth_input; + struct rtable __rcu *nh_rth_output; + struct rtable __rcu *nh_rth_input; struct fnhe_hash_bucket *nh_exceptions; }; diff --git a/net/core/dst.c b/net/core/dst.c index d9e33ebe170f..069d51d29414 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -258,15 +258,6 @@ again: } EXPORT_SYMBOL(dst_destroy); -static void dst_rcu_destroy(struct rcu_head *head) -{ - struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - - dst = dst_destroy(dst); - if (dst) - __dst_free(dst); -} - void dst_release(struct dst_entry *dst) { if (dst) { @@ -274,14 +265,10 @@ void dst_release(struct dst_entry *dst) newrefcnt = atomic_dec_return(&dst->__refcnt); WARN_ON(newrefcnt < 0); - if (unlikely(dst->flags & (DST_NOCACHE | DST_RCU_FREE)) && !newrefcnt) { - if (dst->flags & DST_RCU_FREE) { - call_rcu_bh(&dst->rcu_head, dst_rcu_destroy); - } else { - dst = dst_destroy(dst); - if (dst) - __dst_free(dst); - } + if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) { + dst = dst_destroy(dst); + if (dst) + __dst_free(dst); } } } @@ -333,14 +320,11 @@ EXPORT_SYMBOL(__dst_destroy_metrics_generic); */ void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { - bool hold; - WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); /* If dst not in cache, we must take a reference, because * dst_release() will destroy dst as soon as its refcount becomes zero */ - hold = (dst->flags & (DST_NOCACHE | DST_RCU_FREE)) == DST_NOCACHE; - if (unlikely(hold)) { + if (unlikely(dst->flags & DST_NOCACHE)) { dst_hold(dst); skb_dst_set(skb, dst); } else { diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 26719779ad8e..85a3604c87c8 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -184,12 +184,6 @@ static __inline__ unsigned int dn_hash(__le16 src, __le16 dst) return dn_rt_hash_mask & (unsigned int)tmp; } -static inline void dst_rcu_free(struct rcu_head *head) -{ - struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - dst_free(dst); -} - static inline void dnrt_free(struct dn_route *rt) { call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e55171f184f9..625cf185c489 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -161,6 +161,21 @@ static void free_nh_exceptions(struct fib_nh *nh) kfree(hash); } +static void rt_nexthop_free(struct rtable __rcu **rtp) +{ + struct rtable *rt = rcu_dereference_protected(*rtp, 1); + + if (!rt) + return; + + /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); + * because we waited an RCU grace period before calling + * free_fib_info_rcu() + */ + + dst_free(&rt->dst); +} + /* Release a nexthop info record */ static void free_fib_info_rcu(struct rcu_head *head) { @@ -171,10 +186,8 @@ static void free_fib_info_rcu(struct rcu_head *head) dev_put(nexthop_nh->nh_dev); if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); - if (nexthop_nh->nh_rth_output) - dst_release(&nexthop_nh->nh_rth_output->dst); - if (nexthop_nh->nh_rth_input) - dst_release(&nexthop_nh->nh_rth_input->dst); + rt_nexthop_free(&nexthop_nh->nh_rth_output); + rt_nexthop_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d6eabcfe8a90..2bd107477469 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1199,23 +1199,31 @@ restart: fnhe->fnhe_stamp = jiffies; } +static inline void rt_free(struct rtable *rt) +{ + call_rcu(&rt->dst.rcu_head, dst_rcu_free); +} + static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) { - struct rtable *orig, *prev, **p = &nh->nh_rth_output; + struct rtable *orig, *prev, **p = (struct rtable **)&nh->nh_rth_output; if (rt_is_input_route(rt)) - p = &nh->nh_rth_input; + p = (struct rtable **)&nh->nh_rth_input; orig = *p; - rt->dst.flags |= DST_RCU_FREE; - dst_hold(&rt->dst); prev = cmpxchg(p, orig, rt); if (prev == orig) { if (orig) - dst_release(&orig->dst); + rt_free(orig); } else { - dst_release(&rt->dst); + /* Routes we intend to cache in the FIB nexthop have + * the DST_NOCACHE bit clear. However, if we are + * unsuccessful at storing this route into the cache + * we really need to set it. + */ + rt->dst.flags |= DST_NOCACHE; } } @@ -1412,7 +1420,7 @@ static int __mkroute_input(struct sk_buff *skb, do_cache = false; if (res->fi) { if (!itag) { - rth = FIB_RES_NH(*res).nh_rth_input; + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -1574,7 +1582,7 @@ local_input: do_cache = false; if (res.fi) { if (!itag) { - rth = FIB_RES_NH(res).nh_rth_input; + rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); err = 0; @@ -1742,7 +1750,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, if (fi) { fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); if (!fnhe) { - rth = FIB_RES_NH(*res).nh_rth_output; + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_output); if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; -- cgit v1.2.3 From d26b3a7c4b3b26319f18bb645de93eba8f4bdcd5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jul 2012 05:45:30 +0000 Subject: ipv4: percpu nh_rth_output cache Input path is mostly run under RCU and doesnt touch dst refcnt But output path on forwarding or UDP workloads hits badly dst refcount, and we have lot of false sharing, for example in ipv4_mtu() when reading rt->rt_pmtu Using a percpu cache for nh_rth_output gives a nice performance increase at a small cost. 24 udpflood test on my 24 cpu machine (dummy0 output device) (each process sends 1.000.000 udp frames, 24 processes are started) before : 5.24 s after : 2.06 s For reference, time on linux-3.5 : 6.60 s Signed-off-by: Eric Dumazet Tested-by: Alexander Duyck Signed-off-by: David S. Miller --- include/net/ip_fib.h | 3 ++- net/ipv4/fib_semantics.c | 20 +++++++++++++++++++- net/ipv4/route.c | 18 +++++++++++++----- 3 files changed, 34 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e521a03515b1..e331746029b4 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -21,6 +21,7 @@ #include #include #include +#include struct fib_config { u8 fc_dst_len; @@ -81,7 +82,7 @@ struct fib_nh { __be32 nh_gw; __be32 nh_saddr; int nh_saddr_genid; - struct rtable __rcu *nh_rth_output; + struct rtable __rcu * __percpu *nh_pcpu_rth_output; struct rtable __rcu *nh_rth_input; struct fnhe_hash_bucket *nh_exceptions; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 625cf185c489..fe2ca02a1979 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -176,6 +176,23 @@ static void rt_nexthop_free(struct rtable __rcu **rtp) dst_free(&rt->dst); } +static void rt_nexthop_free_cpus(struct rtable __rcu * __percpu *rtp) +{ + int cpu; + + if (!rtp) + return; + + for_each_possible_cpu(cpu) { + struct rtable *rt; + + rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); + if (rt) + dst_free(&rt->dst); + } + free_percpu(rtp); +} + /* Release a nexthop info record */ static void free_fib_info_rcu(struct rcu_head *head) { @@ -186,7 +203,7 @@ static void free_fib_info_rcu(struct rcu_head *head) dev_put(nexthop_nh->nh_dev); if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); - rt_nexthop_free(&nexthop_nh->nh_rth_output); + rt_nexthop_free_cpus(nexthop_nh->nh_pcpu_rth_output); rt_nexthop_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); @@ -817,6 +834,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_nhs = nhs; change_nexthops(fi) { nexthop_nh->nh_parent = fi; + nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); } endfor_nexthops(fi) if (cfg->fc_mx) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2bd107477469..4f6276ce0af3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1206,11 +1206,15 @@ static inline void rt_free(struct rtable *rt) static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) { - struct rtable *orig, *prev, **p = (struct rtable **)&nh->nh_rth_output; + struct rtable *orig, *prev, **p; - if (rt_is_input_route(rt)) + if (rt_is_input_route(rt)) { p = (struct rtable **)&nh->nh_rth_input; - + } else { + if (!nh->nh_pcpu_rth_output) + goto nocache; + p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output); + } orig = *p; prev = cmpxchg(p, orig, rt); @@ -1223,6 +1227,7 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) * unsuccessful at storing this route into the cache * we really need to set it. */ +nocache: rt->dst.flags |= DST_NOCACHE; } } @@ -1749,8 +1754,11 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = NULL; if (fi) { fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); - if (!fnhe) { - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_output); + if (!fnhe && FIB_RES_NH(*res).nh_pcpu_rth_output) { + struct rtable __rcu **prth; + + prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); + rth = rcu_dereference(*prth); if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; -- cgit v1.2.3 From c5038a8327b980a5b279fa193163c468011de009 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 Jul 2012 15:02:02 -0700 Subject: ipv4: Cache routes in nexthop exception entries. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_semantics.c | 39 +++++++++--------- net/ipv4/route.c | 103 ++++++++++++++++++++++++++--------------------- 3 files changed, 79 insertions(+), 64 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e331746029b4..926142ed8d7a 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -55,6 +55,7 @@ struct fib_nh_exception { u32 fnhe_pmtu; __be32 fnhe_gw; unsigned long fnhe_expires; + struct rtable __rcu *fnhe_rth; unsigned long fnhe_stamp; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index fe2ca02a1979..da80dc14cc76 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { }, }; +static void rt_fibinfo_free(struct rtable __rcu **rtp) +{ + struct rtable *rt = rcu_dereference_protected(*rtp, 1); + + if (!rt) + return; + + /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); + * because we waited an RCU grace period before calling + * free_fib_info_rcu() + */ + + dst_free(&rt->dst); +} + static void free_nh_exceptions(struct fib_nh *nh) { struct fnhe_hash_bucket *hash = nh->nh_exceptions; @@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh) struct fib_nh_exception *next; next = rcu_dereference_protected(fnhe->fnhe_next, 1); + + rt_fibinfo_free(&fnhe->fnhe_rth); + kfree(fnhe); fnhe = next; @@ -161,22 +179,7 @@ static void free_nh_exceptions(struct fib_nh *nh) kfree(hash); } -static void rt_nexthop_free(struct rtable __rcu **rtp) -{ - struct rtable *rt = rcu_dereference_protected(*rtp, 1); - - if (!rt) - return; - - /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); - * because we waited an RCU grace period before calling - * free_fib_info_rcu() - */ - - dst_free(&rt->dst); -} - -static void rt_nexthop_free_cpus(struct rtable __rcu * __percpu *rtp) +static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) { int cpu; @@ -203,8 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head) dev_put(nexthop_nh->nh_dev); if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); - rt_nexthop_free_cpus(nexthop_nh->nh_pcpu_rth_output); - rt_nexthop_free(&nexthop_nh->nh_rth_input); + rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); + rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4f6276ce0af3..b102eeb16e34 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -587,11 +587,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, build_sk_flow_key(fl4, sk); } -static DEFINE_SEQLOCK(fnhe_seqlock); +static inline void rt_free(struct rtable *rt) +{ + call_rcu(&rt->dst.rcu_head, dst_rcu_free); +} + +static DEFINE_SPINLOCK(fnhe_lock); static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) { struct fib_nh_exception *fnhe, *oldest; + struct rtable *orig; oldest = rcu_dereference(hash->chain); for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; @@ -599,6 +605,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) oldest = fnhe; } + orig = rcu_dereference(oldest->fnhe_rth); + if (orig) { + RCU_INIT_POINTER(oldest->fnhe_rth, NULL); + rt_free(orig); + } return oldest; } @@ -620,7 +631,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, int depth; u32 hval = fnhe_hashfun(daddr); - write_seqlock_bh(&fnhe_seqlock); + spin_lock_bh(&fnhe_lock); hash = nh->nh_exceptions; if (!hash) { @@ -667,7 +678,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_stamp = jiffies; out_unlock: - write_sequnlock_bh(&fnhe_seqlock); + spin_unlock_bh(&fnhe_lock); return; } @@ -1167,41 +1178,40 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, __be32 daddr) { - __be32 fnhe_daddr, gw; - unsigned long expires; - unsigned int seq; - u32 pmtu; - -restart: - seq = read_seqbegin(&fnhe_seqlock); - fnhe_daddr = fnhe->fnhe_daddr; - gw = fnhe->fnhe_gw; - pmtu = fnhe->fnhe_pmtu; - expires = fnhe->fnhe_expires; - if (read_seqretry(&fnhe_seqlock, seq)) - goto restart; - - if (daddr != fnhe_daddr) - return; + spin_lock_bh(&fnhe_lock); - if (pmtu) { - unsigned long diff = expires - jiffies; + if (daddr == fnhe->fnhe_daddr) { + struct rtable *orig; - if (time_before(jiffies, expires)) { - rt->rt_pmtu = pmtu; - dst_set_expires(&rt->dst, diff); + if (fnhe->fnhe_pmtu) { + unsigned long expires = fnhe->fnhe_expires; + unsigned long diff = expires - jiffies; + + if (time_before(jiffies, expires)) { + rt->rt_pmtu = fnhe->fnhe_pmtu; + dst_set_expires(&rt->dst, diff); + } + } + if (fnhe->fnhe_gw) { + rt->rt_flags |= RTCF_REDIRECTED; + rt->rt_gateway = fnhe->fnhe_gw; } - } - if (gw) { - rt->rt_flags |= RTCF_REDIRECTED; - rt->rt_gateway = gw; - } - fnhe->fnhe_stamp = jiffies; -} -static inline void rt_free(struct rtable *rt) -{ - call_rcu(&rt->dst.rcu_head, dst_rcu_free); + orig = rcu_dereference(fnhe->fnhe_rth); + rcu_assign_pointer(fnhe->fnhe_rth, rt); + if (orig) + rt_free(orig); + + fnhe->fnhe_stamp = jiffies; + } else { + /* Routes we intend to cache in nexthop exception have + * the DST_NOCACHE bit clear. However, if we are + * unsuccessful at storing this route into the cache + * we really need to set it. + */ + rt->dst.flags |= DST_NOCACHE; + } + spin_unlock_bh(&fnhe_lock); } static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) @@ -1249,13 +1259,13 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) rt->rt_gateway = nh->nh_gw; - if (unlikely(fnhe)) - rt_bind_exception(rt, fnhe, daddr); dst_init_metrics(&rt->dst, fi->fib_metrics, true); #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif - if (!(rt->dst.flags & DST_NOCACHE)) + if (unlikely(fnhe)) + rt_bind_exception(rt, fnhe, daddr); + else if (!(rt->dst.flags & DST_NOCACHE)) rt_cache_route(nh, rt); } @@ -1753,22 +1763,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = NULL; if (fi) { - fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); - if (!fnhe && FIB_RES_NH(*res).nh_pcpu_rth_output) { - struct rtable __rcu **prth; + struct rtable __rcu **prth; + fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); + if (fnhe) + prth = &fnhe->fnhe_rth; + else prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); - rth = rcu_dereference(*prth); - if (rt_cache_valid(rth)) { - dst_hold(&rth->dst); - return rth; - } + rth = rcu_dereference(*prth); + if (rt_cache_valid(rth)) { + dst_hold(&rth->dst); + return rth; } } rth = rt_dst_alloc(dev_out, IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(in_dev, NOXFRM), - fi && !fnhe); + fi); if (!rth) return ERR_PTR(-ENOBUFS); -- cgit v1.2.3 From caacf05e5ad1abf0a2864863da4e33024bc68ec6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 Jul 2012 15:06:50 -0700 Subject: ipv4: Properly purge netdev references on uncached routes. When a device is unregistered, we have to purge all of the references to it that may exist in the entire system. If a route is uncached, we currently have no way of accomplishing this. So create a global list that is scanned when a network device goes down. This mirrors the logic in net/core/dst.c's dst_ifdown(). Signed-off-by: David S. Miller --- include/net/route.h | 3 +++ net/ipv4/fib_frontend.c | 1 + net/ipv4/route.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++--- net/ipv4/xfrm4_policy.c | 1 + 4 files changed, 69 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 8c52bc6f1c90..776a27f1ab78 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -57,6 +57,8 @@ struct rtable { /* Miscellaneous cached information */ u32 rt_pmtu; + + struct list_head rt_uncached; }; static inline bool rt_is_input_route(const struct rtable *rt) @@ -107,6 +109,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; extern int ip_rt_init(void); extern void rt_cache_flush(struct net *net, int how); +extern void rt_flush_dev(struct net_device *dev); extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, struct sock *sk); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8732cc7920ed..c43ae3fba792 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1046,6 +1046,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2, -1); + rt_flush_dev(dev); return NOTIFY_DONE; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b102eeb16e34..c035251beb07 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -147,6 +147,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static void ipv4_dst_destroy(struct dst_entry *dst); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) @@ -170,6 +171,7 @@ static struct dst_ops ipv4_dst_ops = { .default_advmss = ipv4_default_advmss, .mtu = ipv4_mtu, .cow_metrics = ipv4_cow_metrics, + .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, @@ -1175,9 +1177,11 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) return NULL; } -static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, +static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, __be32 daddr) { + bool ret = false; + spin_lock_bh(&fnhe_lock); if (daddr == fnhe->fnhe_daddr) { @@ -1203,6 +1207,7 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, rt_free(orig); fnhe->fnhe_stamp = jiffies; + ret = true; } else { /* Routes we intend to cache in nexthop exception have * the DST_NOCACHE bit clear. However, if we are @@ -1212,11 +1217,14 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, rt->dst.flags |= DST_NOCACHE; } spin_unlock_bh(&fnhe_lock); + + return ret; } -static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) +static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) { struct rtable *orig, *prev, **p; + bool ret = true; if (rt_is_input_route(rt)) { p = (struct rtable **)&nh->nh_rth_input; @@ -1239,6 +1247,48 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) */ nocache: rt->dst.flags |= DST_NOCACHE; + ret = false; + } + + return ret; +} + +static DEFINE_SPINLOCK(rt_uncached_lock); +static LIST_HEAD(rt_uncached_list); + +static void rt_add_uncached_list(struct rtable *rt) +{ + spin_lock_bh(&rt_uncached_lock); + list_add_tail(&rt->rt_uncached, &rt_uncached_list); + spin_unlock_bh(&rt_uncached_lock); +} + +static void ipv4_dst_destroy(struct dst_entry *dst) +{ + struct rtable *rt = (struct rtable *) dst; + + if (dst->flags & DST_NOCACHE) { + spin_lock_bh(&rt_uncached_lock); + list_del(&rt->rt_uncached); + spin_unlock_bh(&rt_uncached_lock); + } +} + +void rt_flush_dev(struct net_device *dev) +{ + if (!list_empty(&rt_uncached_list)) { + struct net *net = dev_net(dev); + struct rtable *rt; + + spin_lock_bh(&rt_uncached_lock); + list_for_each_entry(rt, &rt_uncached_list, rt_uncached) { + if (rt->dst.dev != dev) + continue; + rt->dst.dev = net->loopback_dev; + dev_hold(rt->dst.dev); + dev_put(dev); + } + spin_unlock_bh(&rt_uncached_lock); } } @@ -1254,6 +1304,8 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, struct fib_nh_exception *fnhe, struct fib_info *fi, u16 type, u32 itag) { + bool cached = false; + if (fi) { struct fib_nh *nh = &FIB_RES_NH(*res); @@ -1264,10 +1316,12 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, rt->dst.tclassid = nh->nh_tclassid; #endif if (unlikely(fnhe)) - rt_bind_exception(rt, fnhe, daddr); + cached = rt_bind_exception(rt, fnhe, daddr); else if (!(rt->dst.flags & DST_NOCACHE)) - rt_cache_route(nh, rt); + cached = rt_cache_route(nh, rt); } + if (unlikely(!cached)) + rt_add_uncached_list(rt); #ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -1334,6 +1388,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); if (our) { rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; @@ -1459,6 +1514,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); rth->dst.input = ip_forward; rth->dst.output = ip_output; @@ -1625,6 +1681,7 @@ local_input: rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; @@ -1792,6 +1849,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); RT_CACHE_STAT_INC(out_slow_tot); @@ -2071,6 +2129,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_type = ort->rt_type; rt->rt_gateway = ort->rt_gateway; + INIT_LIST_HEAD(&rt->rt_uncached); + dst_free(new); } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c6281847f16a..681ea2f413e2 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_type = rt->rt_type; xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; + INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); return 0; } -- cgit v1.2.3 From c255a458055e459f65eb7b7f51dc5dbdd0caf1d8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 31 Jul 2012 16:43:02 -0700 Subject: memcg: rename config variables Sanity: CONFIG_CGROUP_MEM_RES_CTLR -> CONFIG_MEMCG CONFIG_CGROUP_MEM_RES_CTLR_SWAP -> CONFIG_MEMCG_SWAP CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED -> CONFIG_MEMCG_SWAP_ENABLED CONFIG_CGROUP_MEM_RES_CTLR_KMEM -> CONFIG_MEMCG_KMEM [mhocko@suse.cz: fix missed bits] Cc: Glauber Costa Acked-by: Michal Hocko Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Tejun Heo Cc: Aneesh Kumar K.V Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/memory.txt | 10 +++++----- arch/powerpc/configs/chroma_defconfig | 4 ++-- arch/s390/defconfig | 2 +- arch/sh/configs/apsh4ad0a_defconfig | 2 +- arch/sh/configs/sdk7786_defconfig | 4 ++-- arch/sh/configs/se7206_defconfig | 2 +- arch/sh/configs/shx3_defconfig | 2 +- arch/sh/configs/urquell_defconfig | 4 ++-- arch/tile/configs/tilegx_defconfig | 4 ++-- arch/tile/configs/tilepro_defconfig | 4 ++-- arch/um/defconfig | 8 ++++---- include/linux/cgroup_subsys.h | 2 +- include/linux/memcontrol.h | 14 +++++++------- include/linux/mmzone.h | 8 ++++---- include/linux/page_cgroup.h | 10 +++++----- include/linux/sched.h | 2 +- include/linux/swap.h | 6 +++--- include/net/sock.h | 4 ++-- init/Kconfig | 14 +++++++------- kernel/fork.c | 2 +- mm/Makefile | 2 +- mm/hwpoison-inject.c | 2 +- mm/memcontrol.c | 20 ++++++++++---------- mm/memory-failure.c | 2 +- mm/mmzone.c | 2 +- mm/oom_kill.c | 2 +- mm/page_cgroup.c | 2 +- mm/vmscan.c | 4 ++-- net/core/sock.c | 2 +- net/ipv4/Makefile | 2 +- net/ipv4/sysctl_net_ipv4.c | 4 ++-- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 33 files changed, 78 insertions(+), 78 deletions(-) (limited to 'include/net') diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index dd88540bb995..672676ac9615 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -187,12 +187,12 @@ the cgroup that brought it in -- this will happen on memory pressure). But see section 8.2: when moving a task to another cgroup, its pages may be recharged to the new cgroup, if move_charge_at_immigrate has been chosen. -Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used. +Exception: If CONFIG_CGROUP_CGROUP_MEMCG_SWAP is not used. When you do swapoff and make swapped-out pages of shmem(tmpfs) to be backed into memory in force, charges for pages are accounted against the caller of swapoff rather than the users of shmem. -2.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP) +2.4 Swap Extension (CONFIG_MEMCG_SWAP) Swap Extension allows you to record charge for swap. A swapped-in page is charged back to original page allocator if possible. @@ -259,7 +259,7 @@ When oom event notifier is registered, event will be delivered. per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by zone->lru_lock, it has no lock of its own. -2.7 Kernel Memory Extension (CONFIG_CGROUP_MEM_RES_CTLR_KMEM) +2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM) With the Kernel memory extension, the Memory Controller is able to limit the amount of kernel memory used by the system. Kernel memory is fundamentally @@ -286,8 +286,8 @@ per cgroup, instead of globally. a. Enable CONFIG_CGROUPS b. Enable CONFIG_RESOURCE_COUNTERS -c. Enable CONFIG_CGROUP_MEM_RES_CTLR -d. Enable CONFIG_CGROUP_MEM_RES_CTLR_SWAP (to use swap extension) +c. Enable CONFIG_MEMCG +d. Enable CONFIG_MEMCG_SWAP (to use swap extension) 1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?) # mount -t tmpfs none /sys/fs/cgroup diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig index b1f9597fe312..29bb11ec6c64 100644 --- a/arch/powerpc/configs/chroma_defconfig +++ b/arch/powerpc/configs/chroma_defconfig @@ -21,8 +21,8 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_NAMESPACES=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 37d2bf267964..de57702a3f44 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -13,7 +13,7 @@ CONFIG_CGROUPS=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEMCG=y CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index e7583484cc07..95ae23fcfdd6 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -11,7 +11,7 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEMCG=y CONFIG_BLK_CGROUP=y CONFIG_NAMESPACES=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index 8a7dd7b59c5c..76a76a295d74 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -18,8 +18,8 @@ CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index 72c3fad7383f..6bc30ab9fd18 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -11,7 +11,7 @@ CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEMCG=y CONFIG_RELAY=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index 6bb413036892..cd6c519f8fad 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -13,7 +13,7 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEMCG=y CONFIG_RELAY=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index 8bfa4d056d7a..d7f89be9f474 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -15,8 +15,8 @@ CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig index b8d99aca5431..0270620a1692 100644 --- a/arch/tile/configs/tilegx_defconfig +++ b/arch/tile/configs/tilegx_defconfig @@ -18,8 +18,8 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig index 2b1fd31894f1..c11de27a9bcb 100644 --- a/arch/tile/configs/tilepro_defconfig +++ b/arch/tile/configs/tilepro_defconfig @@ -17,8 +17,8 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y diff --git a/arch/um/defconfig b/arch/um/defconfig index 7823ab12e6a4..fec0d5d27460 100644 --- a/arch/um/defconfig +++ b/arch/um/defconfig @@ -155,10 +155,10 @@ CONFIG_CPUSETS=y CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y -# CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED is not set -# CONFIG_CGROUP_MEM_RES_CTLR_KMEM is not set +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y +# CONFIG_CGROUP_MEMCG_SWAP_ENABLED is not set +# CONFIG_CGROUP_MEMCG_KMEM is not set CONFIG_CGROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y # CONFIG_CFS_BANDWIDTH is not set diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 5b41ce079024..dfae957398c3 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -31,7 +31,7 @@ SUBSYS(cpuacct) /* */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG SUBSYS(mem_cgroup) #endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 83e7ba90d6e5..170076222431 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -38,7 +38,7 @@ struct mem_cgroup_reclaim_cookie { unsigned int generation; }; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG /* * All "charge" functions with gfp_mask should use GFP_KERNEL or * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't @@ -124,7 +124,7 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, extern void mem_cgroup_replace_page_cache(struct page *oldpage, struct page *newpage); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP extern int do_swap_account; #endif @@ -193,7 +193,7 @@ void mem_cgroup_split_huge_fixup(struct page *head); bool mem_cgroup_bad_page_check(struct page *page); void mem_cgroup_print_bad_page(struct page *page); #endif -#else /* CONFIG_CGROUP_MEM_RES_CTLR */ +#else /* CONFIG_MEMCG */ struct mem_cgroup; static inline int mem_cgroup_newpage_charge(struct page *page, @@ -384,9 +384,9 @@ static inline void mem_cgroup_replace_page_cache(struct page *oldpage, struct page *newpage) { } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR */ +#endif /* CONFIG_MEMCG */ -#if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM) +#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM) static inline bool mem_cgroup_bad_page_check(struct page *page) { @@ -406,7 +406,7 @@ enum { }; struct sock; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM void sock_update_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk); #else @@ -416,6 +416,6 @@ static inline void sock_update_memcg(struct sock *sk) static inline void sock_release_memcg(struct sock *sk) { } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ +#endif /* CONFIG_MEMCG_KMEM */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 458988bd55a1..3bdfa15b2012 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -201,7 +201,7 @@ struct zone_reclaim_stat { struct lruvec { struct list_head lists[NR_LRU_LISTS]; struct zone_reclaim_stat reclaim_stat; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG struct zone *zone; #endif }; @@ -671,7 +671,7 @@ typedef struct pglist_data { int nr_zones; #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ struct page *node_mem_map; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG struct page_cgroup *node_page_cgroup; #endif #endif @@ -736,7 +736,7 @@ extern void lruvec_init(struct lruvec *lruvec, struct zone *zone); static inline struct zone *lruvec_zone(struct lruvec *lruvec) { -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG return lruvec->zone; #else return container_of(lruvec, struct zone, lruvec); @@ -1052,7 +1052,7 @@ struct mem_section { /* See declaration of similar field in struct zone */ unsigned long *pageblock_flags; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG /* * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use * section. (see memcontrol.h/page_cgroup.h about this.) diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index a88cdba27809..777a524716db 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -12,7 +12,7 @@ enum { #ifndef __GENERATING_BOUNDS_H #include -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG #include /* @@ -82,7 +82,7 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc) bit_spin_unlock(PCG_LOCK, &pc->flags); } -#else /* CONFIG_CGROUP_MEM_RES_CTLR */ +#else /* CONFIG_MEMCG */ struct page_cgroup; static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) @@ -102,11 +102,11 @@ static inline void __init page_cgroup_init_flatmem(void) { } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR */ +#endif /* CONFIG_MEMCG */ #include -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new); extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id); @@ -138,7 +138,7 @@ static inline void swap_cgroup_swapoff(int type) return; } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */ +#endif /* CONFIG_MEMCG_SWAP */ #endif /* !__GENERATING_BOUNDS_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 68dcffaa62a0..865725adb9d3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1584,7 +1584,7 @@ struct task_struct { /* bitmask and counter of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ +#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */ struct memcg_batch_info { int do_batch; /* incremented when batch uncharge started */ struct mem_cgroup *memcg; /* target memcg of uncharge */ diff --git a/include/linux/swap.h b/include/linux/swap.h index c84ec68eaec9..9a16bb1cefd1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -301,7 +301,7 @@ static inline void scan_unevictable_unregister_node(struct node *node) extern int kswapd_run(int nid); extern void kswapd_stop(int nid); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG extern int mem_cgroup_swappiness(struct mem_cgroup *mem); #else static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) @@ -309,7 +309,7 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) return vm_swappiness; } #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP extern void mem_cgroup_uncharge_swap(swp_entry_t ent); #else static inline void mem_cgroup_uncharge_swap(swp_entry_t ent) @@ -360,7 +360,7 @@ extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); #else diff --git a/include/net/sock.h b/include/net/sock.h index e067f8c18f88..cee528c119ca 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -913,7 +913,7 @@ struct proto { #ifdef SOCK_REFCNT_DEBUG atomic_t socks; #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM /* * cgroup specific init/deinit functions. Called once for all * protocols that implement it, from cgroups populate function. @@ -994,7 +994,7 @@ inline void sk_refcnt_debug_release(const struct sock *sk) #define sk_refcnt_debug_release(sk) do { } while (0) #endif /* SOCK_REFCNT_DEBUG */ -#if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) +#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET) extern struct static_key memcg_socket_limit_enabled; static inline struct cg_proto *parent_cg_proto(struct proto *proto, struct cg_proto *cg_proto) diff --git a/init/Kconfig b/init/Kconfig index 72437760e90e..af6c7f8ba019 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -686,7 +686,7 @@ config RESOURCE_COUNTERS This option enables controller independent resource accounting infrastructure that works with cgroups. -config CGROUP_MEM_RES_CTLR +config MEMCG bool "Memory Resource Controller for Control Groups" depends on RESOURCE_COUNTERS select MM_OWNER @@ -709,9 +709,9 @@ config CGROUP_MEM_RES_CTLR This config option also selects MM_OWNER config option, which could in turn add some fork/exit overhead. -config CGROUP_MEM_RES_CTLR_SWAP +config MEMCG_SWAP bool "Memory Resource Controller Swap Extension" - depends on CGROUP_MEM_RES_CTLR && SWAP + depends on MEMCG && SWAP help Add swap management feature to memory resource controller. When you enable this, you can limit mem+swap usage per cgroup. In other words, @@ -726,9 +726,9 @@ config CGROUP_MEM_RES_CTLR_SWAP if boot option "swapaccount=0" is set, swap will not be accounted. Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page size is 4096bytes, 512k per 1Gbytes of swap. -config CGROUP_MEM_RES_CTLR_SWAP_ENABLED +config MEMCG_SWAP_ENABLED bool "Memory Resource Controller Swap Extension enabled by default" - depends on CGROUP_MEM_RES_CTLR_SWAP + depends on MEMCG_SWAP default y help Memory Resource Controller Swap Extension comes with its price in @@ -739,9 +739,9 @@ config CGROUP_MEM_RES_CTLR_SWAP_ENABLED For those who want to have the feature enabled by default should select this option (if, for some reason, they need to disable it then swapaccount=0 does the trick). -config CGROUP_MEM_RES_CTLR_KMEM +config MEMCG_KMEM bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)" - depends on CGROUP_MEM_RES_CTLR && EXPERIMENTAL + depends on MEMCG && EXPERIMENTAL default n help The Kernel Memory extension for Memory Resource Controller can limit diff --git a/kernel/fork.c b/kernel/fork.c index aaa8813c45d1..3bd2280d79f6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1306,7 +1306,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif diff --git a/mm/Makefile b/mm/Makefile index fd6fc1c1966c..290bbfe33698 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o -obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o +obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index cc448bb983ba..3a61efc518d5 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c @@ -123,7 +123,7 @@ static int pfn_inject_init(void) if (!dentry) goto fail; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP dentry = debugfs_create_u64("corrupt-filter-memcg", 0600, hwpoison_dir, &hwpoison_filter_memcg); if (!dentry) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a2677e0a6387..55a85e1a342f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -61,12 +61,12 @@ struct cgroup_subsys mem_cgroup_subsys __read_mostly; #define MEM_CGROUP_RECLAIM_RETRIES 5 static struct mem_cgroup *root_mem_cgroup __read_mostly; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ int do_swap_account __read_mostly; /* for remember boot option*/ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED +#ifdef CONFIG_MEMCG_SWAP_ENABLED static int really_do_swap_account __initdata = 1; #else static int really_do_swap_account __initdata = 0; @@ -407,7 +407,7 @@ static void mem_cgroup_get(struct mem_cgroup *memcg); static void mem_cgroup_put(struct mem_cgroup *memcg); /* Writing them here to avoid exposing memcg's inner layout */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM #include #include @@ -466,9 +466,9 @@ struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg) } EXPORT_SYMBOL(tcp_proto_cgroup); #endif /* CONFIG_INET */ -#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ +#endif /* CONFIG_MEMCG_KMEM */ -#if defined(CONFIG_INET) && defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) +#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) static void disarm_sock_keys(struct mem_cgroup *memcg) { if (!memcg_proto_activated(&memcg->tcp_mem.cg_proto)) @@ -3085,7 +3085,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) } #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP /* * called from swap_entry_free(). remove record in swap_cgroup and * uncharge "memsw" account. @@ -4518,7 +4518,7 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp, return 0; } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { return mem_cgroup_sockets_init(memcg, ss); @@ -4608,7 +4608,7 @@ static struct cftype mem_cgroup_files[] = { .read_seq_string = mem_control_numa_stat_show, }, #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP { .name = "memsw.usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), @@ -4795,7 +4795,7 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) } EXPORT_SYMBOL(parent_mem_cgroup); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP static void __init enable_swap_cgroup(void) { if (!mem_cgroup_disabled() && really_do_swap_account) @@ -5526,7 +5526,7 @@ struct cgroup_subsys mem_cgroup_subsys = { .__DEPRECATED_clear_css_refs = true, }; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP static int __init enable_swap_account(char *s) { /* consider enabled if no parameter or 1 is given */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index b04ff2d6f73d..a6e2141a6610 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -128,7 +128,7 @@ static int hwpoison_filter_flags(struct page *p) * can only guarantee that the page either belongs to the memcg tasks, or is * a freed page. */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP u64 hwpoison_filter_memcg; EXPORT_SYMBOL_GPL(hwpoison_filter_memcg); static int hwpoison_filter_task(struct page *p) diff --git a/mm/mmzone.c b/mm/mmzone.c index 6830eab5bf09..3cef80f6ac79 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -96,7 +96,7 @@ void lruvec_init(struct lruvec *lruvec, struct zone *zone) for_each_lru(lru) INIT_LIST_HEAD(&lruvec->lists[lru]); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG lruvec->zone = zone; #endif } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c82ede69bf3f..e6c10640e56b 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -541,7 +541,7 @@ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, int order) { diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index eb750f851395..5ddad0c6daa6 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -317,7 +317,7 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP static DEFINE_MUTEX(swap_cgroup_mutex); struct swap_cgroup_ctrl { diff --git a/mm/vmscan.c b/mm/vmscan.c index 347b3ff2a478..6b1f89a91212 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -133,7 +133,7 @@ long vm_total_pages; /* The total number of pages which the VM controls */ static LIST_HEAD(shrinker_list); static DECLARE_RWSEM(shrinker_rwsem); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG static bool global_reclaim(struct scan_control *sc) { return !sc->target_mem_cgroup; @@ -2142,7 +2142,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, return nr_reclaimed; } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, gfp_t gfp_mask, bool noswap, diff --git a/net/core/sock.c b/net/core/sock.c index 2676a88f533e..a67b06280e4c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -142,7 +142,7 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { struct proto *proto; diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ae2ccf2890e4..15ca63ec604e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o -obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o +obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5840c3255721..ed7db3f1b6f2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -184,7 +184,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, int ret; unsigned long vec[3]; struct net *net = current->nsproxy->net_ns; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM struct mem_cgroup *memcg; #endif @@ -203,7 +203,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, if (ret) return ret; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM rcu_read_lock(); memcg = mem_cgroup_from_task(current); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2fbd9921253f..4bc8f6769b57 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2633,7 +2633,7 @@ struct proto tcp_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM .init_cgroup = tcp_init_cgroup, .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 221224e72507..61c7b6d83176 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2015,7 +2015,7 @@ struct proto tcpv6_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM .proto_cgroup = tcp_proto_cgroup, #endif }; -- cgit v1.2.3 From 99a1dec70d5acbd8c6b3928cdebb4a2d1da676c8 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:44:14 -0700 Subject: net: introduce sk_gfp_atomic() to allow addition of GFP flags depending on the individual socket Introduce sk_gfp_atomic(), this function allows to inject sock specific flags to each sock related allocation. It is only used on allocation paths that may be required for writing pages back to network storage. [davem@davemloft.net: Use sk_gfp_atomic only when necessary] Signed-off-by: Peter Zijlstra Signed-off-by: Mel Gorman Acked-by: David S. Miller Cc: Neil Brown Cc: Mike Christie Cc: Eric B Munson Cc: Eric Dumazet Cc: Sebastian Andrzej Siewior Cc: Mel Gorman Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/sock.h | 5 +++++ net/ipv4/tcp_output.c | 12 +++++++----- net/ipv6/tcp_ipv6.c | 8 +++++--- 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index cee528c119ca..11ccde65c4cf 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -658,6 +658,11 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag) return test_bit(flag, &sk->sk_flags); } +static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask) +{ + return GFP_ATOMIC; +} + static inline void sk_acceptq_removed(struct sock *sk) { sk->sk_ack_backlog--; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 33cd065cfbd8..3f1bcff0b10b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2045,7 +2045,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, if (unlikely(sk->sk_state == TCP_CLOSE)) return; - if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC)) + if (tcp_write_xmit(sk, cur_mss, nonagle, 0, + sk_gfp_atomic(sk, GFP_ATOMIC))) tcp_check_probe_timer(sk); } @@ -2666,7 +2667,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) s_data_desired = cvp->s_data_desired; - skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC); + skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, + sk_gfp_atomic(sk, GFP_ATOMIC)); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -3064,7 +3066,7 @@ void tcp_send_ack(struct sock *sk) * tcp_transmit_skb() will set the ownership to this * sock. */ - buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); + buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); if (buff == NULL) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; @@ -3079,7 +3081,7 @@ void tcp_send_ack(struct sock *sk) /* Send it off, this clears delayed acks for us. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; - tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); + tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); } /* This routine sends a packet with an out of date sequence @@ -3099,7 +3101,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) struct sk_buff *skb; /* We don't queue it, tcp_transmit_skb() sets ownership. */ - skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); + skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); if (skb == NULL) return -1; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 61c7b6d83176..c66b90f71c9b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1299,7 +1299,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; if (treq->pktopts != NULL) { - newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC); + newnp->pktoptions = skb_clone(treq->pktopts, + sk_gfp_atomic(sk, GFP_ATOMIC)); consume_skb(treq->pktopts); treq->pktopts = NULL; if (newnp->pktoptions) @@ -1349,7 +1350,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * across. Shucks. */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, - AF_INET6, key->key, key->keylen, GFP_ATOMIC); + AF_INET6, key->key, key->keylen, + sk_gfp_atomic(sk, GFP_ATOMIC)); } #endif @@ -1442,7 +1444,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, GFP_ATOMIC); + opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ sock_rps_save_rxhash(sk, skb); -- cgit v1.2.3 From 7cb0240492caea2f6467f827313478f41877e6ef Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:44:16 -0700 Subject: netvm: allow the use of __GFP_MEMALLOC by specific sockets Allow specific sockets to be tagged SOCK_MEMALLOC and use __GFP_MEMALLOC for their allocations. These sockets will be able to go below watermarks and allocate from the emergency reserve. Such sockets are to be used to service the VM (iow. to swap over). They must be handled kernel side, exposing such a socket to user-space is a bug. There is a risk that the reserves be depleted so for now, the administrator is responsible for increasing min_free_kbytes as necessary to prevent deadlock for their workloads. [a.p.zijlstra@chello.nl: Original patches] Signed-off-by: Mel Gorman Acked-by: David S. Miller Cc: Neil Brown Cc: Peter Zijlstra Cc: Mike Christie Cc: Eric B Munson Cc: Eric Dumazet Cc: Sebastian Andrzej Siewior Cc: Mel Gorman Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/sock.h | 5 ++++- net/core/sock.c | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 11ccde65c4cf..bfa7d20e6646 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -621,6 +621,7 @@ enum sock_flags { SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */ SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ + SOCK_MEMALLOC, /* VM depends on this socket for swapping */ SOCK_TIMESTAMPING_TX_HARDWARE, /* %SOF_TIMESTAMPING_TX_HARDWARE */ SOCK_TIMESTAMPING_TX_SOFTWARE, /* %SOF_TIMESTAMPING_TX_SOFTWARE */ SOCK_TIMESTAMPING_RX_HARDWARE, /* %SOF_TIMESTAMPING_RX_HARDWARE */ @@ -660,7 +661,7 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag) static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask) { - return GFP_ATOMIC; + return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC); } static inline void sk_acceptq_removed(struct sock *sk) @@ -803,6 +804,8 @@ extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p); extern void sk_stream_wait_close(struct sock *sk, long timeo_p); extern int sk_stream_error(struct sock *sk, int flags, int err); extern void sk_stream_kill_queues(struct sock *sk); +extern void sk_set_memalloc(struct sock *sk); +extern void sk_clear_memalloc(struct sock *sk); extern int sk_wait_data(struct sock *sk, long *timeo); diff --git a/net/core/sock.c b/net/core/sock.c index a67b06280e4c..3617f652f6b0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -271,6 +271,28 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); +/** + * sk_set_memalloc - sets %SOCK_MEMALLOC + * @sk: socket to set it on + * + * Set %SOCK_MEMALLOC on a socket for access to emergency reserves. + * It's the responsibility of the admin to adjust min_free_kbytes + * to meet the requirements + */ +void sk_set_memalloc(struct sock *sk) +{ + sock_set_flag(sk, SOCK_MEMALLOC); + sk->sk_allocation |= __GFP_MEMALLOC; +} +EXPORT_SYMBOL_GPL(sk_set_memalloc); + +void sk_clear_memalloc(struct sock *sk) +{ + sock_reset_flag(sk, SOCK_MEMALLOC); + sk->sk_allocation &= ~__GFP_MEMALLOC; +} +EXPORT_SYMBOL_GPL(sk_clear_memalloc); + #if defined(CONFIG_CGROUPS) #if !defined(CONFIG_NET_CLS_CGROUP) int net_cls_subsys_id = -1; -- cgit v1.2.3 From c93bdd0e03e848555d144eb44a1f275b871a8dd5 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:44:19 -0700 Subject: netvm: allow skb allocation to use PFMEMALLOC reserves Change the skb allocation API to indicate RX usage and use this to fall back to the PFMEMALLOC reserve when needed. SKBs allocated from the reserve are tagged in skb->pfmemalloc. If an SKB is allocated from the reserve and the socket is later found to be unrelated to page reclaim, the packet is dropped so that the memory remains available for page reclaim. Network protocols are expected to recover from this packet loss. [a.p.zijlstra@chello.nl: Ideas taken from various patches] [davem@davemloft.net: Use static branches, coding style corrections] [sebastian@breakpoint.cc: Avoid unnecessary cast, fix !CONFIG_NET build] Signed-off-by: Mel Gorman Acked-by: David S. Miller Cc: Neil Brown Cc: Peter Zijlstra Cc: Mike Christie Cc: Eric B Munson Cc: Eric Dumazet Cc: Sebastian Andrzej Siewior Cc: Mel Gorman Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 3 ++ include/linux/skbuff.h | 14 +++++- include/net/sock.h | 15 ++++++ mm/internal.h | 3 -- net/core/filter.c | 8 ++++ net/core/skbuff.c | 124 +++++++++++++++++++++++++++++++++++++++---------- net/core/sock.c | 5 ++ 7 files changed, 142 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index cbd7400e5862..4883f393f50a 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -385,6 +385,9 @@ void drain_local_pages(void *dummy); */ extern gfp_t gfp_allowed_mask; +/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */ +bool gfp_pfmemalloc_allowed(gfp_t gfp_mask); + extern void pm_restrict_gfp_mask(void); extern void pm_restore_gfp_mask(void); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d205c4be7f5b..0336f02e3667 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -462,6 +462,7 @@ struct sk_buff { #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif + __u8 pfmemalloc:1; __u8 ooo_okay:1; __u8 l4_rxhash:1; __u8 wifi_acked_valid:1; @@ -502,6 +503,15 @@ struct sk_buff { #include +#define SKB_ALLOC_FCLONE 0x01 +#define SKB_ALLOC_RX 0x02 + +/* Returns true if the skb was allocated from PFMEMALLOC reserves */ +static inline bool skb_pfmemalloc(const struct sk_buff *skb) +{ + return unlikely(skb->pfmemalloc); +} + /* * skb might have a dst pointer attached, refcounted or not. * _skb_refdst low order bit is set if refcount was _not_ taken @@ -565,7 +575,7 @@ extern bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize); extern struct sk_buff *__alloc_skb(unsigned int size, - gfp_t priority, int fclone, int node); + gfp_t priority, int flags, int node); extern struct sk_buff *build_skb(void *data, unsigned int frag_size); static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) @@ -576,7 +586,7 @@ static inline struct sk_buff *alloc_skb(unsigned int size, static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 1, NUMA_NO_NODE); + return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } extern void skb_recycle(struct sk_buff *skb); diff --git a/include/net/sock.h b/include/net/sock.h index bfa7d20e6646..81198632ac2a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -659,6 +659,21 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag) return test_bit(flag, &sk->sk_flags); } +#ifdef CONFIG_NET +extern struct static_key memalloc_socks; +static inline int sk_memalloc_socks(void) +{ + return static_key_false(&memalloc_socks); +} +#else + +static inline int sk_memalloc_socks(void) +{ + return 0; +} + +#endif + static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask) { return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC); diff --git a/mm/internal.h b/mm/internal.h index eb76b67890d0..3314f79d775a 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -279,9 +279,6 @@ static inline struct page *mem_map_next(struct page *iter, #define __paginginit __init #endif -/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */ -bool gfp_pfmemalloc_allowed(gfp_t gfp_mask); - /* Memory initialisation debug and verification */ enum mminit_level { MMINIT_WARNING, diff --git a/net/core/filter.c b/net/core/filter.c index d4ce2dc712e3..907efd27ec77 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -83,6 +83,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) int err; struct sk_filter *filter; + /* + * If the skb was allocated from pfmemalloc reserves, only + * allow SOCK_MEMALLOC sockets to use it as this socket is + * helping free memory + */ + if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) + return -ENOMEM; + err = security_sock_rcv_skb(sk, skb); if (err) return err; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 368f65c15e4f..fe00d1208167 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) BUG(); } + +/* + * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells + * the caller if emergency pfmemalloc reserves are being used. If it is and + * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves + * may be used. Otherwise, the packet data may be discarded until enough + * memory is free + */ +#define kmalloc_reserve(size, gfp, node, pfmemalloc) \ + __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) +void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, + bool *pfmemalloc) +{ + void *obj; + bool ret_pfmemalloc = false; + + /* + * Try a regular allocation, when that fails and we're not entitled + * to the reserves, fail. + */ + obj = kmalloc_node_track_caller(size, + flags | __GFP_NOMEMALLOC | __GFP_NOWARN, + node); + if (obj || !(gfp_pfmemalloc_allowed(flags))) + goto out; + + /* Try again but now we are using pfmemalloc reserves */ + ret_pfmemalloc = true; + obj = kmalloc_node_track_caller(size, flags, node); + +out: + if (pfmemalloc) + *pfmemalloc = ret_pfmemalloc; + + return obj; +} + /* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the * [BEEP] leaks. @@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask - * @fclone: allocate from fclone cache instead of head cache - * and allocate a cloned (child) skb + * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache + * instead of head cache and allocate a cloned (child) skb. + * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for + * allocations in case the data is required for writeback * @node: numa node to allocate memory on * * Allocate a new &sk_buff. The returned buffer has no headroom and a @@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) * %GFP_ATOMIC. */ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, - int fclone, int node) + int flags, int node) { struct kmem_cache *cache; struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; + bool pfmemalloc; - cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; + cache = (flags & SKB_ALLOC_FCLONE) + ? skbuff_fclone_cache : skbuff_head_cache; + + if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) + gfp_mask |= __GFP_MEMALLOC; /* Get the HEAD */ skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); @@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, */ size = SKB_DATA_ALIGN(size); size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - data = kmalloc_node_track_caller(size, gfp_mask, node); + data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); if (!data) goto nodata; /* kmalloc(size) might give us more room than requested. @@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, memset(skb, 0, offsetof(struct sk_buff, tail)); /* Account for allocated memory : skb + skb->head */ skb->truesize = SKB_TRUESIZE(size); + skb->pfmemalloc = pfmemalloc; atomic_set(&skb->users, 1); skb->head = data; skb->data = data; @@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, atomic_set(&shinfo->dataref, 1); kmemcheck_annotate_variable(shinfo->destructor_arg); - if (fclone) { + if (flags & SKB_ALLOC_FCLONE) { struct sk_buff *child = skb + 1; atomic_t *fclone_ref = (atomic_t *) (child + 1); @@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, atomic_set(fclone_ref, 1); child->fclone = SKB_FCLONE_UNAVAILABLE; + child->pfmemalloc = pfmemalloc; } out: return skb; @@ -302,14 +348,7 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); #define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) -/** - * netdev_alloc_frag - allocate a page fragment - * @fragsz: fragment size - * - * Allocates a frag from a page for receive buffer. - * Uses GFP_ATOMIC allocations. - */ -void *netdev_alloc_frag(unsigned int fragsz) +static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { struct netdev_alloc_cache *nc; void *data = NULL; @@ -319,7 +358,7 @@ void *netdev_alloc_frag(unsigned int fragsz) nc = &__get_cpu_var(netdev_alloc_cache); if (unlikely(!nc->page)) { refill: - nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); + nc->page = alloc_page(gfp_mask); if (unlikely(!nc->page)) goto end; recycle: @@ -343,6 +382,18 @@ end: local_irq_restore(flags); return data; } + +/** + * netdev_alloc_frag - allocate a page fragment + * @fragsz: fragment size + * + * Allocates a frag from a page for receive buffer. + * Uses GFP_ATOMIC allocations. + */ +void *netdev_alloc_frag(unsigned int fragsz) +{ + return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); +} EXPORT_SYMBOL(netdev_alloc_frag); /** @@ -366,7 +417,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { - void *data = netdev_alloc_frag(fragsz); + void *data; + + if (sk_memalloc_socks()) + gfp_mask |= __GFP_MEMALLOC; + + data = __netdev_alloc_frag(fragsz, gfp_mask); if (likely(data)) { skb = build_skb(data, fragsz); @@ -374,7 +430,8 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, put_page(virt_to_head_page(data)); } } else { - skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); + skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, + SKB_ALLOC_RX, NUMA_NO_NODE); } if (likely(skb)) { skb_reserve(skb, NET_SKB_PAD); @@ -656,6 +713,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #if IS_ENABLED(CONFIG_IP_VS) new->ipvs_property = old->ipvs_property; #endif + new->pfmemalloc = old->pfmemalloc; new->protocol = old->protocol; new->mark = old->mark; new->skb_iif = old->skb_iif; @@ -814,6 +872,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->fclone = SKB_FCLONE_CLONE; atomic_inc(fclone_ref); } else { + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); if (!n) return NULL; @@ -850,6 +911,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } +static inline int skb_alloc_rx_flag(const struct sk_buff *skb) +{ + if (skb_pfmemalloc(skb)) + return SKB_ALLOC_RX; + return 0; +} + /** * skb_copy - create private copy of an sk_buff * @skb: buffer to copy @@ -871,7 +939,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { int headerlen = skb_headroom(skb); unsigned int size = skb_end_offset(skb) + skb->data_len; - struct sk_buff *n = alloc_skb(size, gfp_mask); + struct sk_buff *n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; @@ -906,7 +975,8 @@ EXPORT_SYMBOL(skb_copy); struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) { unsigned int size = skb_headlen(skb) + headroom; - struct sk_buff *n = alloc_skb(size, gfp_mask); + struct sk_buff *n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) goto out; @@ -979,8 +1049,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, size = SKB_DATA_ALIGN(size); - data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), - gfp_mask); + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), + gfp_mask, NUMA_NO_NODE, NULL); if (!data) goto nodata; size = SKB_WITH_OVERHEAD(ksize(data)); @@ -1092,8 +1164,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, /* * Allocate the copy buffer */ - struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask); + struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, + gfp_mask, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; int off; @@ -2775,8 +2848,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_release_head_state(nskb); __skb_push(nskb, doffset); } else { - nskb = alloc_skb(hsize + doffset + headroom, - GFP_ATOMIC); + nskb = __alloc_skb(hsize + doffset + headroom, + GFP_ATOMIC, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); if (unlikely(!nskb)) goto err; diff --git a/net/core/sock.c b/net/core/sock.c index 3617f652f6b0..c8c5816289fe 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -271,6 +271,9 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); +struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE; +EXPORT_SYMBOL_GPL(memalloc_socks); + /** * sk_set_memalloc - sets %SOCK_MEMALLOC * @sk: socket to set it on @@ -283,6 +286,7 @@ void sk_set_memalloc(struct sock *sk) { sock_set_flag(sk, SOCK_MEMALLOC); sk->sk_allocation |= __GFP_MEMALLOC; + static_key_slow_inc(&memalloc_socks); } EXPORT_SYMBOL_GPL(sk_set_memalloc); @@ -290,6 +294,7 @@ void sk_clear_memalloc(struct sock *sk) { sock_reset_flag(sk, SOCK_MEMALLOC); sk->sk_allocation &= ~__GFP_MEMALLOC; + static_key_slow_dec(&memalloc_socks); } EXPORT_SYMBOL_GPL(sk_clear_memalloc); -- cgit v1.2.3 From b4b9e3558508980fc0cd161a545ffb55a1f13ee9 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:44:26 -0700 Subject: netvm: set PF_MEMALLOC as appropriate during SKB processing In order to make sure pfmemalloc packets receive all memory needed to proceed, ensure processing of pfmemalloc SKBs happens under PF_MEMALLOC. This is limited to a subset of protocols that are expected to be used for writing to swap. Taps are not allowed to use PF_MEMALLOC as these are expected to communicate with userspace processes which could be paged out. [a.p.zijlstra@chello.nl: Ideas taken from various patches] [jslaby@suse.cz: Lock imbalance fix] Signed-off-by: Mel Gorman Acked-by: David S. Miller Cc: Neil Brown Cc: Peter Zijlstra Cc: Mike Christie Cc: Eric B Munson Cc: Eric Dumazet Cc: Sebastian Andrzej Siewior Cc: Mel Gorman Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/sock.h | 5 +++++ net/core/dev.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++------ net/core/sock.c | 16 ++++++++++++++++ 3 files changed, 68 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 81198632ac2a..43a470d40d76 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -754,8 +754,13 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s return 0; } +extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); + static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + return __sk_backlog_rcv(sk, skb); + return sk->sk_backlog_rcv(sk, skb); } diff --git a/net/core/dev.c b/net/core/dev.c index 0ebaea16632f..ce132443d5d1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3155,6 +3155,23 @@ void netdev_rx_handler_unregister(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); +/* + * Limit the use of PFMEMALLOC reserves to those protocols that implement + * the special handling of PFMEMALLOC skbs. + */ +static bool skb_pfmemalloc_protocol(struct sk_buff *skb) +{ + switch (skb->protocol) { + case __constant_htons(ETH_P_ARP): + case __constant_htons(ETH_P_IP): + case __constant_htons(ETH_P_IPV6): + case __constant_htons(ETH_P_8021Q): + return true; + default: + return false; + } +} + static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -3164,14 +3181,27 @@ static int __netif_receive_skb(struct sk_buff *skb) bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; + unsigned long pflags = current->flags; net_timestamp_check(!netdev_tstamp_prequeue, skb); trace_netif_receive_skb(skb); + /* + * PFMEMALLOC skbs are special, they should + * - be delivered to SOCK_MEMALLOC sockets only + * - stay away from userspace + * - have bounded memory usage + * + * Use PF_MEMALLOC as this saves us from propagating the allocation + * context down to all allocation sites. + */ + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + current->flags |= PF_MEMALLOC; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) - return NET_RX_DROP; + goto out; orig_dev = skb->dev; @@ -3191,7 +3221,7 @@ another_round: if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { skb = vlan_untag(skb); if (unlikely(!skb)) - goto out; + goto unlock; } #ifdef CONFIG_NET_CLS_ACT @@ -3201,6 +3231,9 @@ another_round: } #endif + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + goto skip_taps; + list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) @@ -3209,13 +3242,18 @@ another_round: } } +skip_taps: #ifdef CONFIG_NET_CLS_ACT skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto out; + goto unlock; ncls: #endif + if (sk_memalloc_socks() && skb_pfmemalloc(skb) + && !skb_pfmemalloc_protocol(skb)) + goto drop; + rx_handler = rcu_dereference(skb->dev->rx_handler); if (vlan_tx_tag_present(skb)) { if (pt_prev) { @@ -3225,7 +3263,7 @@ ncls: if (vlan_do_receive(&skb, !rx_handler)) goto another_round; else if (unlikely(!skb)) - goto out; + goto unlock; } if (rx_handler) { @@ -3235,7 +3273,7 @@ ncls: } switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: - goto out; + goto unlock; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3268,6 +3306,7 @@ ncls: else ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { +drop: atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); /* Jamal, now you will not able to escape explaining @@ -3276,8 +3315,10 @@ ncls: ret = NET_RX_DROP; } -out: +unlock: rcu_read_unlock(); +out: + tsk_restore_flags(current, pflags, PF_MEMALLOC); return ret; } diff --git a/net/core/sock.c b/net/core/sock.c index c8c5816289fe..32fdcd2d6e8f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -298,6 +298,22 @@ void sk_clear_memalloc(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_clear_memalloc); +int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + unsigned long pflags = current->flags; + + /* these should have been dropped before queueing */ + BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); + + current->flags |= PF_MEMALLOC; + ret = sk->sk_backlog_rcv(sk, skb); + tsk_restore_flags(current, pflags, PF_MEMALLOC); + + return ret; +} +EXPORT_SYMBOL(__sk_backlog_rcv); + #if defined(CONFIG_CGROUPS) #if !defined(CONFIG_NET_CLS_CGROUP) int net_cls_subsys_id = -1; -- cgit v1.2.3 From c76562b6709fee5eff8a6a779be41c0bce661fd7 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:44:41 -0700 Subject: netvm: prevent a stream-specific deadlock This patch series is based on top of "Swap-over-NBD without deadlocking v15" as it depends on the same reservation of PF_MEMALLOC reserves logic. When a user or administrator requires swap for their application, they create a swap partition and file, format it with mkswap and activate it with swapon. In diskless systems this is not an option so if swap if required then swapping over the network is considered. The two likely scenarios are when blade servers are used as part of a cluster where the form factor or maintenance costs do not allow the use of disks and thin clients. The Linux Terminal Server Project recommends the use of the Network Block Device (NBD) for swap but this is not always an option. There is no guarantee that the network attached storage (NAS) device is running Linux or supports NBD. However, it is likely that it supports NFS so there are users that want support for swapping over NFS despite any performance concern. Some distributions currently carry patches that support swapping over NFS but it would be preferable to support it in the mainline kernel. Patch 1 avoids a stream-specific deadlock that potentially affects TCP. Patch 2 is a small modification to SELinux to avoid using PFMEMALLOC reserves. Patch 3 adds three helpers for filesystems to handle swap cache pages. For example, page_file_mapping() returns page->mapping for file-backed pages and the address_space of the underlying swap file for swap cache pages. Patch 4 adds two address_space_operations to allow a filesystem to pin all metadata relevant to a swapfile in memory. Upon successful activation, the swapfile is marked SWP_FILE and the address space operation ->direct_IO is used for writing and ->readpage for reading in swap pages. Patch 5 notes that patch 3 is bolting filesystem-specific-swapfile-support onto the side and that the default handlers have different information to what is available to the filesystem. This patch refactors the code so that there are generic handlers for each of the new address_space operations. Patch 6 adds an API to allow a vector of kernel addresses to be translated to struct pages and pinned for IO. Patch 7 adds support for using highmem pages for swap by kmapping the pages before calling the direct_IO handler. Patch 8 updates NFS to use the helpers from patch 3 where necessary. Patch 9 avoids setting PF_private on PG_swapcache pages within NFS. Patch 10 implements the new swapfile-related address_space operations for NFS and teaches the direct IO handler how to manage kernel addresses. Patch 11 prevents page allocator recursions in NFS by using GFP_NOIO where appropriate. Patch 12 fixes a NULL pointer dereference that occurs when using swap-over-NFS. With the patches applied, it is possible to mount a swapfile that is on an NFS filesystem. Swap performance is not great with a swap stress test taking roughly twice as long to complete than if the swap device was backed by NBD. This patch: netvm: prevent a stream-specific deadlock It could happen that all !SOCK_MEMALLOC sockets have buffered so much data that we're over the global rmem limit. This will prevent SOCK_MEMALLOC buffers from receiving data, which will prevent userspace from running, which is needed to reduce the buffered data. Fix this by exempting the SOCK_MEMALLOC sockets from the rmem limit. Once this change it applied, it is important that sockets that set SOCK_MEMALLOC do not clear the flag until the socket is being torn down. If this happens, a warning is generated and the tokens reclaimed to avoid accounting errors until the bug is fixed. [davem@davemloft.net: Warning about clearing SOCK_MEMALLOC] Signed-off-by: Peter Zijlstra Signed-off-by: Mel Gorman Acked-by: David S. Miller Acked-by: Rik van Riel Cc: Trond Myklebust Cc: Neil Brown Cc: Christoph Hellwig Cc: Mike Christie Cc: Eric B Munson Cc: Sebastian Andrzej Siewior Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/sock.h | 8 +++++--- net/caif/caif_socket.c | 2 +- net/core/sock.c | 14 +++++++++++++- net/ipv4/tcp_input.c | 21 +++++++++++---------- net/sctp/ulpevent.c | 3 ++- 5 files changed, 32 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 43a470d40d76..b3730239bf18 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1329,12 +1329,14 @@ static inline bool sk_wmem_schedule(struct sock *sk, int size) __sk_mem_schedule(sk, size, SK_MEM_SEND); } -static inline bool sk_rmem_schedule(struct sock *sk, int size) +static inline bool +sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, unsigned int size) { if (!sk_has_account(sk)) return true; - return size <= sk->sk_forward_alloc || - __sk_mem_schedule(sk, size, SK_MEM_RECV); + return size<= sk->sk_forward_alloc || + __sk_mem_schedule(sk, size, SK_MEM_RECV) || + skb_pfmemalloc(skb); } static inline void sk_mem_reclaim(struct sock *sk) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 78f1cdad5b33..095259f83902 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -141,7 +141,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) err = sk_filter(sk, skb); if (err) return err; - if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { + if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) { set_rx_flow_off(cf_sk); net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n"); caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); diff --git a/net/core/sock.c b/net/core/sock.c index 32fdcd2d6e8f..6b654b3ddfda 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -295,6 +295,18 @@ void sk_clear_memalloc(struct sock *sk) sock_reset_flag(sk, SOCK_MEMALLOC); sk->sk_allocation &= ~__GFP_MEMALLOC; static_key_slow_dec(&memalloc_socks); + + /* + * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward + * progress of swapping. However, if SOCK_MEMALLOC is cleared while + * it has rmem allocations there is a risk that the user of the + * socket cannot make forward progress due to exceeding the rmem + * limits. By rights, sk_clear_memalloc() should only be called + * on sockets being torn down but warn and reset the accounting if + * that assumption breaks. + */ + if (WARN_ON(sk->sk_forward_alloc)) + sk_mem_reclaim(sk); } EXPORT_SYMBOL_GPL(sk_clear_memalloc); @@ -396,7 +408,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) return err; - if (!sk_rmem_schedule(sk, skb->truesize)) { + if (!sk_rmem_schedule(sk, skb, skb->truesize)) { atomic_inc(&sk->sk_drops); return -ENOBUFS; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a356e1fecf9a..00b91b4b8665 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4351,19 +4351,20 @@ static void tcp_ofo_queue(struct sock *sk) static bool tcp_prune_ofo_queue(struct sock *sk); static int tcp_prune_queue(struct sock *sk); -static int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) +static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, + unsigned int size) { if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - !sk_rmem_schedule(sk, size)) { + !sk_rmem_schedule(sk, skb, size)) { if (tcp_prune_queue(sk) < 0) return -1; - if (!sk_rmem_schedule(sk, size)) { + if (!sk_rmem_schedule(sk, skb, size)) { if (!tcp_prune_ofo_queue(sk)) return -1; - if (!sk_rmem_schedule(sk, size)) + if (!sk_rmem_schedule(sk, skb, size)) return -1; } } @@ -4418,7 +4419,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) TCP_ECN_check_ce(tp, skb); - if (unlikely(tcp_try_rmem_schedule(sk, skb->truesize))) { + if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); __kfree_skb(skb); return; @@ -4552,17 +4553,17 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct tcphdr *th; bool fragstolen; - if (tcp_try_rmem_schedule(sk, size + sizeof(*th))) - goto err; - skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); if (!skb) goto err; + if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th))) + goto err_free; + th = (struct tcphdr *)skb_put(skb, sizeof(*th)); skb_reset_transport_header(skb); memset(th, 0, sizeof(*th)); @@ -4633,7 +4634,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (eaten <= 0) { queue_and_out: if (eaten < 0 && - tcp_try_rmem_schedule(sk, skb->truesize)) + tcp_try_rmem_schedule(sk, skb, skb->truesize)) goto drop; eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 33d894776192..10c018a5b9fe 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -702,7 +702,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, if (rx_count >= asoc->base.sk->sk_rcvbuf) { if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || - (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize))) + (!sk_rmem_schedule(asoc->base.sk, chunk->skb, + chunk->skb->truesize))) goto fail; } -- cgit v1.2.3 From 1485348d2424e1131ea42efc033cbd9366462b01 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 30 Jul 2012 16:11:42 +0000 Subject: tcp: Apply device TSO segment limit earlier Cache the device gso_max_segs in sock::sk_gso_max_segs and use it to limit the size of TSO skbs. This avoids the need to fall back to software GSO for local TCP senders. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ net/core/sock.c | 1 + net/ipv4/tcp.c | 4 +++- net/ipv4/tcp_cong.c | 3 ++- net/ipv4/tcp_output.c | 21 ++++++++++++--------- 5 files changed, 20 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index b3730239bf18..72132aef53fc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -218,6 +218,7 @@ struct cg_proto; * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build + * @sk_gso_max_segs: Maximum number of GSO segments * @sk_lingertime: %SO_LINGER l_linger setting * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct @@ -338,6 +339,7 @@ struct sock { netdev_features_t sk_route_nocaps; int sk_gso_type; unsigned int sk_gso_max_size; + u16 sk_gso_max_segs; int sk_rcvlowat; unsigned long sk_lingertime; struct sk_buff_head sk_error_queue; diff --git a/net/core/sock.c b/net/core/sock.c index 6b654b3ddfda..8f67ced8d6a8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1458,6 +1458,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; sk->sk_gso_max_size = dst->dev->gso_max_size; + sk->sk_gso_max_segs = dst->dev->gso_max_segs; } } } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e7e6eeae49c0..2109ff4a1daf 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -811,7 +811,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, old_size_goal + mss_now > xmit_size_goal)) { xmit_size_goal = old_size_goal; } else { - tp->xmit_size_goal_segs = xmit_size_goal / mss_now; + tp->xmit_size_goal_segs = + min_t(u16, xmit_size_goal / mss_now, + sk->sk_gso_max_segs); xmit_size_goal = tp->xmit_size_goal_segs * mss_now; } } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4d4db16e336e..1432cdb0644c 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -291,7 +291,8 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) left = tp->snd_cwnd - in_flight; if (sk_can_gso(sk) && left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && - left * tp->mss_cache < sk->sk_gso_max_size) + left * tp->mss_cache < sk->sk_gso_max_size && + left < sk->sk_gso_max_segs) return true; return left <= tcp_max_tso_deferred_mss(tp); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3f1bcff0b10b..a7b3ec9b6c3e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1522,21 +1522,21 @@ static void tcp_cwnd_validate(struct sock *sk) * when we would be allowed to send the split-due-to-Nagle skb fully. */ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, - unsigned int mss_now, unsigned int cwnd) + unsigned int mss_now, unsigned int max_segs) { const struct tcp_sock *tp = tcp_sk(sk); - u32 needed, window, cwnd_len; + u32 needed, window, max_len; window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; - cwnd_len = mss_now * cwnd; + max_len = mss_now * max_segs; - if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk))) - return cwnd_len; + if (likely(max_len <= window && skb != tcp_write_queue_tail(sk))) + return max_len; needed = min(skb->len, window); - if (cwnd_len <= needed) - return cwnd_len; + if (max_len <= needed) + return max_len; return needed - needed % mss_now; } @@ -1765,7 +1765,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) limit = min(send_win, cong_win); /* If a full-sized TSO skb can be sent, do it. */ - if (limit >= sk->sk_gso_max_size) + if (limit >= min_t(unsigned int, sk->sk_gso_max_size, + sk->sk_gso_max_segs * tp->mss_cache)) goto send_now; /* Middle in queue won't get any more data, full sendable already? */ @@ -1999,7 +2000,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, limit = mss_now; if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, - cwnd_quota); + min_t(unsigned int, + cwnd_quota, + sk->sk_gso_max_segs)); if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) -- cgit v1.2.3 From e3c0d04750751389d5116267f8cf4687444d9a50 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Mon, 30 Jul 2012 21:43:54 +0000 Subject: Fix unexpected SA hard expiration after changing date After SA is setup, one timer is armed to detect soft/hard expiration, however the timer handler uses xtime to do the math. This makes hard expiration occurs first before soft expiration after setting new date with big interval. As a result new child SA is deleted before rekeying the new one. Signed-off-by: Fan Du Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++++ net/xfrm/xfrm_state.c | 21 +++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d9509eb29b80..62b619e82a90 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -213,6 +213,9 @@ struct xfrm_state { struct xfrm_lifetime_cur curlft; struct tasklet_hrtimer mtimer; + /* used to fix curlft->add_time when changing date */ + long saved_tmo; + /* Last used time */ unsigned long lastused; @@ -238,6 +241,7 @@ static inline struct net *xs_net(struct xfrm_state *x) /* xflags - make enum if more show up */ #define XFRM_TIME_DEFER 1 +#define XFRM_SOFT_EXPIRE 2 enum { XFRM_STATE_VOID, diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5b228f97d4b3..87cd0e4d4282 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -415,8 +415,17 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) if (x->lft.hard_add_expires_seconds) { long tmo = x->lft.hard_add_expires_seconds + x->curlft.add_time - now; - if (tmo <= 0) - goto expired; + if (tmo <= 0) { + if (x->xflags & XFRM_SOFT_EXPIRE) { + /* enter hard expire without soft expire first?! + * setting a new date could trigger this. + * workarbound: fix x->curflt.add_time by below: + */ + x->curlft.add_time = now - x->saved_tmo - 1; + tmo = x->lft.hard_add_expires_seconds - x->saved_tmo; + } else + goto expired; + } if (tmo < next) next = tmo; } @@ -433,10 +442,14 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) if (x->lft.soft_add_expires_seconds) { long tmo = x->lft.soft_add_expires_seconds + x->curlft.add_time - now; - if (tmo <= 0) + if (tmo <= 0) { warn = 1; - else if (tmo < next) + x->xflags &= ~XFRM_SOFT_EXPIRE; + } else if (tmo < next) { next = tmo; + x->xflags |= XFRM_SOFT_EXPIRE; + x->saved_tmo = tmo; + } } if (x->lft.soft_use_expires_seconds) { long tmo = x->lft.soft_use_expires_seconds + -- cgit v1.2.3 From 03f6b0843ad6512f27bc2e48f04c21065311e03e Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Wed, 1 Aug 2012 15:58:42 -0500 Subject: cfg80211: add channel flag to prohibit OFDM operation Currently the only way for wireless drivers to tell whether or not OFDM is allowed on the current channel is to check the regulatory information. However, this requires hodling cfg80211_mutex, which is not visible to the drivers. Other regulatory restrictions are provided as flags in the channel definition, so let's do similarly with OFDM. This patch adds a new flag, IEEE80211_CHAN_NO_OFDM, to tell drivers that OFDM on a channel is not allowed. This flag is set on any channels for which regulatory indicates that OFDM is prohibited. Signed-off-by: Seth Forshee Tested-by: Arend van Spriel Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ net/wireless/reg.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 493fa0c79005..3d254e10ff30 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -96,6 +96,7 @@ enum ieee80211_band { * is not permitted. * @IEEE80211_CHAN_NO_HT40MINUS: extension channel below this channel * is not permitted. + * @IEEE80211_CHAN_NO_OFDM: OFDM is not allowed on this channel. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, @@ -104,6 +105,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_RADAR = 1<<3, IEEE80211_CHAN_NO_HT40PLUS = 1<<4, IEEE80211_CHAN_NO_HT40MINUS = 1<<5, + IEEE80211_CHAN_NO_OFDM = 1<<6, }; #define IEEE80211_CHAN_NO_HT40 \ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index a9175fedeb59..cbf30de79c69 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -680,6 +680,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_IBSS; if (rd_flags & NL80211_RRF_DFS) channel_flags |= IEEE80211_CHAN_RADAR; + if (rd_flags & NL80211_RRF_NO_OFDM) + channel_flags |= IEEE80211_CHAN_NO_OFDM; return channel_flags; } -- cgit v1.2.3 From 5d299f3d3c8a2fbc732b1bf03af36333ccec3130 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Aug 2012 05:09:33 +0000 Subject: net: ipv6: fix TCP early demux IPv6 needs a cookie in dst_check() call. We need to add rx_dst_cookie and provide a family independent sk_rx_dst_set(sk, skb) method to properly support IPv6 TCP early demux. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/net/inet_connection_sock.h | 1 + include/net/inet_sock.h | 9 --------- net/ipv4/tcp_input.c | 4 +++- net/ipv4/tcp_ipv4.c | 13 ++++++++++--- net/ipv4/tcp_minisocks.c | 2 +- net/ipv6/tcp_ipv6.c | 27 +++++++++++++++++++++++++-- 7 files changed, 41 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 379e433e15e0..879db26ec401 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -369,6 +369,7 @@ struct ipv6_pinfo { __u8 rcv_tclass; __u32 dst_cookie; + __u32 rx_dst_cookie; struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 5ee66f517b4f..ba1d3615acbb 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -39,6 +39,7 @@ struct inet_connection_sock_af_ops { int (*queue_xmit)(struct sk_buff *skb, struct flowi *fl); void (*send_check)(struct sock *sk, struct sk_buff *skb); int (*rebuild_header)(struct sock *sk); + void (*sk_rx_dst_set)(struct sock *sk, const struct sk_buff *skb); int (*conn_request)(struct sock *sk, struct sk_buff *skb); struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, struct request_sock *req, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 83b567fe1941..613cfa401672 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -249,13 +249,4 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) return flags; } -static inline void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - - dst_hold(dst); - sk->sk_rx_dst = dst; - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; -} - #endif /* _INET_SOCK_H */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2fd2bc9e3c64..85308b90df80 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5392,6 +5392,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); + if (unlikely(sk->sk_rx_dst == NULL)) + inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* * Header prediction. * The code loosely follows the one in the famous @@ -5605,7 +5607,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_ESTABLISHED); if (skb != NULL) { - inet_sk_rx_dst_set(sk, skb); + icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); security_inet_conn_established(sk, skb); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 42b2a6a73092..272241f16fcb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1627,9 +1627,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_rx_dst = NULL; } } - if (unlikely(sk->sk_rx_dst == NULL)) - inet_sk_rx_dst_set(sk, skb); - if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; @@ -1872,10 +1869,20 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; +static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; +} + const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, + .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v4_conn_request, .syn_recv_sock = tcp_v4_syn_recv_sock, .net_header_len = sizeof(struct iphdr), diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 232a90c3ec86..d9c9dcef2de3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -387,7 +387,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_sock *oldtp = tcp_sk(sk); struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - inet_sk_rx_dst_set(newsk, skb); + newicsk->icsk_af_ops->sk_rx_dst_set(newsk, skb); /* TCP Cookie Transactions require space for the cookie pair, * as it differs for each connection. There is no need to diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c66b90f71c9b..5a439e9a4c01 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1447,7 +1447,17 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + struct dst_entry *dst = sk->sk_rx_dst; + sock_rps_save_rxhash(sk, skb); + if (dst) { + if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || + dst->ops->check(dst, np->rx_dst_cookie) == NULL) { + dst_release(dst); + sk->sk_rx_dst = NULL; + } + } + if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; if (opt_skb) @@ -1705,9 +1715,9 @@ static void tcp_v6_early_demux(struct sk_buff *skb) struct dst_entry *dst = sk->sk_rx_dst; struct inet_sock *icsk = inet_sk(sk); if (dst) - dst = dst_check(dst, 0); + dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); if (dst && - icsk->rx_dst_ifindex == inet6_iif(skb)) + icsk->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } @@ -1719,10 +1729,23 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; +static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + const struct rt6_info *rt = (const struct rt6_info *)dst; + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + if (rt->rt6i_node) + inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; +} + static const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, + .sk_rx_dst_set = inet6_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct ipv6hdr), -- cgit v1.2.3 From a37e6e344910a43b9ebc2bbf29a029f5ea942598 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Aug 2012 10:55:45 +0000 Subject: net: force dst_default_metrics to const section While investigating on network performance problems, I found this little gem : $ nm -v vmlinux | grep -1 dst_default_metrics ffffffff82736540 b busy.46605 ffffffff82736560 B dst_default_metrics ffffffff82736598 b dst_busy_list Apparently, declaring a const array without initializer put it in (writeable) bss section, in middle of possibly often dirtied cache lines. Since we really want dst_default_metrics be const to avoid any possible false sharing and catch any buggy writes, I force a null initializer. ffffffff818a4c20 R dst_default_metrics Signed-off-by: Eric Dumazet Cc: Ben Hutchings Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- net/core/dst.c | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index baf597890064..621e3513ef5e 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -110,7 +110,7 @@ struct dst_entry { }; extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); -extern const u32 dst_default_metrics[RTAX_MAX]; +extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL #define __DST_METRICS_PTR(Y) \ diff --git a/net/core/dst.c b/net/core/dst.c index 069d51d29414..56d63612e1e4 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -149,7 +149,15 @@ int dst_discard(struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard); -const u32 dst_default_metrics[RTAX_MAX]; +const u32 dst_default_metrics[RTAX_MAX + 1] = { + /* This initializer is needed to force linker to place this variable + * into const section. Otherwise it might end into bss section. + * We really want to avoid false sharing on this variable, and catch + * any writes on it. + */ + [RTAX_MAX] = 0xdeadbeef, +}; + void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, unsigned short flags) -- cgit v1.2.3 From 63d02d157ec4124990258d66517b6c11fd6df0cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Aug 2012 14:11:00 +0000 Subject: net: tcp: ipv6_mapped needs sk_rx_dst_set method commit 5d299f3d3c8a2fb (net: ipv6: fix TCP early demux) added a regression for ipv6_mapped case. [ 67.422369] SELinux: initialized (dev autofs, type autofs), uses genfs_contexts [ 67.449678] SELinux: initialized (dev autofs, type autofs), uses genfs_contexts [ 92.631060] BUG: unable to handle kernel NULL pointer dereference at (null) [ 92.631435] IP: [< (null)>] (null) [ 92.631645] PGD 0 [ 92.631846] Oops: 0010 [#1] SMP [ 92.632095] Modules linked in: autofs4 sunrpc ipv6 dm_mirror dm_region_hash dm_log dm_multipath dm_mod video sbs sbshc battery ac lp parport sg snd_hda_intel snd_hda_codec snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device pcspkr snd_pcm_oss snd_mixer_oss snd_pcm snd_timer serio_raw button floppy snd i2c_i801 i2c_core soundcore snd_page_alloc shpchp ide_cd_mod cdrom microcode ehci_hcd ohci_hcd uhci_hcd [ 92.634294] CPU 0 [ 92.634294] Pid: 4469, comm: sendmail Not tainted 3.6.0-rc1 #3 [ 92.634294] RIP: 0010:[<0000000000000000>] [< (null)>] (null) [ 92.634294] RSP: 0018:ffff880245fc7cb0 EFLAGS: 00010282 [ 92.634294] RAX: ffffffffa01985f0 RBX: ffff88024827ad00 RCX: 0000000000000000 [ 92.634294] RDX: 0000000000000218 RSI: ffff880254735380 RDI: ffff88024827ad00 [ 92.634294] RBP: ffff880245fc7cc8 R08: 0000000000000001 R09: 0000000000000000 [ 92.634294] R10: 0000000000000000 R11: ffff880245fc7bf8 R12: ffff880254735380 [ 92.634294] R13: ffff880254735380 R14: 0000000000000000 R15: 7fffffffffff0218 [ 92.634294] FS: 00007f4516ccd6f0(0000) GS:ffff880256600000(0000) knlGS:0000000000000000 [ 92.634294] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 92.634294] CR2: 0000000000000000 CR3: 0000000245ed1000 CR4: 00000000000007f0 [ 92.634294] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 92.634294] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 92.634294] Process sendmail (pid: 4469, threadinfo ffff880245fc6000, task ffff880254b8cac0) [ 92.634294] Stack: [ 92.634294] ffffffff813837a7 ffff88024827ad00 ffff880254b6b0e8 ffff880245fc7d68 [ 92.634294] ffffffff81385083 00000000001d2680 ffff8802547353a8 ffff880245fc7d18 [ 92.634294] ffffffff8105903a ffff88024827ad60 0000000000000002 00000000000000ff [ 92.634294] Call Trace: [ 92.634294] [] ? tcp_finish_connect+0x2c/0xfa [ 92.634294] [] tcp_rcv_state_process+0x2b6/0x9c6 [ 92.634294] [] ? sched_clock_cpu+0xc3/0xd1 [ 92.634294] [] ? local_clock+0x2b/0x3c [ 92.634294] [] tcp_v4_do_rcv+0x63a/0x670 [ 92.634294] [] release_sock+0x128/0x1bd [ 92.634294] [] __inet_stream_connect+0x1b1/0x352 [ 92.634294] [] ? lock_sock_nested+0x74/0x7f [ 92.634294] [] ? wake_up_bit+0x25/0x25 [ 92.634294] [] ? lock_sock_nested+0x74/0x7f [ 92.634294] [] ? inet_stream_connect+0x22/0x4b [ 92.634294] [] inet_stream_connect+0x33/0x4b [ 92.634294] [] sys_connect+0x78/0x9e [ 92.634294] [] ? sysret_check+0x1b/0x56 [ 92.634294] [] ? __audit_syscall_entry+0x195/0x1c8 [ 92.634294] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 92.634294] [] system_call_fastpath+0x16/0x1b [ 92.634294] Code: Bad RIP value. [ 92.634294] RIP [< (null)>] (null) [ 92.634294] RSP [ 92.634294] CR2: 0000000000000000 [ 92.648982] ---[ end trace 24e2bed94314c8d9 ]--- [ 92.649146] Kernel panic - not syncing: Fatal exception in interrupt Fix this using inet_sk_rx_dst_set(), and export this function in case IPv6 is modular. Reported-by: Andrew Morton Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index e19124b84cd2..1f000ffe7075 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -464,6 +464,7 @@ extern int tcp_disconnect(struct sock *sk, int flags); void tcp_connect_init(struct sock *sk); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size); +void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 272241f16fcb..767823764016 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1869,7 +1869,7 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; -static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -1877,6 +1877,7 @@ static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } +EXPORT_SYMBOL(inet_sk_rx_dst_set); const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a439e9a4c01..bb9ce2b2f377 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1777,6 +1777,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, + .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct iphdr), -- cgit v1.2.3 From b5ec8eeac46a99004c26791f70b15d001e970acf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Aug 2012 02:22:47 +0000 Subject: ipv4: fix ip_send_skb() ip_send_skb() can send orphaned skb, so we must pass the net pointer to avoid possible NULL dereference in error path. Bug added by commit 3a7c384ffd57 (ipv4: tcp: unicast_sock should not land outside of TCP stack) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/ip_output.c | 5 ++--- net/ipv4/udp.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index bd5e444a19ce..5a5d84d3d2c6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -120,7 +120,7 @@ extern struct sk_buff *__ip_make_skb(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, struct inet_cork *cork); -extern int ip_send_skb(struct sk_buff *skb); +extern int ip_send_skb(struct net *net, struct sk_buff *skb); extern int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4); extern void ip_flush_pending_frames(struct sock *sk); extern struct sk_buff *ip_make_skb(struct sock *sk, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ec410e08b4b9..147ccc3e93db 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1366,9 +1366,8 @@ out: return skb; } -int ip_send_skb(struct sk_buff *skb) +int ip_send_skb(struct net *net, struct sk_buff *skb) { - struct net *net = sock_net(skb->sk); int err; err = ip_local_out(skb); @@ -1391,7 +1390,7 @@ int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4) return 0; /* Netfilter gets whole the not fragmented skb. */ - return ip_send_skb(skb); + return ip_send_skb(sock_net(sk), skb); } /* diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b4c3582a991f..6f6d1aca3c3d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -758,7 +758,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) uh->check = CSUM_MANGLED_0; send: - err = ip_send_skb(skb); + err = ip_send_skb(sock_net(sk), skb); if (err) { if (err == -ENOBUFS && !inet->recverr) { UDP_INC_STATS_USER(sock_net(sk), -- cgit v1.2.3 From 2359a47671fc4fb0fe5e9945f76c2cb10792c0f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Jul 2012 20:52:21 +0000 Subject: codel: refine one condition to avoid a nul rec_inv_sqrt One condition before codel_Newton_step() was not good if we never left the dropping state for a flow. As a result rec_inv_sqrt was 0, instead of the ~0 initial value. codel control law was then set to a very aggressive mode, dropping many packets before reaching 'target' and recovering from this problem. To keep codel_vars_init() as efficient as possible, refine the condition to make sure rec_inv_sqrt initial value is correct Many thanks to Anton Mich for discovering the issue and suggesting a fix. Reported-by: Anton Mich Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/codel.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/codel.h b/include/net/codel.h index 550debfc2403..389cf621161d 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -305,6 +305,8 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, } } } else if (drop) { + u32 delta; + if (params->ecn && INET_ECN_set_ce(skb)) { stats->ecn_mark++; } else { @@ -320,9 +322,11 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, * assume that the drop rate that controlled the queue on the * last cycle is a good starting point to control it now. */ - if (codel_time_before(now - vars->drop_next, + delta = vars->count - vars->lastcount; + if (delta > 1 && + codel_time_before(now - vars->drop_next, 16 * params->interval)) { - vars->count = (vars->count - vars->lastcount) | 1; + vars->count = delta; /* we dont care if rec_inv_sqrt approximation * is not very precise : * Next Newton steps will correct it quadratically. -- cgit v1.2.3