From e39e5b5e7206767a0f1be0e5cb7acbd0db87ae60 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sun, 30 Sep 2012 19:29:39 +0300 Subject: cfg80211: Allow user space to specify non-IEs to SAE Authentication SAE extends Authentication frames with fields that are not information elements. NL80211_ATTR_IE is not suitable for these, so introduce a new attribute that can be used to specify the fields needed for SAE in station mode. Signed-off-by: Jouni Malinen [change to verify that SAE is only used with authenticate command] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 7df9b500c804..179a0c2e2f61 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1273,6 +1273,9 @@ enum nl80211_commands { * the connection request from a station. nl80211_connect_failed_reason * enum has different reasons of connection failure. * + * @NL80211_ATTR_SAE_DATA: SAE elements in Authentication frames. This starts + * with the Authentication transaction sequence number field. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1530,6 +1533,8 @@ enum nl80211_attrs { NL80211_ATTR_CONN_FAILED_REASON, + NL80211_ATTR_SAE_DATA, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2489,6 +2494,7 @@ enum nl80211_bss_status { * @NL80211_AUTHTYPE_SHARED_KEY: Shared Key authentication (WEP only) * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r) * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP) + * @NL80211_AUTHTYPE_SAE: Simultaneous authentication of equals * @__NL80211_AUTHTYPE_NUM: internal * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by @@ -2500,6 +2506,7 @@ enum nl80211_auth_type { NL80211_AUTHTYPE_SHARED_KEY, NL80211_AUTHTYPE_FT, NL80211_AUTHTYPE_NETWORK_EAP, + NL80211_AUTHTYPE_SAE, /* keep last */ __NL80211_AUTHTYPE_NUM, @@ -3028,6 +3035,9 @@ enum nl80211_ap_sme_features { * in the interface combinations, even when it's only used for scan * and remain-on-channel. This could be due to, for example, the * remain-on-channel implementation requiring a channel context. + * @NL80211_FEATURE_SAE: This driver supports simultaneous authentication of + * equals (SAE) with user space SME (NL80211_CMD_AUTHENTICATE) in station + * mode */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3035,6 +3045,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_INACTIVITY_TIMER = 1 << 2, NL80211_FEATURE_CELL_BASE_REG_HINTS = 1 << 3, NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL = 1 << 4, + NL80211_FEATURE_SAE = 1 << 5, }; /** -- cgit v1.2.3 From f461be3eff662f01a177ecea8c1d7b040bb6bfbe Mon Sep 17 00:00:00 2001 From: Mahesh Palivela Date: Thu, 11 Oct 2012 08:04:52 +0000 Subject: {nl,cfg}80211: Peer STA VHT caps To save STAs VHT caps in AP mode Signed-off-by: Mahesh Palivela Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 6 ++++++ net/mac80211/cfg.c | 5 +++++ net/wireless/nl80211.c | 5 +++++ 4 files changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 60cebfac3e3c..607b5c02f740 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -498,6 +498,7 @@ enum station_parameters_apply_mask { * @plink_action: plink action to take * @plink_state: set the peer link state for a station * @ht_capa: HT capabilities of station + * @vht_capa: VHT capabilities of station * @uapsd_queues: bitmap of queues configured for uapsd. same format * as the AC bitmap in the QoS info field * @max_sp: max Service Period. same format as the MAX_SP in the @@ -517,6 +518,7 @@ struct station_parameters { u8 plink_action; u8 plink_state; struct ieee80211_ht_cap *ht_capa; + struct ieee80211_vht_cap *vht_capa; u8 uapsd_queues; u8 max_sp; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 179a0c2e2f61..71ab23b0356d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1276,6 +1276,9 @@ enum nl80211_commands { * @NL80211_ATTR_SAE_DATA: SAE elements in Authentication frames. This starts * with the Authentication transaction sequence number field. * + * @NL80211_ATTR_VHT_CAPABILITY: VHT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1535,6 +1538,8 @@ enum nl80211_attrs { NL80211_ATTR_SAE_DATA, + NL80211_ATTR_VHT_CAPABILITY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1578,6 +1583,7 @@ enum nl80211_attrs { #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 #define NL80211_HT_CAPABILITY_LEN 26 +#define NL80211_VHT_CAPABILITY_LEN 12 #define NL80211_MAX_NR_CIPHER_SUITES 5 #define NL80211_MAX_NR_AKM_SUITES 2 diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 03216b0408c7..ed27988f9d35 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1157,6 +1157,11 @@ static int sta_apply_parameters(struct ieee80211_local *local, params->ht_capa, &sta->sta.ht_cap); + if (params->vht_capa) + ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, + params->vht_capa, + &sta->sta.vht_cap); + if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 74d8123ada77..ef170e982f91 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -355,6 +355,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WDEV] = { .type = NLA_U64 }, [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_SAE_DATA] = { .type = NLA_BINARY, }, + [NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN }, }; /* policy for the key attributes */ @@ -3223,6 +3224,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) + params.vht_capa = + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); -- cgit v1.2.3 From ed47377154310fd2fd59d75fcdeb3d022344fb31 Mon Sep 17 00:00:00 2001 From: Sam Leffler Date: Thu, 11 Oct 2012 21:03:31 -0700 Subject: {nl,cfg}80211: add a flags word to scan requests Add a flags word to direct and scheduled scan requests; it will be used for control of optional behaviours such as flushing the bss cache prior to doing a scan. Signed-off-by: Sam Leffler Tested-by: Amitkumar Karwar Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ include/uapi/linux/nl80211.h | 15 +++++++++++++++ net/wireless/nl80211.c | 12 ++++++++++++ 3 files changed, 31 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 607b5c02f740..d95da8f55f6e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1002,6 +1002,7 @@ struct cfg80211_ssid { * @n_channels: total number of channels to scan * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets + * @flags: bit field of flags controlling operation * @rates: bitmap of rates to advertise for each band * @wiphy: the wiphy this was for * @wdev: the wireless device to scan for @@ -1014,6 +1015,7 @@ struct cfg80211_scan_request { u32 n_channels; const u8 *ie; size_t ie_len; + u32 flags; u32 rates[IEEE80211_NUM_BANDS]; @@ -1046,6 +1048,7 @@ struct cfg80211_match_set { * @interval: interval between each scheduled scan cycle * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets + * @flags: bit field of flags controlling operation * @match_sets: sets of parameters to be matched for a scan result * entry to be considered valid and to be passed to the host * (others are filtered out). @@ -1063,6 +1066,7 @@ struct cfg80211_sched_scan_request { u32 interval; const u8 *ie; size_t ie_len; + u32 flags; struct cfg80211_match_set *match_sets; int n_match_sets; s32 rssi_thold; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 71ab23b0356d..4d0b49ee4c2c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1279,6 +1279,8 @@ enum nl80211_commands { * @NL80211_ATTR_VHT_CAPABILITY: VHT Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) * + * @NL80211_ATTR_SCAN_FLAGS: scan request control flags (u32) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1540,6 +1542,8 @@ enum nl80211_attrs { NL80211_ATTR_VHT_CAPABILITY, + NL80211_ATTR_SCAN_FLAGS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3086,4 +3090,15 @@ enum nl80211_connect_failed_reason { NL80211_CONN_FAIL_BLOCKED_CLIENT, }; +/** + * enum nl80211_scan_flags - scan request control flags + * + * Scan request control flags are used to control the handling + * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN + * requests. + * (will be filled) +enum nl80211_scan_flags { +}; + */ + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ef170e982f91..dc08211c6c6b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -356,6 +356,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_SAE_DATA] = { .type = NLA_BINARY, }, [NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN }, + [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -4367,6 +4368,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } } + if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) + request->flags = nla_get_u32( + info->attrs[NL80211_ATTR_SCAN_FLAGS]); + request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); @@ -4598,6 +4603,10 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, request->ie_len); } + if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) + request->flags = nla_get_u32( + info->attrs[NL80211_ATTR_SCAN_FLAGS]); + request->dev = dev; request->wiphy = &rdev->wiphy; request->interval = interval; @@ -7663,6 +7672,9 @@ static int nl80211_add_scan_req(struct sk_buff *msg, nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie)) goto nla_put_failure; + if (req->flags) + nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags); + return 0; nla_put_failure: return -ENOBUFS; -- cgit v1.2.3 From 46856bbf0f0412c12e9674df68822cb531d49327 Mon Sep 17 00:00:00 2001 From: Sam Leffler Date: Thu, 11 Oct 2012 21:03:32 -0700 Subject: cfg80211: add scan flag to indicate its priority Add NL80211_SCAN_FLAG_LOW_PRIORITY flag support. It tells drivers that this is a low priority scan request, so that they can take necessary action. Drivers need to advertise low priority scan capability during registration. Signed-off-by: Sam Leffler Tested-by: Amitkumar Karwar Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 ++++++-- net/wireless/nl80211.c | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4d0b49ee4c2c..c68e15e41321 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3048,6 +3048,7 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_SAE: This driver supports simultaneous authentication of * equals (SAE) with user space SME (NL80211_CMD_AUTHENTICATE) in station * mode + * @NL80211_FEATURE_LOW_PRIORITY_SCAN: This driver supports low priority scan */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3056,6 +3057,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_CELL_BASE_REG_HINTS = 1 << 3, NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL = 1 << 4, NL80211_FEATURE_SAE = 1 << 5, + NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6, }; /** @@ -3096,9 +3098,11 @@ enum nl80211_connect_failed_reason { * Scan request control flags are used to control the handling * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN * requests. - * (will be filled) + * + * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority + */ enum nl80211_scan_flags { + NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, }; - */ #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index dc08211c6c6b..aee252d65b8f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4368,9 +4368,15 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } } - if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) + if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { request->flags = nla_get_u32( info->attrs[NL80211_ATTR_SCAN_FLAGS]); + if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { + err = -EOPNOTSUPP; + goto out_free; + } + } request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); @@ -4603,9 +4609,15 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, request->ie_len); } - if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) + if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { request->flags = nla_get_u32( info->attrs[NL80211_ATTR_SCAN_FLAGS]); + if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { + err = -EOPNOTSUPP; + goto out_free; + } + } request->dev = dev; request->wiphy = &rdev->wiphy; -- cgit v1.2.3 From 15d6030b4bec618742b8b9ccae9209c8f9e4a916 Mon Sep 17 00:00:00 2001 From: Sam Leffler Date: Thu, 11 Oct 2012 21:03:34 -0700 Subject: cfg80211: add support for flushing old scan results Add an NL80211_SCAN_FLAG_FLUSH flag that causes old bss cache entries to be flushed on scan completion. This is useful for collecting guaranteed fresh scan/survey result (e.g. on resume). For normal scan, flushing only happens on successful completion of a scan; i.e. it does not happen if the scan is aborted. For scheduled scan, previous scan results are flushed everytime when we get new scan results. This feature is enabled by default. Drivers can disable it by unsetting the NL80211_FEATURE_SCAN_FLUSH flag. Signed-off-by: Sam Leffler Tested-by: Amitkumar Karwar Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao [invert polarity of feature flag to account for old kernels] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 ++ include/uapi/linux/nl80211.h | 4 +++ net/wireless/core.c | 2 ++ net/wireless/nl80211.c | 14 +++++++--- net/wireless/scan.c | 66 ++++++++++++++++++++++++++++++-------------- net/wireless/sme.c | 1 + 6 files changed, 66 insertions(+), 24 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d95da8f55f6e..aa0e4a12308c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1005,6 +1005,7 @@ struct cfg80211_ssid { * @flags: bit field of flags controlling operation * @rates: bitmap of rates to advertise for each band * @wiphy: the wiphy this was for + * @scan_start: time (in jiffies) when the scan started * @wdev: the wireless device to scan for * @aborted: (internal) scan request was notified as aborted * @no_cck: used to send probe requests at non CCK rate in 2GHz band @@ -1023,6 +1024,7 @@ struct cfg80211_scan_request { /* internal */ struct wiphy *wiphy; + unsigned long scan_start; bool aborted; bool no_cck; @@ -1074,6 +1076,7 @@ struct cfg80211_sched_scan_request { /* internal */ struct wiphy *wiphy; struct net_device *dev; + unsigned long scan_start; /* keep last */ struct ieee80211_channel *channels[0]; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c68e15e41321..0e6277a06c29 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3049,6 +3049,7 @@ enum nl80211_ap_sme_features { * equals (SAE) with user space SME (NL80211_CMD_AUTHENTICATE) in station * mode * @NL80211_FEATURE_LOW_PRIORITY_SCAN: This driver supports low priority scan + * @NL80211_FEATURE_SCAN_FLUSH: Scan flush is supported */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3058,6 +3059,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL = 1 << 4, NL80211_FEATURE_SAE = 1 << 5, NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6, + NL80211_FEATURE_SCAN_FLUSH = 1 << 7, }; /** @@ -3100,9 +3102,11 @@ enum nl80211_connect_failed_reason { * requests. * * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority + * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning */ enum nl80211_scan_flags { NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, + NL80211_SCAN_FLAG_FLUSH = 1<<1, }; #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/core.c b/net/wireless/core.c index 443d4d7deea2..48c2ea4712e9 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -370,6 +370,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.rts_threshold = (u32) -1; rdev->wiphy.coverage_class = 0; + rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH; + return &rdev->wiphy; } EXPORT_SYMBOL(wiphy_new); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index aee252d65b8f..9e5a7206b0b4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4371,8 +4371,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { request->flags = nla_get_u32( info->attrs[NL80211_ATTR_SCAN_FLAGS]); - if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { + if (((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) || + ((request->flags & NL80211_SCAN_FLAG_FLUSH) && + !(wiphy->features & NL80211_FEATURE_SCAN_FLUSH))) { err = -EOPNOTSUPP; goto out_free; } @@ -4383,6 +4385,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->wdev = wdev; request->wiphy = &rdev->wiphy; + request->scan_start = jiffies; rdev->scan_req = request; err = rdev->ops->scan(&rdev->wiphy, request); @@ -4612,8 +4615,10 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { request->flags = nla_get_u32( info->attrs[NL80211_ATTR_SCAN_FLAGS]); - if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { + if (((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) || + ((request->flags & NL80211_SCAN_FLAG_FLUSH) && + !(wiphy->features & NL80211_FEATURE_SCAN_FLUSH))) { err = -EOPNOTSUPP; goto out_free; } @@ -4622,6 +4627,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, request->dev = dev; request->wiphy = &rdev->wiphy; request->interval = interval; + request->scan_start = jiffies; err = rdev->ops->sched_scan_start(&rdev->wiphy, dev, request); if (!err) { diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 20050965abca..a8d5a9a07e49 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -47,6 +47,27 @@ static void __cfg80211_unlink_bss(struct cfg80211_registered_device *dev, kref_put(&bss->ref, bss_release); } +/* must hold dev->bss_lock! */ +static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev, + unsigned long expire_time) +{ + struct cfg80211_internal_bss *bss, *tmp; + bool expired = false; + + list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) { + if (atomic_read(&bss->hold)) + continue; + if (!time_after(expire_time, bss->ts)) + continue; + + __cfg80211_unlink_bss(dev, bss); + expired = true; + } + + if (expired) + dev->bss_generation++; +} + void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) { struct cfg80211_scan_request *request; @@ -72,10 +93,17 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) if (wdev->netdev) cfg80211_sme_scan_done(wdev->netdev); - if (request->aborted) + if (request->aborted) { nl80211_send_scan_aborted(rdev, wdev); - else + } else { + if (request->flags & NL80211_SCAN_FLAG_FLUSH) { + /* flush entries from previous scans */ + spin_lock_bh(&rdev->bss_lock); + __cfg80211_bss_expire(rdev, request->scan_start); + spin_unlock_bh(&rdev->bss_lock); + } nl80211_send_scan_done(rdev, wdev); + } #ifdef CONFIG_CFG80211_WEXT if (wdev->netdev && !request->aborted) { @@ -126,16 +154,27 @@ EXPORT_SYMBOL(cfg80211_scan_done); void __cfg80211_sched_scan_results(struct work_struct *wk) { struct cfg80211_registered_device *rdev; + struct cfg80211_sched_scan_request *request; rdev = container_of(wk, struct cfg80211_registered_device, sched_scan_results_wk); + request = rdev->sched_scan_req; + mutex_lock(&rdev->sched_scan_mtx); /* we don't have sched_scan_req anymore if the scan is stopping */ - if (rdev->sched_scan_req) - nl80211_send_sched_scan_results(rdev, - rdev->sched_scan_req->dev); + if (request) { + if (request->flags & NL80211_SCAN_FLAG_FLUSH) { + /* flush entries from previous scans */ + spin_lock_bh(&rdev->bss_lock); + __cfg80211_bss_expire(rdev, request->scan_start); + spin_unlock_bh(&rdev->bss_lock); + request->scan_start = + jiffies + msecs_to_jiffies(request->interval); + } + nl80211_send_sched_scan_results(rdev, request->dev); + } mutex_unlock(&rdev->sched_scan_mtx); } @@ -197,23 +236,9 @@ void cfg80211_bss_age(struct cfg80211_registered_device *dev, } } -/* must hold dev->bss_lock! */ void cfg80211_bss_expire(struct cfg80211_registered_device *dev) { - struct cfg80211_internal_bss *bss, *tmp; - bool expired = false; - - list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) { - if (atomic_read(&bss->hold)) - continue; - if (!time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE)) - continue; - __cfg80211_unlink_bss(dev, bss); - expired = true; - } - - if (expired) - dev->bss_generation++; + __cfg80211_bss_expire(dev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE); } const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) @@ -962,6 +987,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, creq->ssids = (void *)&creq->channels[n_channels]; creq->n_channels = n_channels; creq->n_ssids = 1; + creq->scan_start = jiffies; /* translate "Scan on frequencies" request */ i = 0; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 055d59643616..07d717eb9e2a 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -138,6 +138,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) request->wdev = wdev; request->wiphy = &rdev->wiphy; + request->scan_start = jiffies; rdev->scan_req = request; -- cgit v1.2.3 From 5c95b940bd97e744267249e3b0780e6ef04b029c Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 16 Oct 2012 08:39:22 +0200 Subject: nl/cfg80211: force scan using an AP vif if requested If the user wants to scan using a vif configured as AP, cfg80211 must give him a chance to do it, even if this will disrupt the stations performance due to off-channel scanning. To do so, this patch adds a 'force' flag to the SCAN_TRIGGER command which tells cfg80211 to perform the scanning operation even if the vif is an AP and the beaconing has already started. Signed-off-by: Antonio Quartulli Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 ++++++++ net/mac80211/cfg.c | 11 ++++++++++- net/mac80211/main.c | 4 +++- 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 0e6277a06c29..617d0fbfc96f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3050,6 +3050,7 @@ enum nl80211_ap_sme_features { * mode * @NL80211_FEATURE_LOW_PRIORITY_SCAN: This driver supports low priority scan * @NL80211_FEATURE_SCAN_FLUSH: Scan flush is supported + * @NL80211_FEATURE_AP_SCAN: Support scanning using an AP vif */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3060,6 +3061,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_SAE = 1 << 5, NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6, NL80211_FEATURE_SCAN_FLUSH = 1 << 7, + NL80211_FEATURE_AP_SCAN = 1 << 8, }; /** @@ -3103,10 +3105,16 @@ enum nl80211_connect_failed_reason { * * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning + * @NL80211_SCAN_FLAG_AP: force a scan even if the interface is configured + * as AP and the beaconing has already been configured. This attribute is + * dangerous because will destroy stations performance as a lot of frames + * will be lost while scanning off-channel, therefore it must be used only + * when really needed */ enum nl80211_scan_flags { NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, NL80211_SCAN_FLAG_FLUSH = 1<<1, + NL80211_SCAN_FLAG_AP = 1<<2, }; #endif /* __LINUX_NL80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5739bfbf2999..5eab1325a0f6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1855,7 +1855,16 @@ static int ieee80211_scan(struct wiphy *wiphy, * beaconing hasn't been configured yet */ case NL80211_IFTYPE_AP: - if (sdata->u.ap.beacon) + /* + * If the scan has been forced (and the driver supports + * forcing), don't care about being beaconing already. + * This will create problems to the attached stations (e.g. all + * the frames sent while scanning on other channel will be + * lost) + */ + if (sdata->u.ap.beacon && + (!(wiphy->features & NL80211_FEATURE_AP_SCAN) || + !(req->flags & NL80211_SCAN_FLAG_AP))) return -EOPNOTSUPP; break; default: diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ba5a23249771..c42094be2f0b 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -603,7 +603,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, NL80211_FEATURE_HT_IBSS; if (!ops->hw_scan) - wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN; + wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | + NL80211_FEATURE_AP_SCAN; + if (!ops->set_key) wiphy->flags |= WIPHY_FLAG_IBSS_RSN; -- cgit v1.2.3 From e4e541a84863b6a41f2427f59cc9156c644491a8 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 23 Oct 2012 22:29:56 +0400 Subject: sock-diag: Report shutdown for inet and unix sockets (v2) Make it simple -- just put new nlattr with just sk->sk_shutdown bits. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 3 ++- include/uapi/linux/unix_diag.h | 1 + net/ipv4/inet_diag.c | 3 +++ net/unix/diag.c | 3 +++ 4 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 8c469af939aa..bbde90fa5838 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -109,9 +109,10 @@ enum { INET_DIAG_TOS, INET_DIAG_TCLASS, INET_DIAG_SKMEMINFO, + INET_DIAG_SHUTDOWN, }; -#define INET_DIAG_MAX INET_DIAG_SKMEMINFO +#define INET_DIAG_MAX INET_DIAG_SHUTDOWN /* INET_DIAG_MEM */ diff --git a/include/uapi/linux/unix_diag.h b/include/uapi/linux/unix_diag.h index b1d2bf16b33c..b8a24941db21 100644 --- a/include/uapi/linux/unix_diag.h +++ b/include/uapi/linux/unix_diag.h @@ -37,6 +37,7 @@ enum { UNIX_DIAG_ICONS, UNIX_DIAG_RQLEN, UNIX_DIAG_MEMINFO, + UNIX_DIAG_SHUTDOWN, UNIX_DIAG_MAX, }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 535584c00f91..e5bad82d3584 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -105,6 +105,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; + if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) + goto errout; + /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, * hence this needs to be included regardless of socket family. */ diff --git a/net/unix/diag.c b/net/unix/diag.c index 06748f108a57..5ac19dc1d5e4 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -151,6 +151,9 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) goto out_nlmsg_trim; + if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown)) + goto out_nlmsg_trim; + return nlmsg_end(skb, nlh); out_nlmsg_trim: -- cgit v1.2.3 From 2e74598d7f4c6d1b34da84037d9a7f8b1c8e04ae Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 22 Oct 2012 14:14:01 -0300 Subject: [media] v4l2: Fix typo in struct v4l2_captureparm description Judging from what drivers do and from my experience temeperframe fraction is set in seconds - look e.g. here static int bttv_g_parm(struct file *file, void *f, struct v4l2_streamparm *parm) { struct bttv_fh *fh = f; struct bttv *btv = fh->btv; v4l2_video_std_frame_period(bttv_tvnorms[btv->tvnorm].v4l2_id, &parm->parm.capture.timeperframe); ... void v4l2_video_std_frame_period(int id, struct v4l2_fract *frameperiod) { if (id & V4L2_STD_525_60) { frameperiod->numerator = 1001; frameperiod->denominator = 30000; } else { frameperiod->numerator = 1; frameperiod->denominator = 25; } and also v4l2-ctl in userspace decodes this as seconds: if (doioctl(fd, VIDIOC_G_PARM, &parm, "VIDIOC_G_PARM") == 0) { const struct v4l2_fract &tf = parm.parm.capture.timeperframe; ... printf("\tFrames per second: %.3f (%d/%d)\n", (1.0 * tf.denominator) / tf.numerator, tf.denominator, tf.numerator); The typo was there from day 1 - added in 2002 in e028b61b ([PATCH] add v4l2 api)(*) (*) found in history tree git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Signed-off-by: Kirill Smelkov Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 57bfa59cda74..2fff7ff3e05b 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -737,7 +737,7 @@ struct v4l2_window { struct v4l2_captureparm { __u32 capability; /* Supported modes */ __u32 capturemode; /* Current mode */ - struct v4l2_fract timeperframe; /* Time per frame in .1us units */ + struct v4l2_fract timeperframe; /* Time per frame in seconds */ __u32 extendedmode; /* Driver-specific extensions */ __u32 readbuffers; /* # of buffers for read */ __u32 reserved[4]; -- cgit v1.2.3 From d900082bd9060dc955b181dae2f2adf86e27d747 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Oct 2012 22:28:49 +0000 Subject: rtnl: add a new type of msg to advertise protocol configuration A new type is added to allow userland to monitor protocol configuration, like IPv4 or IPv6. For example, monitoring the state of the forwarding status of an interface of the system. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/netconf.h | 22 ++++++++++++++++++++++ include/uapi/linux/rtnetlink.h | 5 +++++ 2 files changed, 27 insertions(+) create mode 100644 include/uapi/linux/netconf.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h new file mode 100644 index 000000000000..d0513726711f --- /dev/null +++ b/include/uapi/linux/netconf.h @@ -0,0 +1,22 @@ +#ifndef _UAPI_LINUX_NETCONF_H_ +#define _UAPI_LINUX_NETCONF_H_ + +#include +#include + +struct netconfmsg { + __u8 ncm_family; +}; + +enum { + NETCONFA_UNSPEC, + NETCONFA_IFINDEX, + NETCONFA_FORWARDING, + __NETCONFA_MAX +}; +#define NETCONFA_MAX (__NETCONFA_MAX - 1) + +#define NETCONFA_IFINDEX_ALL -1 +#define NETCONFA_IFINDEX_DEFAULT -2 + +#endif /* _UAPI_LINUX_NETCONF_H_ */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index fcd768b09f6e..0043b413b8bc 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -120,6 +120,11 @@ enum { RTM_SETDCB, #define RTM_SETDCB RTM_SETDCB + RTM_NEWNETCONF = 80, +#define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_GETNETCONF = 82, +#define RTM_GETNETCONF RTM_GETNETCONF + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; -- cgit v1.2.3 From f3a1bfb11ccbc72d44f0b58c92115a40128979c3 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Oct 2012 22:28:50 +0000 Subject: rtnl/ipv6: use netconf msg to advertise forwarding status Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 ++ net/ipv6/addrconf.c | 77 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 0043b413b8bc..a4d75ea868ed 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -592,6 +592,8 @@ enum rtnetlink_groups { #define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE RTNLGRP_DCB, #define RTNLGRP_DCB RTNLGRP_DCB + RTNLGRP_IPV6_NETCONF, +#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0424e4e27414..0c57a8f67715 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -81,6 +81,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6_PRIVACY #include @@ -460,6 +461,72 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev) return idev; } +static int inet6_netconf_msgsize_devconf(int type) +{ + int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + + nla_total_size(4); /* NETCONFA_IFINDEX */ + + if (type == NETCONFA_FORWARDING) + size += nla_total_size(4); + + return size; +} + +static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, + struct ipv6_devconf *devconf, u32 portid, + u32 seq, int event, unsigned int flags, + int type) +{ + struct nlmsghdr *nlh; + struct netconfmsg *ncm; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), + flags); + if (nlh == NULL) + return -EMSGSIZE; + + ncm = nlmsg_data(nlh); + ncm->ncm_family = AF_INET6; + + if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) + goto nla_put_failure; + + if (type == NETCONFA_FORWARDING && + nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv6_devconf *devconf) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, + RTM_NEWNETCONF, 0, type); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err); +} + #ifdef CONFIG_SYSCTL static void dev_forward_change(struct inet6_dev *idev) { @@ -486,6 +553,8 @@ static void dev_forward_change(struct inet6_dev *idev) else addrconf_leave_anycast(ifa); } + inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING, + dev->ifindex, &idev->cnf); } @@ -518,6 +587,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) *p = newf; if (p == &net->ipv6.devconf_dflt->forwarding) { + if ((!newf) ^ (!old)) + inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); rtnl_unlock(); return 0; } @@ -525,6 +598,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_all->forwarding) { net->ipv6.devconf_dflt->forwarding = newf; addrconf_forward_change(net, newf); + if ((!newf) ^ (!old)) + inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); } else if ((!newf) ^ (!old)) dev_forward_change((struct inet6_dev *)table->extra1); rtnl_unlock(); -- cgit v1.2.3 From edc9e748934cf406cab708ca5dda7bd3c0f0a1db Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Oct 2012 22:28:52 +0000 Subject: rtnl/ipv4: use netconf msg to advertise forwarding status Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 + net/ipv4/devinet.c | 93 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 91 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index a4d75ea868ed..3dee071770d5 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -592,6 +592,8 @@ enum rtnetlink_groups { #define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE RTNLGRP_DCB, #define RTNLGRP_DCB RTNLGRP_DCB + RTNLGRP_IPV4_NETCONF, +#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF RTNLGRP_IPV6_NETCONF, #define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF __RTNLGRP_MAX diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2a6abc163ed2..020fdd2e6e19 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -55,6 +55,7 @@ #include #endif #include +#include #include #include @@ -1442,6 +1443,73 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) return 0; } +static int inet_netconf_msgsize_devconf(int type) +{ + int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + + nla_total_size(4); /* NETCONFA_IFINDEX */ + + if (type == NETCONFA_FORWARDING) + size += nla_total_size(4); + + return size; +} + +static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, + struct ipv4_devconf *devconf, u32 portid, + u32 seq, int event, unsigned int flags, + int type) +{ + struct nlmsghdr *nlh; + struct netconfmsg *ncm; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), + flags); + if (nlh == NULL) + return -EMSGSIZE; + + ncm = nlmsg_data(nlh); + ncm->ncm_family = AF_INET; + + if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) + goto nla_put_failure; + + if (type == NETCONFA_FORWARDING && + nla_put_s32(skb, NETCONFA_FORWARDING, + IPV4_DEVCONF(*devconf, FORWARDING)) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv4_devconf *devconf) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, + RTM_NEWNETCONF, 0, type); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err); +} + #ifdef CONFIG_SYSCTL static void devinet_copy_dflt_conf(struct net *net, int i) @@ -1467,6 +1535,12 @@ static void inet_forward_change(struct net *net) IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; IPV4_DEVCONF_DFLT(net, FORWARDING) = on; + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv4.devconf_all); + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt); for_each_netdev(net, dev) { struct in_device *in_dev; @@ -1474,8 +1548,11 @@ static void inet_forward_change(struct net *net) dev_disable_lro(dev); rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); - if (in_dev) + if (in_dev) { IN_DEV_CONF_SET(in_dev, FORWARDING, on); + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + dev->ifindex, &in_dev->cnf); + } rcu_read_unlock(); } } @@ -1527,15 +1604,23 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, } if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { inet_forward_change(net); - } else if (*valp) { + } else { struct ipv4_devconf *cnf = ctl->extra1; struct in_device *idev = container_of(cnf, struct in_device, cnf); - dev_disable_lro(idev->dev); + if (*valp) + dev_disable_lro(idev->dev); + inet_netconf_notify_devconf(net, + NETCONFA_FORWARDING, + idev->dev->ifindex, + cnf); } rtnl_unlock(); rt_cache_flush(net); - } + } else + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt); } return ret; -- cgit v1.2.3 From 52feb444a90304eb13c03115bb9758101dbb9254 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Wed, 17 Oct 2012 14:43:39 +0200 Subject: NFC: Extend netlink interface for LTO, RW, and MIUX parameters support NFC_CMD_LLC_GET_PARAMS: request LTO, RW, and MIUX parameters for a device NFC_CMD_LLC_SET_PARAMS: set one or more of LTO, RW, and MIUX parameters for a device. LTO must be set before the link is up otherwise -EINPROGRESS is returned. RW and MIUX can be set at anytime and will be passed in subsequent CONNECT and CC messages. If one of the passed parameters is wrong none is set and -EINVAL is returned. Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- include/uapi/linux/nfc.h | 15 +++++ net/nfc/llcp/commands.c | 18 ++---- net/nfc/llcp/llcp.c | 14 ++--- net/nfc/llcp/llcp.h | 3 + net/nfc/netlink.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 6 ++ 6 files changed, 189 insertions(+), 19 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index d908d17da56d..0e63cee8d810 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -60,6 +60,13 @@ * target mode. * @NFC_EVENT_DEVICE_DEACTIVATED: event emitted when the adapter is deactivated * from target mode. + * @NFC_CMD_LLC_GET_PARAMS: request LTO, RW, and MIUX parameters for a device + * @NFC_CMD_LLC_SET_PARAMS: set one or more of LTO, RW, and MIUX parameters for + * a device. LTO must be set before the link is up otherwise -EINPROGRESS + * is returned. RW and MIUX can be set at anytime and will be passed in + * subsequent CONNECT and CC messages. + * If one of the passed parameters is wrong none is set and -EINVAL is + * returned. */ enum nfc_commands { NFC_CMD_UNSPEC, @@ -77,6 +84,8 @@ enum nfc_commands { NFC_EVENT_TARGET_LOST, NFC_EVENT_TM_ACTIVATED, NFC_EVENT_TM_DEACTIVATED, + NFC_CMD_LLC_GET_PARAMS, + NFC_CMD_LLC_SET_PARAMS, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; @@ -102,6 +111,9 @@ enum nfc_commands { * @NFC_ATTR_RF_MODE: Initiator or target * @NFC_ATTR_IM_PROTOCOLS: Initiator mode protocols to poll for * @NFC_ATTR_TM_PROTOCOLS: Target mode protocols to listen for + * @NFC_ATTR_LLC_PARAM_LTO: Link TimeOut parameter + * @NFC_ATTR_LLC_PARAM_RW: Receive Window size parameter + * @NFC_ATTR_LLC_PARAM_MIUX: MIU eXtension parameter */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -119,6 +131,9 @@ enum nfc_attrs { NFC_ATTR_DEVICE_POWERED, NFC_ATTR_IM_PROTOCOLS, NFC_ATTR_TM_PROTOCOLS, + NFC_ATTR_LLC_PARAM_LTO, + NFC_ATTR_LLC_PARAM_RW, + NFC_ATTR_LLC_PARAM_MIUX, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c index 79415353cc28..ed2d17312d61 100644 --- a/net/nfc/llcp/commands.c +++ b/net/nfc/llcp/commands.c @@ -316,8 +316,7 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) struct sk_buff *skb; u8 *service_name_tlv = NULL, service_name_tlv_length; u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length, rw; - __be16 miux; + u8 *rw_tlv = NULL, rw_tlv_length; int err; u16 size = 0; @@ -335,13 +334,11 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) size += service_name_tlv_length; } - miux = cpu_to_be16(LLCP_MAX_MIUX); - miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, + miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, &miux_tlv_length); size += miux_tlv_length; - rw = LLCP_MAX_RW; - rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); + rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length); size += rw_tlv_length; pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len); @@ -378,8 +375,7 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) struct nfc_llcp_local *local; struct sk_buff *skb; u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length, rw; - __be16 miux; + u8 *rw_tlv = NULL, rw_tlv_length; int err; u16 size = 0; @@ -389,13 +385,11 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) if (local == NULL) return -ENODEV; - miux = cpu_to_be16(LLCP_MAX_MIUX); - miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, + miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, &miux_tlv_length); size += miux_tlv_length; - rw = LLCP_MAX_RW; - rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); + rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length); size += rw_tlv_length; skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size); diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 2e23bd348ebd..f6804532047a 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -467,10 +467,9 @@ static u8 nfc_llcp_reserve_sdp_ssap(struct nfc_llcp_local *local) static int nfc_llcp_build_gb(struct nfc_llcp_local *local) { u8 *gb_cur, *version_tlv, version, version_length; - u8 *lto_tlv, lto, lto_length; + u8 *lto_tlv, lto_length; u8 *wks_tlv, wks_length; u8 *miux_tlv, miux_length; - __be16 miux; u8 gb_len = 0; int ret = 0; @@ -479,9 +478,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) 1, &version_length); gb_len += version_length; - /* 1500 ms */ - lto = 150; - lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_LTO, <o, 1, <o_length); + lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_LTO, &local->lto, 1, <o_length); gb_len += lto_length; pr_debug("Local wks 0x%lx\n", local->local_wks); @@ -489,8 +486,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) &wks_length); gb_len += wks_length; - miux = cpu_to_be16(LLCP_MAX_MIUX); - miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, + miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, &miux_length); gb_len += miux_length; @@ -1383,6 +1379,10 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) rwlock_init(&local->connecting_sockets.lock); rwlock_init(&local->raw_sockets.lock); + local->lto = 150; /* 1500 ms */ + local->rw = LLCP_MAX_RW; + local->miux = cpu_to_be16(LLCP_MAX_MIUX); + nfc_llcp_build_gb(local); local->remote_miu = LLCP_DEFAULT_MIU; diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h index 276da3a6a589..0d62366f8cc3 100644 --- a/net/nfc/llcp/llcp.h +++ b/net/nfc/llcp/llcp.h @@ -64,6 +64,9 @@ struct nfc_llcp_local { u32 target_idx; u8 rf_mode; u8 comm_mode; + u8 lto; + u8 rw; + __be16 miux; unsigned long local_wks; /* Well known services */ unsigned long local_sdp; /* Local services */ unsigned long local_sap; /* Local SAPs, not available for discovery */ diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 614cfd0470b7..3568ae16786d 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -29,6 +29,8 @@ #include "nfc.h" +#include "llcp/llcp.h" + static struct genl_multicast_group nfc_genl_event_mcgrp = { .name = NFC_GENL_MCAST_EVENT_NAME, }; @@ -716,6 +718,146 @@ static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info) return rc; } +static int nfc_genl_send_params(struct sk_buff *msg, + struct nfc_llcp_local *local, + u32 portid, u32 seq) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, 0, + NFC_CMD_LLC_GET_PARAMS); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, local->dev->idx) || + nla_put_u8(msg, NFC_ATTR_LLC_PARAM_LTO, local->lto) || + nla_put_u8(msg, NFC_ATTR_LLC_PARAM_RW, local->rw) || + nla_put_u16(msg, NFC_ATTR_LLC_PARAM_MIUX, be16_to_cpu(local->miux))) + goto nla_put_failure; + + return genlmsg_end(msg, hdr); + +nla_put_failure: + + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + struct nfc_llcp_local *local; + int rc = 0; + struct sk_buff *msg = NULL; + u32 idx; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + device_lock(&dev->dev); + + local = nfc_llcp_find_local(dev); + if (!local) { + rc = -ENODEV; + goto exit; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + rc = -ENOMEM; + goto exit; + } + + rc = nfc_genl_send_params(msg, local, info->snd_portid, info->snd_seq); + +exit: + device_unlock(&dev->dev); + + nfc_put_device(dev); + + if (rc < 0) { + if (msg) + nlmsg_free(msg); + + return rc; + } + + return genlmsg_reply(msg, info); +} + +static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + struct nfc_llcp_local *local; + u8 rw = 0; + u16 miux = 0; + u32 idx; + int rc = 0; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + (!info->attrs[NFC_ATTR_LLC_PARAM_LTO] && + !info->attrs[NFC_ATTR_LLC_PARAM_RW] && + !info->attrs[NFC_ATTR_LLC_PARAM_MIUX])) + return -EINVAL; + + if (info->attrs[NFC_ATTR_LLC_PARAM_RW]) { + rw = nla_get_u8(info->attrs[NFC_ATTR_LLC_PARAM_RW]); + + if (rw > LLCP_MAX_RW) + return -EINVAL; + } + + if (info->attrs[NFC_ATTR_LLC_PARAM_MIUX]) { + miux = nla_get_u16(info->attrs[NFC_ATTR_LLC_PARAM_MIUX]); + + if (miux > LLCP_MAX_MIUX) + return -EINVAL; + } + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + device_lock(&dev->dev); + + local = nfc_llcp_find_local(dev); + if (!local) { + nfc_put_device(dev); + rc = -ENODEV; + goto exit; + } + + if (info->attrs[NFC_ATTR_LLC_PARAM_LTO]) { + if (dev->dep_link_up) { + rc = -EINPROGRESS; + goto exit; + } + + local->lto = nla_get_u8(info->attrs[NFC_ATTR_LLC_PARAM_LTO]); + } + + if (info->attrs[NFC_ATTR_LLC_PARAM_RW]) + local->rw = rw; + + if (info->attrs[NFC_ATTR_LLC_PARAM_MIUX]) + local->miux = cpu_to_be16(miux); + +exit: + device_unlock(&dev->dev); + + nfc_put_device(dev); + + return rc; +} + static struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, @@ -760,6 +902,16 @@ static struct genl_ops nfc_genl_ops[] = { .done = nfc_genl_dump_targets_done, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_LLC_GET_PARAMS, + .doit = nfc_genl_llc_get_params, + .policy = nfc_genl_policy, + }, + { + .cmd = NFC_CMD_LLC_SET_PARAMS, + .doit = nfc_genl_llc_set_params, + .policy = nfc_genl_policy, + }, }; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index c5e42b79a418..87d914d2876a 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -56,6 +56,7 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev); int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len); u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len); int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb); +struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); int __init nfc_llcp_init(void); void nfc_llcp_exit(void); @@ -97,6 +98,11 @@ static inline int nfc_llcp_data_received(struct nfc_dev *dev, return 0; } +static inline struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev) +{ + return NULL; +} + static inline int nfc_llcp_init(void) { return 0; -- cgit v1.2.3 From c8442118ad9cd05cfe3b993f058e70ab25b1009a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Oct 2012 10:17:18 +0200 Subject: cfg80211: allow per interface TX power setting The TX power setting is currently per wiphy (hardware device) but with multi-channel capabilities that doesn't make much sense any more. Allow drivers (and mac80211) to advertise support for per-interface TX power configuration. When the TX power is configured for the wiphy, the wdev will be NULL and the driver can still handle that, but when a wdev is given the TX power can be set only for that wdev now. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 9 ++++---- .../net/wireless/brcm80211/brcmfmac/wl_cfg80211.c | 6 ++++-- drivers/net/wireless/mwifiex/cfg80211.c | 1 + drivers/net/wireless/rndis_wlan.c | 10 +++++++-- include/net/cfg80211.h | 10 ++++++--- include/uapi/linux/nl80211.h | 2 ++ net/mac80211/cfg.c | 5 ++++- net/wireless/nl80211.c | 6 +++++- net/wireless/rdev-ops.h | 11 +++++----- net/wireless/trace.h | 24 ++++++++++++---------- net/wireless/wext-compat.c | 4 ++-- 11 files changed, 56 insertions(+), 32 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 277089963eb4..d615f9f7506a 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1384,11 +1384,8 @@ static int ath6kl_cfg80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) return 0; } -/* - * The type nl80211_tx_power_setting replaces the following - * data type from 2.6.36 onwards -*/ static int ath6kl_cfg80211_set_txpower(struct wiphy *wiphy, + struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { @@ -1423,7 +1420,9 @@ static int ath6kl_cfg80211_set_txpower(struct wiphy *wiphy, return 0; } -static int ath6kl_cfg80211_get_txpower(struct wiphy *wiphy, int *dbm) +static int ath6kl_cfg80211_get_txpower(struct wiphy *wiphy, + struct wireless_dev *wdev, + int *dbm) { struct ath6kl *ar = (struct ath6kl *)wiphy_priv(wiphy); struct ath6kl_vif *vif; diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c index cb30feaa565b..904c94121c13 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c @@ -1721,7 +1721,7 @@ brcmf_cfg80211_disconnect(struct wiphy *wiphy, struct net_device *ndev, } static s32 -brcmf_cfg80211_set_tx_power(struct wiphy *wiphy, +brcmf_cfg80211_set_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_tx_power_setting type, s32 mbm) { @@ -1770,7 +1770,9 @@ done: return err; } -static s32 brcmf_cfg80211_get_tx_power(struct wiphy *wiphy, s32 *dbm) +static s32 brcmf_cfg80211_get_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, + s32 *dbm) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); struct brcmf_if *ifp = netdev_priv(cfg_to_ndev(cfg)); diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index fdb1eb861021..8e829b251d83 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -324,6 +324,7 @@ mwifiex_cfg80211_cancel_remain_on_channel(struct wiphy *wiphy, */ static int mwifiex_cfg80211_set_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index bd1f0cb56085..5390af36c064 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -490,9 +490,12 @@ static int rndis_scan(struct wiphy *wiphy, static int rndis_set_wiphy_params(struct wiphy *wiphy, u32 changed); static int rndis_set_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm); -static int rndis_get_tx_power(struct wiphy *wiphy, int *dbm); +static int rndis_get_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, + int *dbm); static int rndis_connect(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_connect_params *sme); @@ -1903,6 +1906,7 @@ static int rndis_set_wiphy_params(struct wiphy *wiphy, u32 changed) } static int rndis_set_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { @@ -1930,7 +1934,9 @@ static int rndis_set_tx_power(struct wiphy *wiphy, return -ENOTSUPP; } -static int rndis_get_tx_power(struct wiphy *wiphy, int *dbm) +static int rndis_get_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, + int *dbm) { struct rndis_wlan_private *priv = wiphy_priv(wiphy); struct usbnet *usbdev = priv->usbdev; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c6964572890f..8034a4268fcb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1551,7 +1551,10 @@ struct cfg80211_gtk_rekey_data { * struct wiphy. If returning an error, no value should be changed. * * @set_tx_power: set the transmit power according to the parameters, - * the power passed is in mBm, to get dBm use MBM_TO_DBM(). + * the power passed is in mBm, to get dBm use MBM_TO_DBM(). The + * wdev may be %NULL if power was set for the wiphy, and will + * always be %NULL unless the driver supports per-vif TX power + * (as advertised by the nl80211 feature flag.) * @get_tx_power: store the current TX power into the dbm variable; * return 0 if successful * @@ -1748,9 +1751,10 @@ struct cfg80211_ops { int (*set_wiphy_params)(struct wiphy *wiphy, u32 changed); - int (*set_tx_power)(struct wiphy *wiphy, + int (*set_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm); - int (*get_tx_power)(struct wiphy *wiphy, int *dbm); + int (*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, + int *dbm); int (*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev, const u8 *addr); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 617d0fbfc96f..4c5f6748ed7d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3051,6 +3051,7 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_LOW_PRIORITY_SCAN: This driver supports low priority scan * @NL80211_FEATURE_SCAN_FLUSH: Scan flush is supported * @NL80211_FEATURE_AP_SCAN: Support scanning using an AP vif + * @NL80211_FEATURE_VIF_TXPOWER: The driver supports per-vif TX power setting */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3062,6 +3063,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6, NL80211_FEATURE_SCAN_FLUSH = 1 << 7, NL80211_FEATURE_AP_SCAN = 1 << 8, + NL80211_FEATURE_VIF_TXPOWER = 1 << 9, }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 34fd3eba3090..a352e4d22dd9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1992,6 +1992,7 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) } static int ieee80211_set_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { struct ieee80211_local *local = wiphy_priv(wiphy); @@ -2026,7 +2027,9 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, return 0; } -static int ieee80211_get_tx_power(struct wiphy *wiphy, int *dbm) +static int ieee80211_get_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, + int *dbm) { struct ieee80211_local *local = wiphy_priv(wiphy); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 879ca620fd6f..87d4670ee53a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1585,9 +1585,13 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) { + struct wireless_dev *txp_wdev = wdev; enum nl80211_tx_power_setting type; int idx, mbm = 0; + if (!(rdev->wiphy.features & NL80211_FEATURE_VIF_TXPOWER)) + txp_wdev = NULL; + if (!rdev->ops->set_tx_power) { result = -EOPNOTSUPP; goto bad_res; @@ -1607,7 +1611,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) mbm = nla_get_u32(info->attrs[idx]); } - result = rdev_set_tx_power(rdev, type, mbm); + result = rdev_set_tx_power(rdev, txp_wdev, type, mbm); if (result) goto bad_res; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index eb5f8974e148..6e5fa659068d 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -476,21 +476,22 @@ rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, u32 changed) } static inline int rdev_set_tx_power(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { int ret; - trace_rdev_set_tx_power(&rdev->wiphy, type, mbm); - ret = rdev->ops->set_tx_power(&rdev->wiphy, type, mbm); + trace_rdev_set_tx_power(&rdev->wiphy, wdev, type, mbm); + ret = rdev->ops->set_tx_power(&rdev->wiphy, wdev, type, mbm); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_get_tx_power(struct cfg80211_registered_device *rdev, - int *dbm) + struct wireless_dev *wdev, int *dbm) { int ret; - trace_rdev_get_tx_power(&rdev->wiphy); - ret = rdev->ops->get_tx_power(&rdev->wiphy, dbm); + trace_rdev_get_tx_power(&rdev->wiphy, wdev); + ret = rdev->ops->get_tx_power(&rdev->wiphy, wdev, dbm); trace_rdev_return_int_int(&rdev->wiphy, ret, *dbm); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 0ca71caf85fb..8e03c6382a8a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -26,7 +26,7 @@ #define WIPHY_PR_ARG MAC_PR_ARG(wiphy_mac) #define WDEV_ENTRY __field(u32, id) -#define WDEV_ASSIGN (__entry->id) = (wdev->identifier) +#define WDEV_ASSIGN (__entry->id) = (wdev ? wdev->identifier : 0) #define WDEV_PR_FMT ", wdev id: %u" #define WDEV_PR_ARG (__entry->id) @@ -260,11 +260,6 @@ DEFINE_EVENT(wiphy_only_evt, rdev_get_antenna, TP_ARGS(wiphy) ); -DEFINE_EVENT(wiphy_only_evt, rdev_get_tx_power, - TP_PROTO(struct wiphy *wiphy), - TP_ARGS(wiphy) -); - DEFINE_EVENT(wiphy_only_evt, rdev_rfkill_poll, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy) @@ -1230,22 +1225,29 @@ TRACE_EVENT(rdev_set_wiphy_params, WIPHY_PR_ARG, __entry->changed) ); +DEFINE_EVENT(wiphy_wdev_evt, rdev_get_tx_power, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev) +); + TRACE_EVENT(rdev_set_tx_power, - TP_PROTO(struct wiphy *wiphy, enum nl80211_tx_power_setting type, - int mbm), - TP_ARGS(wiphy, type, mbm), + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + enum nl80211_tx_power_setting type, int mbm), + TP_ARGS(wiphy, wdev, type, mbm), TP_STRUCT__entry( WIPHY_ENTRY + WDEV_ENTRY __field(enum nl80211_tx_power_setting, type) __field(int, mbm) ), TP_fast_assign( WIPHY_ASSIGN; + WDEV_ASSIGN; __entry->type = type; __entry->mbm = mbm; ), - TP_printk(WIPHY_PR_FMT ", type: %d, mbm: %d", - WIPHY_PR_ARG, __entry->type, __entry->mbm) + TP_printk(WIPHY_PR_FMT WDEV_PR_FMT ", type: %d, mbm: %d", + WIPHY_PR_ARG, WDEV_PR_ARG,__entry->type, __entry->mbm) ); TRACE_EVENT(rdev_return_int_int, diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 6488d2dbc1d7..742ab6ec4c9d 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -895,7 +895,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, return 0; } - return rdev_set_tx_power(rdev, type, DBM_TO_MBM(dbm)); + return rdev_set_tx_power(rdev, wdev, type, DBM_TO_MBM(dbm)); } static int cfg80211_wext_giwtxpower(struct net_device *dev, @@ -914,7 +914,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev, if (!rdev->ops->get_tx_power) return -EOPNOTSUPP; - err = rdev_get_tx_power(rdev, &val); + err = rdev_get_tx_power(rdev, wdev, &val); if (err) return err; -- cgit v1.2.3 From 91716322d834cba34f4a7ed5e4a39673eb90862b Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 22 Oct 2012 15:51:45 +0100 Subject: efivarfs: Add unique magic number Using pstore's superblock magic number is no doubt going to cause problems in the future. Give efivarfs its own magic number. Acked-by: Jeremy Kerr Signed-off-by: Matt Fleming --- drivers/firmware/efivars.c | 2 +- include/uapi/linux/magic.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 2c044343c99b..3b0cf9acb504 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -991,7 +991,7 @@ int efivarfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = PSTOREFS_MAGIC; + sb->s_magic = EFIVARFS_MAGIC; sb->s_op = &efivarfs_ops; sb->s_time_gran = 1; diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index e15192cb9cf4..12f68c7ceba6 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -27,6 +27,7 @@ #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 #define PSTOREFS_MAGIC 0x6165676C +#define EFIVARFS_MAGIC 0xde5e81e4 #define MINIX_SUPER_MAGIC 0x137F /* minix v1 fs, 14 char names */ #define MINIX_SUPER_MAGIC2 0x138F /* minix v1 fs, 30 char names */ -- cgit v1.2.3 From 2ac4ad2a1468123f6bb439a547880a9c0d302e0a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sat, 27 Oct 2012 12:47:12 +0530 Subject: serial/arc-uart: Add new driver Driver for non-standard on-chip UART, instantiated in the ARC (Synopsys) FPGA Boards such as ARCAngel4/ML50x Signed-off-by: Vineet Gupta Reviewed-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 23 ++ drivers/tty/serial/Makefile | 1 + drivers/tty/serial/arc_uart.c | 746 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/serial_core.h | 2 + 4 files changed, 772 insertions(+) create mode 100644 drivers/tty/serial/arc_uart.c (limited to 'include/uapi/linux') diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 2a53be5f010d..b1768012ed21 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -1423,4 +1423,27 @@ config SERIAL_EFM32_UART_CONSOLE depends on SERIAL_EFM32_UART=y select SERIAL_CORE_CONSOLE +config SERIAL_ARC + tristate "ARC UART driver support" + select SERIAL_CORE + help + Driver for on-chip UART for ARC(Synopsys) for the legacy + FPGA Boards (ML50x/ARCAngel4) + +config SERIAL_ARC_CONSOLE + bool "Console on ARC UART" + depends on SERIAL_ARC=y + select SERIAL_CORE_CONSOLE + help + Enable system Console on ARC UART + +config SERIAL_ARC_NR_PORTS + int "Number of ARC UART ports" + depends on SERIAL_ARC + range 1 3 + default "1" + help + Set this to the number of serial ports you want the driver + to support. + endmenu diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile index 4f694dafa719..df1b998c436b 100644 --- a/drivers/tty/serial/Makefile +++ b/drivers/tty/serial/Makefile @@ -82,3 +82,4 @@ obj-$(CONFIG_SERIAL_XILINX_PS_UART) += xilinx_uartps.o obj-$(CONFIG_SERIAL_SIRFSOC) += sirfsoc_uart.o obj-$(CONFIG_SERIAL_AR933X) += ar933x_uart.o obj-$(CONFIG_SERIAL_EFM32_UART) += efm32-uart.o +obj-$(CONFIG_SERIAL_ARC) += arc_uart.o diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c new file mode 100644 index 000000000000..e9c61d1b1c79 --- /dev/null +++ b/drivers/tty/serial/arc_uart.c @@ -0,0 +1,746 @@ +/* + * ARC On-Chip(fpga) UART Driver + * + * Copyright (C) 2010-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * vineetg: July 10th 2012 + * -Decoupled the driver from arch/arc + * +Using platform_get_resource() for irq/membase (thx to bfin_uart.c) + * +Using early_platform_xxx() for early console (thx to mach-shmobile/xxx) + * + * Vineetg: Aug 21st 2010 + * -Is uart_tx_stopped() not done in tty write path as it has already been + * taken care of, in serial core + * + * Vineetg: Aug 18th 2010 + * -New Serial Core based ARC UART driver + * -Derived largely from blackfin driver albiet with some major tweaks + * + * TODO: + * -check if sysreq works + */ + +#if defined(CONFIG_SERIAL_ARC_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) +#define SUPPORT_SYSRQ +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/************************************* + * ARC UART Hardware Specs + ************************************/ +#define ARC_UART_TX_FIFO_SIZE 1 + +/* + * UART Register set (this is not a Standards Compliant IP) + * Also each reg is Word aligned, but only 8 bits wide + */ +#define R_ID0 0 +#define R_ID1 4 +#define R_ID2 8 +#define R_ID3 12 +#define R_DATA 16 +#define R_STS 20 +#define R_BAUDL 24 +#define R_BAUDH 28 + +/* Bits for UART Status Reg (R/W) */ +#define RXIENB 0x04 /* Receive Interrupt Enable */ +#define TXIENB 0x40 /* Transmit Interrupt Enable */ + +#define RXEMPTY 0x20 /* Receive FIFO Empty: No char receivede */ +#define TXEMPTY 0x80 /* Transmit FIFO Empty, thus char can be written into */ + +#define RXFULL 0x08 /* Receive FIFO full */ +#define RXFULL1 0x10 /* Receive FIFO has space for 1 char (tot space=4) */ + +#define RXFERR 0x01 /* Frame Error: Stop Bit not detected */ +#define RXOERR 0x02 /* OverFlow Err: Char recv but RXFULL still set */ + +/* Uart bit fiddling helpers: lowest level */ +#define RBASE(uart, reg) (uart->port.membase + reg) +#define UART_REG_SET(u, r, v) writeb((v), RBASE(u, r)) +#define UART_REG_GET(u, r) readb(RBASE(u, r)) + +#define UART_REG_OR(u, r, v) UART_REG_SET(u, r, UART_REG_GET(u, r) | (v)) +#define UART_REG_CLR(u, r, v) UART_REG_SET(u, r, UART_REG_GET(u, r) & ~(v)) + +/* Uart bit fiddling helpers: API level */ +#define UART_SET_DATA(uart, val) UART_REG_SET(uart, R_DATA, val) +#define UART_GET_DATA(uart) UART_REG_GET(uart, R_DATA) + +#define UART_SET_BAUDH(uart, val) UART_REG_SET(uart, R_BAUDH, val) +#define UART_SET_BAUDL(uart, val) UART_REG_SET(uart, R_BAUDL, val) + +#define UART_CLR_STATUS(uart, val) UART_REG_CLR(uart, R_STS, val) +#define UART_GET_STATUS(uart) UART_REG_GET(uart, R_STS) + +#define UART_ALL_IRQ_DISABLE(uart) UART_REG_CLR(uart, R_STS, RXIENB|TXIENB) +#define UART_RX_IRQ_DISABLE(uart) UART_REG_CLR(uart, R_STS, RXIENB) +#define UART_TX_IRQ_DISABLE(uart) UART_REG_CLR(uart, R_STS, TXIENB) + +#define UART_ALL_IRQ_ENABLE(uart) UART_REG_OR(uart, R_STS, RXIENB|TXIENB) +#define UART_RX_IRQ_ENABLE(uart) UART_REG_OR(uart, R_STS, RXIENB) +#define UART_TX_IRQ_ENABLE(uart) UART_REG_OR(uart, R_STS, TXIENB) + +#define ARC_SERIAL_DEV_NAME "ttyARC" + +struct arc_uart_port { + struct uart_port port; + unsigned long baud; + int is_emulated; /* H/w vs. Instruction Set Simulator */ +}; + +#define to_arc_port(uport) container_of(uport, struct arc_uart_port, port) + +static struct arc_uart_port arc_uart_ports[CONFIG_SERIAL_ARC_NR_PORTS]; + +#ifdef CONFIG_SERIAL_ARC_CONSOLE +static struct console arc_console; +#endif + +#define DRIVER_NAME "arc-uart" + +static struct uart_driver arc_uart_driver = { + .owner = THIS_MODULE, + .driver_name = DRIVER_NAME, + .dev_name = ARC_SERIAL_DEV_NAME, + .major = 0, + .minor = 0, + .nr = CONFIG_SERIAL_ARC_NR_PORTS, +#ifdef CONFIG_SERIAL_ARC_CONSOLE + .cons = &arc_console, +#endif +}; + +static void arc_serial_stop_rx(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + + UART_RX_IRQ_DISABLE(uart); +} + +static void arc_serial_stop_tx(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + + while (!(UART_GET_STATUS(uart) & TXEMPTY)) + cpu_relax(); + + UART_TX_IRQ_DISABLE(uart); +} + +/* + * Return TIOCSER_TEMT when transmitter is not busy. + */ +static unsigned int arc_serial_tx_empty(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + unsigned int stat; + + stat = UART_GET_STATUS(uart); + if (stat & TXEMPTY) + return TIOCSER_TEMT; + + return 0; +} + +/* + * Driver internal routine, used by both tty(serial core) as well as tx-isr + * -Called under spinlock in either cases + * -also tty->stopped / tty->hw_stopped has already been checked + * = by uart_start( ) before calling us + * = tx_ist checks that too before calling + */ +static void arc_serial_tx_chars(struct arc_uart_port *uart) +{ + struct circ_buf *xmit = &uart->port.state->xmit; + int sent = 0; + unsigned char ch; + + if (unlikely(uart->port.x_char)) { + UART_SET_DATA(uart, uart->port.x_char); + uart->port.icount.tx++; + uart->port.x_char = 0; + sent = 1; + } else if (xmit->tail != xmit->head) { /* TODO: uart_circ_empty */ + ch = xmit->buf[xmit->tail]; + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + uart->port.icount.tx++; + while (!(UART_GET_STATUS(uart) & TXEMPTY)) + cpu_relax(); + UART_SET_DATA(uart, ch); + sent = 1; + } + + /* + * If num chars in xmit buffer are too few, ask tty layer for more. + * By Hard ISR to schedule processing in software interrupt part + */ + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(&uart->port); + + if (sent) + UART_TX_IRQ_ENABLE(uart); +} + +/* + * port is locked and interrupts are disabled + * uart_start( ) calls us under the port spinlock irqsave + */ +static void arc_serial_start_tx(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + + arc_serial_tx_chars(uart); +} + +static void arc_serial_rx_chars(struct arc_uart_port *uart) +{ + struct tty_struct *tty = tty_port_tty_get(&uart->port.state->port); + unsigned int status, ch, flg = 0; + + if (!tty) + return; + + /* + * UART has 4 deep RX-FIFO. Driver's recongnition of this fact + * is very subtle. Here's how ... + * Upon getting a RX-Intr, such that RX-EMPTY=0, meaning data available, + * driver reads the DATA Reg and keeps doing that in a loop, until + * RX-EMPTY=1. Multiple chars being avail, with a single Interrupt, + * before RX-EMPTY=0, implies some sort of buffering going on in the + * controller, which is indeed the Rx-FIFO. + */ + while (!((status = UART_GET_STATUS(uart)) & RXEMPTY)) { + + ch = UART_GET_DATA(uart); + uart->port.icount.rx++; + + if (unlikely(status & (RXOERR | RXFERR))) { + if (status & RXOERR) { + uart->port.icount.overrun++; + flg = TTY_OVERRUN; + UART_CLR_STATUS(uart, RXOERR); + } + + if (status & RXFERR) { + uart->port.icount.frame++; + flg = TTY_FRAME; + UART_CLR_STATUS(uart, RXFERR); + } + } else + flg = TTY_NORMAL; + + if (unlikely(uart_handle_sysrq_char(&uart->port, ch))) + goto done; + + uart_insert_char(&uart->port, status, RXOERR, ch, flg); + +done: + tty_flip_buffer_push(tty); + } + + tty_kref_put(tty); +} + +/* + * A note on the Interrupt handling state machine of this driver + * + * kernel printk writes funnel thru the console driver framework and in order + * to keep things simple as well as efficient, it writes to UART in polled + * mode, in one shot, and exits. + * + * OTOH, Userland output (via tty layer), uses interrupt based writes as there + * can be undeterministic delay between char writes. + * + * Thus Rx-interrupts are always enabled, while tx-interrupts are by default + * disabled. + * + * When tty has some data to send out, serial core calls driver's start_tx + * which + * -checks-if-tty-buffer-has-char-to-send + * -writes-data-to-uart + * -enable-tx-intr + * + * Once data bits are pushed out, controller raises the Tx-room-avail-Interrupt. + * The first thing Tx ISR does is disable further Tx interrupts (as this could + * be the last char to send, before settling down into the quiet polled mode). + * It then calls the exact routine used by tty layer write to send out any + * more char in tty buffer. In case of sending, it re-enables Tx-intr. In case + * of no data, it remains disabled. + * This is how the transmit state machine is dynamically switched on/off + */ + +static irqreturn_t arc_serial_isr(int irq, void *dev_id) +{ + struct arc_uart_port *uart = dev_id; + unsigned int status; + + status = UART_GET_STATUS(uart); + + /* + * Single IRQ for both Rx (data available) Tx (room available) Interrupt + * notifications from the UART Controller. + * To demultiplex between the two, we check the relevant bits + */ + if ((status & RXIENB) && !(status & RXEMPTY)) { + + /* already in ISR, no need of xx_irqsave */ + spin_lock(&uart->port.lock); + arc_serial_rx_chars(uart); + spin_unlock(&uart->port.lock); + } + + if ((status & TXIENB) && (status & TXEMPTY)) { + + /* Unconditionally disable further Tx-Interrupts. + * will be enabled by tx_chars() if needed. + */ + UART_TX_IRQ_DISABLE(uart); + + spin_lock(&uart->port.lock); + + if (!uart_tx_stopped(&uart->port)) + arc_serial_tx_chars(uart); + + spin_unlock(&uart->port.lock); + } + + return IRQ_HANDLED; +} + +static unsigned int arc_serial_get_mctrl(struct uart_port *port) +{ + /* + * Pretend we have a Modem status reg and following bits are + * always set, to satify the serial core state machine + * (DSR) Data Set Ready + * (CTS) Clear To Send + * (CAR) Carrier Detect + */ + return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR; +} + +static void arc_serial_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + /* MCR not present */ +} + +/* Enable Modem Status Interrupts */ + +static void arc_serial_enable_ms(struct uart_port *port) +{ + /* MSR not present */ +} + +static void arc_serial_break_ctl(struct uart_port *port, int break_state) +{ + /* ARC UART doesn't support sending Break signal */ +} + +static int arc_serial_startup(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + + /* Before we hook up the ISR, Disable all UART Interrupts */ + UART_ALL_IRQ_DISABLE(uart); + + if (request_irq(uart->port.irq, arc_serial_isr, 0, "arc uart rx-tx", + uart)) { + dev_warn(uart->port.dev, "Unable to attach ARC UART intr\n"); + return -EBUSY; + } + + UART_RX_IRQ_ENABLE(uart); /* Only Rx IRQ enabled to begin with */ + + return 0; +} + +/* This is not really needed */ +static void arc_serial_shutdown(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + free_irq(uart->port.irq, uart); +} + +static void +arc_serial_set_termios(struct uart_port *port, struct ktermios *new, + struct ktermios *old) +{ + struct arc_uart_port *uart = to_arc_port(port); + unsigned int baud, uartl, uarth, hw_val; + unsigned long flags; + + /* + * Use the generic handler so that any specially encoded baud rates + * such as SPD_xx flags or "%B0" can be handled + * Max Baud I suppose will not be more than current 115K * 4 + * Formula for ARC UART is: hw-val = ((CLK/(BAUD*4)) -1) + * spread over two 8-bit registers + */ + baud = uart_get_baud_rate(port, new, old, 0, 460800); + + hw_val = port->uartclk / (uart->baud * 4) - 1; + uartl = hw_val & 0xFF; + uarth = (hw_val >> 8) & 0xFF; + + /* + * UART ISS(Instruction Set simulator) emulation has a subtle bug: + * A existing value of Baudh = 0 is used as a indication to startup + * it's internal state machine. + * Thus if baudh is set to 0, 2 times, it chokes. + * This happens with BAUD=115200 and the formaula above + * Until that is fixed, when running on ISS, we will set baudh to !0 + */ + if (uart->is_emulated) + uarth = 1; + + spin_lock_irqsave(&port->lock, flags); + + UART_ALL_IRQ_DISABLE(uart); + + UART_SET_BAUDL(uart, uartl); + UART_SET_BAUDH(uart, uarth); + + UART_RX_IRQ_ENABLE(uart); + + /* + * UART doesn't support Parity/Hardware Flow Control; + * Only supports 8N1 character size + */ + new->c_cflag &= ~(CMSPAR|CRTSCTS|CSIZE); + new->c_cflag |= CS8; + + if (old) + tty_termios_copy_hw(new, old); + + /* Don't rewrite B0 */ + if (tty_termios_baud_rate(new)) + tty_termios_encode_baud_rate(new, baud, baud); + + uart_update_timeout(port, new->c_cflag, baud); + + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *arc_serial_type(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + + return uart->port.type == PORT_ARC ? DRIVER_NAME : NULL; +} + +static void arc_serial_release_port(struct uart_port *port) +{ +} + +static int arc_serial_request_port(struct uart_port *port) +{ + return 0; +} + +/* + * Verify the new serial_struct (for TIOCSSERIAL). + */ +static int +arc_serial_verify_port(struct uart_port *port, struct serial_struct *ser) +{ + if (port->type != PORT_UNKNOWN && ser->type != PORT_ARC) + return -EINVAL; + + return 0; +} + +/* + * Configure/autoconfigure the port. + */ +static void arc_serial_config_port(struct uart_port *port, int flags) +{ + struct arc_uart_port *uart = to_arc_port(port); + + if (flags & UART_CONFIG_TYPE) + uart->port.type = PORT_ARC; +} + +#if defined(CONFIG_CONSOLE_POLL) || defined(CONFIG_SERIAL_ARC_CONSOLE) + +static void arc_serial_poll_putchar(struct uart_port *port, unsigned char chr) +{ + struct arc_uart_port *uart = to_arc_port(port); + + while (!(UART_GET_STATUS(uart) & TXEMPTY)) + cpu_relax(); + + UART_SET_DATA(uart, chr); +} +#endif + +#ifdef CONFIG_CONSOLE_POLL +static int arc_serial_poll_getchar(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + unsigned char chr; + + while (!(UART_GET_STATUS(uart) & RXEMPTY)) + cpu_relax(); + + chr = UART_GET_DATA(uart); + return chr; +} +#endif + +static struct uart_ops arc_serial_pops = { + .tx_empty = arc_serial_tx_empty, + .set_mctrl = arc_serial_set_mctrl, + .get_mctrl = arc_serial_get_mctrl, + .stop_tx = arc_serial_stop_tx, + .start_tx = arc_serial_start_tx, + .stop_rx = arc_serial_stop_rx, + .enable_ms = arc_serial_enable_ms, + .break_ctl = arc_serial_break_ctl, + .startup = arc_serial_startup, + .shutdown = arc_serial_shutdown, + .set_termios = arc_serial_set_termios, + .type = arc_serial_type, + .release_port = arc_serial_release_port, + .request_port = arc_serial_request_port, + .config_port = arc_serial_config_port, + .verify_port = arc_serial_verify_port, +#ifdef CONFIG_CONSOLE_POLL + .poll_put_char = arc_serial_poll_putchar, + .poll_get_char = arc_serial_poll_getchar, +#endif +}; + +static int __devinit +arc_uart_init_one(struct platform_device *pdev, struct arc_uart_port *uart) +{ + struct resource *res, *res2; + unsigned long *plat_data; + + if (pdev->id < 0 || pdev->id >= CONFIG_SERIAL_ARC_NR_PORTS) { + dev_err(&pdev->dev, "Wrong uart platform device id.\n"); + return -ENOENT; + } + + plat_data = ((unsigned long *)(pdev->dev.platform_data)); + uart->baud = plat_data[0]; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + res2 = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!res2) + return -ENODEV; + + uart->port.mapbase = res->start; + uart->port.membase = ioremap_nocache(res->start, resource_size(res)); + if (!uart->port.membase) + /* No point of dev_err since UART itself is hosed here */ + return -ENXIO; + + uart->port.irq = res2->start; + uart->port.dev = &pdev->dev; + uart->port.iotype = UPIO_MEM; + uart->port.flags = UPF_BOOT_AUTOCONF; + uart->port.line = pdev->id; + uart->port.ops = &arc_serial_pops; + + uart->port.uartclk = plat_data[1]; + uart->port.fifosize = ARC_UART_TX_FIFO_SIZE; + + /* + * uart_insert_char( ) uses it in decideding whether to ignore a + * char or not. Explicitly setting it here, removes the subtelty + */ + uart->port.ignore_status_mask = 0; + + /* Real Hardware vs. emulated to work around a bug */ + uart->is_emulated = !!plat_data[2]; + + return 0; +} + +#ifdef CONFIG_SERIAL_ARC_CONSOLE + +static int __devinit arc_serial_console_setup(struct console *co, char *options) +{ + struct uart_port *port; + int baud = 115200; + int bits = 8; + int parity = 'n'; + int flow = 'n'; + + if (co->index < 0 || co->index >= CONFIG_SERIAL_ARC_NR_PORTS) + return -ENODEV; + + /* + * The uart port backing the console (e.g. ttyARC1) might not have been + * init yet. If so, defer the console setup to after the port. + */ + port = &arc_uart_ports[co->index].port; + if (!port->membase) + return -ENODEV; + + if (options) + uart_parse_options(options, &baud, &parity, &bits, &flow); + + /* + * Serial core will call port->ops->set_termios( ) + * which will set the baud reg + */ + return uart_set_options(port, co, baud, parity, bits, flow); +} + +static void arc_serial_console_putchar(struct uart_port *port, int ch) +{ + arc_serial_poll_putchar(port, (unsigned char)ch); +} + +/* + * Interrupts are disabled on entering + */ +static void arc_serial_console_write(struct console *co, const char *s, + unsigned int count) +{ + struct uart_port *port = &arc_uart_ports[co->index].port; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + uart_console_write(port, s, count, arc_serial_console_putchar); + spin_unlock_irqrestore(&port->lock, flags); +} + +static struct console arc_console = { + .name = ARC_SERIAL_DEV_NAME, + .write = arc_serial_console_write, + .device = uart_console_device, + .setup = arc_serial_console_setup, + .flags = CON_PRINTBUFFER, + .index = -1, + .data = &arc_uart_driver +}; + +static __init void early_serial_write(struct console *con, const char *s, + unsigned int n) +{ + struct uart_port *port = &arc_uart_ports[con->index].port; + unsigned int i; + + for (i = 0; i < n; i++, s++) { + if (*s == '\n') + arc_serial_poll_putchar(port, '\r'); + arc_serial_poll_putchar(port, *s); + } +} + +static struct __initdata console arc_early_serial_console = { + .name = "early_ARCuart", + .write = early_serial_write, + .flags = CON_PRINTBUFFER | CON_BOOT, + .index = -1 +}; + +static int __devinit arc_serial_probe_earlyprintk(struct platform_device *pdev) +{ + arc_early_serial_console.index = pdev->id; + + arc_uart_init_one(pdev, &arc_uart_ports[pdev->id]); + + arc_serial_console_setup(&arc_early_serial_console, NULL); + + register_console(&arc_early_serial_console); + return 0; +} +#else +static int __devinit arc_serial_probe_earlyprintk(struct platform_device *pdev) +{ + return -ENODEV; +} +#endif /* CONFIG_SERIAL_ARC_CONSOLE */ + +static int __devinit arc_serial_probe(struct platform_device *pdev) +{ + struct arc_uart_port *uart; + int rc; + + if (is_early_platform_device(pdev)) + return arc_serial_probe_earlyprintk(pdev); + + uart = &arc_uart_ports[pdev->id]; + rc = arc_uart_init_one(pdev, uart); + if (rc) + return rc; + + return uart_add_one_port(&arc_uart_driver, &uart->port); +} + +static int __devexit arc_serial_remove(struct platform_device *pdev) +{ + /* This will never be called */ + return 0; +} + +static struct platform_driver arc_platform_driver = { + .probe = arc_serial_probe, + .remove = __devexit_p(arc_serial_remove), + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + }, +}; + +#ifdef CONFIG_SERIAL_ARC_CONSOLE +/* + * Register an early platform driver of "earlyprintk" class. + * ARCH platform code installs the driver and probes the early devices + * The installation could rely on user specifying earlyprintk=xyx in cmd line + * or it could be done independently, for all "earlyprintk" class drivers. + * [see arch/arc/plat-arcfpga/platform.c] + */ +early_platform_init("earlyprintk", &arc_platform_driver); + +#endif /* CONFIG_SERIAL_ARC_CONSOLE */ + +static int __init arc_serial_init(void) +{ + int ret; + + ret = uart_register_driver(&arc_uart_driver); + if (ret) + return ret; + + ret = platform_driver_register(&arc_platform_driver); + if (ret) + uart_unregister_driver(&arc_uart_driver); + + return ret; +} + +static void __exit arc_serial_exit(void) +{ + platform_driver_unregister(&arc_platform_driver); + uart_unregister_driver(&arc_uart_driver); +} + +module_init(arc_serial_init); +module_exit(arc_serial_exit); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("plat-arcfpga/uart"); +MODULE_AUTHOR("Vineet Gupta"); +MODULE_DESCRIPTION("ARC(Synopsys) On-Chip(fpga) serial driver"); diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 7e1ab20adc03..ebcc73f0418a 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -215,5 +215,7 @@ /* Energy Micro efm32 SoC */ #define PORT_EFMUART 100 +/* ARC (Synopsys) on-chip UART */ +#define PORT_ARC 101 #endif /* _UAPILINUX_SERIAL_CORE_H */ -- cgit v1.2.3 From 2469ffd723f76ac2d3ce3d4f31ee31ee0a06cd38 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 24 Oct 2012 08:13:03 +0000 Subject: net: set and query VEB/VEPA bridge mode via PF_BRIDGE Hardware switches may support enabling and disabling the loopback switch which puts the device in a VEPA mode defined in the IEEE 802.1Qbg specification. In this mode frames are not switched in the hardware but sent directly to the switch. SR-IOV capable NICs will likely support this mode I am aware of at least two such devices. Also I am told (but don't have any of this hardware available) that there are devices that only support VEPA modes. In these cases it is important at a minimum to be able to query these attributes. This patch adds an additional IFLA_BRIDGE_MODE attribute that can be set and dumped via the PF_BRIDGE:{SET|GET}LINK operations. Also anticipating bridge attributes that may be common for both embedded bridges and software bridges this adds a flags attribute IFLA_BRIDGE_FLAGS currently used to determine if the command or event is being generated to/from an embedded bridge or software bridge. Finally, the event generation is pulled out of the bridge module and into rtnetlink proper. For example using the macvlan driver in VEPA mode on top of an embedded switch requires putting the embedded switch into a VEPA mode to get the expected results. -------- -------- | VEPA | | VEPA | <-- macvlan vepa edge relays -------- -------- | | | | ------------------ | VEPA | <-- embedded switch in NIC ------------------ | | ------------------- | external switch | <-- shiny new physical ------------------- switch with VEPA support A packet sent from the macvlan VEPA at the top could be loopbacked on the embedded switch and never seen by the external switch. So in order for this to work the embedded switch needs to be set in the VEPA state via the above described commands. By making these attributes nested in IFLA_AF_SPEC we allow future extensions to be made as needed. CC: Lennert Buytenhek CC: Stephen Hemminger Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 18 +++++++++ net/bridge/br_netlink.c | 2 - net/bridge/br_private.h | 4 +- net/core/rtnetlink.c | 85 ++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 102 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index a8fe9549ddbc..b3885791e11e 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -97,5 +97,23 @@ struct __fdb_entry { __u16 unused; }; +/* Bridge Flags */ +#define BRIDGE_FLAGS_MASTER 1 /* Bridge command to/from master */ +#define BRIDGE_FLAGS_SELF 2 /* Bridge command to/from lowerdev */ +#define BRIDGE_MODE_VEB 0 /* Default loopback mode */ +#define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */ + +/* Bridge management nested attributes + * [IFLA_AF_SPEC] = { + * [IFLA_BRIDGE_FLAGS] + * [IFLA_BRIDGE_MODE] + * } + */ +enum { + IFLA_BRIDGE_FLAGS, + IFLA_BRIDGE_MODE, + __IFLA_BRIDGE_MAX, +}; +#define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 743511bb7319..14b065cbd214 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -166,8 +166,6 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) br_port_state_selection(p->br); spin_unlock_bh(&p->br->lock); - br_ifinfo_notify(RTM_NEWLINK, p); - return 0; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index fdcd5f626ca6..6f40c14a2a65 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -158,7 +158,9 @@ struct net_bridge_port static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) { - struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data); + struct net_bridge_port *port = + rcu_dereference_rtnl(dev->rx_handler_data); + return br_port_exists(dev) ? port : NULL; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a068666b322f..8d2af0f77d36 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2295,13 +2295,60 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static inline size_t bridge_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(sizeof(u32)) /* IFLA_MASTER */ + + nla_total_size(sizeof(u32)) /* IFLA_MTU */ + + nla_total_size(sizeof(u32)) /* IFLA_LINK */ + + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */ + + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */ + + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */ + + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */ + + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */ +} + +static int rtnl_bridge_notify(struct net_device *dev, u16 flags) +{ + struct net *net = dev_net(dev); + struct net_device *master = dev->master; + struct sk_buff *skb; + int err = -EOPNOTSUPP; + + skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC); + if (!skb) { + err = -ENOMEM; + goto errout; + } + + if (!flags && master && master->netdev_ops->ndo_bridge_getlink) + err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + else if (dev->netdev_ops->ndo_bridge_getlink) + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + + if (err < 0) + goto errout; + + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + return 0; +errout: + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); + return err; +} + static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; - int err = -EINVAL; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 flags = 0; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -2316,15 +2363,45 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; } - if (dev->master && dev->master->netdev_ops->ndo_bridge_setlink) { + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + flags = nla_get_u16(attr); + break; + } + } + } + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + if (!dev->master || + !dev->master->netdev_ops->ndo_bridge_setlink) { + err = -EOPNOTSUPP; + goto out; + } + err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); if (err) goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; } - if (dev->netdev_ops->ndo_bridge_setlink) - err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_setlink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + if (attr && nla_type(attr) == IFLA_BRIDGE_FLAGS) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, flags); out: return err; } -- cgit v1.2.3 From f3335031b9452baebfe49b8b5e55d3fe0c4677d1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 27 Oct 2012 02:26:17 +0000 Subject: net: filter: add vlan tag access BPF filters lack ability to access skb->vlan_tci This patch adds two new ancillary accessors : SKF_AD_VLAN_TAG (44) mapped to vlan_tx_tag_get(skb) SKF_AD_VLAN_TAG_PRESENT (48) mapped to vlan_tx_tag_present(skb) This allows libpcap/tcpdump to use a kernel filter instead of having to fallback to accept all packets, then filter them in user space. Signed-off-by: Eric Dumazet Suggested-by: Ani Sinha Suggested-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 2 ++ include/uapi/linux/filter.h | 4 +++- net/core/filter.c | 9 +++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 24d251f3bab0..c9f0005c35e2 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -123,6 +123,8 @@ enum { BPF_S_ANC_CPU, BPF_S_ANC_ALU_XOR_X, BPF_S_ANC_SECCOMP_LD_W, + BPF_S_ANC_VLAN_TAG, + BPF_S_ANC_VLAN_TAG_PRESENT, }; #endif /* __LINUX_FILTER_H__ */ diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 3d7922433aba..9cfde6941099 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -127,7 +127,9 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_RXHASH 32 #define SKF_AD_CPU 36 #define SKF_AD_ALU_XOR_X 40 -#define SKF_AD_MAX 44 +#define SKF_AD_VLAN_TAG 44 +#define SKF_AD_VLAN_TAG_PRESENT 48 +#define SKF_AD_MAX 52 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index 3d92ebb7fbcf..5a114d41bf11 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -39,6 +39,7 @@ #include #include #include +#include /* No hurry in this branch * @@ -341,6 +342,12 @@ load_b: case BPF_S_ANC_CPU: A = raw_smp_processor_id(); continue; + case BPF_S_ANC_VLAN_TAG: + A = vlan_tx_tag_get(skb); + continue; + case BPF_S_ANC_VLAN_TAG_PRESENT: + A = !!vlan_tx_tag_present(skb); + continue; case BPF_S_ANC_NLATTR: { struct nlattr *nla; @@ -600,6 +607,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(RXHASH); ANCILLARY(CPU); ANCILLARY(ALU_XOR_X); + ANCILLARY(VLAN_TAG); + ANCILLARY(VLAN_TAG_PRESENT); } } ftest->code = code; -- cgit v1.2.3 From bbb009941efaece3898910a862f6d23aa55d6ba8 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 31 Oct 2012 19:45:59 +0000 Subject: tuntap: introduce multiqueue flags Add flags to be used by creating multiqueue tuntap device. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_tun.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 25a585ce23e6..8ef3a87b58a0 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -34,6 +34,7 @@ #define TUN_ONE_QUEUE 0x0080 #define TUN_PERSIST 0x0100 #define TUN_VNET_HDR 0x0200 +#define TUN_TAP_MQ 0x0400 /* Ioctl defines */ #define TUNSETNOCSUM _IOW('T', 200, int) @@ -61,6 +62,7 @@ #define IFF_ONE_QUEUE 0x2000 #define IFF_VNET_HDR 0x4000 #define IFF_TUN_EXCL 0x8000 +#define IFF_MULTI_QUEUE 0x0100 /* Features for GSO (TUNSETOFFLOAD). */ #define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ -- cgit v1.2.3 From cde8b15f1aabe327038ee4e0e11dd6b798572f69 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 31 Oct 2012 19:46:01 +0000 Subject: tuntap: add ioctl to attach or detach a file form tuntap device Sometimes usespace may need to active/deactive a queue, this could be done by detaching and attaching a file from tuntap device. This patch introduces a new ioctls - TUNSETQUEUE which could be used to do this. Flag IFF_ATTACH_QUEUE were introduced to do attaching while IFF_DETACH_QUEUE were introduced to do the detaching. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 56 ++++++++++++++++++++++++++++++++++++++------- include/uapi/linux/if_tun.h | 3 +++ 2 files changed, 51 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2762c55aeb66..79b6f9ecc12c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -195,6 +195,15 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) return txq; } +static inline bool tun_not_capable(struct tun_struct *tun) +{ + const struct cred *cred = current_cred(); + + return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || + (gid_valid(tun->group) && !in_egroup_p(tun->group))) && + !capable(CAP_NET_ADMIN); +} + static void tun_set_real_num_queues(struct tun_struct *tun) { netif_set_real_num_tx_queues(tun->dev, tun->numqueues); @@ -1310,8 +1319,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) dev = __dev_get_by_name(net, ifr->ifr_name); if (dev) { - const struct cred *cred = current_cred(); - if (ifr->ifr_flags & IFF_TUN_EXCL) return -EBUSY; if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) @@ -1321,9 +1328,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else return -EINVAL; - if (((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || - (gid_valid(tun->group) && !in_egroup_p(tun->group))) && - !capable(CAP_NET_ADMIN)) + if (tun_not_capable(tun)) return -EPERM; err = security_tun_dev_attach(tfile->socket.sk); if (err < 0) @@ -1530,6 +1535,40 @@ static void tun_set_sndbuf(struct tun_struct *tun) } } +static int tun_set_queue(struct file *file, struct ifreq *ifr) +{ + struct tun_file *tfile = file->private_data; + struct tun_struct *tun; + struct net_device *dev; + int ret = 0; + + rtnl_lock(); + + if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { + dev = __dev_get_by_name(tfile->net, ifr->ifr_name); + if (!dev) { + ret = -EINVAL; + goto unlock; + } + + tun = netdev_priv(dev); + if (dev->netdev_ops != &tap_netdev_ops && + dev->netdev_ops != &tun_netdev_ops) + ret = -EINVAL; + else if (tun_not_capable(tun)) + ret = -EPERM; + else + ret = tun_attach(tun, file); + } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) + __tun_detach(tfile, false); + else + ret = -EINVAL; + +unlock: + rtnl_unlock(); + return ret; +} + static long __tun_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg, int ifreq_len) { @@ -1543,7 +1582,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, int vnet_hdr_sz; int ret; - if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) { + if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) { if (copy_from_user(&ifr, argp, ifreq_len)) return -EFAULT; } else { @@ -1554,9 +1593,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, * This is needed because we never checked for invalid flags on * TUNSETIFF. */ return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | - IFF_VNET_HDR, + IFF_VNET_HDR | IFF_MULTI_QUEUE, (unsigned int __user*)argp); - } + } else if (cmd == TUNSETQUEUE) + return tun_set_queue(file, &ifr); ret = 0; rtnl_lock(); diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 8ef3a87b58a0..958497ad5bb5 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -54,6 +54,7 @@ #define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog) #define TUNGETVNETHDRSZ _IOR('T', 215, int) #define TUNSETVNETHDRSZ _IOW('T', 216, int) +#define TUNSETQUEUE _IOW('T', 217, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 @@ -63,6 +64,8 @@ #define IFF_VNET_HDR 0x4000 #define IFF_TUN_EXCL 0x8000 #define IFF_MULTI_QUEUE 0x0100 +#define IFF_ATTACH_QUEUE 0x0200 +#define IFF_DETACH_QUEUE 0x0400 /* Features for GSO (TUNSETOFFLOAD). */ #define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ -- cgit v1.2.3 From 215b13dd288c2e1e4461c1530a801f5f83e8cd90 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 31 Oct 2012 06:19:07 +0000 Subject: ptp: add an ioctl to compare PHC time with system time This patch adds an ioctl for PTP Hardware Clock (PHC) devices that allows user space to measure the time offset between the PHC and the system clock. Rather than hard coding any kind of estimation algorithm into the kernel, this patch takes the more flexible approach of just delivering an array of raw clock readings. In that way, the user space clock servo may be adapted to new and different hardware clocks. Signed-off-by: Richard Cochran Acked-by: Jacob Keller Signed-off-by: David S. Miller --- drivers/ptp/ptp_chardev.c | 32 ++++++++++++++++++++++++++++++++ include/uapi/linux/ptp_clock.h | 14 ++++++++++++++ 2 files changed, 46 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index e7f301da2902..4f8ae8057a7e 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -33,9 +33,13 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) { struct ptp_clock_caps caps; struct ptp_clock_request req; + struct ptp_sys_offset sysoff; struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); struct ptp_clock_info *ops = ptp->info; + struct ptp_clock_time *pct; + struct timespec ts; int enable, err = 0; + unsigned int i; switch (cmd) { @@ -88,6 +92,34 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = ops->enable(ops, &req, enable); break; + case PTP_SYS_OFFSET: + if (copy_from_user(&sysoff, (void __user *)arg, + sizeof(sysoff))) { + err = -EFAULT; + break; + } + if (sysoff.n_samples > PTP_MAX_SAMPLES) { + err = -EINVAL; + break; + } + pct = &sysoff.ts[0]; + for (i = 0; i < sysoff.n_samples; i++) { + getnstimeofday(&ts); + pct->sec = ts.tv_sec; + pct->nsec = ts.tv_nsec; + pct++; + ptp->info->gettime(ptp->info, &ts); + pct->sec = ts.tv_sec; + pct->nsec = ts.tv_nsec; + pct++; + } + getnstimeofday(&ts); + pct->sec = ts.tv_sec; + pct->nsec = ts.tv_nsec; + if (copy_to_user((void __user *)arg, &sysoff, sizeof(sysoff))) + err = -EFAULT; + break; + default: err = -ENOTTY; break; diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 94e981f810a2..b65c834f83e9 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -67,12 +67,26 @@ struct ptp_perout_request { unsigned int rsv[4]; /* Reserved for future use. */ }; +#define PTP_MAX_SAMPLES 25 /* Maximum allowed offset measurement samples. */ + +struct ptp_sys_offset { + unsigned int n_samples; /* Desired number of measurements. */ + unsigned int rsv[3]; /* Reserved for future use. */ + /* + * Array of interleaved system/phc time stamps. The kernel + * will provide 2*n_samples + 1 time stamps, with the last + * one as a system time stamp. + */ + struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1]; +}; + #define PTP_CLK_MAGIC '=' #define PTP_CLOCK_GETCAPS _IOR(PTP_CLK_MAGIC, 1, struct ptp_clock_caps) #define PTP_EXTTS_REQUEST _IOW(PTP_CLK_MAGIC, 2, struct ptp_extts_request) #define PTP_PEROUT_REQUEST _IOW(PTP_CLK_MAGIC, 3, struct ptp_perout_request) #define PTP_ENABLE_PPS _IOW(PTP_CLK_MAGIC, 4, int) +#define PTP_SYS_OFFSET _IOW(PTP_CLK_MAGIC, 5, struct ptp_sys_offset) struct ptp_extts_event { struct ptp_clock_time t; /* Time event occured. */ -- cgit v1.2.3 From cc535dfb6a85b42218307c43f60668d7bd6f4318 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 29 Oct 2012 04:53:27 +0000 Subject: rtnl/ipv4: use netconf msg to advertise rp_filter status Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/netconf.h | 1 + net/ipv4/devinet.c | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h index d0513726711f..75dcbc587fb5 100644 --- a/include/uapi/linux/netconf.h +++ b/include/uapi/linux/netconf.h @@ -12,6 +12,7 @@ enum { NETCONFA_UNSPEC, NETCONFA_IFINDEX, NETCONFA_FORWARDING, + NETCONFA_RP_FILTER, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f8b1e0494d75..f6db227c1fd9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1451,6 +1451,8 @@ static int inet_netconf_msgsize_devconf(int type) /* type -1 is used for ALL */ if (type == -1 || type == NETCONFA_FORWARDING) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_RP_FILTER) + size += nla_total_size(4); return size; } @@ -1479,6 +1481,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_FORWARDING, IPV4_DEVCONF(*devconf, FORWARDING)) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_RP_FILTER) && + nla_put_s32(skb, NETCONFA_RP_FILTER, + IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -1515,6 +1521,7 @@ errout: static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, [NETCONFA_FORWARDING] = { .len = sizeof(int) }, + [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, @@ -1647,6 +1654,23 @@ static int devinet_conf_proc(ctl_table *ctl, int write, i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) rt_cache_flush(net); + if (i == IPV4_DEVCONF_RP_FILTER - 1 && + new_value != old_value) { + int ifindex; + + if (cnf == net->ipv4.devconf_dflt) + ifindex = NETCONFA_IFINDEX_DEFAULT; + else if (cnf == net->ipv4.devconf_all) + ifindex = NETCONFA_IFINDEX_ALL; + else { + struct in_device *idev = + container_of(cnf, struct in_device, + cnf); + ifindex = idev->dev->ifindex; + } + inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, + ifindex, cnf); + } } return ret; -- cgit v1.2.3 From 121d1e0941e05c64ee4223064dd83eb24e871739 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Oct 2012 01:08:49 +0000 Subject: netfilter: ipv6: add getsockopt to retrieve origdst userspace can query the original ipv4 destination address of a REDIRECTed connection via getsockopt(m_sock, SOL_IP, SO_ORIGINAL_DST, &m_server_addr, &addrsize) but for ipv6 no such option existed. This adds getsockopt(..., IPPROTO_IPV6, IP6T_SO_ORIGINAL_DST, ...). Without this, userspace needs to parse /proc or use ctnetlink, which appears to be overkill. This uses option number 80 for IP6T_SO_ORIGINAL_DST, which is spare, to use the same number we use in the IPv4 socket option SO_ORIGINAL_DST. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/in6.h | 1 + include/uapi/linux/netfilter_ipv6/ip6_tables.h | 3 ++ net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 61 ++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 1e3159989958..f79c3721da6e 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -240,6 +240,7 @@ struct in6_flowlabel_req { * * IP6T_SO_GET_REVISION_MATCH 68 * IP6T_SO_GET_REVISION_TARGET 69 + * IP6T_SO_ORIGINAL_DST 80 */ /* RFC5014: Source address selection */ diff --git a/include/uapi/linux/netfilter_ipv6/ip6_tables.h b/include/uapi/linux/netfilter_ipv6/ip6_tables.h index bf1ef65cc582..649c68062dca 100644 --- a/include/uapi/linux/netfilter_ipv6/ip6_tables.h +++ b/include/uapi/linux/netfilter_ipv6/ip6_tables.h @@ -178,6 +178,9 @@ struct ip6t_error { #define IP6T_SO_GET_REVISION_TARGET (IP6T_BASE_CTL + 5) #define IP6T_SO_GET_MAX IP6T_SO_GET_REVISION_TARGET +/* obtain original address if REDIRECT'd connection */ +#define IP6T_SO_ORIGINAL_DST 80 + /* ICMP matching stuff */ struct ip6t_icmp { __u8 type; /* type to match */ diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 8860d23e61cf..02dcafdc7a95 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -295,6 +296,50 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { }, }; +static int +ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *inet6 = inet6_sk(sk); + const struct nf_conntrack_tuple_hash *h; + struct sockaddr_in6 sin6; + struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; + struct nf_conn *ct; + + tuple.src.u3.in6 = inet6->rcv_saddr; + tuple.src.u.tcp.port = inet->inet_sport; + tuple.dst.u3.in6 = inet6->daddr; + tuple.dst.u.tcp.port = inet->inet_dport; + tuple.dst.protonum = sk->sk_protocol; + + if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) + return -ENOPROTOOPT; + + if (*len < 0 || (unsigned int) *len < sizeof(sin6)) + return -EINVAL; + + h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); + if (!h) { + pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n", + &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port), + &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port)); + return -ENOENT; + } + + ct = nf_ct_tuplehash_to_ctrack(h); + + sin6.sin6_family = AF_INET6; + sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; + sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK; + memcpy(&sin6.sin6_addr, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, + sizeof(sin6.sin6_addr)); + sin6.sin6_scope_id = sk->sk_bound_dev_if; + + nf_ct_put(ct); + return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; +} + #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include @@ -359,6 +404,14 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI "); +static struct nf_sockopt_ops so_getorigdst6 = { + .pf = NFPROTO_IPV6, + .get_optmin = IP6T_SO_ORIGINAL_DST, + .get_optmax = IP6T_SO_ORIGINAL_DST + 1, + .get = ipv6_getorigdst, + .owner = THIS_MODULE, +}; + static int ipv6_net_init(struct net *net) { int ret = 0; @@ -425,6 +478,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) need_conntrack(); nf_defrag_ipv6_enable(); + ret = nf_register_sockopt(&so_getorigdst6); + if (ret < 0) { + pr_err("Unable to register netfilter socket option\n"); + return ret; + } + ret = register_pernet_subsys(&ipv6_net_ops); if (ret < 0) goto cleanup_pernet; @@ -440,6 +499,7 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) cleanup_ipv6: unregister_pernet_subsys(&ipv6_net_ops); cleanup_pernet: + nf_unregister_sockopt(&so_getorigdst6); return ret; } @@ -448,6 +508,7 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) synchronize_net(); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); unregister_pernet_subsys(&ipv6_net_ops); + nf_unregister_sockopt(&so_getorigdst6); } module_init(nf_conntrack_l3proto_ipv6_init); -- cgit v1.2.3 From 4f99ad51292078cc47343c17d3870764588cff73 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 30 Oct 2012 04:08:41 +0000 Subject: if_ether.h: add B.A.T.M.A.N.-Advanced Ethertype Add Ethertype 0x4305 (not an officially registered id). This Ethertype is used by every frame generated by B.A.T.M.A.N.-Advanced. Its definition is currently batman-adv local only and since it is not officially registered it is better to make its definition kernel-wide so that we avoid collisions given by future unofficial uses of the same Ethertype. Signed-off-by: Antonio Quartulli Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 0343e1f0582c..67fb87ca1094 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -48,6 +48,7 @@ #define ETH_P_BPQ 0x08FF /* G8BPQ AX.25 Ethernet Packet [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_IEEEPUP 0x0a00 /* Xerox IEEE802.3 PUP packet */ #define ETH_P_IEEEPUPAT 0x0a01 /* Xerox IEEE802.3 PUP Addr Trans packet */ +#define ETH_P_BATMAN 0x4305 /* B.A.T.M.A.N.-Advanced packet [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_DEC 0x6000 /* DEC Assigned proto */ #define ETH_P_DNA_DL 0x6001 /* DEC DNA Dump/Load */ #define ETH_P_DNA_RC 0x6002 /* DEC DNA Remote Console */ -- cgit v1.2.3 From f4e583c8935c6f52f9385ee7cfbea8f65c66a737 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Fri, 2 Nov 2012 13:27:48 +0100 Subject: nl/cfg80211: add the NL80211_CMD_SET_MCAST_RATE command This command triggers a new callback: set_mcast_rate(). It enables the user to change the rate used to send multicast frames for vif configured as IBSS or MESH_POINT Signed-off-by: Antonio Quartulli Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++++ include/uapi/linux/nl80211.h | 5 +++++ net/wireless/nl80211.c | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8034a4268fcb..cee791fd4cff 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1545,6 +1545,9 @@ struct cfg80211_gtk_rekey_data { * to a merge. * @leave_ibss: Leave the IBSS. * + * @set_mcast_rate: Set the specified multicast rate (only if vif is in ADHOC or + * MESH mode) + * * @set_wiphy_params: Notify that wiphy parameters have changed; * @changed bitfield (see &enum wiphy_params_flags) describes which values * have changed. The actual parameter values are available in @@ -1749,6 +1752,9 @@ struct cfg80211_ops { struct cfg80211_ibss_params *params); int (*leave_ibss)(struct wiphy *wiphy, struct net_device *dev); + int (*set_mcast_rate)(struct wiphy *wiphy, struct net_device *dev, + int rate[IEEE80211_NUM_BANDS]); + int (*set_wiphy_params)(struct wiphy *wiphy, u32 changed); int (*set_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4c5f6748ed7d..cbd2d6bb907a 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -578,6 +578,9 @@ * station, due to particular reason. %NL80211_ATTR_CONN_FAILED_REASON * is used for this. * + * @NL80211_CMD_SET_MCAST_RATE: Change the rate used to send multicast frames + * for IBSS or MESH vif. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -726,6 +729,8 @@ enum nl80211_commands { NL80211_CMD_CONN_FAILED, + NL80211_CMD_SET_MCAST_RATE, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 87d4670ee53a..9b0a3b8fd20a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1110,6 +1110,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } CMD(start_p2p_device, START_P2P_DEVICE); + CMD(set_mcast_rate, SET_MCAST_RATE); #ifdef CONFIG_NL80211_TESTMODE CMD(testmode_cmd, TESTMODE); @@ -5448,6 +5449,36 @@ static int nl80211_leave_ibss(struct sk_buff *skb, struct genl_info *info) return cfg80211_leave_ibss(rdev, dev, false); } +static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + int mcast_rate[IEEE80211_NUM_BANDS]; + u32 nla_rate; + int err; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + + if (!rdev->ops->set_mcast_rate) + return -EOPNOTSUPP; + + memset(mcast_rate, 0, sizeof(mcast_rate)); + + if (!info->attrs[NL80211_ATTR_MCAST_RATE]) + return -EINVAL; + + nla_rate = nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]); + if (!nl80211_parse_mcast_rate(rdev, mcast_rate, nla_rate)) + return -EINVAL; + + err = rdev->ops->set_mcast_rate(&rdev->wiphy, dev, mcast_rate); + + return err; +} + + #ifdef CONFIG_NL80211_TESTMODE static struct genl_multicast_group nl80211_testmode_mcgrp = { .name = "testmode", @@ -7629,6 +7660,14 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_MCAST_RATE, + .doit = nl80211_set_mcast_rate, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { -- cgit v1.2.3 From 5920cd3a41f1aefc30e9ce86384fc2fe9f5fe0c0 Mon Sep 17 00:00:00 2001 From: Paul Chavent Date: Tue, 6 Nov 2012 23:10:47 +0000 Subject: packet: tx_ring: allow the user to choose tx data offset The tx data offset of packet mmap tx ring used to be : (TPACKET2_HDRLEN - sizeof(struct sockaddr_ll)) The problem is that, with SOCK_RAW socket, the payload (14 bytes after the beginning of the user data) is misaligned. This patch allows to let the user gives an offset for it's tx data if he desires. Set sock option PACKET_TX_HAS_OFF to 1, then specify in each frame of your tx ring tp_net for SOCK_DGRAM, or tp_mac for SOCK_RAW. Signed-off-by: Paul Chavent Signed-off-by: David S. Miller --- Documentation/networking/packet_mmap.txt | 13 +++++++++ include/uapi/linux/if_packet.h | 1 + net/packet/af_packet.c | 46 +++++++++++++++++++++++++++++++- net/packet/internal.h | 1 + 4 files changed, 60 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 1c08a4b0981f..7cd879eba5dc 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -163,6 +163,19 @@ As capture, each frame contains two parts: A complete tutorial is available at: http://wiki.gnu-log.net/ +By default, the user should put data at : + frame base + TPACKET_HDRLEN - sizeof(struct sockaddr_ll) + +So, whatever you choose for the socket mode (SOCK_DGRAM or SOCK_RAW), +the beginning of the user data will be at : + frame base + TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + +If you wish to put user data at a custom offset from the beginning of +the frame (for payload alignment with SOCK_RAW mode for instance) you +can set tp_net (with SOCK_DGRAM) or tp_mac (with SOCK_RAW). In order +to make this work it must be enabled previously with setsockopt() +and the PACKET_TX_HAS_OFF option. + -------------------------------------------------------------------------------- + PACKET_MMAP settings -------------------------------------------------------------------------------- diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index f3799295d231..f9a60375f0d0 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -50,6 +50,7 @@ struct sockaddr_ll { #define PACKET_TX_TIMESTAMP 16 #define PACKET_TIMESTAMP 17 #define PACKET_FANOUT 18 +#define PACKET_TX_HAS_OFF 19 #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9034f52659b5..f262dbfc7f06 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1881,7 +1881,35 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); - data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); + if (po->tp_tx_has_off) { + int off_min, off_max, off; + off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); + off_max = po->tx_ring.frame_size - tp_len; + if (sock->type == SOCK_DGRAM) { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_net; + break; + default: + off = ph.h1->tp_net; + break; + } + } else { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_mac; + break; + default: + off = ph.h1->tp_mac; + break; + } + } + if (unlikely((off < off_min) || (off_max < off))) + return -EINVAL; + data = ph.raw + off; + } else { + data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); + } to_write = tp_len; if (sock->type == SOCK_DGRAM) { @@ -3109,6 +3137,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return fanout_add(sk, val & 0xffff, val >> 16); } + case PACKET_TX_HAS_OFF: + { + unsigned int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) + return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + po->tp_tx_has_off = !!val; + return 0; + } default: return -ENOPROTOOPT; } @@ -3200,6 +3241,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, ((u32)po->fanout->type << 16)) : 0); break; + case PACKET_TX_HAS_OFF: + val = po->tp_tx_has_off; + break; default: return -ENOPROTOOPT; } diff --git a/net/packet/internal.h b/net/packet/internal.h index 44945f6b7252..e84cab8cb7a9 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -109,6 +109,7 @@ struct packet_sock { unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_loss:1; + unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; -- cgit v1.2.3 From a80a6b85b428e6ce12a8363bb1f08d44c50f3252 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 8 Nov 2012 15:53:35 -0800 Subject: revert "epoll: support for disabling items, and a self-test app" Revert commit 03a7beb55b9f ("epoll: support for disabling items, and a self-test app") pending resolution of the issues identified by Michael Kerrisk, copied below. We'll revisit this for 3.8. : I've taken a look at this patch as it currently stands in 3.7-rc1, and : done a bit of testing. (By the way, the test program : tools/testing/selftests/epoll/test_epoll.c does not compile...) : : There are one or two places where the behavior seems a little strange, : so I have a question or two at the end of this mail. But other than : that, I want to check my understanding so that the interface can be : correctly documented. : : Just to go though my understanding, the problem is the following : scenario in a multithreaded application: : : 1. Multiple threads are performing epoll_wait() operations, : and maintaining a user-space cache that contains information : corresponding to each file descriptor being monitored by : epoll_wait(). : : 2. At some point, a thread wants to delete (EPOLL_CTL_DEL) : a file descriptor from the epoll interest list, and : delete the corresponding record from the user-space cache. : : 3. The problem with (2) is that some other thread may have : previously done an epoll_wait() that retrieved information : about the fd in question, and may be in the middle of using : information in the cache that relates to that fd. Thus, : there is a potential race. : : 4. The race can't solved purely in user space, because doing : so would require applying a mutex across the epoll_wait() : call, which would of course blow thread concurrency. : : Right? : : Your solution is the EPOLL_CTL_DISABLE operation. I want to : confirm my understanding about how to use this flag, since : the description that has accompanied the patches so far : has been a bit sparse : : 0. In the scenario you're concerned about, deleting a file : descriptor means (safely) doing the following: : (a) Deleting the file descriptor from the epoll interest list : using EPOLL_CTL_DEL : (b) Deleting the corresponding record in the user-space cache : : 1. It's only meaningful to use this EPOLL_CTL_DISABLE in : conjunction with EPOLLONESHOT. : : 2. Using EPOLL_CTL_DISABLE without using EPOLLONESHOT in : conjunction is a logical error. : : 3. The correct way to code multithreaded applications using : EPOLL_CTL_DISABLE and EPOLLONESHOT is as follows: : : a. All EPOLL_CTL_ADD and EPOLL_CTL_MOD operations should : should EPOLLONESHOT. : : b. When a thread wants to delete a file descriptor, it : should do the following: : : [1] Call epoll_ctl(EPOLL_CTL_DISABLE) : [2] If the return status from epoll_ctl(EPOLL_CTL_DISABLE) : was zero, then the file descriptor can be safely : deleted by the thread that made this call. : [3] If the epoll_ctl(EPOLL_CTL_DISABLE) fails with EBUSY, : then the descriptor is in use. In this case, the calling : thread should set a flag in the user-space cache to : indicate that the thread that is using the descriptor : should perform the deletion operation. : : Is all of the above correct? : : The implementation depends on checking on whether : (events & ~EP_PRIVATE_BITS) == 0 : This replies on the fact that EPOLL_CTL_AD and EPOLL_CTL_MOD always : set EPOLLHUP and EPOLLERR in the 'events' mask, and EPOLLONESHOT : causes those flags (as well as all others in ~EP_PRIVATE_BITS) to be : cleared. : : A corollary to the previous paragraph is that using EPOLL_CTL_DISABLE : is only useful in conjunction with EPOLLONESHOT. However, as things : stand, one can use EPOLL_CTL_DISABLE on a file descriptor that does : not have EPOLLONESHOT set in 'events' This results in the following : (slightly surprising) behavior: : : (a) The first call to epoll_ctl(EPOLL_CTL_DISABLE) returns 0 : (the indicator that the file descriptor can be safely deleted). : (b) The next call to epoll_ctl(EPOLL_CTL_DISABLE) fails with EBUSY. : : This doesn't seem particularly useful, and in fact is probably an : indication that the user made a logic error: they should only be using : epoll_ctl(EPOLL_CTL_DISABLE) on a file descriptor for which : EPOLLONESHOT was set in 'events'. If that is correct, then would it : not make sense to return an error to user space for this case? Cc: Michael Kerrisk Cc: "Paton J. Lewis" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 38 +--- include/uapi/linux/eventpoll.h | 1 - tools/testing/selftests/Makefile | 2 +- tools/testing/selftests/epoll/Makefile | 11 - tools/testing/selftests/epoll/test_epoll.c | 344 ----------------------------- 5 files changed, 4 insertions(+), 392 deletions(-) delete mode 100644 tools/testing/selftests/epoll/Makefile delete mode 100644 tools/testing/selftests/epoll/test_epoll.c (limited to 'include/uapi/linux') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index da72250ddc1c..cd96649bfe62 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p) /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ static inline int ep_op_has_event(int op) { - return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD; + return op != EPOLL_CTL_DEL; } /* Initialize the poll safe wake up structure */ @@ -676,34 +676,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) return 0; } -/* - * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item - * had no event flags set, indicating that another thread may be currently - * handling that item's events (in the case that EPOLLONESHOT was being - * used). Otherwise a zero result indicates that the item has been disabled - * from receiving events. A disabled item may be re-enabled via - * EPOLL_CTL_MOD. Must be called with "mtx" held. - */ -static int ep_disable(struct eventpoll *ep, struct epitem *epi) -{ - int result = 0; - unsigned long flags; - - spin_lock_irqsave(&ep->lock, flags); - if (epi->event.events & ~EP_PRIVATE_BITS) { - if (ep_is_linked(&epi->rdllink)) - list_del_init(&epi->rdllink); - /* Ensure ep_poll_callback will not add epi back onto ready - list: */ - epi->event.events &= EP_PRIVATE_BITS; - } - else - result = -EBUSY; - spin_unlock_irqrestore(&ep->lock, flags); - - return result; -} - static void ep_free(struct eventpoll *ep) { struct rb_node *rbp; @@ -1048,6 +1020,8 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) rb_insert_color(&epi->rbn, &ep->rbr); } + + #define PATH_ARR_SIZE 5 /* * These are the number paths of length 1 to 5, that we are allowing to emanate @@ -1813,12 +1787,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, } else error = -ENOENT; break; - case EPOLL_CTL_DISABLE: - if (epi) - error = ep_disable(ep, epi); - else - error = -ENOENT; - break; } mutex_unlock(&ep->mtx); diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index 8c99ce7202c5..2c267bcbb85c 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -25,7 +25,6 @@ #define EPOLL_CTL_ADD 1 #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 -#define EPOLL_CTL_DISABLE 4 /* * Request the handling of system wakeup events so as to prevent system suspends diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 43480149119e..85baf11e2acd 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,4 +1,4 @@ -TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug epoll +TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/epoll/Makefile b/tools/testing/selftests/epoll/Makefile deleted file mode 100644 index 19806ed62f50..000000000000 --- a/tools/testing/selftests/epoll/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# Makefile for epoll selftests - -all: test_epoll -%: %.c - gcc -pthread -g -o $@ $^ - -run_tests: all - ./test_epoll - -clean: - $(RM) test_epoll diff --git a/tools/testing/selftests/epoll/test_epoll.c b/tools/testing/selftests/epoll/test_epoll.c deleted file mode 100644 index f7525392ce84..000000000000 --- a/tools/testing/selftests/epoll/test_epoll.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * tools/testing/selftests/epoll/test_epoll.c - * - * Copyright 2012 Adobe Systems Incorporated - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Paton J. Lewis - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * A pointer to an epoll_item_private structure will be stored in the epoll - * item's event structure so that we can get access to the epoll_item_private - * data after calling epoll_wait: - */ -struct epoll_item_private { - int index; /* Position of this struct within the epoll_items array. */ - int fd; - uint32_t events; - pthread_mutex_t mutex; /* Guards the following variables... */ - int stop; - int status; /* Stores any error encountered while handling item. */ - /* The following variable allows us to test whether we have encountered - a problem while attempting to cancel and delete the associated - event. When the test program exits, 'deleted' should be exactly - one. If it is greater than one, then the failed test reflects a real - world situation where we would have tried to access the epoll item's - private data after deleting it: */ - int deleted; -}; - -struct epoll_item_private *epoll_items; - -/* - * Delete the specified item from the epoll set. In a real-world secneario this - * is where we would free the associated data structure, but in this testing - * environment we retain the structure so that we can test for double-deletion: - */ -void delete_item(int index) -{ - __sync_fetch_and_add(&epoll_items[index].deleted, 1); -} - -/* - * A pointer to a read_thread_data structure will be passed as the argument to - * each read thread: - */ -struct read_thread_data { - int stop; - int status; /* Indicates any error encountered by the read thread. */ - int epoll_set; -}; - -/* - * The function executed by the read threads: - */ -void *read_thread_function(void *function_data) -{ - struct read_thread_data *thread_data = - (struct read_thread_data *)function_data; - struct epoll_event event_data; - struct epoll_item_private *item_data; - char socket_data; - - /* Handle events until we encounter an error or this thread's 'stop' - condition is set: */ - while (1) { - int result = epoll_wait(thread_data->epoll_set, - &event_data, - 1, /* Number of desired events */ - 1000); /* Timeout in ms */ - if (result < 0) { - /* Breakpoints signal all threads. Ignore that while - debugging: */ - if (errno == EINTR) - continue; - thread_data->status = errno; - return 0; - } else if (thread_data->stop) - return 0; - else if (result == 0) /* Timeout */ - continue; - - /* We need the mutex here because checking for the stop - condition and re-enabling the epoll item need to be done - together as one atomic operation when EPOLL_CTL_DISABLE is - available: */ - item_data = (struct epoll_item_private *)event_data.data.ptr; - pthread_mutex_lock(&item_data->mutex); - - /* Remove the item from the epoll set if we want to stop - handling that event: */ - if (item_data->stop) - delete_item(item_data->index); - else { - /* Clear the data that was written to the other end of - our non-blocking socket: */ - do { - if (read(item_data->fd, &socket_data, 1) < 1) { - if ((errno == EAGAIN) || - (errno == EWOULDBLOCK)) - break; - else - goto error_unlock; - } - } while (item_data->events & EPOLLET); - - /* The item was one-shot, so re-enable it: */ - event_data.events = item_data->events; - if (epoll_ctl(thread_data->epoll_set, - EPOLL_CTL_MOD, - item_data->fd, - &event_data) < 0) - goto error_unlock; - } - - pthread_mutex_unlock(&item_data->mutex); - } - -error_unlock: - thread_data->status = item_data->status = errno; - pthread_mutex_unlock(&item_data->mutex); - return 0; -} - -/* - * A pointer to a write_thread_data structure will be passed as the argument to - * the write thread: - */ -struct write_thread_data { - int stop; - int status; /* Indicates any error encountered by the write thread. */ - int n_fds; - int *fds; -}; - -/* - * The function executed by the write thread. It writes a single byte to each - * socket in turn until the stop condition for this thread is set. If writing to - * a socket would block (i.e. errno was EAGAIN), we leave that socket alone for - * the moment and just move on to the next socket in the list. We don't care - * about the order in which we deliver events to the epoll set. In fact we don't - * care about the data we're writing to the pipes at all; we just want to - * trigger epoll events: - */ -void *write_thread_function(void *function_data) -{ - const char data = 'X'; - int index; - struct write_thread_data *thread_data = - (struct write_thread_data *)function_data; - while (!thread_data->stop) - for (index = 0; - !thread_data->stop && (index < thread_data->n_fds); - ++index) - if ((write(thread_data->fds[index], &data, 1) < 1) && - (errno != EAGAIN) && - (errno != EWOULDBLOCK)) { - thread_data->status = errno; - return; - } -} - -/* - * Arguments are currently ignored: - */ -int main(int argc, char **argv) -{ - const int n_read_threads = 100; - const int n_epoll_items = 500; - int index; - int epoll_set = epoll_create1(0); - struct write_thread_data write_thread_data = { - 0, 0, n_epoll_items, malloc(n_epoll_items * sizeof(int)) - }; - struct read_thread_data *read_thread_data = - malloc(n_read_threads * sizeof(struct read_thread_data)); - pthread_t *read_threads = malloc(n_read_threads * sizeof(pthread_t)); - pthread_t write_thread; - - printf("-----------------\n"); - printf("Runing test_epoll\n"); - printf("-----------------\n"); - - epoll_items = malloc(n_epoll_items * sizeof(struct epoll_item_private)); - - if (epoll_set < 0 || epoll_items == 0 || write_thread_data.fds == 0 || - read_thread_data == 0 || read_threads == 0) - goto error; - - if (sysconf(_SC_NPROCESSORS_ONLN) < 2) { - printf("Error: please run this test on a multi-core system.\n"); - goto error; - } - - /* Create the socket pairs and epoll items: */ - for (index = 0; index < n_epoll_items; ++index) { - int socket_pair[2]; - struct epoll_event event_data; - if (socketpair(AF_UNIX, - SOCK_STREAM | SOCK_NONBLOCK, - 0, - socket_pair) < 0) - goto error; - write_thread_data.fds[index] = socket_pair[0]; - epoll_items[index].index = index; - epoll_items[index].fd = socket_pair[1]; - if (pthread_mutex_init(&epoll_items[index].mutex, NULL) != 0) - goto error; - /* We always use EPOLLONESHOT because this test is currently - structured to demonstrate the need for EPOLL_CTL_DISABLE, - which only produces useful information in the EPOLLONESHOT - case (without EPOLLONESHOT, calling epoll_ctl with - EPOLL_CTL_DISABLE will never return EBUSY). If support for - testing events without EPOLLONESHOT is desired, it should - probably be implemented in a separate unit test. */ - epoll_items[index].events = EPOLLIN | EPOLLONESHOT; - if (index < n_epoll_items / 2) - epoll_items[index].events |= EPOLLET; - epoll_items[index].stop = 0; - epoll_items[index].status = 0; - epoll_items[index].deleted = 0; - event_data.events = epoll_items[index].events; - event_data.data.ptr = &epoll_items[index]; - if (epoll_ctl(epoll_set, - EPOLL_CTL_ADD, - epoll_items[index].fd, - &event_data) < 0) - goto error; - } - - /* Create and start the read threads: */ - for (index = 0; index < n_read_threads; ++index) { - read_thread_data[index].stop = 0; - read_thread_data[index].status = 0; - read_thread_data[index].epoll_set = epoll_set; - if (pthread_create(&read_threads[index], - NULL, - read_thread_function, - &read_thread_data[index]) != 0) - goto error; - } - - if (pthread_create(&write_thread, - NULL, - write_thread_function, - &write_thread_data) != 0) - goto error; - - /* Cancel all event pollers: */ -#ifdef EPOLL_CTL_DISABLE - for (index = 0; index < n_epoll_items; ++index) { - pthread_mutex_lock(&epoll_items[index].mutex); - ++epoll_items[index].stop; - if (epoll_ctl(epoll_set, - EPOLL_CTL_DISABLE, - epoll_items[index].fd, - NULL) == 0) - delete_item(index); - else if (errno != EBUSY) { - pthread_mutex_unlock(&epoll_items[index].mutex); - goto error; - } - /* EBUSY means events were being handled; allow the other thread - to delete the item. */ - pthread_mutex_unlock(&epoll_items[index].mutex); - } -#else - for (index = 0; index < n_epoll_items; ++index) { - pthread_mutex_lock(&epoll_items[index].mutex); - ++epoll_items[index].stop; - pthread_mutex_unlock(&epoll_items[index].mutex); - /* Wait in case a thread running read_thread_function is - currently executing code between epoll_wait and - pthread_mutex_lock with this item. Note that a longer delay - would make double-deletion less likely (at the expense of - performance), but there is no guarantee that any delay would - ever be sufficient. Note also that we delete all event - pollers at once for testing purposes, but in a real-world - environment we are likely to want to be able to cancel event - pollers at arbitrary times. Therefore we can't improve this - situation by just splitting this loop into two loops - (i.e. signal 'stop' for all items, sleep, and then delete all - items). We also can't fix the problem via EPOLL_CTL_DEL - because that command can't prevent the case where some other - thread is executing read_thread_function within the region - mentioned above: */ - usleep(1); - pthread_mutex_lock(&epoll_items[index].mutex); - if (!epoll_items[index].deleted) - delete_item(index); - pthread_mutex_unlock(&epoll_items[index].mutex); - } -#endif - - /* Shut down the read threads: */ - for (index = 0; index < n_read_threads; ++index) - __sync_fetch_and_add(&read_thread_data[index].stop, 1); - for (index = 0; index < n_read_threads; ++index) { - if (pthread_join(read_threads[index], NULL) != 0) - goto error; - if (read_thread_data[index].status) - goto error; - } - - /* Shut down the write thread: */ - __sync_fetch_and_add(&write_thread_data.stop, 1); - if ((pthread_join(write_thread, NULL) != 0) || write_thread_data.status) - goto error; - - /* Check for final error conditions: */ - for (index = 0; index < n_epoll_items; ++index) { - if (epoll_items[index].status != 0) - goto error; - if (pthread_mutex_destroy(&epoll_items[index].mutex) < 0) - goto error; - } - for (index = 0; index < n_epoll_items; ++index) - if (epoll_items[index].deleted != 1) { - printf("Error: item data deleted %1d times.\n", - epoll_items[index].deleted); - goto error; - } - - printf("[PASS]\n"); - return 0; - - error: - printf("[FAIL]\n"); - return errno; -} -- cgit v1.2.3 From b891b4dc1eed33543c5818dae43ce8bb55f2080c Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 9 Nov 2012 15:56:03 +0900 Subject: PCI: Fix bit definitions of PCI_EXP_LNKCAP2 register According to the PCIe 3.0 spec, PCI_EXP_LNKCAP2_SLS_2_5GB is 1st bit of PCI_EXP_LNKCAP2 register, not 0th bit. So, the bit definition of supported link speed vector should be fixed. [bhelgaas: change "Current" to "Supported"] Signed-off-by: Jingoo Han Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 20ae747ddf34..259763d2df71 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -544,9 +544,9 @@ #define PCI_EXP_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints end here */ #define PCI_EXP_LNKCAP2 44 /* Link Capability 2 */ -#define PCI_EXP_LNKCAP2_SLS_2_5GB 0x01 /* Current Link Speed 2.5GT/s */ -#define PCI_EXP_LNKCAP2_SLS_5_0GB 0x02 /* Current Link Speed 5.0GT/s */ -#define PCI_EXP_LNKCAP2_SLS_8_0GB 0x04 /* Current Link Speed 8.0GT/s */ +#define PCI_EXP_LNKCAP2_SLS_2_5GB 0x02 /* Supported Link Speed 2.5GT/s */ +#define PCI_EXP_LNKCAP2_SLS_5_0GB 0x04 /* Supported Link Speed 5.0GT/s */ +#define PCI_EXP_LNKCAP2_SLS_8_0GB 0x08 /* Supported Link Speed 8.0GT/s */ #define PCI_EXP_LNKCAP2_CROSSLINK 0x100 /* Crosslink supported */ #define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ #define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ -- cgit v1.2.3 From d77807230e1ef30dbdee85aa24d27073a14dd168 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Nov 2012 02:37:17 +0000 Subject: UAPI: (Scripted) Disintegrate include/linux/hdlc Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones Acked-by: Krzysztof Halasa Signed-off-by: David S. Miller --- include/linux/hdlc/Kbuild | 1 - include/linux/hdlc/ioctl.h | 81 ----------------------------------------- include/uapi/linux/hdlc/Kbuild | 1 + include/uapi/linux/hdlc/ioctl.h | 81 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 82 deletions(-) delete mode 100644 include/linux/hdlc/ioctl.h create mode 100644 include/uapi/linux/hdlc/ioctl.h (limited to 'include/uapi/linux') diff --git a/include/linux/hdlc/Kbuild b/include/linux/hdlc/Kbuild index 1fb26448faa9..e69de29bb2d1 100644 --- a/include/linux/hdlc/Kbuild +++ b/include/linux/hdlc/Kbuild @@ -1 +0,0 @@ -header-y += ioctl.h diff --git a/include/linux/hdlc/ioctl.h b/include/linux/hdlc/ioctl.h deleted file mode 100644 index 583972364357..000000000000 --- a/include/linux/hdlc/ioctl.h +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef __HDLC_IOCTL_H__ -#define __HDLC_IOCTL_H__ - - -#define GENERIC_HDLC_VERSION 4 /* For synchronization with sethdlc utility */ - -#define CLOCK_DEFAULT 0 /* Default setting */ -#define CLOCK_EXT 1 /* External TX and RX clock - DTE */ -#define CLOCK_INT 2 /* Internal TX and RX clock - DCE */ -#define CLOCK_TXINT 3 /* Internal TX and external RX clock */ -#define CLOCK_TXFROMRX 4 /* TX clock derived from external RX clock */ - - -#define ENCODING_DEFAULT 0 /* Default setting */ -#define ENCODING_NRZ 1 -#define ENCODING_NRZI 2 -#define ENCODING_FM_MARK 3 -#define ENCODING_FM_SPACE 4 -#define ENCODING_MANCHESTER 5 - - -#define PARITY_DEFAULT 0 /* Default setting */ -#define PARITY_NONE 1 /* No parity */ -#define PARITY_CRC16_PR0 2 /* CRC16, initial value 0x0000 */ -#define PARITY_CRC16_PR1 3 /* CRC16, initial value 0xFFFF */ -#define PARITY_CRC16_PR0_CCITT 4 /* CRC16, initial 0x0000, ITU-T version */ -#define PARITY_CRC16_PR1_CCITT 5 /* CRC16, initial 0xFFFF, ITU-T version */ -#define PARITY_CRC32_PR0_CCITT 6 /* CRC32, initial value 0x00000000 */ -#define PARITY_CRC32_PR1_CCITT 7 /* CRC32, initial value 0xFFFFFFFF */ - -#define LMI_DEFAULT 0 /* Default setting */ -#define LMI_NONE 1 /* No LMI, all PVCs are static */ -#define LMI_ANSI 2 /* ANSI Annex D */ -#define LMI_CCITT 3 /* ITU-T Annex A */ -#define LMI_CISCO 4 /* The "original" LMI, aka Gang of Four */ - -typedef struct { - unsigned int clock_rate; /* bits per second */ - unsigned int clock_type; /* internal, external, TX-internal etc. */ - unsigned short loopback; -} sync_serial_settings; /* V.35, V.24, X.21 */ - -typedef struct { - unsigned int clock_rate; /* bits per second */ - unsigned int clock_type; /* internal, external, TX-internal etc. */ - unsigned short loopback; - unsigned int slot_map; -} te1_settings; /* T1, E1 */ - -typedef struct { - unsigned short encoding; - unsigned short parity; -} raw_hdlc_proto; - -typedef struct { - unsigned int t391; - unsigned int t392; - unsigned int n391; - unsigned int n392; - unsigned int n393; - unsigned short lmi; - unsigned short dce; /* 1 for DCE (network side) operation */ -} fr_proto; - -typedef struct { - unsigned int dlci; -} fr_proto_pvc; /* for creating/deleting FR PVCs */ - -typedef struct { - unsigned int dlci; - char master[IFNAMSIZ]; /* Name of master FRAD device */ -}fr_proto_pvc_info; /* for returning PVC information only */ - -typedef struct { - unsigned int interval; - unsigned int timeout; -} cisco_proto; - -/* PPP doesn't need any info now - supply length = 0 to ioctl */ - -#endif /* __HDLC_IOCTL_H__ */ diff --git a/include/uapi/linux/hdlc/Kbuild b/include/uapi/linux/hdlc/Kbuild index aafaa5aa54d4..8c1d2cb75e33 100644 --- a/include/uapi/linux/hdlc/Kbuild +++ b/include/uapi/linux/hdlc/Kbuild @@ -1 +1,2 @@ # UAPI Header export list +header-y += ioctl.h diff --git a/include/uapi/linux/hdlc/ioctl.h b/include/uapi/linux/hdlc/ioctl.h new file mode 100644 index 000000000000..46939b24d612 --- /dev/null +++ b/include/uapi/linux/hdlc/ioctl.h @@ -0,0 +1,81 @@ +#ifndef __HDLC_IOCTL_H__ +#define __HDLC_IOCTL_H__ + + +#define GENERIC_HDLC_VERSION 4 /* For synchronization with sethdlc utility */ + +#define CLOCK_DEFAULT 0 /* Default setting */ +#define CLOCK_EXT 1 /* External TX and RX clock - DTE */ +#define CLOCK_INT 2 /* Internal TX and RX clock - DCE */ +#define CLOCK_TXINT 3 /* Internal TX and external RX clock */ +#define CLOCK_TXFROMRX 4 /* TX clock derived from external RX clock */ + + +#define ENCODING_DEFAULT 0 /* Default setting */ +#define ENCODING_NRZ 1 +#define ENCODING_NRZI 2 +#define ENCODING_FM_MARK 3 +#define ENCODING_FM_SPACE 4 +#define ENCODING_MANCHESTER 5 + + +#define PARITY_DEFAULT 0 /* Default setting */ +#define PARITY_NONE 1 /* No parity */ +#define PARITY_CRC16_PR0 2 /* CRC16, initial value 0x0000 */ +#define PARITY_CRC16_PR1 3 /* CRC16, initial value 0xFFFF */ +#define PARITY_CRC16_PR0_CCITT 4 /* CRC16, initial 0x0000, ITU-T version */ +#define PARITY_CRC16_PR1_CCITT 5 /* CRC16, initial 0xFFFF, ITU-T version */ +#define PARITY_CRC32_PR0_CCITT 6 /* CRC32, initial value 0x00000000 */ +#define PARITY_CRC32_PR1_CCITT 7 /* CRC32, initial value 0xFFFFFFFF */ + +#define LMI_DEFAULT 0 /* Default setting */ +#define LMI_NONE 1 /* No LMI, all PVCs are static */ +#define LMI_ANSI 2 /* ANSI Annex D */ +#define LMI_CCITT 3 /* ITU-T Annex A */ +#define LMI_CISCO 4 /* The "original" LMI, aka Gang of Four */ + +typedef struct { + unsigned int clock_rate; /* bits per second */ + unsigned int clock_type; /* internal, external, TX-internal etc. */ + unsigned short loopback; +} sync_serial_settings; /* V.35, V.24, X.21 */ + +typedef struct { + unsigned int clock_rate; /* bits per second */ + unsigned int clock_type; /* internal, external, TX-internal etc. */ + unsigned short loopback; + unsigned int slot_map; +} te1_settings; /* T1, E1 */ + +typedef struct { + unsigned short encoding; + unsigned short parity; +} raw_hdlc_proto; + +typedef struct { + unsigned int t391; + unsigned int t392; + unsigned int n391; + unsigned int n392; + unsigned int n393; + unsigned short lmi; + unsigned short dce; /* 1 for DCE (network side) operation */ +} fr_proto; + +typedef struct { + unsigned int dlci; +} fr_proto_pvc; /* for creating/deleting FR PVCs */ + +typedef struct { + unsigned int dlci; + char master[IFNAMSIZ]; /* Name of master FRAD device */ +}fr_proto_pvc_info; /* for returning PVC information only */ + +typedef struct { + unsigned int interval; + unsigned int timeout; +} cisco_proto; + +/* PPP doesn't need any info now - supply length = 0 to ioctl */ + +#endif /* __HDLC_IOCTL_H__ */ -- cgit v1.2.3 From c48c8d51c29efba160a1b27555d97f6ee0d049a6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Nov 2012 02:37:24 +0000 Subject: Fix the wanxl firmware to include missing constants Fix the wanxl firmware to include missing constants such as PARITY_NONE. It should be #including the linux/hdlc/ioctl.h header. To make this work, we also have to guard parts of ioctl.h with !__ASSEMBLY__. Signed-off-by: David Howells Signed-off-by: David S. Miller --- drivers/net/wan/Makefile | 2 +- drivers/net/wan/wanxlfw.S | 1 + include/uapi/linux/hdlc/ioctl.h | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index 4dac96b3db2a..b0a61636fc94 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -52,7 +52,7 @@ endif quiet_cmd_build_wanxlfw = BLD FW $@ cmd_build_wanxlfw = \ - $(CPP) -Wp,-MD,$(depfile) -I$(srctree)/include/uapi $< | $(AS68K) -m68360 -o $(obj)/wanxlfw.o; \ + $(CPP) -D__ASSEMBLY__ -Wp,-MD,$(depfile) -I$(srctree)/include/uapi $< | $(AS68K) -m68360 -o $(obj)/wanxlfw.o; \ $(LD68K) --oformat binary -Ttext 0x1000 $(obj)/wanxlfw.o -o $(obj)/wanxlfw.bin; \ hexdump -ve '"\n" 16/1 "0x%02X,"' $(obj)/wanxlfw.bin | sed 's/0x ,//g;1s/^/static u8 firmware[]={/;$$s/,$$/\n};\n/' >$(obj)/wanxlfw.inc; \ rm -f $(obj)/wanxlfw.bin $(obj)/wanxlfw.o diff --git a/drivers/net/wan/wanxlfw.S b/drivers/net/wan/wanxlfw.S index 73aae2bf2f1c..21565d59ec7b 100644 --- a/drivers/net/wan/wanxlfw.S +++ b/drivers/net/wan/wanxlfw.S @@ -35,6 +35,7 @@ */ #include +#include #include "wanxl.h" /* memory addresses and offsets */ diff --git a/include/uapi/linux/hdlc/ioctl.h b/include/uapi/linux/hdlc/ioctl.h index 46939b24d612..04bc0274a189 100644 --- a/include/uapi/linux/hdlc/ioctl.h +++ b/include/uapi/linux/hdlc/ioctl.h @@ -34,6 +34,8 @@ #define LMI_CCITT 3 /* ITU-T Annex A */ #define LMI_CISCO 4 /* The "original" LMI, aka Gang of Four */ +#ifndef __ASSEMBLY__ + typedef struct { unsigned int clock_rate; /* bits per second */ unsigned int clock_type; /* internal, external, TX-internal etc. */ @@ -78,4 +80,5 @@ typedef struct { /* PPP doesn't need any info now - supply length = 0 to ioctl */ +#endif /* __ASSEMBLY__ */ #endif /* __HDLC_IOCTL_H__ */ -- cgit v1.2.3 From 0974658da47cb399b76794057823bf3cd22acf37 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 9 Nov 2012 06:09:59 +0000 Subject: ipip: advertise tunnel param via rtnl It is usefull for daemons that monitor link event to have the full parameters of these interfaces when a rtnl message is sent. It allows also to dump them via rtnetlink. It is based on what is done for GRE tunnels. Signed-off-by: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 11 ++++++++ net/ipv4/ipip.c | 57 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 67 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 5db5942575fe..ccb21d585bf4 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -37,6 +37,17 @@ struct ip_tunnel_parm { struct iphdr iph; }; +enum { + IFLA_IPTUN_UNSPEC, + IFLA_IPTUN_LINK, + IFLA_IPTUN_LOCAL, + IFLA_IPTUN_REMOTE, + IFLA_IPTUN_TTL, + IFLA_IPTUN_TOS, + __IFLA_IPTUN_MAX, +}; +#define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) + /* SIT-mode i_flags */ #define SIT_ISATAP 0x0001 diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index cc49cc1ff3b9..720855e41100 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -138,6 +138,7 @@ struct ipip_net { static int ipip_tunnel_init(struct net_device *dev); static void ipip_tunnel_setup(struct net_device *dev); static void ipip_dev_free(struct net_device *dev); +static struct rtnl_link_ops ipip_link_ops __read_mostly; /* * Locking : hash tables are protected by RCU and RTNL @@ -305,6 +306,7 @@ static struct ip_tunnel *ipip_tunnel_locate(struct net *net, goto failed_free; strcpy(nt->parms.name, dev->name); + dev->rtnl_link_ops = &ipip_link_ops; dev_hold(dev); ipip_tunnel_link(ipn, nt); @@ -841,6 +843,47 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev) return 0; } +static size_t ipip_get_size(const struct net_device *dev) +{ + return + /* IFLA_IPTUN_LINK */ + nla_total_size(4) + + /* IFLA_IPTUN_LOCAL */ + nla_total_size(4) + + /* IFLA_IPTUN_REMOTE */ + nla_total_size(4) + + /* IFLA_IPTUN_TTL */ + nla_total_size(1) + + /* IFLA_IPTUN_TOS */ + nla_total_size(1) + + 0; +} + +static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || + nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || + nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops ipip_link_ops __read_mostly = { + .kind = "ipip", + .maxtype = IFLA_IPTUN_MAX, + .priv_size = sizeof(struct ip_tunnel), + .get_size = ipip_get_size, + .fill_info = ipip_fill_info, +}; + static struct xfrm_tunnel ipip_handler __read_mostly = { .handler = ipip_rcv, .err_handler = ipip_err, @@ -937,14 +980,26 @@ static int __init ipip_init(void) return err; err = xfrm4_tunnel_register(&ipip_handler, AF_INET); if (err < 0) { - unregister_pernet_device(&ipip_net_ops); pr_info("%s: can't register tunnel\n", __func__); + goto xfrm_tunnel_failed; } + err = rtnl_link_register(&ipip_link_ops); + if (err < 0) + goto rtnl_link_failed; + +out: return err; + +rtnl_link_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel_failed: + unregister_pernet_device(&ipip_net_ops); + goto out; } static void __exit ipip_fini(void) { + rtnl_link_unregister(&ipip_link_ops); if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) pr_info("%s: can't deregister tunnel\n", __func__); -- cgit v1.2.3 From c075b13098b399dc565b4d53f42047a8d40ed3ba Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 9 Nov 2012 06:10:01 +0000 Subject: ip6tnl: advertise tunnel param via rtnl It is usefull for daemons that monitor link event to have the full parameters of these interfaces when a rtnl message is sent. It allows also to dump them via rtnetlink. It is based on what is done for GRE tunnels. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 3 +++ net/ipv6/ip6_tunnel.c | 57 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index ccb21d585bf4..c1bf0b5a8da1 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -44,6 +44,9 @@ enum { IFLA_IPTUN_REMOTE, IFLA_IPTUN_TTL, IFLA_IPTUN_TOS, + IFLA_IPTUN_ENCAP_LIMIT, + IFLA_IPTUN_FLOWINFO, + IFLA_IPTUN_FLAGS, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 09482f723064..424ed45ef122 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -83,6 +83,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); +static struct rtnl_link_ops ip6_link_ops __read_mostly; static int ip6_tnl_net_id __read_mostly; struct ip6_tnl_net { @@ -299,6 +300,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) goto failed_free; strcpy(t->parms.name, dev->name); + dev->rtnl_link_ops = &ip6_link_ops; dev_hold(dev); ip6_tnl_link(ip6n, t); @@ -1504,6 +1506,55 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) return 0; } +static size_t ip6_get_size(const struct net_device *dev) +{ + return + /* IFLA_IPTUN_LINK */ + nla_total_size(4) + + /* IFLA_IPTUN_LOCAL */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_REMOTE */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_TTL */ + nla_total_size(1) + + /* IFLA_IPTUN_ENCAP_LIMIT */ + nla_total_size(1) + + /* IFLA_IPTUN_FLOWINFO */ + nla_total_size(4) + + /* IFLA_IPTUN_FLAGS */ + nla_total_size(4) + + 0; +} + +static int ip6_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + struct __ip6_tnl_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || + nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), + &parm->raddr) || + nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), + &parm->laddr) || + nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || + nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || + nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || + nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops ip6_link_ops __read_mostly = { + .kind = "ip6tnl", + .maxtype = IFLA_IPTUN_MAX, + .priv_size = sizeof(struct ip6_tnl), + .get_size = ip6_get_size, + .fill_info = ip6_fill_info, +}; + static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { .handler = ip4ip6_rcv, .err_handler = ip4ip6_err, @@ -1612,9 +1663,14 @@ static int __init ip6_tunnel_init(void) pr_err("%s: can't register ip6ip6\n", __func__); goto out_ip6ip6; } + err = rtnl_link_register(&ip6_link_ops); + if (err < 0) + goto rtnl_link_failed; return 0; +rtnl_link_failed: + xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); out_ip6ip6: xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); out_ip4ip6: @@ -1629,6 +1685,7 @@ out_pernet: static void __exit ip6_tunnel_cleanup(void) { + rtnl_link_unregister(&ip6_link_ops); if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) pr_info("%s: can't deregister ip4ip6\n", __func__); -- cgit v1.2.3 From 2a91c9f781de209d420d751e43eb43ffe6934803 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Fri, 9 Nov 2012 17:51:30 -0800 Subject: nl/cfg80211: advertise OBSS scan requirement wpa_supplicant will do OBSS scan for drivers that implement auth/assoc API. Drivers that implement nl80211 connect API (rather than auth/assoc) may need wpa_supplicant to do this as well. Add a new feature flag to inform it (wpa_s) that a driver needs wpa_supplicant to do OBSS scans. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index cbd2d6bb907a..06ddc89f026c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3057,6 +3057,9 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_SCAN_FLUSH: Scan flush is supported * @NL80211_FEATURE_AP_SCAN: Support scanning using an AP vif * @NL80211_FEATURE_VIF_TXPOWER: The driver supports per-vif TX power setting + * @NL80211_FEATURE_NEED_OBSS_SCAN: The driver expects userspace to perform + * OBSS scans and generate 20/40 BSS coex reports. This flag is used only + * for drivers implementing the CONNECT API, for AUTH/ASSOC it is implied. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3069,6 +3072,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_SCAN_FLUSH = 1 << 7, NL80211_FEATURE_AP_SCAN = 1 << 8, NL80211_FEATURE_VIF_TXPOWER = 1 << 9, + NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10, }; /** -- cgit v1.2.3 From 5cb04436eef62aa8f5c482f8ec8deba391dea465 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 6 Nov 2012 16:46:20 +0000 Subject: ipv6: add knob to send unsolicited ND on link-layer address change This patch introduces a new knob ndisc_notify. If enabled, the kernel will transmit an unsolicited neighbour advertisement on link-layer address change to update the neighbour tables of the corresponding hosts more quickly. This is the equivalent to arp_notify in ipv4 world. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 8 ++++++++ net/ipv6/ndisc.c | 7 +++++++ 4 files changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bcba48a97868..5e11905a4f01 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -47,6 +47,7 @@ struct ipv6_devconf { __s32 disable_ipv6; __s32 accept_dad; __s32 force_tllao; + __s32 ndisc_notify; void *sysctl; }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index a6d7d1c536c3..5a2991cf0251 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -157,6 +157,7 @@ enum { DEVCONF_DISABLE_IPV6, DEVCONF_ACCEPT_DAD, DEVCONF_FORCE_TLLAO, + DEVCONF_NDISC_NOTIFY, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fab23db8ee73..cb803b7bb0d8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4037,6 +4037,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; + array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; } static inline size_t inet6_ifla6_size(void) @@ -4704,6 +4705,13 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "ndisc_notify", + .data = &ipv6_devconf.ndisc_notify, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { /* sentinel */ } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 6ba4b54a550a..f41853bca428 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1572,11 +1572,18 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = ptr; struct net *net = dev_net(dev); + struct inet6_dev *idev; switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); fib6_run_gc(~0UL, net); + idev = in6_dev_get(dev); + if (!idev) + break; + if (idev->cnf.ndisc_notify) + ndisc_send_unsol_na(dev); + in6_dev_put(idev); break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); -- cgit v1.2.3 From 25c71c75ac87508528db053b818944f3650dd7a6 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 13 Nov 2012 07:53:05 +0000 Subject: bridge: bridge port parameters over netlink Expose bridge port parameter over netlink. By switching to a nested message, this can be used for other bridge parameters. This changes IFLA_PROTINFO attribute from one byte to a full nested set of attributes. This is safe for application interface because the old message used IFLA_PROTINFO and new one uses IFLA_PROTINFO | NLA_F_NESTED. The code adapts to old format requests, and therefore stays compatible with user mode RSTP daemon. Since the type field for nested and unnested attributes are different, and the old code in libnetlink doesn't do the mask, it is also safe to use with old versions of bridge monitor command. Note: although mode is only a boolean, treating it as a full byte since in the future someone will probably want to add more values (like macvlan has). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 15 ++++ net/bridge/br_netlink.c | 165 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 147 insertions(+), 33 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5c80cb11518b..96f7cf49367b 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -205,6 +205,21 @@ enum { #define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) +enum { + BRIDGE_MODE_UNSPEC, + BRIDGE_MODE_HAIRPIN, +}; + +enum { + IFLA_BRPORT_UNSPEC, + IFLA_BRPORT_STATE, /* Spanning tree state */ + IFLA_BRPORT_PRIORITY, /* " priority */ + IFLA_BRPORT_COST, /* " cost */ + IFLA_BRPORT_MODE, /* mode (hairpin) */ + __IFLA_BRPORT_MAX +}; +#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) + struct ifla_cacheinfo { __u32 max_reasm_len; __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 14b065cbd214..0188a2f706c4 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -20,16 +20,39 @@ #include "br_private.h" #include "br_private_stp.h" +static inline size_t br_port_info_size(void) +{ + return nla_total_size(1) /* IFLA_BRPORT_STATE */ + + nla_total_size(2) /* IFLA_BRPORT_PRIORITY */ + + nla_total_size(4) /* IFLA_BRPORT_COST */ + + nla_total_size(1) /* IFLA_BRPORT_MODE */ + + 0; +} + static inline size_t br_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) - + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ - + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ - + nla_total_size(4) /* IFLA_MASTER */ - + nla_total_size(4) /* IFLA_MTU */ - + nla_total_size(4) /* IFLA_LINK */ - + nla_total_size(1) /* IFLA_OPERSTATE */ - + nla_total_size(1); /* IFLA_PROTINFO */ + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(4) /* IFLA_MTU */ + + nla_total_size(4) /* IFLA_LINK */ + + nla_total_size(1) /* IFLA_OPERSTATE */ + + nla_total_size(br_port_info_size()); /* IFLA_PROTINFO */ +} + +static int br_port_fill_attrs(struct sk_buff *skb, + const struct net_bridge_port *p) +{ + u8 mode = !!(p->flags & BR_HAIRPIN_MODE); + + if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) || + nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) || + nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || + nla_put_u8(skb, IFLA_BRPORT_MODE, mode)) + return -EMSGSIZE; + + return 0; } /* @@ -67,10 +90,18 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink)) || - (event == RTM_NEWLINK && - nla_put_u8(skb, IFLA_PROTINFO, port->state))) + nla_put_u32(skb, IFLA_LINK, dev->iflink))) goto nla_put_failure; + + if (event == RTM_NEWLINK) { + struct nlattr *nest + = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); + + if (nest == NULL || br_port_fill_attrs(skb, port) < 0) + goto nla_put_failure; + nla_nest_end(skb, nest); + } + return nlmsg_end(skb, nlh); nla_put_failure: @@ -126,47 +157,115 @@ out: return err; } -/* - * Change state of port (ie from forwarding to blocking etc) - * Used by spanning tree in user space. - */ +static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { + [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, + [IFLA_BRPORT_COST] = { .type = NLA_U32 }, + [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, + [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, +}; + +/* Change the state of the port and notify spanning tree */ +static int br_set_port_state(struct net_bridge_port *p, u8 state) +{ + if (state > BR_STATE_BLOCKING) + return -EINVAL; + + /* if kernel STP is running, don't allow changes */ + if (p->br->stp_enabled == BR_KERNEL_STP) + return -EBUSY; + + if (!netif_running(p->dev) || + (!netif_carrier_ok(p->dev) && state != BR_STATE_DISABLED)) + return -ENETDOWN; + + p->state = state; + br_log_state(p); + br_port_state_selection(p->br); + return 0; +} + +/* Set/clear or port flags based on attribute */ +static void br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[], + int attrtype, unsigned long mask) +{ + if (tb[attrtype]) { + u8 flag = nla_get_u8(tb[attrtype]); + if (flag) + p->flags |= mask; + else + p->flags &= ~mask; + } +} + +/* Process bridge protocol info on port */ +static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) +{ + int err; + + br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); + + if (tb[IFLA_BRPORT_COST]) { + err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); + if (err) + return err; + } + + if (tb[IFLA_BRPORT_PRIORITY]) { + err = br_stp_set_port_priority(p, nla_get_u16(tb[IFLA_BRPORT_PRIORITY])); + if (err) + return err; + } + + if (tb[IFLA_BRPORT_STATE]) { + err = br_set_port_state(p, nla_get_u8(tb[IFLA_BRPORT_STATE])); + if (err) + return err; + } + return 0; +} + +/* Change state and parameters on port. */ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { struct ifinfomsg *ifm; struct nlattr *protinfo; struct net_bridge_port *p; - u8 new_state; + struct nlattr *tb[IFLA_BRPORT_MAX]; + int err; ifm = nlmsg_data(nlh); protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); - if (!protinfo || nla_len(protinfo) < sizeof(u8)) - return -EINVAL; - - new_state = nla_get_u8(protinfo); - if (new_state > BR_STATE_BLOCKING) - return -EINVAL; + if (!protinfo) + return 0; p = br_port_get_rtnl(dev); if (!p) return -EINVAL; - /* if kernel STP is running, don't allow changes */ - if (p->br->stp_enabled == BR_KERNEL_STP) - return -EBUSY; + if (protinfo->nla_type & NLA_F_NESTED) { + err = nla_parse_nested(tb, IFLA_BRPORT_MAX, + protinfo, ifla_brport_policy); + if (err) + return err; - if (!netif_running(dev) || - (!netif_carrier_ok(dev) && new_state != BR_STATE_DISABLED)) - return -ENETDOWN; + spin_lock_bh(&p->br->lock); + err = br_setport(p, tb); + spin_unlock_bh(&p->br->lock); + } else { + /* Binary compatability with old RSTP */ + if (nla_len(protinfo) < sizeof(u8)) + return -EINVAL; - p->state = new_state; - br_log_state(p); + spin_lock_bh(&p->br->lock); + err = br_set_port_state(p, nla_get_u8(protinfo)); + spin_unlock_bh(&p->br->lock); + } - spin_lock_bh(&p->br->lock); - br_port_state_selection(p->br); - spin_unlock_bh(&p->br->lock); + if (err == 0) + br_ifinfo_notify(RTM_NEWLINK, p); - return 0; + return err; } static int br_validate(struct nlattr *tb[], struct nlattr *data[]) -- cgit v1.2.3 From a2e01a65cd7135dab26d27d4b589b2e5358bec99 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 13 Nov 2012 07:53:07 +0000 Subject: bridge: implement BPDU blocking This is Linux bridge implementation of STP protection (Cisco BPDU guard/Juniper BPDU block). BPDU block disables the bridge port if a STP BPDU packet is received. Why would you want to do this? If running Spanning Tree on bridge, hostile devices on the network may send BPDU and cause network failure. Enabling bpdu block will detect and stop this. How to recover the port? The port will be restarted if link is brought down, or removed and reattached. For example: # ip li set dev eth0 down; ip li set dev eth0 up Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/bridge/br_netlink.c | 6 +++++- net/bridge/br_private.h | 1 + net/bridge/br_stp_bpdu.c | 7 +++++++ net/bridge/br_sysfs_if.c | 2 ++ 5 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 96f7cf49367b..5e871e4d923f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -216,6 +216,7 @@ enum { IFLA_BRPORT_PRIORITY, /* " priority */ IFLA_BRPORT_COST, /* " cost */ IFLA_BRPORT_MODE, /* mode (hairpin) */ + IFLA_BRPORT_GUARD, /* bpdu guard */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0188a2f706c4..c331e28c7880 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -26,6 +26,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(2) /* IFLA_BRPORT_PRIORITY */ + nla_total_size(4) /* IFLA_BRPORT_COST */ + nla_total_size(1) /* IFLA_BRPORT_MODE */ + + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + 0; } @@ -49,7 +50,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) || nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) || nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || - nla_put_u8(skb, IFLA_BRPORT_MODE, mode)) + nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || + nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD))) return -EMSGSIZE; return 0; @@ -162,6 +164,7 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_COST] = { .type = NLA_U32 }, [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, + [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -203,6 +206,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) int err; br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); + br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 22111ffd68df..c92b0804ff2d 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -135,6 +135,7 @@ struct net_bridge_port unsigned long flags; #define BR_HAIRPIN_MODE 0x00000001 +#define BR_BPDU_GUARD 0x00000002 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index fd30a6022dea..7f884e3fb955 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -170,6 +170,13 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, if (!ether_addr_equal(dest, br->group_addr)) goto out; + if (p->flags & BR_BPDU_GUARD) { + br_notice(br, "BPDU received on blocked port %u(%s)\n", + (unsigned int) p->port_no, p->dev->name); + br_stp_disable_port(p); + goto out; + } + buf = skb_pull(skb, 3); if (buf[0] == BPDU_TYPE_CONFIG) { diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index f26173d33d8d..d1dfa4026185 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -156,6 +156,7 @@ static int store_flush(struct net_bridge_port *p, unsigned long v) static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); +BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -189,6 +190,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_hold_timer, &brport_attr_flush, &brport_attr_hairpin_mode, + &brport_attr_bpdu_guard, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, #endif -- cgit v1.2.3 From 1007dd1aa50b0403df370834f647abef1722925c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 13 Nov 2012 07:53:08 +0000 Subject: bridge: add root port blocking This is Linux bridge implementation of root port guard. If BPDU is received from a leaf (edge) port, it should not be elected as root port. Why would you want to do this? If using STP on a bridge and the downstream bridges are not fully trusted; this prevents a hostile guest for rerouting traffic. Why not just use netfilter? Netfilter does not track of follow spanning tree decisions. It would be difficult and error prone to try and mirror STP resolution in netfilter module. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/bridge/br_netlink.c | 5 ++++- net/bridge/br_private.h | 1 + net/bridge/br_stp.c | 22 +++++++++++++++++++++- net/bridge/br_sysfs_if.c | 2 ++ 5 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5e871e4d923f..7aae0179ae44 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -217,6 +217,7 @@ enum { IFLA_BRPORT_COST, /* " cost */ IFLA_BRPORT_MODE, /* mode (hairpin) */ IFLA_BRPORT_GUARD, /* bpdu guard */ + IFLA_BRPORT_PROTECT, /* root port protection */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index c331e28c7880..65429b99a2a3 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -27,6 +27,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(4) /* IFLA_BRPORT_COST */ + nla_total_size(1) /* IFLA_BRPORT_MODE */ + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + 0; } @@ -51,7 +52,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) || nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || - nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD))) + nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || + nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK))) return -EMSGSIZE; return 0; @@ -165,6 +167,7 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c92b0804ff2d..eb9cd42146a5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -136,6 +136,7 @@ struct net_bridge_port unsigned long flags; #define BR_HAIRPIN_MODE 0x00000001 #define BR_BPDU_GUARD 0x00000002 +#define BR_ROOT_BLOCK 0x00000004 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index af9a12099ba4..b01849a74310 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -100,6 +100,21 @@ static int br_should_become_root_port(const struct net_bridge_port *p, return 0; } +static void br_root_port_block(const struct net_bridge *br, + struct net_bridge_port *p) +{ + + br_notice(br, "port %u(%s) tried to become root port (blocked)", + (unsigned int) p->port_no, p->dev->name); + + p->state = BR_STATE_LISTENING; + br_log_state(p); + br_ifinfo_notify(RTM_NEWLINK, p); + + if (br->forward_delay > 0) + mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); +} + /* called under bridge lock */ static void br_root_selection(struct net_bridge *br) { @@ -107,7 +122,12 @@ static void br_root_selection(struct net_bridge *br) u16 root_port = 0; list_for_each_entry(p, &br->port_list, list) { - if (br_should_become_root_port(p, root_port)) + if (!br_should_become_root_port(p, root_port)) + continue; + + if (p->flags & BR_ROOT_BLOCK) + br_root_port_block(br, p); + else root_port = p->port_no; } diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index d1dfa4026185..80a4fc5d96ab 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -157,6 +157,7 @@ static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); +BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -191,6 +192,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_flush, &brport_attr_hairpin_mode, &brport_attr_bpdu_guard, + &brport_attr_root_block, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, #endif -- cgit v1.2.3 From cfa323b6b98f44ddf46cc987f74a23dcab697134 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Nov 2012 05:13:58 +0000 Subject: ip6tnl/rtnl: add IFLA_IPTUN_PROTO on dump IPv6 tunnels can have three mode: 4in6, 6in6 and xin6. This information was missing in the netlink message. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 1 + net/ipv6/ip6_tunnel.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index c1bf0b5a8da1..f5ea6b7b651f 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -47,6 +47,7 @@ enum { IFLA_IPTUN_ENCAP_LIMIT, IFLA_IPTUN_FLOWINFO, IFLA_IPTUN_FLAGS, + IFLA_IPTUN_PROTO, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8db4d9b7ab14..929ba0b5cc9b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1515,6 +1515,8 @@ static size_t ip6_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_IPTUN_FLAGS */ nla_total_size(4) + + /* IFLA_IPTUN_PROTO */ + nla_total_size(1) + 0; } @@ -1531,7 +1533,8 @@ static int ip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || - nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags)) + nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || + nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) goto nla_put_failure; return 0; -- cgit v1.2.3 From befe2aa1b2c7b9b7e20e97906f99b58475608867 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Nov 2012 05:14:02 +0000 Subject: ipip/rtnl: add IFLA_IPTUN_PMTUDISC on dump This parameter was missing in the dump. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 1 + net/ipv4/ipip.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index f5ea6b7b651f..5ab0c8ddc2bc 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -48,6 +48,7 @@ enum { IFLA_IPTUN_FLOWINFO, IFLA_IPTUN_FLAGS, IFLA_IPTUN_PROTO, + IFLA_IPTUN_PMTUDISC, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 4be88cc98957..1fc0ea4786b9 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -835,6 +835,8 @@ static size_t ipip_get_size(const struct net_device *dev) nla_total_size(1) + /* IFLA_IPTUN_TOS */ nla_total_size(1) + + /* IFLA_IPTUN_PMTUDISC */ + nla_total_size(1) + 0; } @@ -847,7 +849,9 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || - nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos)) + nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || + nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, + !!(parm->iph.frag_off & htons(IP_DF)))) goto nla_put_failure; return 0; -- cgit v1.2.3 From af8036dd749fbf4e732161ff0f3874759b73be40 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 14 Nov 2012 16:59:21 +0100 Subject: Input: introduce EV_MSC Timestamp Some devices provides the actual timestamp (hid_dg_scan_time in win8 ones) computed by the hardware itself. This value is global to the frame and is not specific to the multitouch protocol. Signed-off-by: Benjamin Tissoires Reviewed-by: Henrik Rydberg Acked-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- Documentation/input/event-codes.txt | 11 +++++++++++ include/uapi/linux/input.h | 1 + 2 files changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt index 53305bd08182..f1ea2c69648d 100644 --- a/Documentation/input/event-codes.txt +++ b/Documentation/input/event-codes.txt @@ -196,6 +196,17 @@ EV_MSC: EV_MSC events are used for input and output events that do not fall under other categories. +A few EV_MSC codes have special meaning: + +* MSC_TIMESTAMP: + - Used to report the number of microseconds since the last reset. This event + should be coded as an uint32 value, which is allowed to wrap around with + no special consequence. It is assumed that the time difference between two + consecutive events is reliable on a reasonable time scale (hours). + A reset to zero can happen, in which case the time since the last event is + unknown. If the device does not provide this information, the driver must + not provide it to user space. + EV_LED: ---------- EV_LED events are used for input and output to set and query the state of diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index 558828590a69..935119c698ac 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -851,6 +851,7 @@ struct input_keymap_entry { #define MSC_GESTURE 0x02 #define MSC_RAW 0x03 #define MSC_SCAN 0x04 +#define MSC_TIMESTAMP 0x05 #define MSC_MAX 0x07 #define MSC_CNT (MSC_MAX+1) -- cgit v1.2.3 From 130cd273d4a46a3011b1cc739f5d2af78779d666 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 5 Nov 2012 05:28:18 +0000 Subject: ipv6: export IP6_RT_PRIO_* to userland The kernel uses some default metric when routes are managed. For example, a static route added with a metric set to 0 is inserted in the kernel with metric 1024 (IP6_RT_PRIO_USER). It is useful for routing daemons to know these values, to be able to set routes without interfering with what the kernel does. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/ip6_route.h | 3 --- include/uapi/linux/ipv6_route.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5fa2af00634a..27d83183e615 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -1,9 +1,6 @@ #ifndef _NET_IP6_ROUTE_H #define _NET_IP6_ROUTE_H -#define IP6_RT_PRIO_USER 1024 -#define IP6_RT_PRIO_ADDRCONF 256 - struct route_info { __u8 type; __u8 length; diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h index 0459664c2636..2be7bd174751 100644 --- a/include/uapi/linux/ipv6_route.h +++ b/include/uapi/linux/ipv6_route.h @@ -55,4 +55,7 @@ struct in6_rtmsg { #define RTMSG_NEWROUTE 0x21 #define RTMSG_DELROUTE 0x22 +#define IP6_RT_PRIO_USER 1024 +#define IP6_RT_PRIO_ADDRCONF 256 + #endif /* _UAPI_LINUX_IPV6_ROUTE_H */ -- cgit v1.2.3 From fa0cbbf145aabbf29c6f28f8a11935c0b0fd86fc Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 12 Nov 2012 17:53:04 -0800 Subject: mm, oom: reintroduce /proc/pid/oom_adj This is mostly a revert of 01dc52ebdf47 ("oom: remove deprecated oom_adj") from Davidlohr Bueso. It reintroduces /proc/pid/oom_adj for backwards compatibility with earlier kernels. It simply scales the value linearly when /proc/pid/oom_score_adj is written. The major difference is that its scheduled removal is no longer included in Documentation/feature-removal-schedule.txt. We do warn users with a single printk, though, to suggest the more powerful and supported /proc/pid/oom_score_adj interface. Reported-by: Artem S. Tashkinov Signed-off-by: David Rientjes Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 16 ++++-- fs/proc/base.c | 109 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/oom.h | 9 +++ 3 files changed, 130 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index a1793d670cd0..3844d21d6ca3 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -33,7 +33,7 @@ Table of Contents 2 Modifying System Parameters 3 Per-Process Parameters - 3.1 /proc//oom_score_adj - Adjust the oom-killer + 3.1 /proc//oom_adj & /proc//oom_score_adj - Adjust the oom-killer score 3.2 /proc//oom_score - Display current oom-killer score 3.3 /proc//io - Display the IO accounting fields @@ -1320,10 +1320,10 @@ of the kernel. CHAPTER 3: PER-PROCESS PARAMETERS ------------------------------------------------------------------------------ -3.1 /proc//oom_score_adj- Adjust the oom-killer score +3.1 /proc//oom_adj & /proc//oom_score_adj- Adjust the oom-killer score -------------------------------------------------------------------------------- -This file can be used to adjust the badness heuristic used to select which +These file can be used to adjust the badness heuristic used to select which process gets killed in out of memory conditions. The badness heuristic assigns a value to each candidate task ranging from 0 @@ -1361,6 +1361,12 @@ same system, cpuset, mempolicy, or memory controller resources to use at least equivalent to discounting 50% of the task's allowed memory from being considered as scoring against the task. +For backwards compatibility with previous kernels, /proc//oom_adj may also +be used to tune the badness score. Its acceptable values range from -16 +(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17 +(OOM_DISABLE) to disable oom killing entirely for that task. Its value is +scaled linearly with /proc//oom_score_adj. + The value of /proc//oom_score_adj may be reduced no lower than the last value set by a CAP_SYS_RESOURCE process. To reduce the value any lower requires CAP_SYS_RESOURCE. @@ -1375,7 +1381,9 @@ minimal amount of work. ------------------------------------------------------------- This file can be used to check the current score used by the oom-killer is for -any given . +any given . Use it together with /proc//oom_score_adj to tune which +process should be killed in an out-of-memory situation. + 3.3 /proc//io - Display the IO accounting fields ------------------------------------------------------- diff --git a/fs/proc/base.c b/fs/proc/base.c index 144a96732dd7..3c231adf8450 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -873,6 +873,113 @@ static const struct file_operations proc_environ_operations = { .release = mem_release, }; +static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + char buffer[PROC_NUMBUF]; + int oom_adj = OOM_ADJUST_MIN; + size_t len; + unsigned long flags; + + if (!task) + return -ESRCH; + if (lock_task_sighand(task, &flags)) { + if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) + oom_adj = OOM_ADJUST_MAX; + else + oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / + OOM_SCORE_ADJ_MAX; + unlock_task_sighand(task, &flags); + } + put_task_struct(task); + len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); + return simple_read_from_buffer(buf, count, ppos, buffer, len); +} + +static ssize_t oom_adj_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + char buffer[PROC_NUMBUF]; + int oom_adj; + unsigned long flags; + int err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &oom_adj); + if (err) + goto out; + if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && + oom_adj != OOM_DISABLE) { + err = -EINVAL; + goto out; + } + + task = get_proc_task(file->f_path.dentry->d_inode); + if (!task) { + err = -ESRCH; + goto out; + } + + task_lock(task); + if (!task->mm) { + err = -EINVAL; + goto err_task_lock; + } + + if (!lock_task_sighand(task, &flags)) { + err = -ESRCH; + goto err_task_lock; + } + + /* + * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum + * value is always attainable. + */ + if (oom_adj == OOM_ADJUST_MAX) + oom_adj = OOM_SCORE_ADJ_MAX; + else + oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; + + if (oom_adj < task->signal->oom_score_adj && + !capable(CAP_SYS_RESOURCE)) { + err = -EACCES; + goto err_sighand; + } + + /* + * /proc/pid/oom_adj is provided for legacy purposes, ask users to use + * /proc/pid/oom_score_adj instead. + */ + printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", + current->comm, task_pid_nr(current), task_pid_nr(task), + task_pid_nr(task)); + + task->signal->oom_score_adj = oom_adj; + trace_oom_score_adj_update(task); +err_sighand: + unlock_task_sighand(task, &flags); +err_task_lock: + task_unlock(task); + put_task_struct(task); +out: + return err < 0 ? err : count; +} + +static const struct file_operations proc_oom_adj_operations = { + .read = oom_adj_read, + .write = oom_adj_write, + .llseek = generic_file_llseek, +}; + static ssize_t oom_score_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -2598,6 +2705,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), @@ -2964,6 +3072,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), diff --git a/include/uapi/linux/oom.h b/include/uapi/linux/oom.h index a49c4afc7060..b29272d621ce 100644 --- a/include/uapi/linux/oom.h +++ b/include/uapi/linux/oom.h @@ -8,4 +8,13 @@ #define OOM_SCORE_ADJ_MIN (-1000) #define OOM_SCORE_ADJ_MAX 1000 +/* + * /proc//oom_adj set to -17 protects from the oom killer for legacy + * purposes. + */ +#define OOM_DISABLE (-17) +/* inclusive */ +#define OOM_ADJUST_MIN (-16) +#define OOM_ADJUST_MAX 15 + #endif /* _UAPI__INCLUDE_LINUX_OOM_H */ -- cgit v1.2.3 From 3475b0946bd2057497628790d4b4fce4bfdcc304 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 16 Nov 2012 22:49:57 +0200 Subject: cfg80211: Add TDLS event to allow drivers to request operations The NL80211_CMD_TDLS_OPER command was previously used only for userspace request for the kernel code to perform TDLS operations. However, there are also cases where the driver may need to request operations from userspace, e.g., when using security on the AP path. Add a new cfg80211 function for generating a TDLS operation event for drivers to request a new link to be set up (NL80211_TDLS_SETUP) or an existing link to be torn down (NL80211_TDLS_TEARDOWN). Drivers can optionally use these events, e.g., based on noticing data traffic being sent to a peer station that is seen with good signal strength. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 19 ++++++++++++++++++ include/uapi/linux/nl80211.h | 6 ++++++ net/wireless/nl80211.c | 47 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/trace.h | 23 ++++++++++++++++++++++ 4 files changed, 95 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 81d725038f97..8a1aec54e68f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3593,6 +3593,25 @@ bool cfg80211_can_beacon_sec_chan(struct wiphy *wiphy, void cfg80211_ch_switch_notify(struct net_device *dev, int freq, enum nl80211_channel_type type); +/* + * cfg80211_tdls_oper_request - request userspace to perform TDLS operation + * @dev: the device on which the operation is requested + * @peer: the MAC address of the peer device + * @oper: the requested TDLS operation (NL80211_TDLS_SETUP or + * NL80211_TDLS_TEARDOWN) + * @reason_code: the reason code for teardown request + * @gfp: allocation flags + * + * This function is used to request userspace to perform TDLS operation that + * requires knowledge of keys, i.e., link setup or teardown when the AP + * connection uses encryption. This is optional mechanism for the driver to use + * if it can automatically determine when a TDLS link could be useful (e.g., + * based on traffic and signal strength for a peer). + */ +void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, + enum nl80211_tdls_operation oper, + u16 reason_code, gfp_t gfp); + /* * cfg80211_calculate_bitrate - calculate actual bitrate (in 100Kbps units) * @rate: given rate_info to calculate bitrate from diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 06ddc89f026c..1a9a819cfab0 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -526,6 +526,12 @@ * of PMKSA caching dandidates. * * @NL80211_CMD_TDLS_OPER: Perform a high-level TDLS command (e.g. link setup). + * In addition, this can be used as an event to request userspace to take + * actions on TDLS links (set up a new link or tear down an existing one). + * In such events, %NL80211_ATTR_TDLS_OPERATION indicates the requested + * operation, %NL80211_ATTR_MAC contains the peer MAC address, and + * %NL80211_ATTR_REASON_CODE the reason code to be used (only with + * %NL80211_TDLS_TEARDOWN). * @NL80211_CMD_TDLS_MGMT: Send a TDLS management frame. * * @NL80211_CMD_UNEXPECTED_FRAME: Used by an application controlling an AP diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c18b2fc9d492..4c427fa5c450 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9027,6 +9027,53 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_report_obss_beacon); +void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, + enum nl80211_tdls_operation oper, + u16 reason_code, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct sk_buff *msg; + void *hdr; + int err; + + trace_cfg80211_tdls_oper_request(wdev->wiphy, dev, peer, oper, + reason_code); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_TDLS_OPER); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || + nla_put_u8(msg, NL80211_ATTR_TDLS_OPERATION, oper) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer) || + (reason_code > 0 && + nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason_code))) + goto nla_put_failure; + + err = genlmsg_end(msg, hdr); + if (err < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_tdls_oper_request); + static int nl80211_netlink_notify(struct notifier_block * nb, unsigned long state, void *_notify) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 8e03c6382a8a..f264c20a7090 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2157,6 +2157,29 @@ TRACE_EVENT(cfg80211_report_obss_beacon, WIPHY_PR_ARG, __entry->freq, __entry->sig_dbm) ); +TRACE_EVENT(cfg80211_tdls_oper_request, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *peer, + enum nl80211_tdls_operation oper, u16 reason_code), + TP_ARGS(wiphy, netdev, peer, oper, reason_code), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(peer) + __field(enum nl80211_tdls_operation, oper) + __field(u16, reason_code) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(peer, peer); + __entry->oper = oper; + __entry->reason_code = reason_code; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", oper: %d, reason_code %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->oper, + __entry->reason_code) + ); + TRACE_EVENT(cfg80211_scan_done, TP_PROTO(struct cfg80211_scan_request *request, bool aborted), TP_ARGS(request, aborted), -- cgit v1.2.3 From d2709c7ce4c513ab7f4ca9a106a930621811f2d3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Nov 2012 22:21:03 +0000 Subject: perf: Make perf build for x86 with UAPI disintegration applied Make perf build for x86 once the UAPI disintegration patches for that arch have been applied by adding the appropriate -I flags - in the right order - and then converting some #includes that use ../.. notation to find main kernel headerfiles to use and instead. Note that -Iarch/foo/include/uapi is present _before_ -Iarch/foo/include. This makes sure we get the userspace version of the pt_regs struct. Ideally, we wouldn't have the latter -I flag at all, but unfortunately we want asm/svm.h and asm/vmx.h in builtin-kvm.c and these aren't part of the UAPI - at least not for x86. I wonder if the bits outside of the __KERNEL__ guards *should* be transferred there. I note also that perf seems to do its dependency handling manually by listing all the header files it might want to use in LIB_H in the Makefile. Can this be changed to use -MD? Note that to do make this work, we need to export and UAPI disintegrate linux/hw_breakpoint.h, which I think should've been exported previously so that perf can access the bits. We have to do this in the same patch to maintain bisectability. Signed-off-by: David Howells --- include/linux/hw_breakpoint.h | 31 +------------------------------ include/uapi/linux/Kbuild | 1 + include/uapi/linux/hw_breakpoint.h | 30 ++++++++++++++++++++++++++++++ tools/perf/Makefile | 29 ++++++++++++++++++++++++++++- tools/perf/arch/x86/include/perf_regs.h | 2 +- tools/perf/builtin-kvm.c | 6 +++--- tools/perf/builtin-test.c | 2 +- tools/perf/perf.h | 16 +++------------- tools/perf/util/evsel.c | 4 ++-- tools/perf/util/evsel.h | 3 ++- tools/perf/util/header.h | 2 +- tools/perf/util/parse-events-test.c | 2 +- tools/perf/util/parse-events.c | 2 +- tools/perf/util/parse-events.h | 2 +- tools/perf/util/pmu.h | 2 +- tools/perf/util/session.h | 2 +- 16 files changed, 78 insertions(+), 58 deletions(-) create mode 100644 include/uapi/linux/hw_breakpoint.h (limited to 'include/uapi/linux') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 6ae9c631a1be..0464c85e63fd 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -1,35 +1,8 @@ #ifndef _LINUX_HW_BREAKPOINT_H #define _LINUX_HW_BREAKPOINT_H -enum { - HW_BREAKPOINT_LEN_1 = 1, - HW_BREAKPOINT_LEN_2 = 2, - HW_BREAKPOINT_LEN_4 = 4, - HW_BREAKPOINT_LEN_8 = 8, -}; - -enum { - HW_BREAKPOINT_EMPTY = 0, - HW_BREAKPOINT_R = 1, - HW_BREAKPOINT_W = 2, - HW_BREAKPOINT_RW = HW_BREAKPOINT_R | HW_BREAKPOINT_W, - HW_BREAKPOINT_X = 4, - HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X, -}; - -enum bp_type_idx { - TYPE_INST = 0, -#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS - TYPE_DATA = 0, -#else - TYPE_DATA = 1, -#endif - TYPE_MAX -}; - -#ifdef __KERNEL__ - #include +#include #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -151,6 +124,4 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#endif /* __KERNEL__ */ - #endif /* _LINUX_HW_BREAKPOINT_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index e194387ef784..19e765fbfef7 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -415,3 +415,4 @@ header-y += wireless.h header-y += x25.h header-y += xattr.h header-y += xfrm.h +header-y += hw_breakpoint.h diff --git a/include/uapi/linux/hw_breakpoint.h b/include/uapi/linux/hw_breakpoint.h new file mode 100644 index 000000000000..b04000a2296a --- /dev/null +++ b/include/uapi/linux/hw_breakpoint.h @@ -0,0 +1,30 @@ +#ifndef _UAPI_LINUX_HW_BREAKPOINT_H +#define _UAPI_LINUX_HW_BREAKPOINT_H + +enum { + HW_BREAKPOINT_LEN_1 = 1, + HW_BREAKPOINT_LEN_2 = 2, + HW_BREAKPOINT_LEN_4 = 4, + HW_BREAKPOINT_LEN_8 = 8, +}; + +enum { + HW_BREAKPOINT_EMPTY = 0, + HW_BREAKPOINT_R = 1, + HW_BREAKPOINT_W = 2, + HW_BREAKPOINT_RW = HW_BREAKPOINT_R | HW_BREAKPOINT_W, + HW_BREAKPOINT_X = 4, + HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X, +}; + +enum bp_type_idx { + TYPE_INST = 0, +#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS + TYPE_DATA = 0, +#else + TYPE_DATA = 1, +#endif + TYPE_MAX +}; + +#endif /* _UAPI_LINUX_HW_BREAKPOINT_H */ diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 00deed4d6159..0a619af5be43 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -169,7 +169,34 @@ endif ### --- END CONFIGURATION SECTION --- -BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +ifneq ($(objtree),) +#$(info Determined 'objtree' to be $(objtree)) +endif + +ifneq ($(OUTPUT),) +#$(info Determined 'OUTPUT' to be $(OUTPUT)) +endif + +BASIC_CFLAGS = \ + -Iutil/include \ + -Iarch/$(ARCH)/include \ + $(if $(objtree),-I$(objtree)/arch/$(ARCH)/include/generated/uapi) \ + -I$(srctree)/arch/$(ARCH)/include/uapi \ + -I$(srctree)/arch/$(ARCH)/include \ + $(if $(objtree),-I$(objtree)/include/generated/uapi) \ + -I$(srctree)/include/uapi \ + -I$(srctree)/include \ + -I$(OUTPUT)util \ + -Iutil \ + -I. \ + -I$(TRACE_EVENT_DIR) \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE BASIC_LDFLAGS = # Guard against environment variables diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index 46fc9f15c6b3..7fcdcdbee917 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -3,7 +3,7 @@ #include #include "../../util/types.h" -#include "../../../../../arch/x86/include/asm/perf_regs.h" +#include #ifndef ARCH_X86_64 #define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 260abc535b5b..e013bdb5e24a 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -22,9 +22,9 @@ #include #include -#include "../../arch/x86/include/asm/svm.h" -#include "../../arch/x86/include/asm/vmx.h" -#include "../../arch/x86/include/asm/kvm.h" +#include +#include +#include struct event_key { #define INVALID_KEY (~0ULL) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 484f26cc0c00..5acd6e8e658b 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -15,7 +15,7 @@ #include "util/thread_map.h" #include "util/pmu.h" #include "event-parse.h" -#include "../../include/linux/hw_breakpoint.h" +#include #include diff --git a/tools/perf/perf.h b/tools/perf/perf.h index e2ba8f004d32..238f923f2218 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -5,8 +5,9 @@ struct winsize; void get_term_dimensions(struct winsize *ws); +#include + #if defined(__i386__) -#include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC "model name" @@ -16,7 +17,6 @@ void get_term_dimensions(struct winsize *ws); #endif #if defined(__x86_64__) -#include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lfence" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC "model name" @@ -26,20 +26,17 @@ void get_term_dimensions(struct winsize *ws); #endif #ifdef __powerpc__ -#include "../../arch/powerpc/include/uapi/asm/unistd.h" #define rmb() asm volatile ("sync" ::: "memory") #define cpu_relax() asm volatile ("" ::: "memory"); #define CPUINFO_PROC "cpu" #endif #ifdef __s390__ -#include "../../arch/s390/include/asm/unistd.h" #define rmb() asm volatile("bcr 15,0" ::: "memory") #define cpu_relax() asm volatile("" ::: "memory"); #endif #ifdef __sh__ -#include "../../arch/sh/include/asm/unistd.h" #if defined(__SH4A__) || defined(__SH5__) # define rmb() asm volatile("synco" ::: "memory") #else @@ -50,35 +47,30 @@ void get_term_dimensions(struct winsize *ws); #endif #ifdef __hppa__ -#include "../../arch/parisc/include/asm/unistd.h" #define rmb() asm volatile("" ::: "memory") #define cpu_relax() asm volatile("" ::: "memory"); #define CPUINFO_PROC "cpu" #endif #ifdef __sparc__ -#include "../../arch/sparc/include/uapi/asm/unistd.h" #define rmb() asm volatile("":::"memory") #define cpu_relax() asm volatile("":::"memory") #define CPUINFO_PROC "cpu" #endif #ifdef __alpha__ -#include "../../arch/alpha/include/asm/unistd.h" #define rmb() asm volatile("mb" ::: "memory") #define cpu_relax() asm volatile("" ::: "memory") #define CPUINFO_PROC "cpu model" #endif #ifdef __ia64__ -#include "../../arch/ia64/include/asm/unistd.h" #define rmb() asm volatile ("mf" ::: "memory") #define cpu_relax() asm volatile ("hint @pause" ::: "memory") #define CPUINFO_PROC "model name" #endif #ifdef __arm__ -#include "../../arch/arm/include/asm/unistd.h" /* * Use the __kuser_memory_barrier helper in the CPU helper page. See * arch/arm/kernel/entry-armv.S in the kernel source for details. @@ -89,13 +81,11 @@ void get_term_dimensions(struct winsize *ws); #endif #ifdef __aarch64__ -#include "../../arch/arm64/include/asm/unistd.h" #define rmb() asm volatile("dmb ld" ::: "memory") #define cpu_relax() asm volatile("yield" ::: "memory") #endif #ifdef __mips__ -#include "../../arch/mips/include/asm/unistd.h" #define rmb() asm volatile( \ ".set mips2\n\t" \ "sync\n\t" \ @@ -112,7 +102,7 @@ void get_term_dimensions(struct winsize *ws); #include #include -#include "../../include/uapi/linux/perf_event.h" +#include #include "util/types.h" #include diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 618d41140abd..d144d464ce39 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -18,8 +18,8 @@ #include "cpumap.h" #include "thread_map.h" #include "target.h" -#include "../../../include/linux/hw_breakpoint.h" -#include "../../../include/uapi/linux/perf_event.h" +#include +#include #include "perf_regs.h" #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6f94d6dea00f..d99b476ef37c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -3,7 +3,8 @@ #include #include -#include "../../../include/uapi/linux/perf_event.h" +#include +#include #include "types.h" #include "xyarray.h" #include "cgroup.h" diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 879d215cdac9..9bc00783f24f 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -1,7 +1,7 @@ #ifndef __PERF_HEADER_H #define __PERF_HEADER_H -#include "../../../include/uapi/linux/perf_event.h" +#include #include #include #include "types.h" diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index 516ecd9ddd6e..6ef213b35ecd 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -3,7 +3,7 @@ #include "evsel.h" #include "evlist.h" #include "sysfs.h" -#include "../../../include/linux/hw_breakpoint.h" +#include #define TEST_ASSERT_VAL(text, cond) \ do { \ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 75c7b0fca6d9..6b6d03e93c3d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,4 +1,4 @@ -#include "../../../include/linux/hw_breakpoint.h" +#include #include "util.h" #include "../perf.h" #include "evlist.h" diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 839230ceb18b..2820c407adb2 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -7,7 +7,7 @@ #include #include #include "types.h" -#include "../../../include/uapi/linux/perf_event.h" +#include #include "types.h" struct list_head; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 39f3abac7744..fdeb8ac7c5d2 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -2,7 +2,7 @@ #define __PMU_H #include -#include "../../../include/uapi/linux/perf_event.h" +#include enum { PERF_PMU_FORMAT_VALUE_CONFIG, diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index dd6426163ba6..0eae00ad5fe7 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -7,7 +7,7 @@ #include "symbol.h" #include "thread.h" #include -#include "../../../include/uapi/linux/perf_event.h" +#include struct sample_queue; struct ip_callchain; -- cgit v1.2.3 From e4f67addf158f98f8197e08974966b18480dc751 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Tue, 20 Nov 2012 02:50:14 +0000 Subject: add DOVE extensions for VXLAN This patch provides extensions to VXLAN for supporting Distributed Overlay Virtual Ethernet (DOVE) networks. The patch includes: + a dove flag per VXLAN device to enable DOVE extensions + ARP reduction, whereby a bridge-connected VXLAN tunnel endpoint answers ARP requests from the local bridge on behalf of remote DOVE clients + route short-circuiting (aka L3 switching). Known destination IP addresses use the corresponding destination MAC address for switching rather than going to a (possibly remote) router first. + netlink notification messages for forwarding table and L3 switching misses Changes since v2 - combined bools into "u32 flags" - replaced loop with !is_zero_ether_addr() Signed-off-by: David L Stevens Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 256 ++++++++++++++++++++++++++++++++++++++----- include/uapi/linux/if_link.h | 4 + 2 files changed, 235 insertions(+), 25 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index a14df1ce99ff..ce77b8b693ae 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include #include #include @@ -110,7 +112,7 @@ struct vxlan_dev { __u16 port_max; __u8 tos; /* TOS override */ __u8 ttl; - bool learn; + u32 flags; /* VXLAN_F_* below */ unsigned long age_interval; struct timer_list age_timer; @@ -121,6 +123,12 @@ struct vxlan_dev { struct hlist_head fdb_head[FDB_HASH_SIZE]; }; +#define VXLAN_F_LEARN 0x01 +#define VXLAN_F_PROXY 0x02 +#define VXLAN_F_RSC 0x04 +#define VXLAN_F_L2MISS 0x08 +#define VXLAN_F_L3MISS 0x10 + /* salt for hash table */ static u32 vxlan_salt __read_mostly; @@ -154,6 +162,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, struct nda_cacheinfo ci; struct nlmsghdr *nlh; struct ndmsg *ndm; + bool send_ip, send_eth; nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); if (nlh == NULL) @@ -161,16 +170,24 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, ndm = nlmsg_data(nlh); memset(ndm, 0, sizeof(*ndm)); - ndm->ndm_family = AF_BRIDGE; + + send_eth = send_ip = true; + + if (type == RTM_GETNEIGH) { + ndm->ndm_family = AF_INET; + send_ip = fdb->remote_ip != 0; + send_eth = !is_zero_ether_addr(fdb->eth_addr); + } else + ndm->ndm_family = AF_BRIDGE; ndm->ndm_state = fdb->state; ndm->ndm_ifindex = vxlan->dev->ifindex; ndm->ndm_flags = NTF_SELF; ndm->ndm_type = NDA_DST; - if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) + if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) goto nla_put_failure; - if (nla_put_be32(skb, NDA_DST, fdb->remote_ip)) + if (send_ip && nla_put_be32(skb, NDA_DST, fdb->remote_ip)) goto nla_put_failure; ci.ndm_used = jiffies_to_clock_t(now - fdb->used); @@ -222,6 +239,29 @@ errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } +static void vxlan_ip_miss(struct net_device *dev, __be32 ipa) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb f; + + memset(&f, 0, sizeof f); + f.state = NUD_STALE; + f.remote_ip = ipa; /* goes to NDA_DST */ + + vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); +} + +static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN]) +{ + struct vxlan_fdb f; + + memset(&f, 0, sizeof f); + f.state = NUD_STALE; + memcpy(f.eth_addr, eth_addr, ETH_ALEN); + + vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); +} + /* Hash Ethernet address */ static u32 eth_hash(const unsigned char *addr) { @@ -551,6 +591,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto drop; } + skb_reset_mac_header(skb); + /* Re-examine inner Ethernet packet */ oip = ip_hdr(skb); skb->protocol = eth_type_trans(skb, vxlan->dev); @@ -560,7 +602,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) vxlan->dev->dev_addr) == 0) goto drop; - if (vxlan->learn) + if (vxlan->flags & VXLAN_F_LEARN) vxlan_snoop(skb->dev, oip->saddr, eth_hdr(skb)->h_source); __skb_tunnel_rx(skb, vxlan->dev); @@ -599,6 +641,117 @@ drop: return 0; } +static int arp_reduce(struct net_device *dev, struct sk_buff *skb) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct arphdr *parp; + u8 *arpptr, *sha; + __be32 sip, tip; + struct neighbour *n; + + if (dev->flags & IFF_NOARP) + goto out; + + if (!pskb_may_pull(skb, arp_hdr_len(dev))) { + dev->stats.tx_dropped++; + goto out; + } + parp = arp_hdr(skb); + + if ((parp->ar_hrd != htons(ARPHRD_ETHER) && + parp->ar_hrd != htons(ARPHRD_IEEE802)) || + parp->ar_pro != htons(ETH_P_IP) || + parp->ar_op != htons(ARPOP_REQUEST) || + parp->ar_hln != dev->addr_len || + parp->ar_pln != 4) + goto out; + arpptr = (u8 *)parp + sizeof(struct arphdr); + sha = arpptr; + arpptr += dev->addr_len; /* sha */ + memcpy(&sip, arpptr, sizeof(sip)); + arpptr += sizeof(sip); + arpptr += dev->addr_len; /* tha */ + memcpy(&tip, arpptr, sizeof(tip)); + + if (ipv4_is_loopback(tip) || + ipv4_is_multicast(tip)) + goto out; + + n = neigh_lookup(&arp_tbl, &tip, dev); + + if (n) { + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb *f; + struct sk_buff *reply; + + if (!(n->nud_state & NUD_CONNECTED)) { + neigh_release(n); + goto out; + } + + f = vxlan_find_mac(vxlan, n->ha); + if (f && f->remote_ip == 0) { + /* bridge-local neighbor */ + neigh_release(n); + goto out; + } + + reply = arp_create(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, + n->ha, sha); + + neigh_release(n); + + skb_reset_mac_header(reply); + __skb_pull(reply, skb_network_offset(reply)); + reply->ip_summed = CHECKSUM_UNNECESSARY; + reply->pkt_type = PACKET_HOST; + + if (netif_rx_ni(reply) == NET_RX_DROP) + dev->stats.rx_dropped++; + } else if (vxlan->flags & VXLAN_F_L3MISS) + vxlan_ip_miss(dev, tip); +out: + consume_skb(skb); + return NETDEV_TX_OK; +} + +static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct neighbour *n; + struct iphdr *pip; + + if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) + return false; + + n = NULL; + switch (ntohs(eth_hdr(skb)->h_proto)) { + case ETH_P_IP: + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return false; + pip = ip_hdr(skb); + n = neigh_lookup(&arp_tbl, &pip->daddr, dev); + break; + default: + return false; + } + + if (n) { + bool diff; + + diff = compare_ether_addr(eth_hdr(skb)->h_dest, n->ha) != 0; + if (diff) { + memcpy(eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + dev->addr_len); + memcpy(eth_hdr(skb)->h_dest, n->ha, dev->addr_len); + } + neigh_release(n); + return diff; + } else if (vxlan->flags & VXLAN_F_L3MISS) + vxlan_ip_miss(dev, pip->daddr); + return false; +} + /* Extract dsfield from inner protocol */ static inline u8 vxlan_get_dsfield(const struct iphdr *iph, const struct sk_buff *skb) @@ -621,22 +774,6 @@ static inline u8 vxlan_ecn_encap(u8 tos, return INET_ECN_encapsulate(tos, inner); } -static __be32 vxlan_find_dst(struct vxlan_dev *vxlan, struct sk_buff *skb) -{ - const struct ethhdr *eth = (struct ethhdr *) skb->data; - const struct vxlan_fdb *f; - - if (is_multicast_ether_addr(eth->h_dest)) - return vxlan->gaddr; - - f = vxlan_find_mac(vxlan, eth->h_dest); - if (f) - return f->remote_ip; - else - return vxlan->gaddr; - -} - static void vxlan_sock_free(struct sk_buff *skb) { sock_put(skb->sk); @@ -683,6 +820,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct rtable *rt; const struct iphdr *old_iph; + struct ethhdr *eth; struct iphdr *iph; struct vxlanhdr *vxh; struct udphdr *uh; @@ -693,10 +831,50 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) __be16 df = 0; __u8 tos, ttl; int err; + bool did_rsc = false; + const struct vxlan_fdb *f; + + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + if ((vxlan->flags & VXLAN_F_PROXY) && ntohs(eth->h_proto) == ETH_P_ARP) + return arp_reduce(dev, skb); + else if ((vxlan->flags&VXLAN_F_RSC) && ntohs(eth->h_proto) == ETH_P_IP) + did_rsc = route_shortcircuit(dev, skb); - dst = vxlan_find_dst(vxlan, skb); - if (!dst) + f = vxlan_find_mac(vxlan, eth->h_dest); + if (f == NULL) { + did_rsc = false; + dst = vxlan->gaddr; + if (!dst && (vxlan->flags & VXLAN_F_L2MISS) && + !is_multicast_ether_addr(eth->h_dest)) + vxlan_fdb_miss(vxlan, eth->h_dest); + } else + dst = f->remote_ip; + + if (!dst) { + if (did_rsc) { + __skb_pull(skb, skb_network_offset(skb)); + skb->ip_summed = CHECKSUM_NONE; + skb->pkt_type = PACKET_HOST; + + /* short-circuited back to local bridge */ + if (netif_rx(skb) == NET_RX_SUCCESS) { + struct vxlan_stats *stats = + this_cpu_ptr(vxlan->stats); + + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += pkt_len; + u64_stats_update_end(&stats->syncp); + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } + return NETDEV_TX_OK; + } goto drop; + } /* Need space for new headers (invalidates iph ptr) */ if (skb_cow_head(skb, VXLAN_HEADROOM)) @@ -1019,6 +1197,10 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_AGEING] = { .type = NLA_U32 }, [IFLA_VXLAN_LIMIT] = { .type = NLA_U32 }, [IFLA_VXLAN_PORT_RANGE] = { .len = sizeof(struct ifla_vxlan_port_range) }, + [IFLA_VXLAN_PROXY] = { .type = NLA_U8 }, + [IFLA_VXLAN_RSC] = { .type = NLA_U8 }, + [IFLA_VXLAN_L2MISS] = { .type = NLA_U8 }, + [IFLA_VXLAN_L3MISS] = { .type = NLA_U8 }, }; static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1114,13 +1296,25 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, vxlan->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]); if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING])) - vxlan->learn = true; + vxlan->flags |= VXLAN_F_LEARN; if (data[IFLA_VXLAN_AGEING]) vxlan->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]); else vxlan->age_interval = FDB_AGE_DEFAULT; + if (data[IFLA_VXLAN_PROXY] && nla_get_u8(data[IFLA_VXLAN_PROXY])) + vxlan->flags |= VXLAN_F_PROXY; + + if (data[IFLA_VXLAN_RSC] && nla_get_u8(data[IFLA_VXLAN_RSC])) + vxlan->flags |= VXLAN_F_RSC; + + if (data[IFLA_VXLAN_L2MISS] && nla_get_u8(data[IFLA_VXLAN_L2MISS])) + vxlan->flags |= VXLAN_F_L2MISS; + + if (data[IFLA_VXLAN_L3MISS] && nla_get_u8(data[IFLA_VXLAN_L3MISS])) + vxlan->flags |= VXLAN_F_L3MISS; + if (data[IFLA_VXLAN_LIMIT]) vxlan->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]); @@ -1157,6 +1351,10 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L2MISS */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L3MISS */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */ nla_total_size(sizeof(struct ifla_vxlan_port_range)) + @@ -1185,7 +1383,15 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->ttl) || nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->tos) || - nla_put_u8(skb, IFLA_VXLAN_LEARNING, vxlan->learn) || + nla_put_u8(skb, IFLA_VXLAN_LEARNING, + !!(vxlan->flags & VXLAN_F_LEARN)) || + nla_put_u8(skb, IFLA_VXLAN_PROXY, + !!(vxlan->flags & VXLAN_F_PROXY)) || + nla_put_u8(skb, IFLA_VXLAN_RSC, !!(vxlan->flags & VXLAN_F_RSC)) || + nla_put_u8(skb, IFLA_VXLAN_L2MISS, + !!(vxlan->flags & VXLAN_F_L2MISS)) || + nla_put_u8(skb, IFLA_VXLAN_L3MISS, + !!(vxlan->flags & VXLAN_F_L3MISS)) || nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) || nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax)) goto nla_put_failure; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 7aae0179ae44..bb58aeb7f34d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -302,6 +302,10 @@ enum { IFLA_VXLAN_AGEING, IFLA_VXLAN_LIMIT, IFLA_VXLAN_PORT_RANGE, + IFLA_VXLAN_PROXY, + IFLA_VXLAN_RSC, + IFLA_VXLAN_L2MISS, + IFLA_VXLAN_L3MISS, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) -- cgit v1.2.3 From e2f1f072db8db81e6b5bcbfcf409bb5c91dc9329 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 19 Nov 2012 22:41:45 +0000 Subject: sit: allow to configure 6rd tunnels via netlink This patch add the support of 6RD tunnels management via netlink. Note that netdev_state_change() is now called when 6RD parameters are updated. 6RD parameters are updated only if there is at least one 6RD attribute. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 4 ++ net/ipv6/sit.c | 149 ++++++++++++++++++++++++++++++++++------- 2 files changed, 128 insertions(+), 25 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 5ab0c8ddc2bc..aee73d0611fb 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -49,6 +49,10 @@ enum { IFLA_IPTUN_FLAGS, IFLA_IPTUN_PROTO, IFLA_IPTUN_PMTUDISC, + IFLA_IPTUN_6RD_PREFIX, + IFLA_IPTUN_6RD_RELAY_PREFIX, + IFLA_IPTUN_6RD_PREFIXLEN, + IFLA_IPTUN_6RD_RELAY_PREFIXLEN, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index fee21c6c3ebf..80cb3829831c 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -936,6 +936,38 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) netdev_state_change(t->dev); } +#ifdef CONFIG_IPV6_SIT_6RD +static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, + struct ip_tunnel_6rd *ip6rd) +{ + struct in6_addr prefix; + __be32 relay_prefix; + + if (ip6rd->relay_prefixlen > 32 || + ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64) + return -EINVAL; + + ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen); + if (!ipv6_addr_equal(&prefix, &ip6rd->prefix)) + return -EINVAL; + if (ip6rd->relay_prefixlen) + relay_prefix = ip6rd->relay_prefix & + htonl(0xffffffffUL << + (32 - ip6rd->relay_prefixlen)); + else + relay_prefix = 0; + if (relay_prefix != ip6rd->relay_prefix) + return -EINVAL; + + t->ip6rd.prefix = prefix; + t->ip6rd.relay_prefix = relay_prefix; + t->ip6rd.prefixlen = ip6rd->prefixlen; + t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; + netdev_state_change(t->dev); + return 0; +} +#endif + static int ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -1105,31 +1137,9 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t = netdev_priv(dev); if (cmd != SIOCDEL6RD) { - struct in6_addr prefix; - __be32 relay_prefix; - - err = -EINVAL; - if (ip6rd.relay_prefixlen > 32 || - ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64) - goto done; - - ipv6_addr_prefix(&prefix, &ip6rd.prefix, - ip6rd.prefixlen); - if (!ipv6_addr_equal(&prefix, &ip6rd.prefix)) + err = ipip6_tunnel_update_6rd(t, &ip6rd); + if (err < 0) goto done; - if (ip6rd.relay_prefixlen) - relay_prefix = ip6rd.relay_prefix & - htonl(0xffffffffUL << - (32 - ip6rd.relay_prefixlen)); - else - relay_prefix = 0; - if (relay_prefix != ip6rd.relay_prefix) - goto done; - - t->ip6rd.prefix = prefix; - t->ip6rd.relay_prefix = relay_prefix; - t->ip6rd.prefixlen = ip6rd.prefixlen; - t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen; } else ipip6_tunnel_clone_6rd(dev, sitn); @@ -1261,11 +1271,53 @@ static void ipip6_netlink_parms(struct nlattr *data[], parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); } +#ifdef CONFIG_IPV6_SIT_6RD +/* This function returns true when 6RD attributes are present in the nl msg */ +static bool ipip6_netlink_6rd_parms(struct nlattr *data[], + struct ip_tunnel_6rd *ip6rd) +{ + bool ret = false; + memset(ip6rd, 0, sizeof(*ip6rd)); + + if (!data) + return ret; + + if (data[IFLA_IPTUN_6RD_PREFIX]) { + ret = true; + nla_memcpy(&ip6rd->prefix, data[IFLA_IPTUN_6RD_PREFIX], + sizeof(struct in6_addr)); + } + + if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) { + ret = true; + ip6rd->relay_prefix = + nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]); + } + + if (data[IFLA_IPTUN_6RD_PREFIXLEN]) { + ret = true; + ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]); + } + + if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) { + ret = true; + ip6rd->relay_prefixlen = + nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]); + } + + return ret; +} +#endif + static int ipip6_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); struct ip_tunnel *nt; +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd ip6rd; +#endif + int err; nt = netdev_priv(dev); ipip6_netlink_parms(data, &nt->parms); @@ -1273,7 +1325,16 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, if (ipip6_tunnel_locate(net, &nt->parms, 0)) return -EEXIST; - return ipip6_tunnel_create(dev); + err = ipip6_tunnel_create(dev); + if (err < 0) + return err; + +#ifdef CONFIG_IPV6_SIT_6RD + if (ipip6_netlink_6rd_parms(data, &ip6rd)) + err = ipip6_tunnel_update_6rd(nt, &ip6rd); +#endif + + return err; } static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], @@ -1283,6 +1344,9 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm p; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd ip6rd; +#endif if (dev == sitn->fb_tunnel_dev) return -EINVAL; @@ -1302,6 +1366,12 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], t = netdev_priv(dev); ipip6_tunnel_update(t, &p); + +#ifdef CONFIG_IPV6_SIT_6RD + if (ipip6_netlink_6rd_parms(data, &ip6rd)) + return ipip6_tunnel_update_6rd(t, &ip6rd); +#endif + return 0; } @@ -1322,6 +1392,16 @@ static size_t ipip6_get_size(const struct net_device *dev) nla_total_size(1) + /* IFLA_IPTUN_FLAGS */ nla_total_size(2) + +#ifdef CONFIG_IPV6_SIT_6RD + /* IFLA_IPTUN_6RD_PREFIX */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_6RD_RELAY_PREFIX */ + nla_total_size(4) + + /* IFLA_IPTUN_6RD_PREFIXLEN */ + nla_total_size(2) + + /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */ + nla_total_size(2) + +#endif 0; } @@ -1339,6 +1419,19 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) !!(parm->iph.frag_off & htons(IP_DF))) || nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) goto nla_put_failure; + +#ifdef CONFIG_IPV6_SIT_6RD + if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr), + &tunnel->ip6rd.prefix) || + nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, + tunnel->ip6rd.relay_prefix) || + nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN, + tunnel->ip6rd.prefixlen) || + nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN, + tunnel->ip6rd.relay_prefixlen)) + goto nla_put_failure; +#endif + return 0; nla_put_failure: @@ -1353,6 +1446,12 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 }, +#ifdef CONFIG_IPV6_SIT_6RD + [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) }, + [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 }, + [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 }, + [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 }, +#endif }; static struct rtnl_link_ops sit_link_ops __read_mostly = { -- cgit v1.2.3 From dc96efb72054985c0912f831da009a2da4e9f6dd Mon Sep 17 00:00:00 2001 From: Matt Schulte Date: Mon, 19 Nov 2012 09:12:04 -0600 Subject: Serial: Add support for new devices: Exar's XR17V35x family of multi-port PCIe UARTs Add support for new devices: Exar's XR17V35x family of multi-port PCIe UARTs. Signed-off-by: Matt Schulte Acked-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.c | 71 ++++++++++++++++++++++++++++ drivers/tty/serial/8250/8250_pci.c | 96 ++++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 3 ++ include/uapi/linux/serial_core.h | 3 +- include/uapi/linux/serial_reg.h | 6 +++ 5 files changed, 178 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c index 2af83a246499..3624df674a31 100644 --- a/drivers/tty/serial/8250/8250.c +++ b/drivers/tty/serial/8250/8250.c @@ -282,6 +282,15 @@ static const struct serial8250_config uart_config[] = { .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR, }, + [PORT_XR17V35X] = { + .name = "XR17V35X", + .fifo_size = 256, + .tx_loadsz = 256, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11 | + UART_FCR_T_TRIG_11, + .flags = UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR | + UART_CAP_SLEEP, + }, [PORT_LPC3220] = { .name = "LPC3220", .fifo_size = 64, @@ -455,6 +464,7 @@ static void io_serial_out(struct uart_port *p, int offset, int value) } static int serial8250_default_handle_irq(struct uart_port *port); +static int exar_handle_irq(struct uart_port *port); static void set_io_from_upio(struct uart_port *p) { @@ -574,6 +584,18 @@ EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos); */ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) { + /* + * Exar UARTs have a SLEEP register that enables or disables + * each UART to enter sleep mode separately. On the XR17V35x the + * register is accessible to each UART at the UART_EXAR_SLEEP + * offset but the UART channel may only write to the corresponding + * bit. + */ + if (p->port.type == PORT_XR17V35X) { + serial_out(p, UART_EXAR_SLEEP, 0xff); + return; + } + if (p->capabilities & UART_CAP_SLEEP) { if (p->capabilities & UART_CAP_EFR) { serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); @@ -881,6 +903,27 @@ static void autoconfig_16550a(struct uart_8250_port *up) up->port.type = PORT_16550A; up->capabilities |= UART_CAP_FIFO; + /* + * XR17V35x UARTs have an extra divisor register, DLD + * that gets enabled with when DLAB is set which will + * cause the device to incorrectly match and assign + * port type to PORT_16650. The EFR for this UART is + * found at offset 0x09. Instead check the Deice ID (DVID) + * register for a 2, 4 or 8 port UART. + */ + status1 = serial_in(up, UART_EXAR_DVID); + if (status1 == 0x82 || status1 == 0x84 || status1 == 0x88) { + if (up->port.flags & UPF_EXAR_EFR) { + DEBUG_AUTOCONF("Exar XR17V35x "); + up->port.type = PORT_XR17V35X; + up->capabilities |= UART_CAP_AFE | UART_CAP_EFR | + UART_CAP_SLEEP; + + return; + } + + } + /* * Check for presence of the EFR when DLAB is set. * Only ST16C650V1 UARTs pass this test. @@ -1515,6 +1558,30 @@ static int serial8250_default_handle_irq(struct uart_port *port) return serial8250_handle_irq(port, iir); } +/* + * These Exar UARTs have an extra interrupt indicator that could + * fire for a few unimplemented interrupts. One of which is a + * wakeup event when coming out of sleep. Put this here just + * to be on the safe side that these interrupts don't go unhandled. + */ +static int exar_handle_irq(struct uart_port *port) +{ + unsigned char int0, int1, int2, int3; + unsigned int iir = serial_port_in(port, UART_IIR); + int ret; + + ret = serial8250_handle_irq(port, iir); + + if (port->type == PORT_XR17V35X) { + int0 = serial_port_in(port, 0x80); + int1 = serial_port_in(port, 0x81); + int2 = serial_port_in(port, 0x82); + int3 = serial_port_in(port, 0x83); + } + + return ret; +} + /* * This is the serial driver's interrupt routine. * @@ -2614,6 +2681,10 @@ static void serial8250_config_port(struct uart_port *port, int flags) serial8250_release_rsa_resource(up); if (port->type == PORT_UNKNOWN) serial8250_release_std_resource(up); + + /* Fixme: probably not the best place for this */ + if (port->type == PORT_XR17V35X) + port->handle_irq = exar_handle_irq; } static int diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 97058c1d7d45..2285d3283b3b 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1164,6 +1164,39 @@ pci_xr17c154_setup(struct serial_private *priv, return pci_default_setup(priv, board, port, idx); } +static int +pci_xr17v35x_setup(struct serial_private *priv, + const struct pciserial_board *board, + struct uart_8250_port *port, int idx) +{ + u8 __iomem *p; + + p = pci_ioremap_bar(priv->dev, 0); + + port->port.flags |= UPF_EXAR_EFR; + + /* + * Setup Multipurpose Input/Output pins. + */ + if (idx == 0) { + writeb(0x00, p + 0x8f); /*MPIOINT[7:0]*/ + writeb(0x00, p + 0x90); /*MPIOLVL[7:0]*/ + writeb(0x00, p + 0x91); /*MPIO3T[7:0]*/ + writeb(0x00, p + 0x92); /*MPIOINV[7:0]*/ + writeb(0x00, p + 0x93); /*MPIOSEL[7:0]*/ + writeb(0x00, p + 0x94); /*MPIOOD[7:0]*/ + writeb(0x00, p + 0x95); /*MPIOINT[15:8]*/ + writeb(0x00, p + 0x96); /*MPIOLVL[15:8]*/ + writeb(0x00, p + 0x97); /*MPIO3T[15:8]*/ + writeb(0x00, p + 0x98); /*MPIOINV[15:8]*/ + writeb(0x00, p + 0x99); /*MPIOSEL[15:8]*/ + writeb(0x00, p + 0x9a); /*MPIOOD[15:8]*/ + } + iounmap(p); + + return pci_default_setup(priv, board, port, idx); +} + static int pci_wch_ch353_setup(struct serial_private *priv, const struct pciserial_board *board, @@ -1622,6 +1655,27 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = pci_xr17c154_setup, }, + { + .vendor = PCI_VENDOR_ID_EXAR, + .device = PCI_DEVICE_ID_EXAR_XR17V352, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_xr17v35x_setup, + }, + { + .vendor = PCI_VENDOR_ID_EXAR, + .device = PCI_DEVICE_ID_EXAR_XR17V354, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_xr17v35x_setup, + }, + { + .vendor = PCI_VENDOR_ID_EXAR, + .device = PCI_DEVICE_ID_EXAR_XR17V358, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_xr17v35x_setup, + }, /* * Xircom cards */ @@ -1962,6 +2016,9 @@ enum pci_board_num_t { pbn_exar_XR17C152, pbn_exar_XR17C154, pbn_exar_XR17C158, + pbn_exar_XR17V352, + pbn_exar_XR17V354, + pbn_exar_XR17V358, pbn_exar_ibm_saturn, pbn_pasemi_1682M, pbn_ni8430_2, @@ -2580,6 +2637,30 @@ static struct pciserial_board pci_boards[] = { .base_baud = 921600, .uart_offset = 0x200, }, + [pbn_exar_XR17V352] = { + .flags = FL_BASE0, + .num_ports = 2, + .base_baud = 7812500, + .uart_offset = 0x400, + .reg_shift = 0, + .first_offset = 0, + }, + [pbn_exar_XR17V354] = { + .flags = FL_BASE0, + .num_ports = 4, + .base_baud = 7812500, + .uart_offset = 0x400, + .reg_shift = 0, + .first_offset = 0, + }, + [pbn_exar_XR17V358] = { + .flags = FL_BASE0, + .num_ports = 8, + .base_baud = 7812500, + .uart_offset = 0x400, + .reg_shift = 0, + .first_offset = 0, + }, [pbn_exar_ibm_saturn] = { .flags = FL_BASE0, .num_ports = 1, @@ -3826,6 +3907,21 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_exar_XR17C158 }, + /* + * Exar Corp. XR17V35[248] Dual/Quad/Octal PCIe UARTs + */ + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V352, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_exar_XR17V352 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V354, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_exar_XR17V354 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V358, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_exar_XR17V358 }, /* * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 9d36b829533a..0199a7a76fcb 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1985,6 +1985,9 @@ #define PCI_DEVICE_ID_EXAR_XR17C152 0x0152 #define PCI_DEVICE_ID_EXAR_XR17C154 0x0154 #define PCI_DEVICE_ID_EXAR_XR17C158 0x0158 +#define PCI_DEVICE_ID_EXAR_XR17V352 0x0352 +#define PCI_DEVICE_ID_EXAR_XR17V354 0x0354 +#define PCI_DEVICE_ID_EXAR_XR17V358 0x0358 #define PCI_VENDOR_ID_MICROGATE 0x13c0 #define PCI_DEVICE_ID_MICROGATE_USC 0x0010 diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index ebcc73f0418a..78f99d97475b 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -49,7 +49,8 @@ #define PORT_XR17D15X 21 /* Exar XR17D15x UART */ #define PORT_LPC3220 22 /* NXP LPC32xx SoC "Standard" UART */ #define PORT_8250_CIR 23 /* CIR infrared port, has its own driver */ -#define PORT_MAX_8250 23 /* max port ID */ +#define PORT_XR17V35X 24 /* Exar XR17V35x UARTs */ +#define PORT_MAX_8250 24 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h index 5ed325e88a81..d0b47607b90b 100644 --- a/include/uapi/linux/serial_reg.h +++ b/include/uapi/linux/serial_reg.h @@ -367,5 +367,11 @@ #define UART_OMAP_MDR1_CIR_MODE 0x06 /* CIR mode */ #define UART_OMAP_MDR1_DISABLE 0x07 /* Disable (default state) */ +/* + * These are definitions for the XR17V35X and XR17D15X + */ +#define UART_EXAR_SLEEP 0x8b /* Sleep mode */ +#define UART_EXAR_DVID 0x8d /* Device identification */ + #endif /* _LINUX_SERIAL_REG_H */ -- cgit v1.2.3 From d02f81555362e0032080af62154dca00d5ec99e0 Mon Sep 17 00:00:00 2001 From: Matt Schulte Date: Tue, 20 Nov 2012 11:21:17 -0600 Subject: Add register definitions used in several Exar PCI/PCIe UARTs Add register definitions used in several Exar PCI/PCIe UARTs Signed-off-by: Matt Schulte Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_reg.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h index d0b47607b90b..e6322605b138 100644 --- a/include/uapi/linux/serial_reg.h +++ b/include/uapi/linux/serial_reg.h @@ -368,10 +368,22 @@ #define UART_OMAP_MDR1_DISABLE 0x07 /* Disable (default state) */ /* - * These are definitions for the XR17V35X and XR17D15X + * These are definitions for the Exar XR17V35X and XR17(C|D)15X */ +#define UART_EXAR_8XMODE 0x88 /* 8X sampling rate select */ #define UART_EXAR_SLEEP 0x8b /* Sleep mode */ #define UART_EXAR_DVID 0x8d /* Device identification */ +#define UART_EXAR_FCTR 0x08 /* Feature Control Register */ +#define UART_FCTR_EXAR_IRDA 0x08 /* IrDa data encode select */ +#define UART_FCTR_EXAR_485 0x10 /* Auto 485 half duplex dir ctl */ +#define UART_FCTR_EXAR_TRGA 0x00 /* FIFO trigger table A */ +#define UART_FCTR_EXAR_TRGB 0x60 /* FIFO trigger table B */ +#define UART_FCTR_EXAR_TRGC 0x80 /* FIFO trigger table C */ +#define UART_FCTR_EXAR_TRGD 0xc0 /* FIFO trigger table D programmable */ + +#define UART_EXAR_TXTRG 0x0a /* Tx FIFO trigger level write-only */ +#define UART_EXAR_RXTRG 0x0b /* Rx FIFO trigger level write-only */ + #endif /* _LINUX_SERIAL_REG_H */ -- cgit v1.2.3 From 051c7788bcb92f2e98ef86e86651e0420765b121 Mon Sep 17 00:00:00 2001 From: Sumit Semwal Date: Thu, 14 Jun 2012 10:37:35 -0300 Subject: [media] v4l: Add DMABUF as a memory type Adds DMABUF memory type to v4l framework. Also adds the related file descriptor in v4l2_plane and v4l2_buffer. [original work in the PoC for buffer sharing] Signed-off-by: Tomasz Stanislawski Signed-off-by: Sumit Semwal Signed-off-by: Sumit Semwal Acked-by: Laurent Pinchart Acked-by: Hans Verkuil Tested-by: Mauro Carvalho Chehab Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 18 ++++++++++++++++++ drivers/media/v4l2-core/v4l2-ioctl.c | 1 + include/uapi/linux/videodev2.h | 7 +++++++ 3 files changed, 26 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 83ffb6436baf..cc5998b31463 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -297,6 +297,7 @@ struct v4l2_plane32 { union { __u32 mem_offset; compat_long_t userptr; + __s32 fd; } m; __u32 data_offset; __u32 reserved[11]; @@ -318,6 +319,7 @@ struct v4l2_buffer32 { __u32 offset; compat_long_t userptr; compat_caddr_t planes; + __s32 fd; } m; __u32 length; __u32 reserved2; @@ -341,6 +343,9 @@ static int get_v4l2_plane32(struct v4l2_plane *up, struct v4l2_plane32 *up32, up_pln = compat_ptr(p); if (put_user((unsigned long)up_pln, &up->m.userptr)) return -EFAULT; + } else if (memory == V4L2_MEMORY_DMABUF) { + if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(int))) + return -EFAULT; } else { if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, sizeof(__u32))) @@ -364,6 +369,11 @@ static int put_v4l2_plane32(struct v4l2_plane *up, struct v4l2_plane32 *up32, if (copy_in_user(&up32->m.mem_offset, &up->m.mem_offset, sizeof(__u32))) return -EFAULT; + /* For DMABUF, driver might've set up the fd, so copy it back. */ + if (memory == V4L2_MEMORY_DMABUF) + if (copy_in_user(&up32->m.fd, &up->m.fd, + sizeof(int))) + return -EFAULT; return 0; } @@ -446,6 +456,10 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user if (get_user(kp->m.offset, &up->m.offset)) return -EFAULT; break; + case V4L2_MEMORY_DMABUF: + if (get_user(kp->m.fd, &up->m.fd)) + return -EFAULT; + break; } } @@ -510,6 +524,10 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user if (put_user(kp->m.offset, &up->m.offset)) return -EFAULT; break; + case V4L2_MEMORY_DMABUF: + if (put_user(kp->m.fd, &up->m.fd)) + return -EFAULT; + break; } } diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 8f388ff31ebb..530a67e3fe0e 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -155,6 +155,7 @@ static const char *v4l2_memory_names[] = { [V4L2_MEMORY_MMAP] = "mmap", [V4L2_MEMORY_USERPTR] = "userptr", [V4L2_MEMORY_OVERLAY] = "overlay", + [V4L2_MEMORY_DMABUF] = "dmabuf", }; #define prt_names(a, arr) (((unsigned)(a)) < ARRAY_SIZE(arr) ? arr[a] : "unknown") diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 2fff7ff3e05b..91ac83b21c20 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -186,6 +186,7 @@ enum v4l2_memory { V4L2_MEMORY_MMAP = 1, V4L2_MEMORY_USERPTR = 2, V4L2_MEMORY_OVERLAY = 3, + V4L2_MEMORY_DMABUF = 4, }; /* see also http://vektor.theorem.ca/graphics/ycbcr/ */ @@ -602,6 +603,8 @@ struct v4l2_requestbuffers { * should be passed to mmap() called on the video node) * @userptr: when memory is V4L2_MEMORY_USERPTR, a userspace pointer * pointing to this plane + * @fd: when memory is V4L2_MEMORY_DMABUF, a userspace file + * descriptor associated with this plane * @data_offset: offset in the plane to the start of data; usually 0, * unless there is a header in front of the data * @@ -616,6 +619,7 @@ struct v4l2_plane { union { __u32 mem_offset; unsigned long userptr; + __s32 fd; } m; __u32 data_offset; __u32 reserved[11]; @@ -640,6 +644,8 @@ struct v4l2_plane { * (or a "cookie" that should be passed to mmap() as offset) * @userptr: for non-multiplanar buffers with memory == V4L2_MEMORY_USERPTR; * a userspace pointer pointing to this buffer + * @fd: for non-multiplanar buffers with memory == V4L2_MEMORY_DMABUF; + * a userspace file descriptor associated with this buffer * @planes: for multiplanar buffers; userspace pointer to the array of plane * info structs for this buffer * @length: size in bytes of the buffer (NOT its payload) for single-plane @@ -666,6 +672,7 @@ struct v4l2_buffer { __u32 offset; unsigned long userptr; struct v4l2_plane *planes; + __s32 fd; } m; __u32 length; __u32 reserved2; -- cgit v1.2.3 From b799d09a157da71566e8013a62073435550cab6d Mon Sep 17 00:00:00 2001 From: Tomasz Stanislawski Date: Thu, 14 Jun 2012 11:32:23 -0300 Subject: [media] v4l: add buffer exporting via dmabuf This patch adds extension to V4L2 api. A new ioctl VIDIOC_EXPBUF is added. The ioctl is used to export an mmap buffer as a DMABUF file descriptor. Signed-off-by: Tomasz Stanislawski Signed-off-by: Kyungmin Park Acked-by: Hans Verkuil Tested-by: Mauro Carvalho Chehab Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 1 + drivers/media/v4l2-core/v4l2-dev.c | 1 + drivers/media/v4l2-core/v4l2-ioctl.c | 10 ++++++++++ include/media/v4l2-ioctl.h | 2 ++ include/uapi/linux/videodev2.h | 28 +++++++++++++++++++++++++++ 5 files changed, 42 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index cc5998b31463..7157af301b14 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -1018,6 +1018,7 @@ long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg) case VIDIOC_S_FBUF32: case VIDIOC_OVERLAY32: case VIDIOC_QBUF32: + case VIDIOC_EXPBUF: case VIDIOC_DQBUF32: case VIDIOC_STREAMON32: case VIDIOC_STREAMOFF32: diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c index a2df842e5100..98dcad9c8a3b 100644 --- a/drivers/media/v4l2-core/v4l2-dev.c +++ b/drivers/media/v4l2-core/v4l2-dev.c @@ -571,6 +571,7 @@ static void determine_valid_ioctls(struct video_device *vdev) SET_VALID_IOCTL(ops, VIDIOC_REQBUFS, vidioc_reqbufs); SET_VALID_IOCTL(ops, VIDIOC_QUERYBUF, vidioc_querybuf); SET_VALID_IOCTL(ops, VIDIOC_QBUF, vidioc_qbuf); + SET_VALID_IOCTL(ops, VIDIOC_EXPBUF, vidioc_expbuf); SET_VALID_IOCTL(ops, VIDIOC_DQBUF, vidioc_dqbuf); SET_VALID_IOCTL(ops, VIDIOC_STREAMON, vidioc_streamon); SET_VALID_IOCTL(ops, VIDIOC_STREAMOFF, vidioc_streamoff); diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 530a67e3fe0e..aa6e7c788db2 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -454,6 +454,15 @@ static void v4l_print_buffer(const void *arg, bool write_only) tc->type, tc->flags, tc->frames, *(__u32 *)tc->userbits); } +static void v4l_print_exportbuffer(const void *arg, bool write_only) +{ + const struct v4l2_exportbuffer *p = arg; + + pr_cont("fd=%d, type=%s, index=%u, plane=%u, flags=0x%08x\n", + p->fd, prt_names(p->type, v4l2_type_names), + p->index, p->plane, p->flags); +} + static void v4l_print_create_buffers(const void *arg, bool write_only) { const struct v4l2_create_buffers *p = arg; @@ -1961,6 +1970,7 @@ static struct v4l2_ioctl_info v4l2_ioctls[] = { IOCTL_INFO_STD(VIDIOC_S_FBUF, vidioc_s_fbuf, v4l_print_framebuffer, INFO_FL_PRIO), IOCTL_INFO_FNC(VIDIOC_OVERLAY, v4l_overlay, v4l_print_u32, INFO_FL_PRIO), IOCTL_INFO_FNC(VIDIOC_QBUF, v4l_qbuf, v4l_print_buffer, INFO_FL_QUEUE), + IOCTL_INFO_STD(VIDIOC_EXPBUF, vidioc_expbuf, v4l_print_exportbuffer, INFO_FL_QUEUE | INFO_FL_CLEAR(v4l2_exportbuffer, flags)), IOCTL_INFO_FNC(VIDIOC_DQBUF, v4l_dqbuf, v4l_print_buffer, INFO_FL_QUEUE), IOCTL_INFO_FNC(VIDIOC_STREAMON, v4l_streamon, v4l_print_buftype, INFO_FL_PRIO | INFO_FL_QUEUE), IOCTL_INFO_FNC(VIDIOC_STREAMOFF, v4l_streamoff, v4l_print_buftype, INFO_FL_PRIO | INFO_FL_QUEUE), diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h index e48b571ca37d..4118ad1324c9 100644 --- a/include/media/v4l2-ioctl.h +++ b/include/media/v4l2-ioctl.h @@ -111,6 +111,8 @@ struct v4l2_ioctl_ops { int (*vidioc_reqbufs) (struct file *file, void *fh, struct v4l2_requestbuffers *b); int (*vidioc_querybuf)(struct file *file, void *fh, struct v4l2_buffer *b); int (*vidioc_qbuf) (struct file *file, void *fh, struct v4l2_buffer *b); + int (*vidioc_expbuf) (struct file *file, void *fh, + struct v4l2_exportbuffer *e); int (*vidioc_dqbuf) (struct file *file, void *fh, struct v4l2_buffer *b); int (*vidioc_create_bufs)(struct file *file, void *fh, struct v4l2_create_buffers *b); diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 91ac83b21c20..3cf3e946e331 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -694,6 +694,33 @@ struct v4l2_buffer { #define V4L2_BUF_FLAG_NO_CACHE_INVALIDATE 0x0800 #define V4L2_BUF_FLAG_NO_CACHE_CLEAN 0x1000 +/** + * struct v4l2_exportbuffer - export of video buffer as DMABUF file descriptor + * + * @index: id number of the buffer + * @type: enum v4l2_buf_type; buffer type (type == *_MPLANE for + * multiplanar buffers); + * @plane: index of the plane to be exported, 0 for single plane queues + * @flags: flags for newly created file, currently only O_CLOEXEC is + * supported, refer to manual of open syscall for more details + * @fd: file descriptor associated with DMABUF (set by driver) + * + * Contains data used for exporting a video buffer as DMABUF file descriptor. + * The buffer is identified by a 'cookie' returned by VIDIOC_QUERYBUF + * (identical to the cookie used to mmap() the buffer to userspace). All + * reserved fields must be set to zero. The field reserved0 is expected to + * become a structure 'type' allowing an alternative layout of the structure + * content. Therefore this field should not be used for any other extensions. + */ +struct v4l2_exportbuffer { + __u32 type; /* enum v4l2_buf_type */ + __u32 index; + __u32 plane; + __u32 flags; + __s32 fd; + __u32 reserved[11]; +}; + /* * O V E R L A Y P R E V I E W */ @@ -1895,6 +1922,7 @@ struct v4l2_create_buffers { #define VIDIOC_S_FBUF _IOW('V', 11, struct v4l2_framebuffer) #define VIDIOC_OVERLAY _IOW('V', 14, int) #define VIDIOC_QBUF _IOWR('V', 15, struct v4l2_buffer) +#define VIDIOC_EXPBUF _IOWR('V', 16, struct v4l2_exportbuffer) #define VIDIOC_DQBUF _IOWR('V', 17, struct v4l2_buffer) #define VIDIOC_STREAMON _IOW('V', 18, int) #define VIDIOC_STREAMOFF _IOW('V', 19, int) -- cgit v1.2.3 From 42d97a599eb6b2aab3a401b3e5799a399d6c7652 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Nov 2012 18:31:02 +0100 Subject: cfg80211: remove remain-on-channel channel type As mwifiex (and mac80211 in the software case) are the only drivers actually implementing remain-on-channel with channel type, userspace can't be relying on it. This is the case, as it's used only for P2P operations right now. Rather than adding a flag to tell userspace whether or not it can actually rely on it, simplify all the code by removing the ability to use different channel types. Leave only the validation of the attribute, so that if we extend it again later (with the needed capability flag), it can't break userspace sending invalid data. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 7 ++-- drivers/net/wireless/ath/ath6kl/wmi.c | 5 ++- drivers/net/wireless/iwlwifi/dvm/dev.h | 1 - drivers/net/wireless/iwlwifi/dvm/mac80211.c | 2 -- drivers/net/wireless/mac80211_hwsim.c | 1 - drivers/net/wireless/mwifiex/cfg80211.c | 16 +++------ drivers/net/wireless/mwifiex/main.h | 2 -- drivers/net/wireless/mwifiex/sta_event.c | 1 - drivers/net/wireless/mwifiex/sta_ioctl.c | 3 +- include/net/cfg80211.h | 11 ++---- include/net/mac80211.h | 1 - include/uapi/linux/nl80211.h | 14 ++++---- net/mac80211/cfg.c | 27 +++++---------- net/mac80211/driver-ops.h | 5 ++- net/mac80211/ieee80211_i.h | 2 -- net/mac80211/main.c | 2 +- net/mac80211/offchannel.c | 8 ++--- net/mac80211/trace.h | 6 ++-- net/wireless/core.h | 6 ++-- net/wireless/mlme.c | 21 ++++-------- net/wireless/nl80211.c | 36 ++++++++------------ net/wireless/nl80211.h | 4 +-- net/wireless/rdev-ops.h | 20 +++++------ net/wireless/trace.h | 52 ++++++++++------------------- 24 files changed, 81 insertions(+), 172 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index d615f9f7506a..74091d33ed6c 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -2976,7 +2976,6 @@ static int ath6kl_change_station(struct wiphy *wiphy, struct net_device *dev, static int ath6kl_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie) { @@ -3135,10 +3134,8 @@ static bool ath6kl_is_p2p_go_ssid(const u8 *buf, size_t len) static int ath6kl_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie) + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie) { struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); struct ath6kl *ar = ath6kl_priv(vif->ndev); diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c index c30ab4b11d61..0e05c41cdcfc 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.c +++ b/drivers/net/wireless/ath/ath6kl/wmi.c @@ -474,7 +474,7 @@ static int ath6kl_wmi_remain_on_chnl_event_rx(struct wmi *wmi, u8 *datap, return -EINVAL; } id = vif->last_roc_id; - cfg80211_ready_on_channel(&vif->wdev, id, chan, NL80211_CHAN_NO_HT, + cfg80211_ready_on_channel(&vif->wdev, id, chan, dur, GFP_ATOMIC); return 0; @@ -513,8 +513,7 @@ static int ath6kl_wmi_cancel_remain_on_chnl_event_rx(struct wmi *wmi, else id = vif->last_roc_id; /* timeout on uncanceled r-o-c */ vif->last_cancel_roc_id = 0; - cfg80211_remain_on_channel_expired(&vif->wdev, id, chan, - NL80211_CHAN_NO_HT, GFP_ATOMIC); + cfg80211_remain_on_channel_expired(&vif->wdev, id, chan, GFP_ATOMIC); return 0; } diff --git a/drivers/net/wireless/iwlwifi/dvm/dev.h b/drivers/net/wireless/iwlwifi/dvm/dev.h index 8141f91c3725..29c571a56251 100644 --- a/drivers/net/wireless/iwlwifi/dvm/dev.h +++ b/drivers/net/wireless/iwlwifi/dvm/dev.h @@ -789,7 +789,6 @@ struct iwl_priv { /* remain-on-channel offload support */ struct ieee80211_channel *hw_roc_channel; struct delayed_work hw_roc_disable_work; - enum nl80211_channel_type hw_roc_chantype; int hw_roc_duration; bool hw_roc_setup, hw_roc_start_notified; diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index e75d80341f28..852edb02e5f6 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -1034,7 +1034,6 @@ done: static int iwlagn_mac_remain_on_channel(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *channel, - enum nl80211_channel_type channel_type, int duration) { struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); @@ -1066,7 +1065,6 @@ static int iwlagn_mac_remain_on_channel(struct ieee80211_hw *hw, } priv->hw_roc_channel = channel; - priv->hw_roc_chantype = channel_type; /* convert from ms to TU */ priv->hw_roc_duration = DIV_ROUND_UP(1000 * duration, 1024); priv->hw_roc_start_notified = false; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 3baa51f1bb83..b0338543547b 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1455,7 +1455,6 @@ static void hw_roc_done(struct work_struct *work) static int mac80211_hwsim_roc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, int duration) { struct mac80211_hwsim_data *hwsim = hw->priv; diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index 8e829b251d83..f69190b492aa 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -180,10 +180,8 @@ mwifiex_form_mgmt_frame(struct sk_buff *skb, const u8 *buf, size_t len) static int mwifiex_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie) + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie) { struct sk_buff *skb; u16 pkt_len; @@ -253,7 +251,6 @@ static int mwifiex_cfg80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); @@ -271,15 +268,14 @@ mwifiex_cfg80211_remain_on_channel(struct wiphy *wiphy, } ret = mwifiex_remain_on_chan_cfg(priv, HostCmd_ACT_GEN_SET, chan, - &channel_type, duration); + duration); if (!ret) { *cookie = random32() | 1; priv->roc_cfg.cookie = *cookie; priv->roc_cfg.chan = *chan; - priv->roc_cfg.chan_type = channel_type; - cfg80211_ready_on_channel(wdev, *cookie, chan, channel_type, + cfg80211_ready_on_channel(wdev, *cookie, chan, duration, GFP_ATOMIC); wiphy_dbg(wiphy, "info: ROC, cookie = 0x%llx\n", *cookie); @@ -302,13 +298,11 @@ mwifiex_cfg80211_cancel_remain_on_channel(struct wiphy *wiphy, return -ENOENT; ret = mwifiex_remain_on_chan_cfg(priv, HostCmd_ACT_GEN_REMOVE, - &priv->roc_cfg.chan, - &priv->roc_cfg.chan_type, 0); + &priv->roc_cfg.chan, 0); if (!ret) { cfg80211_remain_on_channel_expired(wdev, cookie, &priv->roc_cfg.chan, - priv->roc_cfg.chan_type, GFP_ATOMIC); memset(&priv->roc_cfg, 0, sizeof(struct mwifiex_roc_cfg)); diff --git a/drivers/net/wireless/mwifiex/main.h b/drivers/net/wireless/mwifiex/main.h index 81f8772dcb07..771717df1c59 100644 --- a/drivers/net/wireless/mwifiex/main.h +++ b/drivers/net/wireless/mwifiex/main.h @@ -371,7 +371,6 @@ struct wps { struct mwifiex_roc_cfg { u64 cookie; struct ieee80211_channel chan; - enum nl80211_channel_type chan_type; }; struct mwifiex_adapter; @@ -1016,7 +1015,6 @@ int mwifiex_get_ver_ext(struct mwifiex_private *priv); int mwifiex_remain_on_chan_cfg(struct mwifiex_private *priv, u16 action, struct ieee80211_channel *chan, - enum nl80211_channel_type *channel_type, unsigned int duration); int mwifiex_set_bss_role(struct mwifiex_private *priv, u8 bss_role); diff --git a/drivers/net/wireless/mwifiex/sta_event.c b/drivers/net/wireless/mwifiex/sta_event.c index 8132119e1a21..78dfa31c908c 100644 --- a/drivers/net/wireless/mwifiex/sta_event.c +++ b/drivers/net/wireless/mwifiex/sta_event.c @@ -424,7 +424,6 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) cfg80211_remain_on_channel_expired(priv->wdev, priv->roc_cfg.cookie, &priv->roc_cfg.chan, - priv->roc_cfg.chan_type, GFP_ATOMIC); memset(&priv->roc_cfg, 0x00, sizeof(struct mwifiex_roc_cfg)); diff --git a/drivers/net/wireless/mwifiex/sta_ioctl.c b/drivers/net/wireless/mwifiex/sta_ioctl.c index 552d72ed055a..24af6ba7d8a1 100644 --- a/drivers/net/wireless/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/mwifiex/sta_ioctl.c @@ -1046,7 +1046,6 @@ mwifiex_get_ver_ext(struct mwifiex_private *priv) int mwifiex_remain_on_chan_cfg(struct mwifiex_private *priv, u16 action, struct ieee80211_channel *chan, - enum nl80211_channel_type *ct, unsigned int duration) { struct host_cmd_ds_remain_on_chan roc_cfg; @@ -1056,7 +1055,7 @@ mwifiex_remain_on_chan_cfg(struct mwifiex_private *priv, u16 action, roc_cfg.action = cpu_to_le16(action); if (action == HostCmd_ACT_GEN_SET) { roc_cfg.band_cfg = chan->band; - sc = mwifiex_chan_type_to_sec_chan_offset(*ct); + sc = mwifiex_chan_type_to_sec_chan_offset(NL80211_CHAN_NO_HT); roc_cfg.band_cfg |= (sc << 2); roc_cfg.channel = diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c2c185febb87..1effe0682d28 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1791,7 +1791,6 @@ struct cfg80211_ops { int (*remain_on_channel)(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie); int (*cancel_remain_on_channel)(struct wiphy *wiphy, @@ -1800,10 +1799,8 @@ struct cfg80211_ops { int (*mgmt_tx)(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie); + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie); int (*mgmt_tx_cancel_wait)(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie); @@ -3350,14 +3347,12 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, * @wdev: wireless device * @cookie: the request cookie * @chan: The current channel (from remain_on_channel request) - * @channel_type: Channel type * @duration: Duration in milliseconds that the driver intents to remain on the * channel * @gfp: allocation flags */ void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp); /** @@ -3365,12 +3360,10 @@ void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, * @wdev: wireless device * @cookie: the request cookie * @chan: The current channel (from remain_on_channel request) - * @channel_type: Channel type * @gfp: allocation flags */ void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, gfp_t gfp); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e1293c7e4d2c..12093778b057 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2550,7 +2550,6 @@ struct ieee80211_ops { int (*remain_on_channel)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, int duration); int (*cancel_remain_on_channel)(struct ieee80211_hw *hw); int (*set_ringparam)(struct ieee80211_hw *hw, u32 tx, u32 rx); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1a9a819cfab0..43cd6fa084c5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -401,8 +401,7 @@ * a response while being associated to an AP on another channel. * %NL80211_ATTR_IFINDEX is used to specify which interface (and thus * radio) is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the - * frequency for the operation and %NL80211_ATTR_WIPHY_CHANNEL_TYPE may be - * optionally used to specify additional channel parameters. + * frequency for the operation. * %NL80211_ATTR_DURATION is used to specify the duration in milliseconds * to remain on the channel. This command is also used as an event to * notify when the requested duration starts (it may take a while for the @@ -440,12 +439,11 @@ * as an event indicating reception of a frame that was not processed in * kernel code, but is for us (i.e., which may need to be processed in a * user space application). %NL80211_ATTR_FRAME is used to specify the - * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and - * optionally %NL80211_ATTR_WIPHY_CHANNEL_TYPE) is used to indicate on - * which channel the frame is to be transmitted or was received. If this - * channel is not the current channel (remain-on-channel or the - * operational channel) the device will switch to the given channel and - * transmit the frame, optionally waiting for a response for the time + * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ is used + * to indicate on which channel the frame is to be transmitted or was + * received. If this channel is not the current channel (remain-on-channel + * or the operational channel) the device will switch to the given channel + * and transmit the frame, optionally waiting for a response for the time * specified using %NL80211_ATTR_DURATION. When called, this operation * returns a cookie (%NL80211_ATTR_COOKIE) that will be included with the * TX status event pertaining to the TX request. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 18926aea480c..ac0241e3539b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2236,7 +2236,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, static int ieee80211_start_roc_work(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, - enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie, struct sk_buff *txskb) { @@ -2254,7 +2253,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, return -ENOMEM; roc->chan = channel; - roc->chan_type = channel_type; roc->duration = duration; roc->req_duration = duration; roc->frame = txskb; @@ -2287,8 +2285,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, if (!duration) duration = 10; - ret = drv_remain_on_channel(local, sdata, channel, channel_type, - duration); + ret = drv_remain_on_channel(local, sdata, channel, duration); if (ret) { kfree(roc); return ret; @@ -2299,8 +2296,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, out_check_combine: list_for_each_entry(tmp, &local->roc_list, list) { - if (tmp->chan != channel || tmp->chan_type != channel_type || - tmp->sdata != sdata) + if (tmp->chan != channel || tmp->sdata != sdata) continue; /* @@ -2417,7 +2413,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, static int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie) { @@ -2426,7 +2421,7 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy, int ret; mutex_lock(&local->mtx); - ret = ieee80211_start_roc_work(local, sdata, chan, channel_type, + ret = ieee80211_start_roc_work(local, sdata, chan, duration, cookie, NULL); mutex_unlock(&local->mtx); @@ -2519,10 +2514,8 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie) + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; @@ -2591,14 +2584,10 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (chanctx_conf) { + if (chanctx_conf) need_offchan = chan != chanctx_conf->channel; - if (channel_type_valid && - channel_type != chanctx_conf->channel_type) - need_offchan = true; - } else { + else need_offchan = true; - } rcu_read_unlock(); } @@ -2633,7 +2622,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, local->hw.offchannel_tx_hw_queue; /* This will handle all kinds of coalescing and immediate TX */ - ret = ieee80211_start_roc_work(local, sdata, chan, channel_type, + ret = ieee80211_start_roc_work(local, sdata, chan, wait, cookie, skb); if (ret) kfree_skb(skb); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 68c27aaf5c93..c6560cc7a9d6 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -738,16 +738,15 @@ static inline int drv_get_antenna(struct ieee80211_local *local, static inline int drv_remain_on_channel(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, - enum nl80211_channel_type chantype, unsigned int duration) { int ret; might_sleep(); - trace_drv_remain_on_channel(local, sdata, chan, chantype, duration); + trace_drv_remain_on_channel(local, sdata, chan, duration); ret = local->ops->remain_on_channel(&local->hw, &sdata->vif, - chan, chantype, duration); + chan, duration); trace_drv_return_int(local, ret); return ret; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d5da0fe14318..fba4b1f425c1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -348,7 +348,6 @@ struct ieee80211_roc_work { struct ieee80211_sub_if_data *sdata; struct ieee80211_channel *chan; - enum nl80211_channel_type chan_type; bool started, abort, hw_begun, notified; @@ -1048,7 +1047,6 @@ struct ieee80211_local { /* Temporary remain-on-channel for off-channel operations */ struct ieee80211_channel *tmp_channel; - enum nl80211_channel_type tmp_channel_type; /* channel contexts */ struct list_head chanctx_list; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 70e87600cacc..b229cded4567 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -115,7 +115,7 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) channel_type = NL80211_CHAN_NO_HT; } else if (local->tmp_channel) { chan = local->tmp_channel; - channel_type = local->tmp_channel_type; + channel_type = NL80211_CHAN_NO_HT; } else { chan = local->_oper_channel; channel_type = local->_oper_channel_type; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 7f8a36510813..5abddfe3e101 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -205,8 +205,8 @@ void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) } } else { cfg80211_ready_on_channel(&roc->sdata->wdev, roc->cookie, - roc->chan, roc->chan_type, - roc->req_duration, GFP_KERNEL); + roc->chan, roc->req_duration, + GFP_KERNEL); } roc->notified = true; @@ -284,7 +284,6 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) duration = 10; ret = drv_remain_on_channel(local, roc->sdata, roc->chan, - roc->chan_type, duration); roc->started = true; @@ -321,7 +320,7 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) if (!roc->mgmt_tx_cookie) cfg80211_remain_on_channel_expired(&roc->sdata->wdev, roc->cookie, roc->chan, - roc->chan_type, GFP_KERNEL); + GFP_KERNEL); list_for_each_entry_safe(dep, tmp, &roc->dependents, list) ieee80211_roc_notify_destroy(dep); @@ -359,7 +358,6 @@ void ieee80211_sw_roc_work(struct work_struct *work) ieee80211_recalc_idle(local); local->tmp_channel = roc->chan; - local->tmp_channel_type = roc->chan_type; ieee80211_hw_config(local, 0); /* tell userspace or send frame */ diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index e9579b7a2cd0..bc28346ba207 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1022,15 +1022,14 @@ TRACE_EVENT(drv_remain_on_channel, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, - enum nl80211_channel_type chantype, unsigned int duration), + unsigned int duration), - TP_ARGS(local, sdata, chan, chantype, duration), + TP_ARGS(local, sdata, chan, duration), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(int, center_freq) - __field(int, channel_type) __field(unsigned int, duration) ), @@ -1038,7 +1037,6 @@ TRACE_EVENT(drv_remain_on_channel, LOCAL_ASSIGN; VIF_ASSIGN; __entry->center_freq = chan->center_freq; - __entry->channel_type = chantype; __entry->duration = duration; ), diff --git a/net/wireless/core.h b/net/wireless/core.h index e53831c876bb..b0a09cf56e06 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -378,10 +378,8 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie); + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie); void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, const struct ieee80211_ht_cap *ht_capa_mask); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 4bfd14f7c592..a9646b53a095 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -579,31 +579,25 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - trace_cfg80211_ready_on_channel(wdev, cookie, chan, channel_type, - duration); - nl80211_send_remain_on_channel(rdev, wdev, cookie, chan, channel_type, - duration, gfp); + trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration); + nl80211_send_remain_on_channel(rdev, wdev, cookie, chan, duration, gfp); } EXPORT_SYMBOL(cfg80211_ready_on_channel); void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan, - channel_type); - nl80211_send_remain_on_channel_cancel(rdev, wdev, cookie, chan, - channel_type, gfp); + trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan); + nl80211_send_remain_on_channel_cancel(rdev, wdev, cookie, chan, gfp); } EXPORT_SYMBOL(cfg80211_remain_on_channel_expired); @@ -758,10 +752,8 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie) + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie) { const struct ieee80211_mgmt *mgmt; u16 stype; @@ -855,7 +847,6 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, /* Transmit the Action frame as requested by user space */ return rdev_mgmt_tx(rdev, wdev, chan, offchan, - channel_type, channel_type_valid, wait, buf, len, no_cck, dont_wait_for_ack, cookie); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4c427fa5c450..e880f4494950 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5952,7 +5952,6 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, struct sk_buff *msg; void *hdr; u64 cookie; - enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; u32 freq, duration; int err; @@ -5975,11 +5974,11 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, return -EINVAL; if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE] && - !nl80211_valid_channel_type(info, &channel_type)) + !nl80211_valid_channel_type(info, NULL)) return -EINVAL; freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); - chan = rdev_freq_to_chan(rdev, freq, channel_type); + chan = rdev_freq_to_chan(rdev, freq, NL80211_CHAN_NO_HT); if (chan == NULL) return -EINVAL; @@ -5995,8 +5994,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, goto free_msg; } - err = rdev_remain_on_channel(rdev, wdev, chan, channel_type, duration, - &cookie); + err = rdev_remain_on_channel(rdev, wdev, chan, duration, &cookie); if (err) goto free_msg; @@ -6216,8 +6214,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct ieee80211_channel *chan; - enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; - bool channel_type_valid = false; u32 freq; int err; void *hdr = NULL; @@ -6264,11 +6260,9 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) } - if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { - if (!nl80211_valid_channel_type(info, &channel_type)) - return -EINVAL; - channel_type_valid = true; - } + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE] && + !nl80211_valid_channel_type(info, NULL)) + return -EINVAL; offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK]; @@ -6278,7 +6272,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); - chan = rdev_freq_to_chan(rdev, freq, channel_type); + chan = rdev_freq_to_chan(rdev, freq, NL80211_CHAN_NO_HT); if (chan == NULL) return -EINVAL; @@ -6296,8 +6290,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) } } - err = cfg80211_mlme_mgmt_tx(rdev, wdev, chan, offchan, channel_type, - channel_type_valid, wait, + err = cfg80211_mlme_mgmt_tx(rdev, wdev, chan, offchan, wait, nla_data(info->attrs[NL80211_ATTR_FRAME]), nla_len(info->attrs[NL80211_ATTR_FRAME]), no_cck, dont_wait_for_ack, &cookie); @@ -8395,7 +8388,6 @@ static void nl80211_send_remain_on_chan_event( int cmd, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp) { struct sk_buff *msg; @@ -8416,7 +8408,8 @@ static void nl80211_send_remain_on_chan_event( wdev->netdev->ifindex)) || nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, chan->center_freq) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, channel_type) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, + NL80211_CHAN_NO_HT) || nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie)) goto nla_put_failure; @@ -8438,23 +8431,20 @@ static void nl80211_send_remain_on_chan_event( void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp) { nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, rdev, wdev, cookie, chan, - channel_type, duration, gfp); + duration, gfp); } void nl80211_send_remain_on_channel_cancel( struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, gfp_t gfp) + u64 cookie, struct ieee80211_channel *chan, gfp_t gfp) { nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, - rdev, wdev, cookie, chan, - channel_type, 0, gfp); + rdev, wdev, cookie, chan, 0, gfp); } void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index f6153516068c..7adbd767dbfd 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -76,13 +76,11 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp); void nl80211_send_remain_on_channel_cancel( struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, gfp_t gfp); + u64 cookie, struct ieee80211_channel *chan, gfp_t gfp); void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *mac_addr, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 6e5fa659068d..ee54a5aa4381 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -600,14 +600,12 @@ static inline int rdev_remain_on_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie) { int ret; - trace_rdev_remain_on_channel(&rdev->wiphy, wdev, chan, channel_type, - duration); + trace_rdev_remain_on_channel(&rdev->wiphy, wdev, chan, duration); ret = rdev->ops->remain_on_channel(&rdev->wiphy, wdev, chan, - channel_type, duration, cookie); + duration, cookie); trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie); return ret; } @@ -626,17 +624,15 @@ rdev_cancel_remain_on_channel(struct cfg80211_registered_device *rdev, static inline int rdev_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie) + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie) { int ret; - trace_rdev_mgmt_tx(&rdev->wiphy, wdev, chan, offchan, channel_type, - channel_type_valid, wait, no_cck, dont_wait_for_ack); + trace_rdev_mgmt_tx(&rdev->wiphy, wdev, chan, offchan, + wait, no_cck, dont_wait_for_ack); ret = rdev->ops->mgmt_tx(&rdev->wiphy, wdev, chan, offchan, - channel_type, channel_type_valid, wait, buf, - len, no_cck, dont_wait_for_ack, cookie); + wait, buf, len, no_cck, + dont_wait_for_ack, cookie); trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index f264c20a7090..ed10833f9a3a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1573,25 +1573,22 @@ DEFINE_EVENT(rdev_pmksa, rdev_del_pmksa, TRACE_EVENT(rdev_remain_on_channel, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration), - TP_ARGS(wiphy, wdev, chan, channel_type, duration), + unsigned int duration), + TP_ARGS(wiphy, wdev, chan, duration), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY CHAN_ENTRY - __field(enum nl80211_channel_type, channel_type) __field(unsigned int, duration) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; CHAN_ASSIGN(chan); - __entry->channel_type = channel_type; __entry->duration = duration; ), - TP_printk(WIPHY_PR_FMT WDEV_PR_FMT CHAN_PR_FMT ", channel type: %d, duration: %u", - WIPHY_PR_ARG, WDEV_PR_ARG, CHAN_PR_ARG, __entry->channel_type, - __entry->duration) + TP_printk(WIPHY_PR_FMT WDEV_PR_FMT CHAN_PR_FMT ", duration: %u", + WIPHY_PR_ARG, WDEV_PR_ARG, CHAN_PR_ARG, __entry->duration) ); TRACE_EVENT(rdev_return_int_cookie, @@ -1631,18 +1628,13 @@ TRACE_EVENT(rdev_cancel_remain_on_channel, TRACE_EVENT(rdev_mgmt_tx, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, bool no_cck, - bool dont_wait_for_ack), - TP_ARGS(wiphy, wdev, chan, offchan, channel_type, channel_type_valid, - wait, no_cck, dont_wait_for_ack), + unsigned int wait, bool no_cck, bool dont_wait_for_ack), + TP_ARGS(wiphy, wdev, chan, offchan, wait, no_cck, dont_wait_for_ack), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY CHAN_ENTRY __field(bool, offchan) - __field(enum nl80211_channel_type, channel_type) - __field(bool, channel_type_valid) __field(unsigned int, wait) __field(bool, no_cck) __field(bool, dont_wait_for_ack) @@ -1652,18 +1644,14 @@ TRACE_EVENT(rdev_mgmt_tx, WDEV_ASSIGN; CHAN_ASSIGN(chan); __entry->offchan = offchan; - __entry->channel_type = channel_type; - __entry->channel_type_valid = channel_type_valid; __entry->wait = wait; __entry->no_cck = no_cck; __entry->dont_wait_for_ack = dont_wait_for_ack; ), - TP_printk(WIPHY_PR_FMT WDEV_PR_FMT CHAN_PR_FMT ", offchan: %s, " - "channel type: %d, channel type valid: %s, wait: %u, " - "no cck: %s, dont wait for ack: %s", + TP_printk(WIPHY_PR_FMT WDEV_PR_FMT CHAN_PR_FMT ", offchan: %s," + " wait: %u, no cck: %s, dont wait for ack: %s", WIPHY_PR_ARG, WDEV_PR_ARG, CHAN_PR_ARG, - BOOL_TO_STR(__entry->offchan), __entry->channel_type, - BOOL_TO_STR(__entry->channel_type_valid), __entry->wait, + BOOL_TO_STR(__entry->offchan), __entry->wait, BOOL_TO_STR(__entry->no_cck), BOOL_TO_STR(__entry->dont_wait_for_ack)) ); @@ -1894,47 +1882,41 @@ TRACE_EVENT(cfg80211_michael_mic_failure, TRACE_EVENT(cfg80211_ready_on_channel, TP_PROTO(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration), - TP_ARGS(wdev, cookie, chan, channel_type, duration), + unsigned int duration), + TP_ARGS(wdev, cookie, chan, duration), TP_STRUCT__entry( WDEV_ENTRY __field(u64, cookie) CHAN_ENTRY - __field(enum nl80211_channel_type, channel_type) __field(unsigned int, duration) ), TP_fast_assign( WDEV_ASSIGN; __entry->cookie = cookie; CHAN_ASSIGN(chan); - __entry->channel_type = channel_type; __entry->duration = duration; ), - TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT ", channel type: %d, duration: %u", + TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT ", duration: %u", WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG, - __entry->channel_type, __entry->duration) + __entry->duration) ); TRACE_EVENT(cfg80211_ready_on_channel_expired, TP_PROTO(struct wireless_dev *wdev, u64 cookie, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type), - TP_ARGS(wdev, cookie, chan, channel_type), + struct ieee80211_channel *chan), + TP_ARGS(wdev, cookie, chan), TP_STRUCT__entry( WDEV_ENTRY __field(u64, cookie) CHAN_ENTRY - __field(enum nl80211_channel_type, channel_type) ), TP_fast_assign( WDEV_ASSIGN; __entry->cookie = cookie; CHAN_ASSIGN(chan); - __entry->channel_type = channel_type; ), - TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT ", channel type: %d", - WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG, - __entry->channel_type) + TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT, + WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG) ); TRACE_EVENT(cfg80211_new_sta, -- cgit v1.2.3 From fe4b31810c06cc6518fb193efb9b3c3289b55832 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Nov 2012 19:20:56 +0100 Subject: nl80211: add documentation for channel type Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 43cd6fa084c5..82b5ad38435b 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2438,6 +2438,15 @@ enum nl80211_ac { #define NL80211_TXQ_Q_BE NL80211_AC_BE #define NL80211_TXQ_Q_BK NL80211_AC_BK +/** + * enum nl80211_channel_type - channel type + * @NL80211_CHAN_NO_HT: 20 MHz, non-HT channel + * @NL80211_CHAN_HT20: 20 MHz HT channel + * @NL80211_CHAN_HT40MINUS: HT40 channel, secondary channel + * below the control channel + * @NL80211_CHAN_HT40PLUS: HT40 channel, secondary channel + * above the control channel + */ enum nl80211_channel_type { NL80211_CHAN_NO_HT, NL80211_CHAN_HT20, -- cgit v1.2.3 From 3d9d1d6656a73ea8407734cfb00b81d14ef62d4b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Nov 2012 23:14:50 +0100 Subject: nl80211/cfg80211: support VHT channel configuration Change nl80211 to support specifying a VHT (or HT) using the control channel frequency (as before) and new attributes for the channel width and first and second center frequency. The old channel type is of course still supported for HT. Also change the cfg80211 channel definition struct to support these by adding the relevant fields to it (and removing the _type field.) This also adds new helper functions: - cfg80211_chandef_create to create a channel def struct given the control channel and channel type, - cfg80211_chandef_identical to check if two channel definitions are identical - cfg80211_chandef_compatible to check if the given channel definitions are compatible, and return the wider of the two This isn't entirely complete, but that doesn't matter until we have a driver using it. In particular, it's missing - regulatory checks on the usable bandwidth (if that even makes sense) - regulatory TX power (database can't deal with it) - a proper channel compatibility calculation for the new channel types Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 10 +- include/net/cfg80211.h | 73 ++++++++- include/uapi/linux/nl80211.h | 61 +++++-- net/mac80211/cfg.c | 5 +- net/mac80211/ibss.c | 13 +- net/wireless/chan.c | 249 ++++++++++++++++++++++++++--- net/wireless/core.h | 6 + net/wireless/ibss.c | 4 +- net/wireless/mesh.c | 4 +- net/wireless/nl80211.c | 160 ++++++++++++------ net/wireless/trace.h | 28 ++-- net/wireless/wext-compat.c | 4 +- net/wireless/wext-sme.c | 3 +- 13 files changed, 507 insertions(+), 113 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index c0cc2e59fe6c..51bbe85c574c 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1099,12 +1099,10 @@ void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq, "channel switch notify nw_type %d freq %d mode %d\n", vif->nw_type, freq, mode); - chandef.chan = ieee80211_get_channel(vif->ar->wiphy, freq); - if (WARN_ON(!chandef.chan)) - return; - - chandef._type = (mode == WMI_11G_HT20) ? - NL80211_CHAN_HT20 : NL80211_CHAN_NO_HT; + cfg80211_chandef_create(&chandef, + ieee80211_get_channel(vif->ar->wiphy, freq), + (mode == WMI_11G_HT20) ? + NL80211_CHAN_HT20 : NL80211_CHAN_NO_HT); cfg80211_ch_switch_notify(vif->ndev, &chandef); } diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 86f777af79e8..977da58fb7ea 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -308,20 +308,85 @@ struct key_params { /** * struct cfg80211_chan_def - channel definition * @chan: the (control) channel - * @_type: the channel type, don't use this field, - * use cfg80211_get_chandef_type() if needed. + * @width: channel width + * @center_freq1: center frequency of first segment + * @center_freq2: center frequency of second segment + * (only with 80+80 MHz) */ struct cfg80211_chan_def { struct ieee80211_channel *chan; - enum nl80211_channel_type _type; + enum nl80211_chan_width width; + u32 center_freq1; + u32 center_freq2; }; +/** + * cfg80211_get_chandef_type - return old channel type from chandef + * @chandef: the channel definition + * + * Returns the old channel type (NOHT, HT20, HT40+/-) from a given + * chandef, which must have a bandwidth allowing this conversion. + */ static inline enum nl80211_channel_type cfg80211_get_chandef_type(const struct cfg80211_chan_def *chandef) { - return chandef->_type; + switch (chandef->width) { + case NL80211_CHAN_WIDTH_20_NOHT: + return NL80211_CHAN_NO_HT; + case NL80211_CHAN_WIDTH_20: + return NL80211_CHAN_HT20; + case NL80211_CHAN_WIDTH_40: + if (chandef->center_freq1 > chandef->chan->center_freq) + return NL80211_CHAN_HT40PLUS; + return NL80211_CHAN_HT40MINUS; + default: + WARN_ON(1); + return NL80211_CHAN_NO_HT; + } +} + +/** + * cfg80211_chandef_create - create channel definition using channel type + * @chandef: the channel definition struct to fill + * @channel: the control channel + * @chantype: the channel type + * + * Given a channel type, create a channel definition. + */ +void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, + struct ieee80211_channel *channel, + enum nl80211_channel_type chantype); + +/** + * cfg80211_chandef_identical - check if two channel definitions are identical + * @chandef1: first channel definition + * @chandef2: second channel definition + * + * Returns %true if the channels defined by the channel definitions are + * identical, %false otherwise. + */ +static inline bool +cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1, + const struct cfg80211_chan_def *chandef2) +{ + return (chandef1->chan == chandef2->chan && + chandef1->width == chandef2->width && + chandef1->center_freq1 == chandef2->center_freq1 && + chandef1->center_freq2 == chandef2->center_freq2); } +/** + * cfg80211_chandef_compatible - check if two channel definitions are compatible + * @chandef1: first channel definition + * @chandef2: second channel definition + * + * Returns %NULL if the given channel definitions are incompatible, + * chandef1 or chandef2 otherwise. + */ +const struct cfg80211_chan_def * +cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1, + const struct cfg80211_chan_def *chandef2); + /** * enum survey_info_flags - survey information flags * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 82b5ad38435b..84f9c7d84c69 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -118,8 +118,9 @@ * to get a list of all present wiphys. * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME, - * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ, - * %NL80211_ATTR_WIPHY_CHANNEL_TYPE, %NL80211_ATTR_WIPHY_RETRY_SHORT, + * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ (and the + * attributes determining the channel width; this is used for setting + * monitor mode channel), %NL80211_ATTR_WIPHY_RETRY_SHORT, * %NL80211_ATTR_WIPHY_RETRY_LONG, %NL80211_ATTR_WIPHY_FRAG_THRESHOLD, * and/or %NL80211_ATTR_WIPHY_RTS_THRESHOLD. * However, for setting the channel, see %NL80211_CMD_SET_CHANNEL @@ -171,7 +172,7 @@ * %NL80211_ATTR_AKM_SUITES, %NL80211_ATTR_PRIVACY, * %NL80211_ATTR_AUTH_TYPE and %NL80211_ATTR_INACTIVITY_TIMEOUT. * The channel to use can be set on the interface or be given using the - * %NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_WIPHY_CHANNEL_TYPE attrs. + * %NL80211_ATTR_WIPHY_FREQ and the attributes determining channel width. * @NL80211_CMD_NEW_BEACON: old alias for %NL80211_CMD_START_AP * @NL80211_CMD_STOP_AP: Stop AP operation on the given interface * @NL80211_CMD_DEL_BEACON: old alias for %NL80211_CMD_STOP_AP @@ -471,8 +472,8 @@ * command is used as an event to indicate the that a trigger level was * reached. * @NL80211_CMD_SET_CHANNEL: Set the channel (using %NL80211_ATTR_WIPHY_FREQ - * and %NL80211_ATTR_WIPHY_CHANNEL_TYPE) the given interface (identifed - * by %NL80211_ATTR_IFINDEX) shall operate on. + * and the attributes determining channel width) the given interface + * (identifed by %NL80211_ATTR_IFINDEX) shall operate on. * In case multiple channels are supported by the device, the mechanism * with which it switches channels is implementation-defined. * When a monitor interface is given, it can only switch channel while @@ -566,8 +567,8 @@ * * @NL80211_CMD_CH_SWITCH_NOTIFY: An AP or GO may decide to switch channels * independently of the userspace SME, send this event indicating - * %NL80211_ATTR_IFINDEX is now on %NL80211_ATTR_WIPHY_FREQ with - * %NL80211_ATTR_WIPHY_CHANNEL_TYPE. + * %NL80211_ATTR_IFINDEX is now on %NL80211_ATTR_WIPHY_FREQ and the + * attributes determining channel width. * * @NL80211_CMD_START_P2P_DEVICE: Start the given P2P Device, identified by * its %NL80211_ATTR_WDEV identifier. It must have been created with @@ -771,14 +772,26 @@ enum nl80211_commands { * /sys/class/ieee80211//index * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming) * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters - * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz + * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz, + * defines the channel together with the (deprecated) + * %NL80211_ATTR_WIPHY_CHANNEL_TYPE attribute or the attributes + * %NL80211_ATTR_CHANNEL_WIDTH and if needed %NL80211_ATTR_CENTER_FREQ1 + * and %NL80211_ATTR_CENTER_FREQ2 + * @NL80211_ATTR_CHANNEL_WIDTH: u32 attribute containing one of the values + * of &enum nl80211_chan_width, describing the channel width. See the + * documentation of the enum for more information. + * @NL80211_ATTR_CENTER_FREQ1: Center frequency of the first part of the + * channel, used for anything but 20 MHz bandwidth + * @NL80211_ATTR_CENTER_FREQ2: Center frequency of the second part of the + * channel, used only for 80+80 MHz bandwidth * @NL80211_ATTR_WIPHY_CHANNEL_TYPE: included with NL80211_ATTR_WIPHY_FREQ - * if HT20 or HT40 are allowed (i.e., 802.11n disabled if not included): + * if HT20 or HT40 are to be used (i.e., HT disabled if not included): * NL80211_CHAN_NO_HT = HT not allowed (i.e., same as not including * this attribute) * NL80211_CHAN_HT20 = HT20 only * NL80211_CHAN_HT40MINUS = secondary channel is below the primary channel * NL80211_CHAN_HT40PLUS = secondary channel is above the primary channel + * This attribute is now deprecated. * @NL80211_ATTR_WIPHY_RETRY_SHORT: TX retry limit for frames whose length is * less than or equal to the RTS threshold; allowed range: 1..255; * dot11ShortRetryLimit; u8 @@ -1553,6 +1566,10 @@ enum nl80211_attrs { NL80211_ATTR_SCAN_FLAGS, + NL80211_ATTR_CHANNEL_WIDTH, + NL80211_ATTR_CENTER_FREQ1, + NL80211_ATTR_CENTER_FREQ2, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2454,6 +2471,32 @@ enum nl80211_channel_type { NL80211_CHAN_HT40PLUS }; +/** + * enum nl80211_chan_width - channel width definitions + * + * These values are used with the %NL80211_ATTR_CHANNEL_WIDTH + * attribute. + * + * @NL80211_CHAN_WIDTH_20_NOHT: 20 MHz, non-HT channel + * @NL80211_CHAN_WIDTH_20: 20 MHz HT channel + * @NL80211_CHAN_WIDTH_40: 40 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well + * @NL80211_CHAN_WIDTH_80: 80 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well + * @NL80211_CHAN_WIDTH_80P80: 80+80 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * and %NL80211_ATTR_CENTER_FREQ2 attributes must be provided as well + * @NL80211_CHAN_WIDTH_160: 160 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well + */ +enum nl80211_chan_width { + NL80211_CHAN_WIDTH_20_NOHT, + NL80211_CHAN_WIDTH_20, + NL80211_CHAN_WIDTH_40, + NL80211_CHAN_WIDTH_80, + NL80211_CHAN_WIDTH_80P80, + NL80211_CHAN_WIDTH_160, +}; + /** * enum nl80211_bss - netlink attributes for a BSS * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fbb2d072cb9e..7136b945798e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3125,8 +3125,9 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (chanctx_conf) { - chandef->chan = chanctx_conf->channel; - chandef->_type = chanctx_conf->channel_type; + cfg80211_chandef_create(chandef, + chanctx_conf->channel, + chanctx_conf->channel_type); ret = 0; } rcu_read_unlock(); diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index bed616fd97e9..5648bbed240b 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -52,6 +52,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, u32 bss_change; u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; struct cfg80211_chan_def chandef; + enum nl80211_channel_type chan_type; lockdep_assert_held(&ifibss->mtx); @@ -79,13 +80,13 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - chandef.chan = chan; - chandef._type = ifibss->channel_type; + chan_type = ifibss->channel_type; + cfg80211_chandef_create(&chandef, chan, chan_type); if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) - chandef._type = NL80211_CHAN_HT20; + chan_type = NL80211_CHAN_HT20; ieee80211_vif_release_channel(sdata); - if (ieee80211_vif_use_channel(sdata, chan, chandef._type, + if (ieee80211_vif_use_channel(sdata, chan, chan_type, ifibss->fixed_channel ? IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE)) { @@ -159,7 +160,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, ifibss->ie, ifibss->ie_len); /* add HT capability and information IEs */ - if (chandef._type != NL80211_CHAN_NO_HT && + if (chan_type != NL80211_CHAN_NO_HT && sband->ht_cap.ht_supported) { pos = skb_put(skb, 4 + sizeof(struct ieee80211_ht_cap) + @@ -172,7 +173,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, * keep them at 0 */ pos = ieee80211_ie_build_ht_oper(pos, &sband->ht_cap, - chan, chandef._type, 0); + chan, chan_type, 0); } if (local->hw.queues >= IEEE80211_NUM_ACS) { diff --git a/net/wireless/chan.c b/net/wireless/chan.c index e834422de40a..bf2dfd54ff3b 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -11,43 +11,252 @@ #include "core.h" #include "rdev-ops.h" -bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef) +void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, + struct ieee80211_channel *chan, + enum nl80211_channel_type chan_type) { - struct ieee80211_channel *sec_chan; - int diff; + if (WARN_ON(!chan)) + return; - trace_cfg80211_reg_can_beacon(wiphy, chandef); + chandef->chan = chan; + chandef->center_freq2 = 0; - switch (chandef->_type) { + switch (chan_type) { + case NL80211_CHAN_NO_HT: + chandef->width = NL80211_CHAN_WIDTH_20_NOHT; + chandef->center_freq1 = chan->center_freq; + break; + case NL80211_CHAN_HT20: + chandef->width = NL80211_CHAN_WIDTH_20; + chandef->center_freq1 = chan->center_freq; + break; case NL80211_CHAN_HT40PLUS: - diff = 20; + chandef->width = NL80211_CHAN_WIDTH_40; + chandef->center_freq1 = chan->center_freq + 10; break; case NL80211_CHAN_HT40MINUS: - diff = -20; + chandef->width = NL80211_CHAN_WIDTH_40; + chandef->center_freq1 = chan->center_freq - 10; + break; + default: + WARN_ON(1); + } +} +EXPORT_SYMBOL(cfg80211_chandef_create); + +bool cfg80211_chan_def_valid(const struct cfg80211_chan_def *chandef) +{ + u32 control_freq; + + if (!chandef->chan) + return false; + + control_freq = chandef->chan->center_freq; + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: + if (chandef->center_freq1 != control_freq) + return false; + if (chandef->center_freq2) + return false; + break; + case NL80211_CHAN_WIDTH_40: + if (chandef->center_freq1 != control_freq + 10 && + chandef->center_freq1 != control_freq - 10) + return false; + if (chandef->center_freq2) + return false; + break; + case NL80211_CHAN_WIDTH_80P80: + if (chandef->center_freq1 != control_freq + 30 && + chandef->center_freq1 != control_freq + 10 && + chandef->center_freq1 != control_freq - 10 && + chandef->center_freq1 != control_freq - 30) + return false; + if (!chandef->center_freq2) + return false; + break; + case NL80211_CHAN_WIDTH_80: + if (chandef->center_freq1 != control_freq + 30 && + chandef->center_freq1 != control_freq + 10 && + chandef->center_freq1 != control_freq - 10 && + chandef->center_freq1 != control_freq - 30) + return false; + if (chandef->center_freq2) + return false; + break; + case NL80211_CHAN_WIDTH_160: + if (chandef->center_freq1 != control_freq + 70 && + chandef->center_freq1 != control_freq + 50 && + chandef->center_freq1 != control_freq + 30 && + chandef->center_freq1 != control_freq + 10 && + chandef->center_freq1 != control_freq - 10 && + chandef->center_freq1 != control_freq - 30 && + chandef->center_freq1 != control_freq - 50 && + chandef->center_freq1 != control_freq - 70) + return false; + if (chandef->center_freq2) + return false; + break; + default: + return false; + } + + return true; +} + +static void chandef_primary_freqs(const struct cfg80211_chan_def *c, + int *pri40, int *pri80) +{ + int tmp; + + switch (c->width) { + case NL80211_CHAN_WIDTH_40: + *pri40 = c->center_freq1; + *pri80 = 0; + break; + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + *pri80 = c->center_freq1; + /* n_P20 */ + tmp = (30 + c->chan->center_freq - c->center_freq1)/20; + /* n_P40 */ + tmp /= 2; + /* freq_P40 */ + *pri40 = c->center_freq1 - 20 + 40 * tmp; + break; + case NL80211_CHAN_WIDTH_160: + /* n_P20 */ + tmp = (70 + c->chan->center_freq - c->center_freq1)/20; + /* n_P40 */ + tmp /= 2; + /* freq_P40 */ + *pri40 = c->center_freq1 - 60 + 40 * tmp; + /* n_P80 */ + tmp /= 2; + *pri80 = c->center_freq1 - 40 + 80 * tmp; break; default: - trace_cfg80211_return_bool(true); - return true; + WARN_ON_ONCE(1); } +} + +const struct cfg80211_chan_def * +cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, + const struct cfg80211_chan_def *c2) +{ + u32 c1_pri40, c1_pri80, c2_pri40, c2_pri80; - sec_chan = ieee80211_get_channel(wiphy, - chandef->chan->center_freq + diff); - if (!sec_chan) { + /* If they are identical, return */ + if (cfg80211_chandef_identical(c1, c2)) + return c1; + + /* otherwise, must have same control channel */ + if (c1->chan != c2->chan) + return NULL; + + /* + * If they have the same width, but aren't identical, + * then they can't be compatible. + */ + if (c1->width == c2->width) + return NULL; + + if (c1->width == NL80211_CHAN_WIDTH_20_NOHT || + c1->width == NL80211_CHAN_WIDTH_20) + return c2; + + if (c2->width == NL80211_CHAN_WIDTH_20_NOHT || + c2->width == NL80211_CHAN_WIDTH_20) + return c1; + + chandef_primary_freqs(c1, &c1_pri40, &c1_pri80); + chandef_primary_freqs(c2, &c2_pri40, &c2_pri80); + + if (c1_pri40 != c2_pri40) + return NULL; + + WARN_ON(!c1_pri80 && !c2_pri80); + if (c1_pri80 && c2_pri80 && c1_pri80 != c2_pri80) + return NULL; + + if (c1->width > c2->width) + return c1; + return c2; +} +EXPORT_SYMBOL(cfg80211_chandef_compatible); + +bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, + u32 center_freq, u32 bandwidth, + u32 prohibited_flags) +{ + struct ieee80211_channel *c; + u32 freq; + + for (freq = center_freq - bandwidth/2 + 10; + freq <= center_freq + bandwidth/2 - 10; + freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c || c->flags & prohibited_flags) + return false; + } + + return true; +} + +static bool cfg80211_check_beacon_chans(struct wiphy *wiphy, + u32 center_freq, u32 bw) +{ + return cfg80211_secondary_chans_ok(wiphy, center_freq, bw, + IEEE80211_CHAN_DISABLED | + IEEE80211_CHAN_PASSIVE_SCAN | + IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_RADAR); +} + +bool cfg80211_reg_can_beacon(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef) +{ + u32 width; + bool res; + + trace_cfg80211_reg_can_beacon(wiphy, chandef); + + if (WARN_ON(!cfg80211_chan_def_valid(chandef))) { trace_cfg80211_return_bool(false); return false; } - /* we'll need a DFS capability later */ - if (sec_chan->flags & (IEEE80211_CHAN_DISABLED | - IEEE80211_CHAN_PASSIVE_SCAN | - IEEE80211_CHAN_NO_IBSS | - IEEE80211_CHAN_RADAR)) { + switch (chandef->width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + width = 20; + break; + case NL80211_CHAN_WIDTH_40: + width = 40; + break; + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + width = 80; + break; + case NL80211_CHAN_WIDTH_160: + width = 160; + break; + default: + WARN_ON_ONCE(1); trace_cfg80211_return_bool(false); return false; } - trace_cfg80211_return_bool(true); - return true; + + res = cfg80211_check_beacon_chans(wiphy, chandef->center_freq1, width); + + if (res && chandef->center_freq2) + res = cfg80211_check_beacon_chans(wiphy, chandef->center_freq2, + width); + + trace_cfg80211_return_bool(res); + return res; } EXPORT_SYMBOL(cfg80211_reg_can_beacon); diff --git a/net/wireless/core.h b/net/wireless/core.h index 6183a0d25b8b..a0c8decf6a47 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -483,6 +483,12 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); +bool cfg80211_chan_def_valid(const struct cfg80211_chan_def *chandef); + +bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, + u32 center_freq, u32 bandwidth, + u32 prohibited_flags); + #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index ccc8865dfadb..9b9551e4a6f9 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -252,7 +252,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, /* try to find an IBSS channel if none requested ... */ if (!wdev->wext.ibss.chandef.chan) { - wdev->wext.ibss.chandef._type = NL80211_CHAN_NO_HT; + wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; @@ -352,7 +352,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, if (chan) { wdev->wext.ibss.chandef.chan = chan; - wdev->wext.ibss.chandef._type = NL80211_CHAN_NO_HT; + wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; wdev->wext.ibss.channel_fixed = true; } else { /* cfg80211_ibss_wext_join will pick one if needed */ diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 12b5a570a306..3ee5a7282283 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -146,7 +146,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!setup->chandef.chan) return -EINVAL; - setup->chandef._type = NL80211_CHAN_NO_HT; + setup->chandef.width = NL80211_CHAN_WIDTH_20_NOHT;; } if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) @@ -198,7 +198,7 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, * compatible with 802.11 mesh. */ if (rdev->ops->libertas_set_mesh_channel) { - if (chandef->_type != NL80211_CHAN_NO_HT) + if (chandef->width != NL80211_CHAN_WIDTH_20_NOHT) return -EINVAL; if (!netif_running(wdev->netdev)) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 999108cd947c..15158a3d64a3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -223,8 +223,13 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, .len = 20-1 }, [NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED }, + [NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 }, + [NL80211_ATTR_CHANNEL_WIDTH] = { .type = NLA_U32 }, + [NL80211_ATTR_CENTER_FREQ1] = { .type = NLA_U32 }, + [NL80211_ATTR_CENTER_FREQ2] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_RETRY_SHORT] = { .type = NLA_U8 }, [NL80211_ATTR_WIPHY_RETRY_LONG] = { .type = NLA_U8 }, [NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 }, @@ -1360,35 +1365,13 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) wdev->iftype == NL80211_IFTYPE_P2P_GO; } -static bool nl80211_valid_channel_type(struct genl_info *info, - enum nl80211_channel_type *channel_type) -{ - enum nl80211_channel_type tmp; - - if (!info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) - return false; - - tmp = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); - if (tmp != NL80211_CHAN_NO_HT && - tmp != NL80211_CHAN_HT20 && - tmp != NL80211_CHAN_HT40PLUS && - tmp != NL80211_CHAN_HT40MINUS) - return false; - - if (channel_type) - *channel_type = tmp; - - return true; -} - static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, struct genl_info *info, struct cfg80211_chan_def *chandef) { struct ieee80211_sta_ht_cap *ht_cap; - struct ieee80211_channel *sc; - u32 control_freq; - int offs; + struct ieee80211_sta_vht_cap *vht_cap; + u32 control_freq, width; if (!info->attrs[NL80211_ATTR_WIPHY_FREQ]) return -EINVAL; @@ -1396,47 +1379,105 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, control_freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); chandef->chan = ieee80211_get_channel(&rdev->wiphy, control_freq); - chandef->_type = NL80211_CHAN_NO_HT; - - if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE] && - !nl80211_valid_channel_type(info, &chandef->_type)) - return -EINVAL; + chandef->width = NL80211_CHAN_WIDTH_20_NOHT; + chandef->center_freq1 = control_freq; + chandef->center_freq2 = 0; /* Primary channel not allowed */ if (!chandef->chan || chandef->chan->flags & IEEE80211_CHAN_DISABLED) return -EINVAL; + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { + enum nl80211_channel_type chantype; + + chantype = nla_get_u32( + info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); + + switch (chantype) { + case NL80211_CHAN_NO_HT: + case NL80211_CHAN_HT20: + case NL80211_CHAN_HT40PLUS: + case NL80211_CHAN_HT40MINUS: + cfg80211_chandef_create(chandef, chandef->chan, + chantype); + break; + default: + return -EINVAL; + } + } else if (info->attrs[NL80211_ATTR_CHANNEL_WIDTH]) { + chandef->width = + nla_get_u32(info->attrs[NL80211_ATTR_CHANNEL_WIDTH]); + if (info->attrs[NL80211_ATTR_CENTER_FREQ1]) + chandef->center_freq1 = + nla_get_u32( + info->attrs[NL80211_ATTR_CENTER_FREQ1]); + if (info->attrs[NL80211_ATTR_CENTER_FREQ2]) + chandef->center_freq2 = + nla_get_u32( + info->attrs[NL80211_ATTR_CENTER_FREQ2]); + } + ht_cap = &rdev->wiphy.bands[chandef->chan->band]->ht_cap; + vht_cap = &rdev->wiphy.bands[chandef->chan->band]->vht_cap; - switch (chandef->_type) { - case NL80211_CHAN_NO_HT: + if (!cfg80211_chan_def_valid(chandef)) + return -EINVAL; + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_20: + if (!ht_cap->ht_supported) + return -EINVAL; + case NL80211_CHAN_WIDTH_20_NOHT: + width = 20; break; - case NL80211_CHAN_HT40MINUS: - if (chandef->chan->flags & IEEE80211_CHAN_NO_HT40MINUS) + case NL80211_CHAN_WIDTH_40: + width = 40; + /* quick early regulatory check */ + if (chandef->center_freq1 < control_freq && + chandef->chan->flags & IEEE80211_CHAN_NO_HT40MINUS) + return -EINVAL; + if (chandef->center_freq1 > control_freq && + chandef->chan->flags & IEEE80211_CHAN_NO_HT40PLUS) + return -EINVAL; + if (!ht_cap->ht_supported) return -EINVAL; - offs = -20; - /* fall through */ - case NL80211_CHAN_HT40PLUS: - if (chandef->_type == NL80211_CHAN_HT40PLUS) { - if (chandef->chan->flags & IEEE80211_CHAN_NO_HT40PLUS) - return -EINVAL; - offs = 20; - } if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT) return -EINVAL; - - sc = ieee80211_get_channel(&rdev->wiphy, - chandef->chan->center_freq + offs); - if (!sc || sc->flags & IEEE80211_CHAN_DISABLED) + break; + case NL80211_CHAN_WIDTH_80: + width = 80; + if (!vht_cap->vht_supported) return -EINVAL; - /* fall through */ - case NL80211_CHAN_HT20: - if (!ht_cap->ht_supported) + break; + case NL80211_CHAN_WIDTH_80P80: + width = 80; + if (!vht_cap->vht_supported) + return -EINVAL; + if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)) + return -EINVAL; + break; + case NL80211_CHAN_WIDTH_160: + width = 160; + if (!vht_cap->vht_supported) + return -EINVAL; + if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ)) return -EINVAL; break; + default: + return -EINVAL; } + if (!cfg80211_secondary_chans_ok(&rdev->wiphy, chandef->center_freq1, + width, IEEE80211_CHAN_DISABLED)) + return -EINVAL; + if (chandef->center_freq2 && + !cfg80211_secondary_chans_ok(&rdev->wiphy, chandef->center_freq2, + width, IEEE80211_CHAN_DISABLED)) + return -EINVAL; + + /* TODO: missing regulatory check on bandwidth */ + return 0; } @@ -1800,10 +1841,28 @@ static inline u64 wdev_id(struct wireless_dev *wdev) static int nl80211_send_chandef(struct sk_buff *msg, struct cfg80211_chan_def *chandef) { + WARN_ON(!cfg80211_chan_def_valid(chandef)); + if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, chandef->chan->center_freq)) return -ENOBUFS; - if (nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, chandef->_type)) + switch (chandef->width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + if (nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, + cfg80211_get_chandef_type(chandef))) + return -ENOBUFS; + break; + default: + break; + } + if (nla_put_u32(msg, NL80211_ATTR_CHANNEL_WIDTH, chandef->width)) + return -ENOBUFS; + if (nla_put_u32(msg, NL80211_ATTR_CENTER_FREQ1, chandef->center_freq1)) + return -ENOBUFS; + if (chandef->center_freq2 && + nla_put_u32(msg, NL80211_ATTR_CENTER_FREQ2, chandef->center_freq2)) return -ENOBUFS; return 0; } @@ -5447,7 +5506,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(connkeys)) return PTR_ERR(connkeys); - if ((ibss.chandef._type != NL80211_CHAN_NO_HT) && no_ht) { + if ((ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) && + no_ht) { kfree(connkeys); return -EINVAL; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 1370d52b1393..3c7aa1221563 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -126,25 +126,33 @@ #define CHAN_PR_FMT ", band: %d, freq: %u" #define CHAN_PR_ARG __entry->band, __entry->center_freq -#define CHAN_DEF_ENTRY __field(enum ieee80211_band, band) \ - __field(u16, center_freq) \ - __field(u32, channel_type) +#define CHAN_DEF_ENTRY __field(enum ieee80211_band, band) \ + __field(u32, control_freq) \ + __field(u32, width) \ + __field(u32, center_freq1) \ + __field(u32, center_freq2) #define CHAN_DEF_ASSIGN(chandef) \ do { \ if ((chandef) && (chandef)->chan) { \ __entry->band = (chandef)->chan->band; \ - __entry->center_freq = \ + __entry->control_freq = \ (chandef)->chan->center_freq; \ - __entry->channel_type = (chandef)->_type; \ + __entry->width = (chandef)->width; \ + __entry->center_freq1 = (chandef)->center_freq1;\ + __entry->center_freq2 = (chandef)->center_freq2;\ } else { \ __entry->band = 0; \ - __entry->center_freq = 0; \ - __entry->channel_type = 0; \ + __entry->control_freq = 0; \ + __entry->width = 0; \ + __entry->center_freq1 = 0; \ + __entry->center_freq2 = 0; \ } \ } while (0) -#define CHAN_DEF_PR_FMT ", band: %d, freq: %u, chantype: %d" -#define CHAN_DEF_PR_ARG __entry->band, __entry->center_freq, \ - __entry->channel_type +#define CHAN_DEF_PR_FMT \ + ", band: %d, control freq: %u, width: %d, cf1: %u, cf2: %u" +#define CHAN_DEF_PR_ARG __entry->band, __entry->control_freq, \ + __entry->width, __entry->center_freq1, \ + __entry->center_freq2 #define SINFO_ENTRY __field(int, generation) \ __field(u32, connected_time) \ diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index da3307f32362..f9680c9cf9b3 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -785,7 +785,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct cfg80211_chan_def chandef = { - ._type = NL80211_CHAN_NO_HT, + .width = NL80211_CHAN_WIDTH_20_NOHT, }; int freq, err; @@ -800,6 +800,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, return freq; if (freq == 0) return -EINVAL; + chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; @@ -813,6 +814,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, return freq; if (freq == 0) return -EINVAL; + chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index e6e5dbf2f616..873af63187c0 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -120,7 +120,8 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, */ if (chan && !wdev->wext.connect.ssid_len) { struct cfg80211_chan_def chandef = { - ._type = NL80211_CHAN_NO_HT, + .width = NL80211_CHAN_WIDTH_20_NOHT, + .center_freq1 = freq, }; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); -- cgit v1.2.3 From db9c64cf8d9d3fcbc34b09d037f266d1fc9f928c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Nov 2012 14:56:41 +0100 Subject: nl80211/cfg80211: add VHT MCS support Add support for reporting and calculating VHT MCSes. Note that I'm not completely sure that the bitrate calculations are correct, nor that they can't be simplified. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 24 +++++++++----- include/uapi/linux/nl80211.h | 12 ++++++- net/wireless/nl80211.c | 58 +++++++++++++++++++++++++--------- net/wireless/util.c | 74 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 144 insertions(+), 24 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 977da58fb7ea..e78db2cf3d1b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -662,16 +662,24 @@ enum station_info_flags { * Used by the driver to indicate the specific rate transmission * type for 802.11n transmissions. * - * @RATE_INFO_FLAGS_MCS: @tx_bitrate_mcs filled - * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 Mhz width transmission + * @RATE_INFO_FLAGS_MCS: mcs field filled with HT MCS + * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS + * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 MHz width transmission + * @RATE_INFO_FLAGS_80_MHZ_WIDTH: 80 MHz width transmission + * @RATE_INFO_FLAGS_80P80_MHZ_WIDTH: 80+80 MHz width transmission + * @RATE_INFO_FLAGS_160_MHZ_WIDTH: 160 MHz width transmission * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval - * @RATE_INFO_FLAGS_60G: 60gHz MCS + * @RATE_INFO_FLAGS_60G: 60GHz MCS */ enum rate_info_flags { - RATE_INFO_FLAGS_MCS = 1<<0, - RATE_INFO_FLAGS_40_MHZ_WIDTH = 1<<1, - RATE_INFO_FLAGS_SHORT_GI = 1<<2, - RATE_INFO_FLAGS_60G = 1<<3, + RATE_INFO_FLAGS_MCS = BIT(0), + RATE_INFO_FLAGS_VHT_MCS = BIT(1), + RATE_INFO_FLAGS_40_MHZ_WIDTH = BIT(2), + RATE_INFO_FLAGS_80_MHZ_WIDTH = BIT(3), + RATE_INFO_FLAGS_80P80_MHZ_WIDTH = BIT(4), + RATE_INFO_FLAGS_160_MHZ_WIDTH = BIT(5), + RATE_INFO_FLAGS_SHORT_GI = BIT(6), + RATE_INFO_FLAGS_60G = BIT(7), }; /** @@ -682,11 +690,13 @@ enum rate_info_flags { * @flags: bitflag of flags from &enum rate_info_flags * @mcs: mcs index if struct describes a 802.11n bitrate * @legacy: bitrate in 100kbit/s for 802.11abg + * @nss: number of streams (VHT only) */ struct rate_info { u8 flags; u8 mcs; u16 legacy; + u8 nss; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 84f9c7d84c69..33a417481ad8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1734,10 +1734,15 @@ struct nl80211_sta_flag_update { * @__NL80211_RATE_INFO_INVALID: attribute number 0 is reserved * @NL80211_RATE_INFO_BITRATE: total bitrate (u16, 100kbit/s) * @NL80211_RATE_INFO_MCS: mcs index for 802.11n (u8) - * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 Mhz dualchannel bitrate + * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 MHz dualchannel bitrate * @NL80211_RATE_INFO_SHORT_GI: 400ns guard interval * @NL80211_RATE_INFO_BITRATE32: total bitrate (u32, 100kbit/s) * @NL80211_RATE_INFO_MAX: highest rate_info number currently defined + * @NL80211_RATE_INFO_VHT_MCS: MCS index for VHT (u8) + * @NL80211_RATE_INFO_VHT_NSS: number of streams in VHT (u8) + * @NL80211_RATE_INFO_80_MHZ_WIDTH: 80 MHz VHT rate + * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: 80+80 MHz VHT rate + * @NL80211_RATE_INFO_160_MHZ_WIDTH: 160 MHz VHT rate * @__NL80211_RATE_INFO_AFTER_LAST: internal use */ enum nl80211_rate_info { @@ -1747,6 +1752,11 @@ enum nl80211_rate_info { NL80211_RATE_INFO_40_MHZ_WIDTH, NL80211_RATE_INFO_SHORT_GI, NL80211_RATE_INFO_BITRATE32, + NL80211_RATE_INFO_VHT_MCS, + NL80211_RATE_INFO_VHT_NSS, + NL80211_RATE_INFO_80_MHZ_WIDTH, + NL80211_RATE_INFO_80P80_MHZ_WIDTH, + NL80211_RATE_INFO_160_MHZ_WIDTH, /* keep last */ __NL80211_RATE_INFO_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 15158a3d64a3..d038fa45ecd1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2890,29 +2890,52 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, rate = nla_nest_start(msg, attr); if (!rate) - goto nla_put_failure; + return false; /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */ bitrate = cfg80211_calculate_bitrate(info); /* report 16-bit bitrate only if we can */ bitrate_compat = bitrate < (1UL << 16) ? bitrate : 0; - if ((bitrate > 0 && - nla_put_u32(msg, NL80211_RATE_INFO_BITRATE32, bitrate)) || - (bitrate_compat > 0 && - nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate_compat)) || - ((info->flags & RATE_INFO_FLAGS_MCS) && - nla_put_u8(msg, NL80211_RATE_INFO_MCS, info->mcs)) || - ((info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) && - nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH)) || - ((info->flags & RATE_INFO_FLAGS_SHORT_GI) && - nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI))) - goto nla_put_failure; + if (bitrate > 0 && + nla_put_u32(msg, NL80211_RATE_INFO_BITRATE32, bitrate)) + return false; + if (bitrate_compat > 0 && + nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate_compat)) + return false; + + if (info->flags & RATE_INFO_FLAGS_MCS) { + if (nla_put_u8(msg, NL80211_RATE_INFO_MCS, info->mcs)) + return false; + if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH && + nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH)) + return false; + if (info->flags & RATE_INFO_FLAGS_SHORT_GI && + nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) + return false; + } else if (info->flags & RATE_INFO_FLAGS_VHT_MCS) { + if (nla_put_u8(msg, NL80211_RATE_INFO_VHT_MCS, info->mcs)) + return false; + if (nla_put_u8(msg, NL80211_RATE_INFO_VHT_NSS, info->nss)) + return false; + if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH && + nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH)) + return false; + if (info->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH && + nla_put_flag(msg, NL80211_RATE_INFO_80_MHZ_WIDTH)) + return false; + if (info->flags & RATE_INFO_FLAGS_80P80_MHZ_WIDTH && + nla_put_flag(msg, NL80211_RATE_INFO_80P80_MHZ_WIDTH)) + return false; + if (info->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH && + nla_put_flag(msg, NL80211_RATE_INFO_160_MHZ_WIDTH)) + return false; + if (info->flags & RATE_INFO_FLAGS_SHORT_GI && + nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) + return false; + } nla_nest_end(msg, rate); return true; - -nla_put_failure: - return false; } static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, @@ -5475,6 +5498,11 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef)) return -EINVAL; + if (ibss.chandef.width > NL80211_CHAN_WIDTH_40) + return -EINVAL; + if (ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && + !(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) + ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; diff --git a/net/wireless/util.c b/net/wireless/util.c index db61fe8a6b6d..3cce6e486219 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -944,14 +944,86 @@ static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate) return __mcs2bitrate[rate->mcs]; } +static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) +{ + static const u32 base[4][10] = { + { 6500000, + 13000000, + 19500000, + 26000000, + 39000000, + 52000000, + 58500000, + 65000000, + 78000000, + 0, + }, + { 13500000, + 27000000, + 40500000, + 54000000, + 81000000, + 108000000, + 121500000, + 135000000, + 162000000, + 180000000, + }, + { 29300000, + 58500000, + 87800000, + 117000000, + 175500000, + 234000000, + 263300000, + 292500000, + 351000000, + 390000000, + }, + { 58500000, + 117000000, + 175500000, + 234000000, + 351000000, + 468000000, + 526500000, + 585000000, + 702000000, + 780000000, + }, + }; + u32 bitrate; + int idx; + + if (WARN_ON_ONCE(rate->mcs > 9)) + return 0; + + idx = rate->flags & (RATE_INFO_FLAGS_160_MHZ_WIDTH | + RATE_INFO_FLAGS_80P80_MHZ_WIDTH) ? 3 : + rate->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH ? 2 : + rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH ? 1 : 0; + + bitrate = base[idx][rate->mcs]; + bitrate *= rate->nss; + + if (rate->flags & RATE_INFO_FLAGS_SHORT_GI) + bitrate = (bitrate / 9) * 10; + + /* do NOT round down here */ + return (bitrate + 50000) / 100000; +} + u32 cfg80211_calculate_bitrate(struct rate_info *rate) { int modulation, streams, bitrate; - if (!(rate->flags & RATE_INFO_FLAGS_MCS)) + if (!(rate->flags & RATE_INFO_FLAGS_MCS) && + !(rate->flags & RATE_INFO_FLAGS_VHT_MCS)) return rate->legacy; if (rate->flags & RATE_INFO_FLAGS_60G) return cfg80211_calculate_bitrate_60g(rate); + if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) + return cfg80211_calculate_bitrate_vht(rate); /* the formula below does only work for MCS values smaller than 32 */ if (WARN_ON_ONCE(rate->mcs >= 32)) -- cgit v1.2.3 From 53cabad70ecf0c245b41285de64a74a6c3ee9933 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Nov 2012 15:17:28 +0100 Subject: nl80211: support P2P GO powersave configuration If a driver supports P2P GO powersave, allow it to set the new feature flags for it and allow userspace to configure the parameters for it. This can be done at GO startup and later changed with SET_BSS. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 ++++++ include/uapi/linux/nl80211.h | 16 +++++++++++++ net/wireless/nl80211.c | 56 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a238f41e55c2..731b48fa238b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -538,6 +538,8 @@ struct cfg80211_beacon_data { * @privacy: the BSS uses privacy * @auth_type: Authentication type (algorithm) * @inactivity_timeout: time in seconds to determine station's inactivity. + * @p2p_ctwindow: P2P CT Window + * @p2p_opp_ps: P2P opportunistic PS */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -552,6 +554,8 @@ struct cfg80211_ap_settings { bool privacy; enum nl80211_auth_type auth_type; int inactivity_timeout; + u8 p2p_ctwindow; + bool p2p_opp_ps; }; /** @@ -913,6 +917,8 @@ struct mpath_info { * @ap_isolate: do not forward packets between connected stations * @ht_opmode: HT Operation mode * (u16 = opmode, -1 = do not change) + * @p2p_ctwindow: P2P CT Window (-1 = no change) + * @p2p_opp_ps: P2P opportunistic PS (-1 = no change) */ struct bss_parameters { int use_cts_prot; @@ -922,6 +928,7 @@ struct bss_parameters { u8 basic_rates_len; int ap_isolate; int ht_opmode; + s8 p2p_ctwindow, p2p_opp_ps; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 33a417481ad8..e3e19f8b16f2 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1303,6 +1303,13 @@ enum nl80211_commands { * * @NL80211_ATTR_SCAN_FLAGS: scan request control flags (u32) * + * @NL80211_ATTR_P2P_CTWINDOW: P2P GO Client Traffic Window (u8), used with + * the START_AP and SET_BSS commands + * @NL80211_ATTR_P2P_OPPPS: P2P GO opportunistic PS (u8), used with the + * START_AP and SET_BSS commands. This can have the values 0 or 1; + * if not given in START_AP 0 is assumed, if not given in SET_BSS + * no change is made. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1570,6 +1577,9 @@ enum nl80211_attrs { NL80211_ATTR_CENTER_FREQ1, NL80211_ATTR_CENTER_FREQ2, + NL80211_ATTR_P2P_CTWINDOW, + NL80211_ATTR_P2P_OPPPS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3126,6 +3136,10 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_NEED_OBSS_SCAN: The driver expects userspace to perform * OBSS scans and generate 20/40 BSS coex reports. This flag is used only * for drivers implementing the CONNECT API, for AUTH/ASSOC it is implied. + * @NL80211_FEATURE_P2P_GO_CTWIN: P2P GO implementation supports CT Window + * setting + * @NL80211_FEATURE_P2P_GO_OPPPS: P2P GO implementation supports opportunistic + * powersave */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3139,6 +3153,8 @@ enum nl80211_feature_flags { NL80211_FEATURE_AP_SCAN = 1 << 8, NL80211_FEATURE_VIF_TXPOWER = 1 << 9, NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10, + NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11, + NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12, }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index eb0aa71a02b3..7f53aafd47f7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -363,6 +363,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_SAE_DATA] = { .type = NLA_BINARY, }, [NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN }, [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, + [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, + [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, }; /* policy for the key attributes */ @@ -2702,6 +2704,32 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]); } + if (info->attrs[NL80211_ATTR_P2P_CTWINDOW]) { + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; + params.p2p_ctwindow = + nla_get_u8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]); + if (params.p2p_ctwindow > 127) + return -EINVAL; + if (params.p2p_ctwindow != 0 && + !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN)) + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_P2P_OPPPS]) { + u8 tmp; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; + tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]); + if (tmp > 1) + return -EINVAL; + params.p2p_opp_ps = tmp; + if (params.p2p_opp_ps != 0 && + !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS)) + return -EINVAL; + } + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { err = nl80211_parse_chandef(rdev, info, ¶ms.chandef); if (err) @@ -3668,6 +3696,8 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) params.use_short_slot_time = -1; params.ap_isolate = -1; params.ht_opmode = -1; + params.p2p_ctwindow = -1; + params.p2p_opp_ps = -1; if (info->attrs[NL80211_ATTR_BSS_CTS_PROT]) params.use_cts_prot = @@ -3690,6 +3720,32 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) params.ht_opmode = nla_get_u16(info->attrs[NL80211_ATTR_BSS_HT_OPMODE]); + if (info->attrs[NL80211_ATTR_P2P_CTWINDOW]) { + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; + params.p2p_ctwindow = + nla_get_s8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]); + if (params.p2p_ctwindow < 0) + return -EINVAL; + if (params.p2p_ctwindow != 0 && + !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN)) + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_P2P_OPPPS]) { + u8 tmp; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; + tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]); + if (tmp > 1) + return -EINVAL; + params.p2p_opp_ps = tmp; + if (params.p2p_opp_ps && + !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS)) + return -EINVAL; + } + if (!rdev->ops->change_bss) return -EOPNOTSUPP; -- cgit v1.2.3 From d871befe357ccc262edbb0a4f9aeea650012edf5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 27 Nov 2012 14:49:42 +0100 Subject: netfilter: ctnetlink: dump entries from the dying and unconfirmed lists This patch adds a new operation to dump the content of the dying and unconfirmed lists. Under some situations, the global conntrack counter can be inconsistent with the number of entries that we can dump from the conntrack table. The way to resolve this is to allow dumping the content of the unconfirmed and dying lists, so far it was not possible to look at its content. This provides some extra instrumentation to resolve problematic situations in which anyone suspects memory leaks. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_conntrack.h | 2 + net/netfilter/nf_conntrack_netlink.c | 108 +++++++++++++++++++++ 2 files changed, 110 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 43bfe3e1685b..86e930cf3dfb 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -9,6 +9,8 @@ enum cntl_msg_types { IPCTNL_MSG_CT_GET_CTRZERO, IPCTNL_MSG_CT_GET_STATS_CPU, IPCTNL_MSG_CT_GET_STATS, + IPCTNL_MSG_CT_GET_DYING, + IPCTNL_MSG_CT_GET_UNCONFIRMED, IPCTNL_MSG_MAX }; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 34370a928360..c24a00a73c7b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1089,6 +1089,112 @@ out: return err == -EAGAIN ? -ENOBUFS : err; } +static int ctnetlink_done_list(struct netlink_callback *cb) +{ + if (cb->args[1]) + nf_ct_put((struct nf_conn *)cb->args[1]); + return 0; +} + +static int +ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, + struct hlist_nulls_head *list) +{ + struct nf_conn *ct, *last; + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + u_int8_t l3proto = nfmsg->nfgen_family; + int res; + + if (cb->args[2]) + return 0; + + spin_lock_bh(&nf_conntrack_lock); + last = (struct nf_conn *)cb->args[1]; +restart: + hlist_nulls_for_each_entry(h, n, list, hnnode) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (l3proto && nf_ct_l3num(ct) != l3proto) + continue; + if (cb->args[1]) { + if (ct != last) + continue; + cb->args[1] = 0; + } + rcu_read_lock(); + res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + ct); + rcu_read_unlock(); + if (res < 0) { + nf_conntrack_get(&ct->ct_general); + cb->args[1] = (unsigned long)ct; + goto out; + } + } + if (cb->args[1]) { + cb->args[1] = 0; + goto restart; + } else + cb->args[2] = 1; +out: + spin_unlock_bh(&nf_conntrack_lock); + if (last) + nf_ct_put(last); + + return skb->len; +} + +static int +ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + + return ctnetlink_dump_list(skb, cb, &net->ct.dying); +} + +static int +ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ctnetlink_dump_dying, + .done = ctnetlink_done_list, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } + + return -EOPNOTSUPP; +} + +static int +ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + + return ctnetlink_dump_list(skb, cb, &net->ct.unconfirmed); +} + +static int +ctnetlink_get_ct_unconfirmed(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ctnetlink_dump_unconfirmed, + .done = ctnetlink_done_list, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } + + return -EOPNOTSUPP; +} + #ifdef CONFIG_NF_NAT_NEEDED static int ctnetlink_parse_nat_setup(struct nf_conn *ct, @@ -2712,6 +2818,8 @@ static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { .policy = ct_nla_policy }, [IPCTNL_MSG_CT_GET_STATS_CPU] = { .call = ctnetlink_stat_ct_cpu }, [IPCTNL_MSG_CT_GET_STATS] = { .call = ctnetlink_stat_ct }, + [IPCTNL_MSG_CT_GET_DYING] = { .call = ctnetlink_get_ct_dying }, + [IPCTNL_MSG_CT_GET_UNCONFIRMED] = { .call = ctnetlink_get_ct_unconfirmed }, }; static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { -- cgit v1.2.3 From 5d097109257c03a71845729f8db6b5770c4bbedc Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 3 Dec 2012 10:07:14 +0000 Subject: tun: only queue packets on device Historically tun supported two modes of operation: - in default mode, a small number of packets would get queued at the device, the rest would be queued in qdisc - in one queue mode, all packets would get queued at the device This might have made sense up to a point where we made the queue depth for both modes the same and set it to a huge value (500) so unless the consumer is stuck the chance of losing packets is small. Thus in practice both modes behave the same, but the default mode has some problems: - if packets are never consumed, fragments are never orphaned which cases a DOS for sender using zero copy transmit - overrun errors are hard to diagnose: fifo error is incremented only once so you can not distinguish between userspace that is stuck and a transient failure, tcpdump on the device does not show any traffic Userspace solves this simply by enabling IFF_ONE_QUEUE but there seems to be little point in not doing the right thing for everyone, by default. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/tun.c | 24 ++++++++---------------- include/uapi/linux/if_tun.h | 2 ++ 2 files changed, 10 insertions(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 71f6874d8048..a1b2389e6d7f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -690,21 +690,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) * number of queues. */ if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) - >= dev->tx_queue_len / tun->numqueues){ - if (!(tun->flags & TUN_ONE_QUEUE)) { - /* Normal queueing mode. */ - /* Packet scheduler handles dropping of further packets. */ - netif_stop_subqueue(dev, txq); - - /* We won't see all dropped packets individually, so overrun - * error is more appropriate. */ - dev->stats.tx_fifo_errors++; - } else { - /* Single queue mode. - * Driver handles dropping of all packets itself. */ - goto drop; - } - } + >= dev->tx_queue_len / tun->numqueues) + goto drop; /* Orphan the skb - required as we might hang on to it * for indefinite time. */ @@ -1319,7 +1306,6 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, schedule(); continue; } - netif_wake_subqueue(tun->dev, tfile->queue_index); ret = tun_put_user(tun, tfile, skb, iv, len); kfree_skb(skb); @@ -1482,6 +1468,9 @@ static int tun_flags(struct tun_struct *tun) if (tun->flags & TUN_NO_PI) flags |= IFF_NO_PI; + /* This flag has no real effect. We track the value for backwards + * compatibility. + */ if (tun->flags & TUN_ONE_QUEUE) flags |= IFF_ONE_QUEUE; @@ -1632,6 +1621,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else tun->flags &= ~TUN_NO_PI; + /* This flag has no real effect. We track the value for backwards + * compatibility. + */ if (ifr->ifr_flags & IFF_ONE_QUEUE) tun->flags |= TUN_ONE_QUEUE; else diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 958497ad5bb5..2835b85fd46d 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -31,6 +31,7 @@ #define TUN_FASYNC 0x0010 #define TUN_NOCHECKSUM 0x0020 #define TUN_NO_PI 0x0040 +/* This flag has no real effect */ #define TUN_ONE_QUEUE 0x0080 #define TUN_PERSIST 0x0100 #define TUN_VNET_HDR 0x0200 @@ -60,6 +61,7 @@ #define IFF_TUN 0x0001 #define IFF_TAP 0x0002 #define IFF_NO_PI 0x1000 +/* This flag has no real effect */ #define IFF_ONE_QUEUE 0x2000 #define IFF_VNET_HDR 0x4000 #define IFF_TUN_EXCL 0x8000 -- cgit v1.2.3 From d67b8c616b48df30e2836d797795f2420d109bc9 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Dec 2012 01:13:35 +0000 Subject: netconf: advertise mc_forwarding status This patch advertise the MC_FORWARDING status for IPv4 and IPv6. This field is readonly, only multicast engine in the kernel updates it. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 3 +++ include/net/addrconf.h | 3 +++ include/uapi/linux/netconf.h | 1 + net/ipv4/devinet.c | 10 ++++++++-- net/ipv4/ipmr.c | 12 ++++++++++++ net/ipv6/addrconf.c | 10 ++++++++-- net/ipv6/ip6mr.c | 20 ++++++++++++++++++-- 7 files changed, 53 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index d032780d0ce5..a9d828976a77 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -171,6 +171,9 @@ struct in_ifaddr { extern int register_inetaddr_notifier(struct notifier_block *nb); extern int unregister_inetaddr_notifier(struct notifier_block *nb); +extern void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv4_devconf *devconf); + extern struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref); static inline struct net_device *ip_dev_find(struct net *net, __be32 addr) { diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 9e63e76b20e7..df4ef9453384 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -172,6 +172,9 @@ extern bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, extern int register_inet6addr_notifier(struct notifier_block *nb); extern int unregister_inet6addr_notifier(struct notifier_block *nb); +extern void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv6_devconf *devconf); + /** * __in6_dev_get - get inet6_dev pointer from netdevice * @dev: network device diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h index 75dcbc587fb5..64804a798b0c 100644 --- a/include/uapi/linux/netconf.h +++ b/include/uapi/linux/netconf.h @@ -13,6 +13,7 @@ enum { NETCONFA_IFINDEX, NETCONFA_FORWARDING, NETCONFA_RP_FILTER, + NETCONFA_MC_FORWARDING, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e13183abd7f6..cc06a47f1216 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1453,6 +1453,8 @@ static int inet_netconf_msgsize_devconf(int type) size += nla_total_size(4); if (type == -1 || type == NETCONFA_RP_FILTER) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_MC_FORWARDING) + size += nla_total_size(4); return size; } @@ -1485,6 +1487,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_RP_FILTER, IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_MC_FORWARDING) && + nla_put_s32(skb, NETCONFA_MC_FORWARDING, + IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -1493,8 +1499,8 @@ nla_put_failure: return -EMSGSIZE; } -static void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv4_devconf *devconf) +void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv4_devconf *devconf) { struct sk_buff *skb; int err = -ENOBUFS; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 58e4160fdcee..0c452e3fdc1b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -65,6 +65,7 @@ #include #include #include +#include #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) #define CONFIG_IP_PIMSM 1 @@ -582,6 +583,9 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, in_dev = __in_dev_get_rtnl(dev); if (in_dev) { IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; + inet_netconf_notify_devconf(dev_net(dev), + NETCONFA_MC_FORWARDING, + dev->ifindex, &in_dev->cnf); ip_rt_multicast_event(in_dev); } @@ -772,6 +776,8 @@ static int vif_add(struct net *net, struct mr_table *mrt, return -EADDRNOTAVAIL; } IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; + inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex, + &in_dev->cnf); ip_rt_multicast_event(in_dev); /* Fill in the VIF structures */ @@ -1185,6 +1191,9 @@ static void mrtsock_destruct(struct sock *sk) ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; + inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv4.devconf_all); RCU_INIT_POINTER(mrt->mroute_sk, NULL); mroute_clean_tables(mrt); } @@ -1236,6 +1245,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi if (ret == 0) { rcu_assign_pointer(mrt->mroute_sk, sk); IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; + inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv4.devconf_all); } rtnl_unlock(); return ret; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 22ae75d54017..28e0e627229c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -469,6 +469,8 @@ static int inet6_netconf_msgsize_devconf(int type) /* type -1 is used for ALL */ if (type == -1 || type == NETCONFA_FORWARDING) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_MC_FORWARDING) + size += nla_total_size(4); return size; } @@ -496,6 +498,10 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if ((type == -1 || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_MC_FORWARDING) && + nla_put_s32(skb, NETCONFA_MC_FORWARDING, + devconf->mc_forwarding) < 0) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -504,8 +510,8 @@ nla_put_failure: return -EMSGSIZE; } -static void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv6_devconf *devconf) +void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv6_devconf *devconf) { struct sk_buff *skb; int err = -ENOBUFS; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 926ea544f499..1c05fe604d37 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -52,6 +52,7 @@ #include #include #include +#include struct mr6_table { struct list_head list; @@ -805,8 +806,12 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) dev_set_allmulti(dev, -1); in6_dev = __in6_dev_get(dev); - if (in6_dev) + if (in6_dev) { in6_dev->cnf.mc_forwarding--; + inet6_netconf_notify_devconf(dev_net(dev), + NETCONFA_MC_FORWARDING, + dev->ifindex, &in6_dev->cnf); + } if (v->flags & MIFF_REGISTER) unregister_netdevice_queue(dev, head); @@ -958,8 +963,12 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, } in6_dev = __in6_dev_get(dev); - if (in6_dev) + if (in6_dev) { in6_dev->cnf.mc_forwarding++; + inet6_netconf_notify_devconf(dev_net(dev), + NETCONFA_MC_FORWARDING, + dev->ifindex, &in6_dev->cnf); + } /* * Fill in the VIF structures @@ -1513,6 +1522,9 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) if (likely(mrt->mroute6_sk == NULL)) { mrt->mroute6_sk = sk; net->ipv6.devconf_all->mc_forwarding++; + inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); } else err = -EADDRINUSE; @@ -1535,6 +1547,10 @@ int ip6mr_sk_done(struct sock *sk) write_lock_bh(&mrt_lock); mrt->mroute6_sk = NULL; net->ipv6.devconf_all->mc_forwarding--; + inet6_netconf_notify_devconf(net, + NETCONFA_MC_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); write_unlock_bh(&mrt_lock); mroute_clean_tables(mrt); -- cgit v1.2.3 From adfa85e45dac616ff4f8bfceff1621ccafc0b1ff Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Dec 2012 01:13:37 +0000 Subject: ipmr/ip6mr: advertise mfc stats via rtnetlink These statistics can be checked only via /proc/net/ip_mr_cache or SIOCGETSGCNT[_IN6] and thus only for the table RT_TABLE_DEFAULT. Advertising them via rtnetlink allows to get statistics for all cache entries, whatever the table is. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 7 +++++++ net/ipv4/ipmr.c | 7 +++++++ net/ipv6/ip6mr.c | 7 +++++++ 3 files changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 3dee071770d5..80abe27dc2a7 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -288,6 +288,7 @@ enum rtattr_type_t { RTA_MP_ALGO, /* no longer used */ RTA_TABLE, RTA_MARK, + RTA_MFC_STATS, __RTA_MAX }; @@ -408,6 +409,12 @@ struct rta_session { } u; }; +struct rta_mfc_stats { + __u64 mfcs_packets; + __u64 mfcs_bytes; + __u64 mfcs_wrong_if; +}; + /**** * General form of address family dependent message. ****/ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 0c452e3fdc1b..c5617d646b93 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2046,6 +2046,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, int ct; struct rtnexthop *nhp; struct nlattr *mp_attr; + struct rta_mfc_stats mfcs; /* If cache is unresolved, don't try to parse IIF and OIF */ if (c->mfc_parent >= MAXVIFS) @@ -2074,6 +2075,12 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, nla_nest_end(skb, mp_attr); + mfcs.mfcs_packets = c->mfc_un.res.pkt; + mfcs.mfcs_bytes = c->mfc_un.res.bytes; + mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; + if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0) + return -EMSGSIZE; + rtm->rtm_type = RTN_MULTICAST; return 1; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 23f364a9efb5..4220a7b93386 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2120,6 +2120,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, int ct; struct rtnexthop *nhp; struct nlattr *mp_attr; + struct rta_mfc_stats mfcs; /* If cache is unresolved, don't try to parse IIF and OIF */ if (c->mf6c_parent >= MAXMIFS) @@ -2149,6 +2150,12 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, nla_nest_end(skb, mp_attr); + mfcs.mfcs_packets = c->mfc_un.res.pkt; + mfcs.mfcs_bytes = c->mfc_un.res.bytes; + mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; + if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0) + return -EMSGSIZE; + rtm->rtm_type = RTN_MULTICAST; return 1; } -- cgit v1.2.3 From 9a68ac72a44ecb6d4dc4a7cadf45e1a2cd183885 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Dec 2012 01:13:38 +0000 Subject: ipmr/ip6mr: report origin of mfc entry into rtnl msg A mfc entry can be static or not (added via the mroute_sk socket). The patch reports MFC_STATIC flag into rtm_protocol by setting rtm_protocol to RTPROT_STATIC or RTPROT_MROUTED. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + net/ipv4/ipmr.c | 5 ++++- net/ipv6/ip6mr.c | 5 ++++- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 80abe27dc2a7..33d29cea37ea 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -227,6 +227,7 @@ enum { #define RTPROT_XORP 14 /* XORP */ #define RTPROT_NTK 15 /* Netsukuku */ #define RTPROT_DHCP 16 /* DHCP client */ +#define RTPROT_MROUTED 17 /* Multicast daemon */ /* rtm_scope diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c5617d646b93..91782a7634c2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2169,7 +2169,10 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, goto nla_put_failure; rtm->rtm_type = RTN_MULTICAST; rtm->rtm_scope = RT_SCOPE_UNIVERSE; - rtm->rtm_protocol = RTPROT_UNSPEC; + if (c->mfc_flags & MFC_STATIC) + rtm->rtm_protocol = RTPROT_STATIC; + else + rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) || diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 4220a7b93386..d51b91122866 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2249,7 +2249,10 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, if (nla_put_u32(skb, RTA_TABLE, mrt->id)) goto nla_put_failure; rtm->rtm_scope = RT_SCOPE_UNIVERSE; - rtm->rtm_protocol = RTPROT_UNSPEC; + if (c->mfc_flags & MFC_STATIC) + rtm->rtm_protocol = RTPROT_STATIC; + else + rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) || -- cgit v1.2.3 From 7793eeabc89fd342b96fdadce5a50c46ab77f3f9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 5 Dec 2012 13:51:17 -0700 Subject: PCI: Add and use standard PCI-X Capability register names Add and use #defines for PCI-X Capability registers and fields. Note that the PCI-X Capability has a different layout for type 0 (endpoint) and type 1 (bridge) devices. Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 15 +++++++++------ include/uapi/linux/pci_regs.h | 15 ++++++++++++++- 2 files changed, 23 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ec909afa90b6..81d06676ce34 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -579,14 +579,16 @@ static void pci_set_bus_speed(struct pci_bus *bus) if (pos) { u16 status; enum pci_bus_speed max; - pci_read_config_word(bridge, pos + 2, &status); - if (status & 0x8000) { + pci_read_config_word(bridge, pos + PCI_X_BRIDGE_SSTATUS, + &status); + + if (status & PCI_X_SSTATUS_533MHZ) { max = PCI_SPEED_133MHz_PCIX_533; - } else if (status & 0x4000) { + } else if (status & PCI_X_SSTATUS_266MHZ) { max = PCI_SPEED_133MHz_PCIX_266; - } else if (status & 0x0002) { - if (((status >> 12) & 0x3) == 2) { + } else if (status & PCI_X_SSTATUS_133MHZ) { + if ((status & PCI_X_SSTATUS_VERS) == PCI_X_SSTATUS_V2) { max = PCI_SPEED_133MHz_PCIX_ECC; } else { max = PCI_SPEED_133MHz_PCIX; @@ -596,7 +598,8 @@ static void pci_set_bus_speed(struct pci_bus *bus) } bus->max_bus_speed = max; - bus->cur_bus_speed = pcix_bus_speed[(status >> 6) & 0xf]; + bus->cur_bus_speed = pcix_bus_speed[ + (status & PCI_X_SSTATUS_FREQ) >> 6]; return; } diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 20ae747ddf34..4cca834f9abd 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -349,7 +349,7 @@ #define PCI_AF_STATUS_TP 0x01 #define PCI_CAP_AF_SIZEOF 6 /* size of AF registers */ -/* PCI-X registers */ +/* PCI-X registers (Type 0 (non-bridge) devices) */ #define PCI_X_CMD 2 /* Modes & Features */ #define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */ @@ -389,6 +389,19 @@ #define PCI_CAP_PCIX_SIZEOF_V1 24 /* size for Version 1 */ #define PCI_CAP_PCIX_SIZEOF_V2 PCI_CAP_PCIX_SIZEOF_V1 /* Same for v2 */ +/* PCI-X registers (Type 1 (bridge) devices) */ + +#define PCI_X_BRIDGE_SSTATUS 2 /* Secondary Status */ +#define PCI_X_SSTATUS_64BIT 0x0001 /* Secondary AD interface is 64 bits */ +#define PCI_X_SSTATUS_133MHZ 0x0002 /* 133 MHz capable */ +#define PCI_X_SSTATUS_FREQ 0x03c0 /* Secondary Bus Mode and Frequency */ +#define PCI_X_SSTATUS_VERS 0x3000 /* PCI-X Capability Version */ +#define PCI_X_SSTATUS_V1 0x1000 /* Mode 2, not Mode 1 */ +#define PCI_X_SSTATUS_V2 0x2000 /* Mode 1 or Modes 1 and 2 */ +#define PCI_X_SSTATUS_266MHZ 0x4000 /* 266 MHz capable */ +#define PCI_X_SSTATUS_533MHZ 0x8000 /* 533 MHz capable */ +#define PCI_X_BRIDGE_STATUS 4 /* Bridge Status */ + /* PCI Bridge Subsystem ID registers */ #define PCI_SSVID_VENDOR_ID 4 /* PCI-Bridge subsystem vendor id register */ -- cgit v1.2.3 From c2d3babfafbb9f6629cfb47139758e59a5eb0d80 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 5 Dec 2012 16:24:45 -0500 Subject: bridge: implement multicast fast leave V3: make it a flag V2: make the toggle per-port Fast leave allows bridge to immediately stops the multicast traffic on the port receives IGMP Leave when IGMP snooping is enabled, no timeouts are observed. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang --- include/uapi/linux/if_link.h | 1 + net/bridge/br_multicast.c | 2 +- net/bridge/br_netlink.c | 4 +++- net/bridge/br_private.h | 2 +- net/bridge/br_sysfs_if.c | 19 +------------------ 5 files changed, 7 insertions(+), 21 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bb58aeb7f34d..60f3b6b90602 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -218,6 +218,7 @@ enum { IFLA_BRPORT_MODE, /* mode (hairpin) */ IFLA_BRPORT_GUARD, /* bpdu guard */ IFLA_BRPORT_PROTECT, /* root port protection */ + IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2391bae4f733..a2a7a1a79081 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1225,7 +1225,7 @@ static void br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; - if (port && port->multicast_fast_leave) { + if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { struct net_bridge_port_group __rcu **pp; for (pp = &mp->ports; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 65429b99a2a3..850b7d1f3a41 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -53,7 +53,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || - nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK))) + nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || + nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE))) return -EMSGSIZE; return 0; @@ -210,6 +211,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); + br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index cdbf9047a659..cd86222cf5e3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -137,11 +137,11 @@ struct net_bridge_port #define BR_HAIRPIN_MODE 0x00000001 #define BR_BPDU_GUARD 0x00000002 #define BR_ROOT_BLOCK 0x00000004 +#define BR_MULTICAST_FAST_LEAVE 0x00000008 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; unsigned char multicast_router; - unsigned char multicast_fast_leave; struct timer_list multicast_router_timer; struct timer_list multicast_query_timer; struct hlist_head mglist; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index dc484ace0be3..a1ef1b6e14dc 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -173,24 +173,7 @@ static int store_multicast_router(struct net_bridge_port *p, static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, store_multicast_router); -static ssize_t show_multicast_fast_leave(struct net_bridge_port *p, - char *buf) -{ - return sprintf(buf, "%d\n", p->multicast_fast_leave); -} - -static int store_multicast_fast_leave(struct net_bridge_port *p, - unsigned long v) -{ - if (p->br->multicast_disabled) - return -EINVAL; - - p->multicast_fast_leave = !!v; - return 0; -} - -static BRPORT_ATTR(multicast_fast_leave, S_IRUGO | S_IWUSR, - show_multicast_fast_leave, store_multicast_fast_leave); +BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE); #endif static const struct brport_attribute *brport_attrs[] = { -- cgit v1.2.3 From a2932923ccf63c419c77aaa18ac09be98f2c94d8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 19 Nov 2012 22:57:20 +0000 Subject: KVM: PPC: Book3S HV: Provide a method for userspace to read and write the HPT A new ioctl, KVM_PPC_GET_HTAB_FD, returns a file descriptor. Reads on this fd return the contents of the HPT (hashed page table), writes create and/or remove entries in the HPT. There is a new capability, KVM_CAP_PPC_HTAB_FD, to indicate the presence of the ioctl. The ioctl takes an argument structure with the index of the first HPT entry to read out and a set of flags. The flags indicate whether the user is intending to read or write the HPT, and whether to return all entries or only the "bolted" entries (those with the bolted bit, 0x10, set in the first doubleword). This is intended for use in implementing qemu's savevm/loadvm and for live migration. Therefore, on reads, the first pass returns information about all HPTEs (or all bolted HPTEs). When the first pass reaches the end of the HPT, it returns from the read. Subsequent reads only return information about HPTEs that have changed since they were last read. A read that finds no changed HPTEs in the HPT following where the last read finished will return 0 bytes. The format of the data provides a simple run-length compression of the invalid entries. Each block of data starts with a header that indicates the index (position in the HPT, which is just an array), the number of valid entries starting at that index (may be zero), and the number of invalid entries following those valid entries. The valid entries, 16 bytes each, follow the header. The invalid entries are not explicitly represented. Signed-off-by: Paul Mackerras [agraf: fix documentation] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 54 +++++ arch/powerpc/include/asm/kvm_book3s_64.h | 22 ++ arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/include/uapi/asm/kvm.h | 25 +++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 344 +++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 12 -- arch/powerpc/kvm/powerpc.c | 17 ++ include/uapi/linux/kvm.h | 3 + 8 files changed, 467 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6671fdc0afb1..a5607c571cb3 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2071,6 +2071,60 @@ KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm Note that the vcpu ioctl is asynchronous to vcpu execution. +4.78 KVM_PPC_GET_HTAB_FD + +Capability: KVM_CAP_PPC_HTAB_FD +Architectures: powerpc +Type: vm ioctl +Parameters: Pointer to struct kvm_get_htab_fd (in) +Returns: file descriptor number (>= 0) on success, -1 on error + +This returns a file descriptor that can be used either to read out the +entries in the guest's hashed page table (HPT), or to write entries to +initialize the HPT. The returned fd can only be written to if the +KVM_GET_HTAB_WRITE bit is set in the flags field of the argument, and +can only be read if that bit is clear. The argument struct looks like +this: + +/* For KVM_PPC_GET_HTAB_FD */ +struct kvm_get_htab_fd { + __u64 flags; + __u64 start_index; + __u64 reserved[2]; +}; + +/* Values for kvm_get_htab_fd.flags */ +#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1) +#define KVM_GET_HTAB_WRITE ((__u64)0x2) + +The `start_index' field gives the index in the HPT of the entry at +which to start reading. It is ignored when writing. + +Reads on the fd will initially supply information about all +"interesting" HPT entries. Interesting entries are those with the +bolted bit set, if the KVM_GET_HTAB_BOLTED_ONLY bit is set, otherwise +all entries. When the end of the HPT is reached, the read() will +return. If read() is called again on the fd, it will start again from +the beginning of the HPT, but will only return HPT entries that have +changed since they were last read. + +Data read or written is structured as a header (8 bytes) followed by a +series of valid HPT entries (16 bytes) each. The header indicates how +many valid HPT entries there are and how many invalid entries follow +the valid entries. The invalid entries are not represented explicitly +in the stream. The header format is: + +struct kvm_get_htab_header { + __u32 index; + __u16 n_valid; + __u16 n_invalid; +}; + +Writes to the fd create HPT entries starting at the index given in the +header; first `n_valid' valid entries with contents from the data +written, then `n_invalid' invalid entries, invalidating any previously +valid entries found. + 5. The kvm_run structure ------------------------ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index b322e5bd6964..38bec1dc9928 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -246,4 +246,26 @@ static inline bool slot_is_aligned(struct kvm_memory_slot *memslot, return !(memslot->base_gfn & mask) && !(memslot->npages & mask); } +/* + * This works for 4k, 64k and 16M pages on POWER7, + * and 4k and 16M pages on PPC970. + */ +static inline unsigned long slb_pgsize_encoding(unsigned long psize) +{ + unsigned long senc = 0; + + if (psize > 0x1000) { + senc = SLB_VSID_L; + if (psize == 0x10000) + senc |= SLB_VSID_LP_01; + } + return senc; +} + +static inline int is_vrma_hpte(unsigned long hpte_v) +{ + return (hpte_v & ~0xffffffUL) == + (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); +} + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 609cca3e9426..1ca31e92ee75 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -164,6 +164,8 @@ extern void kvmppc_bookehv_exit(void); extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); +extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index b89ae4db45ce..514883dd311e 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -331,6 +331,31 @@ struct kvm_book3e_206_tlb_params { __u32 reserved[8]; }; +/* For KVM_PPC_GET_HTAB_FD */ +struct kvm_get_htab_fd { + __u64 flags; + __u64 start_index; + __u64 reserved[2]; +}; + +/* Values for kvm_get_htab_fd.flags */ +#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1) +#define KVM_GET_HTAB_WRITE ((__u64)0x2) + +/* + * Data read on the file descriptor is formatted as a series of + * records, each consisting of a header followed by a series of + * `n_valid' HPTEs (16 bytes each), which are all valid. Following + * those valid HPTEs there are `n_invalid' invalid HPTEs, which + * are not represented explicitly in the stream. The same format + * is used for writing. + */ +struct kvm_get_htab_header { + __u32 index; + __u16 n_valid; + __u16 n_invalid; +}; + #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) #define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) #define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 6ee6516a0bee..0aa40734c8f6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include @@ -1145,6 +1147,348 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) put_page(page); } +/* + * Functions for reading and writing the hash table via reads and + * writes on a file descriptor. + * + * Reads return the guest view of the hash table, which has to be + * pieced together from the real hash table and the guest_rpte + * values in the revmap array. + * + * On writes, each HPTE written is considered in turn, and if it + * is valid, it is written to the HPT as if an H_ENTER with the + * exact flag set was done. When the invalid count is non-zero + * in the header written to the stream, the kernel will make + * sure that that many HPTEs are invalid, and invalidate them + * if not. + */ + +struct kvm_htab_ctx { + unsigned long index; + unsigned long flags; + struct kvm *kvm; + int first_pass; +}; + +#define HPTE_SIZE (2 * sizeof(unsigned long)) + +static long record_hpte(unsigned long flags, unsigned long *hptp, + unsigned long *hpte, struct revmap_entry *revp, + int want_valid, int first_pass) +{ + unsigned long v, r; + int ok = 1; + int valid, dirty; + + /* Unmodified entries are uninteresting except on the first pass */ + dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + if (!first_pass && !dirty) + return 0; + + valid = 0; + if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) { + valid = 1; + if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && + !(hptp[0] & HPTE_V_BOLTED)) + valid = 0; + } + if (valid != want_valid) + return 0; + + v = r = 0; + if (valid || dirty) { + /* lock the HPTE so it's stable and read it */ + preempt_disable(); + while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) + cpu_relax(); + v = hptp[0]; + if (v & HPTE_V_ABSENT) { + v &= ~HPTE_V_ABSENT; + v |= HPTE_V_VALID; + } + /* re-evaluate valid and dirty from synchronized HPTE value */ + valid = !!(v & HPTE_V_VALID); + if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) + valid = 0; + r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C)); + dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + /* only clear modified if this is the right sort of entry */ + if (valid == want_valid && dirty) { + r &= ~HPTE_GR_MODIFIED; + revp->guest_rpte = r; + } + asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); + hptp[0] &= ~HPTE_V_HVLOCK; + preempt_enable(); + if (!(valid == want_valid && (first_pass || dirty))) + ok = 0; + } + hpte[0] = v; + hpte[1] = r; + return ok; +} + +static ssize_t kvm_htab_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct kvm_htab_ctx *ctx = file->private_data; + struct kvm *kvm = ctx->kvm; + struct kvm_get_htab_header hdr; + unsigned long *hptp; + struct revmap_entry *revp; + unsigned long i, nb, nw; + unsigned long __user *lbuf; + struct kvm_get_htab_header __user *hptr; + unsigned long flags; + int first_pass; + unsigned long hpte[2]; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + first_pass = ctx->first_pass; + flags = ctx->flags; + + i = ctx->index; + hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + revp = kvm->arch.revmap + i; + lbuf = (unsigned long __user *)buf; + + nb = 0; + while (nb + sizeof(hdr) + HPTE_SIZE < count) { + /* Initialize header */ + hptr = (struct kvm_get_htab_header __user *)buf; + hdr.index = i; + hdr.n_valid = 0; + hdr.n_invalid = 0; + nw = nb; + nb += sizeof(hdr); + lbuf = (unsigned long __user *)(buf + sizeof(hdr)); + + /* Skip uninteresting entries, i.e. clean on not-first pass */ + if (!first_pass) { + while (i < kvm->arch.hpt_npte && + !(revp->guest_rpte & HPTE_GR_MODIFIED)) { + ++i; + hptp += 2; + ++revp; + } + } + + /* Grab a series of valid entries */ + while (i < kvm->arch.hpt_npte && + hdr.n_valid < 0xffff && + nb + HPTE_SIZE < count && + record_hpte(flags, hptp, hpte, revp, 1, first_pass)) { + /* valid entry, write it out */ + ++hdr.n_valid; + if (__put_user(hpte[0], lbuf) || + __put_user(hpte[1], lbuf + 1)) + return -EFAULT; + nb += HPTE_SIZE; + lbuf += 2; + ++i; + hptp += 2; + ++revp; + } + /* Now skip invalid entries while we can */ + while (i < kvm->arch.hpt_npte && + hdr.n_invalid < 0xffff && + record_hpte(flags, hptp, hpte, revp, 0, first_pass)) { + /* found an invalid entry */ + ++hdr.n_invalid; + ++i; + hptp += 2; + ++revp; + } + + if (hdr.n_valid || hdr.n_invalid) { + /* write back the header */ + if (__copy_to_user(hptr, &hdr, sizeof(hdr))) + return -EFAULT; + nw = nb; + buf = (char __user *)lbuf; + } else { + nb = nw; + } + + /* Check if we've wrapped around the hash table */ + if (i >= kvm->arch.hpt_npte) { + i = 0; + ctx->first_pass = 0; + break; + } + } + + ctx->index = i; + + return nb; +} + +static ssize_t kvm_htab_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct kvm_htab_ctx *ctx = file->private_data; + struct kvm *kvm = ctx->kvm; + struct kvm_get_htab_header hdr; + unsigned long i, j; + unsigned long v, r; + unsigned long __user *lbuf; + unsigned long *hptp; + unsigned long tmp[2]; + ssize_t nb; + long int err, ret; + int rma_setup; + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + + /* lock out vcpus from running while we're doing this */ + mutex_lock(&kvm->lock); + rma_setup = kvm->arch.rma_setup_done; + if (rma_setup) { + kvm->arch.rma_setup_done = 0; /* temporarily */ + /* order rma_setup_done vs. vcpus_running */ + smp_mb(); + if (atomic_read(&kvm->arch.vcpus_running)) { + kvm->arch.rma_setup_done = 1; + mutex_unlock(&kvm->lock); + return -EBUSY; + } + } + + err = 0; + for (nb = 0; nb + sizeof(hdr) <= count; ) { + err = -EFAULT; + if (__copy_from_user(&hdr, buf, sizeof(hdr))) + break; + + err = 0; + if (nb + hdr.n_valid * HPTE_SIZE > count) + break; + + nb += sizeof(hdr); + buf += sizeof(hdr); + + err = -EINVAL; + i = hdr.index; + if (i >= kvm->arch.hpt_npte || + i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte) + break; + + hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + lbuf = (unsigned long __user *)buf; + for (j = 0; j < hdr.n_valid; ++j) { + err = -EFAULT; + if (__get_user(v, lbuf) || __get_user(r, lbuf + 1)) + goto out; + err = -EINVAL; + if (!(v & HPTE_V_VALID)) + goto out; + lbuf += 2; + nb += HPTE_SIZE; + + if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + kvmppc_do_h_remove(kvm, 0, i, 0, tmp); + err = -EIO; + ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, + tmp); + if (ret != H_SUCCESS) { + pr_err("kvm_htab_write ret %ld i=%ld v=%lx " + "r=%lx\n", ret, i, v, r); + goto out; + } + if (!rma_setup && is_vrma_hpte(v)) { + unsigned long psize = hpte_page_size(v, r); + unsigned long senc = slb_pgsize_encoding(psize); + unsigned long lpcr; + + kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | + (VRMA_VSID << SLB_VSID_SHIFT_1T); + lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; + lpcr |= senc << (LPCR_VRMASD_SH - 4); + kvm->arch.lpcr = lpcr; + rma_setup = 1; + } + ++i; + hptp += 2; + } + + for (j = 0; j < hdr.n_invalid; ++j) { + if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + kvmppc_do_h_remove(kvm, 0, i, 0, tmp); + ++i; + hptp += 2; + } + err = 0; + } + + out: + /* Order HPTE updates vs. rma_setup_done */ + smp_wmb(); + kvm->arch.rma_setup_done = rma_setup; + mutex_unlock(&kvm->lock); + + if (err) + return err; + return nb; +} + +static int kvm_htab_release(struct inode *inode, struct file *filp) +{ + struct kvm_htab_ctx *ctx = filp->private_data; + + filp->private_data = NULL; + if (!(ctx->flags & KVM_GET_HTAB_WRITE)) + atomic_dec(&ctx->kvm->arch.hpte_mod_interest); + kvm_put_kvm(ctx->kvm); + kfree(ctx); + return 0; +} + +static struct file_operations kvm_htab_fops = { + .read = kvm_htab_read, + .write = kvm_htab_write, + .llseek = default_llseek, + .release = kvm_htab_release, +}; + +int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) +{ + int ret; + struct kvm_htab_ctx *ctx; + int rwflag; + + /* reject flags we don't recognize */ + if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE)) + return -EINVAL; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + kvm_get_kvm(kvm); + ctx->kvm = kvm; + ctx->index = ghf->start_index; + ctx->flags = ghf->flags; + ctx->first_pass = 1; + + rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; + ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag); + if (ret < 0) { + kvm_put_kvm(kvm); + return ret; + } + + if (rwflag == O_RDONLY) { + mutex_lock(&kvm->slots_lock); + atomic_inc(&kvm->arch.hpte_mod_interest); + /* make sure kvmppc_do_h_enter etc. see the increment */ + synchronize_srcu_expedited(&kvm->srcu); + mutex_unlock(&kvm->slots_lock); + } + + return ret; +} + void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) { struct kvmppc_mmu *mmu = &vcpu->arch.mmu; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 843eb754a1d5..a4f59dbcd800 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1563,18 +1563,6 @@ out: return r; } -static unsigned long slb_pgsize_encoding(unsigned long psize) -{ - unsigned long senc = 0; - - if (psize > 0x1000) { - senc = SLB_VSID_L; - if (psize == 0x10000) - senc |= SLB_VSID_LP_01; - } - return senc; -} - static void unpin_slot(struct kvm_memory_slot *memslot) { unsigned long *physp; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index d583ea15e151..70739a089560 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -354,6 +354,12 @@ int kvm_dev_ioctl_check_extension(long ext) r = 1; #else r = 0; + break; +#endif +#ifdef CONFIG_KVM_BOOK3S_64_HV + case KVM_CAP_PPC_HTAB_FD: + r = 1; + break; #endif break; case KVM_CAP_NR_VCPUS: @@ -954,6 +960,17 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + + case KVM_PPC_GET_HTAB_FD: { + struct kvm *kvm = filp->private_data; + struct kvm_get_htab_fd ghf; + + r = -EFAULT; + if (copy_from_user(&ghf, argp, sizeof(ghf))) + break; + r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf); + break; + } #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 494a84c37c3e..e6e5d4b13708 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -634,6 +634,7 @@ struct kvm_ppc_smmu_info { #endif #define KVM_CAP_IRQFD_RESAMPLE 82 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 +#define KVM_CAP_PPC_HTAB_FD 84 #ifdef KVM_CAP_IRQ_ROUTING @@ -859,6 +860,8 @@ struct kvm_s390_ucas_mapping { #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) /* Available with KVM_CAP_RMA */ #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) +/* Available with KVM_CAP_PPC_HTAB_FD */ +#define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) /* * ioctls for vcpu fds -- cgit v1.2.3 From cf66bb93e0f75e0a4ba1ec070692618fa028e994 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 3 Dec 2012 16:25:40 +0000 Subject: byteorder: allow arch to opt to use GCC intrinsics for byteswapping Since GCC 4.4, there have been __builtin_bswap32() and __builtin_bswap16() intrinsics. A __builtin_bswap16() came a little later (4.6 for PowerPC, 48 for other platforms). By using these instead of the inline assembler that most architectures have in their __arch_swabXX() macros, we let the compiler see what's actually happening. The resulting code should be at least as good, and much *better* in the cases where it can be combined with a nearby load or store, using a load-and-byteswap or store-and-byteswap instruction (e.g. lwbrx/stwbrx on PowerPC, movbe on Atom). When GCC is sufficiently recent *and* the architecture opts in to using the intrinsics by setting CONFIG_ARCH_USE_BUILTIN_BSWAP, they will be used in preference to the __arch_swabXX() macros. An architecture which does not set ARCH_USE_BUILTIN_BSWAP will continue to use its own hand-crafted macros. Signed-off-by: David Woodhouse Acked-by: H. Peter Anvin --- arch/Kconfig | 19 +++++++++++++++++++ include/linux/compiler-gcc4.h | 10 ++++++++++ include/linux/compiler-intel.h | 7 +++++++ include/uapi/linux/swab.h | 12 +++++++++--- 4 files changed, 45 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 366ec06a5185..c31416b10586 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -112,6 +112,25 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS See Documentation/unaligned-memory-access.txt for more information on the topic of unaligned memory accesses. +config ARCH_USE_BUILTIN_BSWAP + bool + help + Modern versions of GCC (since 4.4) have builtin functions + for handling byte-swapping. Using these, instead of the old + inline assembler that the architecture code provides in the + __arch_bswapXX() macros, allows the compiler to see what's + happening and offers more opportunity for optimisation. In + particular, the compiler will be able to combine the byteswap + with a nearby load or store and use load-and-swap or + store-and-swap instructions if the architecture has them. It + should almost *never* result in code which is worse than the + hand-coded assembler in . But just in case it + does, the use of the builtins is optional. + + Any architecture with load-and-swap or store-and-swap + instructions should set this. And it shouldn't hurt to set it + on architectures that don't have such instructions. + config HAVE_SYSCALL_WRAPPERS bool diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 412bc6c2b023..dc16a858e77c 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -63,3 +63,13 @@ #define __compiletime_warning(message) __attribute__((warning(message))) #define __compiletime_error(message) __attribute__((error(message))) #endif + +#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP +#if __GNUC_MINOR__ >= 4 +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#endif +#if __GNUC_MINOR__ >= 8 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6) +#define __HAVE_BUILTIN_BSWAP16__ +#endif +#endif diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index d8e636e5607d..973ce10c40b6 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -29,3 +29,10 @@ #endif #define uninitialized_var(x) x + +#ifndef __HAVE_BUILTIN_BSWAP16__ +/* icc has this, but it's called _bswap16 */ +#define __HAVE_BUILTIN_BSWAP16__ +#define __builtin_bswap16 _bswap16 +#endif + diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index e811474724c2..0e011eb91b5d 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -45,7 +45,9 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val) { -#ifdef __arch_swab16 +#ifdef __HAVE_BUILTIN_BSWAP16__ + return __builtin_bswap16(val); +#elif defined (__arch_swab16) return __arch_swab16(val); #else return ___constant_swab16(val); @@ -54,7 +56,9 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val) static inline __attribute_const__ __u32 __fswab32(__u32 val) { -#ifdef __arch_swab32 +#ifdef __HAVE_BUILTIN_BSWAP32__ + return __builtin_bswap32(val); +#elif defined(__arch_swab32) return __arch_swab32(val); #else return ___constant_swab32(val); @@ -63,7 +67,9 @@ static inline __attribute_const__ __u32 __fswab32(__u32 val) static inline __attribute_const__ __u64 __fswab64(__u64 val) { -#ifdef __arch_swab64 +#ifdef __HAVE_BUILTIN_BSWAP64__ + return __builtin_bswap64(val); +#elif defined (__arch_swab64) return __arch_swab64(val); #elif defined(__SWAB_64_THRU_32__) __u32 h = val >> 32; -- cgit v1.2.3 From 7508320678b7819ac6aeb89580b8622a424ce586 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 5 Dec 2012 13:51:19 -0700 Subject: PCI: Add standard PCIe Capability Link ASPM field names Add standard #defines for ASPM fields in PCI Express Link Capability and Link Control registers. Previously we used PCIE_LINK_STATE_L0S and PCIE_LINK_STATE_L1 directly, but these are defined for the Linux ASPM interfaces, e.g., pci_disable_link_state(), and only coincidentally match the actual register bits. PCIE_LINK_STATE_CLKPM, also part of that interface, does not match the register bit. Signed-off-by: Bjorn Helgaas Reviewed-by: Kenji Kaneshige Acked-by: Kenji Kaneshige --- drivers/pci/pcie/aspm.c | 11 ++++++----- include/uapi/linux/pci_regs.h | 2 ++ 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 213753b283a6..c2faf9d0ffde 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -427,7 +427,8 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val) { - pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL, 0x3, val); + pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, val); } static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state) @@ -442,12 +443,12 @@ static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state) return; /* Convert ASPM state to upstream/downstream ASPM register state */ if (state & ASPM_STATE_L0S_UP) - dwstream |= PCIE_LINK_STATE_L0S; + dwstream |= PCI_EXP_LNKCTL_ASPM_L0S; if (state & ASPM_STATE_L0S_DW) - upstream |= PCIE_LINK_STATE_L0S; + upstream |= PCI_EXP_LNKCTL_ASPM_L0S; if (state & ASPM_STATE_L1) { - upstream |= PCIE_LINK_STATE_L1; - dwstream |= PCIE_LINK_STATE_L1; + upstream |= PCI_EXP_LNKCTL_ASPM_L1; + dwstream |= PCI_EXP_LNKCTL_ASPM_L1; } /* * Spec 2.0 suggests all functions should be configured the diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 4cca834f9abd..0b6dbe49dc1e 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -469,6 +469,8 @@ #define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ #define PCI_EXP_LNKCTL 16 /* Link Control */ #define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ +#define PCI_EXP_LNKCTL_ASPM_L0S 0x01 /* L0s Enable */ +#define PCI_EXP_LNKCTL_ASPM_L1 0x02 /* L1 Enable */ #define PCI_EXP_LNKCTL_RCB 0x0008 /* Read Completion Boundary */ #define PCI_EXP_LNKCTL_LD 0x0010 /* Link Disable */ #define PCI_EXP_LNKCTL_RL 0x0020 /* Retrain Link */ -- cgit v1.2.3 From ee07c6e7a6f8a25c18f0a6b18152fbd7499245f6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 7 Dec 2012 00:04:48 +0000 Subject: bridge: export multicast database via netlink V5: fix two bugs pointed out by Thomas remove seq check for now, mark it as TODO V4: remove some useless #include some coding style fix V3: drop debugging printk's update selinux perm table as well V2: drop patch 1/2, export ifindex directly Redesign netlink attributes Improve netlink seq check Handle IPv6 addr as well This patch exports bridge multicast database via netlink message type RTM_GETMDB. Similar to fdb, but currently bridge-specific. We may need to support modify multicast database too (RTM_{ADD,DEL}MDB). (Thanks to Thomas for patient reviews) Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Thomas Graf Cc: Jesper Dangaard Brouer Signed-off-by: Cong Wang Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 55 ++++++++++++++ include/uapi/linux/rtnetlink.h | 3 + net/bridge/Makefile | 2 +- net/bridge/br_mdb.c | 163 +++++++++++++++++++++++++++++++++++++++++ net/bridge/br_multicast.c | 1 + net/bridge/br_private.h | 1 + security/selinux/nlmsgtab.c | 1 + 7 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 net/bridge/br_mdb.c (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index b3885791e11e..9a0f6ff0d7e7 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -116,4 +116,59 @@ enum { __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) + +/* Bridge multicast database attributes + * [MDBA_MDB] = { + * [MDBA_MDB_ENTRY] = { + * [MDBA_MDB_ENTRY_INFO] + * } + * } + * [MDBA_ROUTER] = { + * [MDBA_ROUTER_PORT] + * } + */ +enum { + MDBA_UNSPEC, + MDBA_MDB, + MDBA_ROUTER, + __MDBA_MAX, +}; +#define MDBA_MAX (__MDBA_MAX - 1) + +enum { + MDBA_MDB_UNSPEC, + MDBA_MDB_ENTRY, + __MDBA_MDB_MAX, +}; +#define MDBA_MDB_MAX (__MDBA_MDB_MAX - 1) + +enum { + MDBA_MDB_ENTRY_UNSPEC, + MDBA_MDB_ENTRY_INFO, + __MDBA_MDB_ENTRY_MAX, +}; +#define MDBA_MDB_ENTRY_MAX (__MDBA_MDB_ENTRY_MAX - 1) + +enum { + MDBA_ROUTER_UNSPEC, + MDBA_ROUTER_PORT, + __MDBA_ROUTER_MAX, +}; +#define MDBA_ROUTER_MAX (__MDBA_ROUTER_MAX - 1) + +struct br_port_msg { + __u32 ifindex; +}; + +struct br_mdb_entry { + __u32 ifindex; + struct { + union { + __be32 ip4; + struct in6_addr ip6; + } u; + __be16 proto; + } addr; +}; + #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 33d29cea37ea..354a1e7d32a3 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -125,6 +125,9 @@ enum { RTM_GETNETCONF = 82, #define RTM_GETNETCONF RTM_GETNETCONF + RTM_GETMDB = 86, +#define RTM_GETMDB RTM_GETMDB + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; diff --git a/net/bridge/Makefile b/net/bridge/Makefile index d0359ea8ee79..e859098f5ee9 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -12,6 +12,6 @@ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o -bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o +bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/ diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c new file mode 100644 index 000000000000..edc0d731f6b2 --- /dev/null +++ b/net/bridge/br_mdb.c @@ -0,0 +1,163 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_IPV6) +#include +#endif + +#include "br_private.h" + +static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p; + struct hlist_node *n; + struct nlattr *nest; + + if (!br->multicast_router || hlist_empty(&br->router_list)) + return 0; + + nest = nla_nest_start(skb, MDBA_ROUTER); + if (nest == NULL) + return -EMSGSIZE; + + hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) { + if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex)) + goto fail; + } + + nla_nest_end(skb, nest); + return 0; +fail: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_mdb_htable *mdb; + struct nlattr *nest, *nest2; + int i, err = 0; + int idx = 0, s_idx = cb->args[1]; + + if (br->multicast_disabled) + return 0; + + mdb = rcu_dereference(br->mdb); + if (!mdb) + return 0; + + nest = nla_nest_start(skb, MDBA_MDB); + if (nest == NULL) + return -EMSGSIZE; + + for (i = 0; i < mdb->max; i++) { + struct hlist_node *h; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p, **pp; + struct net_bridge_port *port; + + hlist_for_each_entry_rcu(mp, h, &mdb->mhash[i], hlist[mdb->ver]) { + if (idx < s_idx) + goto skip; + + nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY); + if (nest2 == NULL) { + err = -EMSGSIZE; + goto out; + } + + for (pp = &mp->ports; + (p = rcu_dereference(*pp)) != NULL; + pp = &p->next) { + port = p->port; + if (port) { + struct br_mdb_entry e; + e.ifindex = port->dev->ifindex; + e.addr.u.ip4 = p->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + e.addr.u.ip6 = p->addr.u.ip6; +#endif + e.addr.proto = p->addr.proto; + if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) { + nla_nest_cancel(skb, nest2); + err = -EMSGSIZE; + goto out; + } + } + } + nla_nest_end(skb, nest2); + skip: + idx++; + } + } + +out: + cb->args[1] = idx; + nla_nest_end(skb, nest); + return err; +} + +static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net_device *dev; + struct net *net = sock_net(skb->sk); + struct nlmsghdr *nlh = NULL; + int idx = 0, s_idx; + + s_idx = cb->args[0]; + + rcu_read_lock(); + + /* TODO: in case of rehashing, we need to check + * consistency for dumping. + */ + cb->seq = net->dev_base_seq; + + for_each_netdev_rcu(net, dev) { + if (dev->priv_flags & IFF_EBRIDGE) { + struct br_port_msg *bpm; + + if (idx < s_idx) + goto skip; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_GETMDB, + sizeof(*bpm), NLM_F_MULTI); + if (nlh == NULL) + break; + + bpm = nlmsg_data(nlh); + bpm->ifindex = dev->ifindex; + if (br_mdb_fill_info(skb, cb, dev) < 0) + goto out; + if (br_rports_fill_info(skb, cb, dev) < 0) + goto out; + + cb->args[1] = 0; + nlmsg_end(skb, nlh); + skip: + idx++; + } + } + +out: + if (nlh) + nlmsg_end(skb, nlh); + rcu_read_unlock(); + cb->args[0] = idx; + return skb->len; +} + +void br_mdb_init(void) +{ + rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL); +} diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a2a7a1a79081..68e375ac93bd 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1605,6 +1605,7 @@ void br_multicast_init(struct net_bridge *br) br_multicast_querier_expired, (unsigned long)br); setup_timer(&br->multicast_query_timer, br_multicast_query_expired, (unsigned long)br); + br_mdb_init(); } void br_multicast_open(struct net_bridge *br) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index cd86222cf5e3..ae0a6ec0a702 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -433,6 +433,7 @@ extern int br_multicast_set_port_router(struct net_bridge_port *p, extern int br_multicast_toggle(struct net_bridge *br, unsigned long val); extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val); extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); +extern void br_mdb_init(void); static inline bool br_multicast_is_router(struct net_bridge *br) { diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index d309e7f472d8..163aaa77d5aa 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -67,6 +67,7 @@ static struct nlmsg_perm nlmsg_route_perms[] = { RTM_GETADDRLABEL, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETDCB, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_SETDCB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, + { RTM_GETMDB, NETLINK_ROUTE_SOCKET__NLMSG_READ }, }; static struct nlmsg_perm nlmsg_tcpdiag_perms[] = -- cgit v1.2.3 From 986a4f4d452dec004697f667439d27c3fda9c928 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 7 Dec 2012 07:04:56 +0000 Subject: virtio_net: multiqueue support This patch adds the multiqueue (VIRTIO_NET_F_MQ) support to virtio_net driver. VIRTIO_NET_F_MQ capable device could allow the driver to do packet transmission and reception through multiple queue pairs and does the packet steering to get better performance. By default, one one queue pair is used, user could change the number of queue pairs by ethtool in the next patch. When multiple queue pairs is used and the number of queue pairs is equal to the number of vcpus. Driver does the following optimizations to implement per-cpu virt queue pairs: - select the txq based on the smp processor id. - smp affinity hint to the cpu that owns the queue pairs. This could be used with the flow steering support of the device to guarantee the packets of a single flow is handled by the same cpu. Signed-off-by: Krishna Kumar Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 473 +++++++++++++++++++++++++++++++--------- include/uapi/linux/virtio_net.h | 27 +++ 2 files changed, 402 insertions(+), 98 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 02a71021565e..c0830488a390 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -58,6 +58,9 @@ struct send_queue { /* TX: fragments + linear part + virtio header */ struct scatterlist sg[MAX_SKB_FRAGS + 2]; + + /* Name of the send queue: output.$index */ + char name[40]; }; /* Internal representation of a receive virtqueue */ @@ -75,22 +78,34 @@ struct receive_queue { /* RX: fragments + linear part + virtio header */ struct scatterlist sg[MAX_SKB_FRAGS + 2]; + + /* Name of this receive queue: input.$index */ + char name[40]; }; struct virtnet_info { struct virtio_device *vdev; struct virtqueue *cvq; struct net_device *dev; - struct send_queue sq; - struct receive_queue rq; + struct send_queue *sq; + struct receive_queue *rq; unsigned int status; + /* Max # of queue pairs supported by the device */ + u16 max_queue_pairs; + + /* # of queue pairs currently used by the driver */ + u16 curr_queue_pairs; + /* I like... big packets and I cannot lie! */ bool big_packets; /* Host will merge rx buffers for big packets (shake it! shake it!) */ bool mergeable_rx_bufs; + /* Has control virtqueue */ + bool has_cvq; + /* enable config space updates */ bool config_enable; @@ -105,6 +120,9 @@ struct virtnet_info { /* Lock for config space updates */ struct mutex config_lock; + + /* Does the affinity hint is set for virtqueues? */ + bool affinity_hint_set; }; struct skb_vnet_hdr { @@ -125,6 +143,29 @@ struct padded_vnet_hdr { char padding[6]; }; +/* Converting between virtqueue no. and kernel tx/rx queue no. + * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq + */ +static int vq2txq(struct virtqueue *vq) +{ + return (virtqueue_get_queue_index(vq) - 1) / 2; +} + +static int txq2vq(int txq) +{ + return txq * 2 + 1; +} + +static int vq2rxq(struct virtqueue *vq) +{ + return virtqueue_get_queue_index(vq) / 2; +} + +static int rxq2vq(int rxq) +{ + return rxq * 2; +} + static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) { return (struct skb_vnet_hdr *)skb->cb; @@ -165,7 +206,7 @@ static void skb_xmit_done(struct virtqueue *vq) virtqueue_disable_cb(vq); /* We were probably waiting for more output buffers. */ - netif_wake_queue(vi->dev); + netif_wake_subqueue(vi->dev, vq2txq(vq)); } static void set_skb_frag(struct sk_buff *skb, struct page *page, @@ -502,7 +543,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) static void skb_recv_done(struct virtqueue *rvq) { struct virtnet_info *vi = rvq->vdev->priv; - struct receive_queue *rq = &vi->rq; + struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; /* Schedule NAPI, Suppress further interrupts if successful. */ if (napi_schedule_prep(&rq->napi)) { @@ -532,15 +573,21 @@ static void refill_work(struct work_struct *work) struct virtnet_info *vi = container_of(work, struct virtnet_info, refill.work); bool still_empty; + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) { + struct receive_queue *rq = &vi->rq[i]; - napi_disable(&vi->rq.napi); - still_empty = !try_fill_recv(&vi->rq, GFP_KERNEL); - virtnet_napi_enable(&vi->rq); + napi_disable(&rq->napi); + still_empty = !try_fill_recv(rq, GFP_KERNEL); + virtnet_napi_enable(rq); - /* In theory, this can happen: if we don't get any buffers in - * we will *never* try to fill again. */ - if (still_empty) - schedule_delayed_work(&vi->refill, HZ/2); + /* In theory, this can happen: if we don't get any buffers in + * we will *never* try to fill again. + */ + if (still_empty) + schedule_delayed_work(&vi->refill, HZ/2); + } } static int virtnet_poll(struct napi_struct *napi, int budget) @@ -578,6 +625,21 @@ again: return received; } +static int virtnet_open(struct net_device *dev) +{ + struct virtnet_info *vi = netdev_priv(dev); + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) { + /* Make sure we have some buffers: if oom use wq. */ + if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) + schedule_delayed_work(&vi->refill, 0); + virtnet_napi_enable(&vi->rq[i]); + } + + return 0; +} + static unsigned int free_old_xmit_skbs(struct send_queue *sq) { struct sk_buff *skb; @@ -650,7 +712,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); - struct send_queue *sq = &vi->sq; + int qnum = skb_get_queue_mapping(skb); + struct send_queue *sq = &vi->sq[qnum]; int capacity; /* Free up any pending old buffers before queueing new ones. */ @@ -664,13 +727,14 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) if (likely(capacity == -ENOMEM)) { if (net_ratelimit()) dev_warn(&dev->dev, - "TX queue failure: out of memory\n"); + "TXQ (%d) failure: out of memory\n", + qnum); } else { dev->stats.tx_fifo_errors++; if (net_ratelimit()) dev_warn(&dev->dev, - "Unexpected TX queue failure: %d\n", - capacity); + "Unexpected TXQ (%d) failure: %d\n", + qnum, capacity); } dev->stats.tx_dropped++; kfree_skb(skb); @@ -685,12 +749,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Apparently nice girls don't return TX_BUSY; stop the queue * before it gets out of hand. Naturally, this wastes entries. */ if (capacity < 2+MAX_SKB_FRAGS) { - netif_stop_queue(dev); + netif_stop_subqueue(dev, qnum); if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { /* More just got used, free them then recheck. */ capacity += free_old_xmit_skbs(sq); if (capacity >= 2+MAX_SKB_FRAGS) { - netif_start_queue(dev); + netif_start_subqueue(dev, qnum); virtqueue_disable_cb(sq->vq); } } @@ -758,23 +822,13 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev, static void virtnet_netpoll(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); + int i; - napi_schedule(&vi->rq.napi); + for (i = 0; i < vi->curr_queue_pairs; i++) + napi_schedule(&vi->rq[i].napi); } #endif -static int virtnet_open(struct net_device *dev) -{ - struct virtnet_info *vi = netdev_priv(dev); - - /* Make sure we have some buffers: if oom use wq. */ - if (!try_fill_recv(&vi->rq, GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); - - virtnet_napi_enable(&vi->rq); - return 0; -} - /* * Send command via the control virtqueue and check status. Commands * supported by the hypervisor, as indicated by feature bits, should @@ -830,13 +884,39 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi) rtnl_unlock(); } +static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) +{ + struct scatterlist sg; + struct virtio_net_ctrl_mq s; + struct net_device *dev = vi->dev; + + if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) + return 0; + + s.virtqueue_pairs = queue_pairs; + sg_init_one(&sg, &s, sizeof(s)); + + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, 1, 0)){ + dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", + queue_pairs); + return -EINVAL; + } else + vi->curr_queue_pairs = queue_pairs; + + return 0; +} + static int virtnet_close(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); + int i; /* Make sure refill_work doesn't re-enable napi! */ cancel_delayed_work_sync(&vi->refill); - napi_disable(&vi->rq.napi); + + for (i = 0; i < vi->max_queue_pairs; i++) + napi_disable(&vi->rq[i].napi); return 0; } @@ -943,13 +1023,41 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid) return 0; } +static void virtnet_set_affinity(struct virtnet_info *vi, bool set) +{ + int i; + + /* In multiqueue mode, when the number of cpu is equal to the number of + * queue pairs, we let the queue pairs to be private to one cpu by + * setting the affinity hint to eliminate the contention. + */ + if ((vi->curr_queue_pairs == 1 || + vi->max_queue_pairs != num_online_cpus()) && set) { + if (vi->affinity_hint_set) + set = false; + else + return; + } + + for (i = 0; i < vi->max_queue_pairs; i++) { + int cpu = set ? i : -1; + virtqueue_set_affinity(vi->rq[i].vq, cpu); + virtqueue_set_affinity(vi->sq[i].vq, cpu); + } + + if (set) + vi->affinity_hint_set = true; + else + vi->affinity_hint_set = false; +} + static void virtnet_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) { struct virtnet_info *vi = netdev_priv(dev); - ring->rx_max_pending = virtqueue_get_vring_size(vi->rq.vq); - ring->tx_max_pending = virtqueue_get_vring_size(vi->sq.vq); + ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq); + ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq); ring->rx_pending = ring->rx_max_pending; ring->tx_pending = ring->tx_max_pending; } @@ -984,6 +1092,21 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu) return 0; } +/* To avoid contending a lock hold by a vcpu who would exit to host, select the + * txq based on the processor id. + * TODO: handle cpu hotplug. + */ +static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb) +{ + int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : + smp_processor_id(); + + while (unlikely(txq >= dev->real_num_tx_queues)) + txq -= dev->real_num_tx_queues; + + return txq; +} + static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, @@ -995,6 +1118,7 @@ static const struct net_device_ops virtnet_netdev = { .ndo_get_stats64 = virtnet_stats, .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, + .ndo_select_queue = virtnet_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = virtnet_netpoll, #endif @@ -1030,10 +1154,10 @@ static void virtnet_config_changed_work(struct work_struct *work) if (vi->status & VIRTIO_NET_S_LINK_UP) { netif_carrier_on(vi->dev); - netif_wake_queue(vi->dev); + netif_tx_wake_all_queues(vi->dev); } else { netif_carrier_off(vi->dev); - netif_stop_queue(vi->dev); + netif_tx_stop_all_queues(vi->dev); } done: mutex_unlock(&vi->config_lock); @@ -1046,48 +1170,203 @@ static void virtnet_config_changed(struct virtio_device *vdev) schedule_work(&vi->config_work); } +static void virtnet_free_queues(struct virtnet_info *vi) +{ + kfree(vi->rq); + kfree(vi->sq); +} + +static void free_receive_bufs(struct virtnet_info *vi) +{ + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) { + while (vi->rq[i].pages) + __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); + } +} + +static void free_unused_bufs(struct virtnet_info *vi) +{ + void *buf; + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) { + struct virtqueue *vq = vi->sq[i].vq; + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) + dev_kfree_skb(buf); + } + + for (i = 0; i < vi->max_queue_pairs; i++) { + struct virtqueue *vq = vi->rq[i].vq; + + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { + if (vi->mergeable_rx_bufs || vi->big_packets) + give_pages(&vi->rq[i], buf); + else + dev_kfree_skb(buf); + --vi->rq[i].num; + } + BUG_ON(vi->rq[i].num != 0); + } +} + static void virtnet_del_vqs(struct virtnet_info *vi) { struct virtio_device *vdev = vi->vdev; + virtnet_set_affinity(vi, false); + vdev->config->del_vqs(vdev); + + virtnet_free_queues(vi); } -static int init_vqs(struct virtnet_info *vi) +static int virtnet_find_vqs(struct virtnet_info *vi) { - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL}; - const char *names[] = { "input", "output", "control" }; - int nvqs, err; - - /* We expect two virtqueues, receive then send, - * and optionally control. */ - nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2; - - err = vi->vdev->config->find_vqs(vi->vdev, nvqs, vqs, callbacks, names); - if (err) - return err; + vq_callback_t **callbacks; + struct virtqueue **vqs; + int ret = -ENOMEM; + int i, total_vqs; + const char **names; + + /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by + * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by + * possible control vq. + */ + total_vqs = vi->max_queue_pairs * 2 + + virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); + + /* Allocate space for find_vqs parameters */ + vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL); + if (!vqs) + goto err_vq; + callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL); + if (!callbacks) + goto err_callback; + names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL); + if (!names) + goto err_names; + + /* Parameters for control virtqueue, if any */ + if (vi->has_cvq) { + callbacks[total_vqs - 1] = NULL; + names[total_vqs - 1] = "control"; + } - vi->rq.vq = vqs[0]; - vi->sq.vq = vqs[1]; + /* Allocate/initialize parameters for send/receive virtqueues */ + for (i = 0; i < vi->max_queue_pairs; i++) { + callbacks[rxq2vq(i)] = skb_recv_done; + callbacks[txq2vq(i)] = skb_xmit_done; + sprintf(vi->rq[i].name, "input.%d", i); + sprintf(vi->sq[i].name, "output.%d", i); + names[rxq2vq(i)] = vi->rq[i].name; + names[txq2vq(i)] = vi->sq[i].name; + } - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) { - vi->cvq = vqs[2]; + ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, + names); + if (ret) + goto err_find; + if (vi->has_cvq) { + vi->cvq = vqs[total_vqs - 1]; if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) vi->dev->features |= NETIF_F_HW_VLAN_FILTER; } + + for (i = 0; i < vi->max_queue_pairs; i++) { + vi->rq[i].vq = vqs[rxq2vq(i)]; + vi->sq[i].vq = vqs[txq2vq(i)]; + } + + kfree(names); + kfree(callbacks); + kfree(vqs); + return 0; + +err_find: + kfree(names); +err_names: + kfree(callbacks); +err_callback: + kfree(vqs); +err_vq: + return ret; +} + +static int virtnet_alloc_queues(struct virtnet_info *vi) +{ + int i; + + vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL); + if (!vi->sq) + goto err_sq; + vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL); + if (!vi->sq) + goto err_rq; + + INIT_DELAYED_WORK(&vi->refill, refill_work); + for (i = 0; i < vi->max_queue_pairs; i++) { + vi->rq[i].pages = NULL; + netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, + napi_weight); + + sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); + sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); + } + + return 0; + +err_rq: + kfree(vi->sq); +err_sq: + return -ENOMEM; +} + +static int init_vqs(struct virtnet_info *vi) +{ + int ret; + + /* Allocate send & receive queues */ + ret = virtnet_alloc_queues(vi); + if (ret) + goto err; + + ret = virtnet_find_vqs(vi); + if (ret) + goto err_free; + + virtnet_set_affinity(vi, true); + return 0; + +err_free: + virtnet_free_queues(vi); +err: + return ret; } static int virtnet_probe(struct virtio_device *vdev) { - int err; + int i, err; struct net_device *dev; struct virtnet_info *vi; + u16 max_queue_pairs; + + /* Find if host supports multiqueue virtio_net device */ + err = virtio_config_val(vdev, VIRTIO_NET_F_MQ, + offsetof(struct virtio_net_config, + max_virtqueue_pairs), &max_queue_pairs); + + /* We need at least 2 queue's */ + if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || + max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || + !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) + max_queue_pairs = 1; /* Allocate ourselves a network device with room for our info */ - dev = alloc_etherdev(sizeof(struct virtnet_info)); + dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); if (!dev) return -ENOMEM; @@ -1133,22 +1412,17 @@ static int virtnet_probe(struct virtio_device *vdev) /* Set up our device-specific information */ vi = netdev_priv(dev); - netif_napi_add(dev, &vi->rq.napi, virtnet_poll, napi_weight); vi->dev = dev; vi->vdev = vdev; vdev->priv = vi; - vi->rq.pages = NULL; vi->stats = alloc_percpu(struct virtnet_stats); err = -ENOMEM; if (vi->stats == NULL) goto free; - INIT_DELAYED_WORK(&vi->refill, refill_work); mutex_init(&vi->config_lock); vi->config_enable = true; INIT_WORK(&vi->config_work, virtnet_config_changed_work); - sg_init_table(vi->rq.sg, ARRAY_SIZE(vi->rq.sg)); - sg_init_table(vi->sq.sg, ARRAY_SIZE(vi->sq.sg)); /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || @@ -1159,10 +1433,21 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; + if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) + vi->has_cvq = true; + + /* Use single tx/rx queue pair as default */ + vi->curr_queue_pairs = 1; + vi->max_queue_pairs = max_queue_pairs; + + /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ err = init_vqs(vi); if (err) goto free_stats; + netif_set_real_num_tx_queues(dev, 1); + netif_set_real_num_rx_queues(dev, 1); + err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); @@ -1170,12 +1455,15 @@ static int virtnet_probe(struct virtio_device *vdev) } /* Last of all, set up some receive buffers. */ - try_fill_recv(&vi->rq, GFP_KERNEL); - - /* If we didn't even get one input buffer, we're useless. */ - if (vi->rq.num == 0) { - err = -ENOMEM; - goto unregister; + for (i = 0; i < vi->max_queue_pairs; i++) { + try_fill_recv(&vi->rq[i], GFP_KERNEL); + + /* If we didn't even get one input buffer, we're useless. */ + if (vi->rq[i].num == 0) { + free_unused_bufs(vi); + err = -ENOMEM; + goto free_recv_bufs; + } } /* Assume link up if device can't report link status, @@ -1188,12 +1476,16 @@ static int virtnet_probe(struct virtio_device *vdev) netif_carrier_on(dev); } - pr_debug("virtnet: registered device %s\n", dev->name); + pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", + dev->name, max_queue_pairs); + return 0; -unregister: +free_recv_bufs: + free_receive_bufs(vi); unregister_netdev(dev); free_vqs: + cancel_delayed_work_sync(&vi->refill); virtnet_del_vqs(vi); free_stats: free_percpu(vi->stats); @@ -1202,28 +1494,6 @@ free: return err; } -static void free_unused_bufs(struct virtnet_info *vi) -{ - void *buf; - while (1) { - buf = virtqueue_detach_unused_buf(vi->sq.vq); - if (!buf) - break; - dev_kfree_skb(buf); - } - while (1) { - buf = virtqueue_detach_unused_buf(vi->rq.vq); - if (!buf) - break; - if (vi->mergeable_rx_bufs || vi->big_packets) - give_pages(&vi->rq, buf); - else - dev_kfree_skb(buf); - --vi->rq.num; - } - BUG_ON(vi->rq.num != 0); -} - static void remove_vq_common(struct virtnet_info *vi) { vi->vdev->config->reset(vi->vdev); @@ -1231,10 +1501,9 @@ static void remove_vq_common(struct virtnet_info *vi) /* Free unused buffers in both send and recv, if any. */ free_unused_bufs(vi); - virtnet_del_vqs(vi); + free_receive_bufs(vi); - while (vi->rq.pages) - __free_pages(get_a_page(&vi->rq, GFP_KERNEL), 0); + virtnet_del_vqs(vi); } static void virtnet_remove(struct virtio_device *vdev) @@ -1260,6 +1529,7 @@ static void virtnet_remove(struct virtio_device *vdev) static int virtnet_freeze(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; + int i; /* Prevent config work handler from accessing the device */ mutex_lock(&vi->config_lock); @@ -1270,7 +1540,10 @@ static int virtnet_freeze(struct virtio_device *vdev) cancel_delayed_work_sync(&vi->refill); if (netif_running(vi->dev)) - napi_disable(&vi->rq.napi); + for (i = 0; i < vi->max_queue_pairs; i++) { + napi_disable(&vi->rq[i].napi); + netif_napi_del(&vi->rq[i].napi); + } remove_vq_common(vi); @@ -1282,24 +1555,28 @@ static int virtnet_freeze(struct virtio_device *vdev) static int virtnet_restore(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int err; + int err, i; err = init_vqs(vi); if (err) return err; if (netif_running(vi->dev)) - virtnet_napi_enable(&vi->rq); + for (i = 0; i < vi->max_queue_pairs; i++) + virtnet_napi_enable(&vi->rq[i]); netif_device_attach(vi->dev); - if (!try_fill_recv(&vi->rq, GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); + for (i = 0; i < vi->max_queue_pairs; i++) + if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) + schedule_delayed_work(&vi->refill, 0); mutex_lock(&vi->config_lock); vi->config_enable = true; mutex_unlock(&vi->config_lock); + virtnet_set_queues(vi, vi->curr_queue_pairs); + return 0; } #endif @@ -1317,7 +1594,7 @@ static unsigned int features[] = { VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_NET_F_GUEST_ANNOUNCE, + VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, }; static struct virtio_driver virtio_net_driver = { diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 2470f541af50..848e3584d7c8 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -51,6 +51,8 @@ #define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */ #define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce device on the * network */ +#define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow + * Steering */ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ #define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ @@ -60,6 +62,11 @@ struct virtio_net_config { __u8 mac[6]; /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ __u16 status; + /* Maximum number of each of transmit and receive queues; + * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ. + * Legal values are between 1 and 0x8000 + */ + __u16 max_virtqueue_pairs; } __attribute__((packed)); /* This is the first element of the scatter-gather list. If you don't @@ -166,4 +173,24 @@ struct virtio_net_ctrl_mac { #define VIRTIO_NET_CTRL_ANNOUNCE 3 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 +/* + * Control Receive Flow Steering + * + * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET + * enables Receive Flow Steering, specifying the number of the transmit and + * receive queues that will be used. After the command is consumed and acked by + * the device, the device will not steer new packets on receive virtqueues + * other than specified nor read from transmit virtqueues other than specified. + * Accordingly, driver should not transmit new packets on virtqueues other than + * specified. + */ +struct virtio_net_ctrl_mq { + u16 virtqueue_pairs; +}; + +#define VIRTIO_NET_CTRL_MQ 4 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 + #endif /* _LINUX_VIRTIO_NET_H */ -- cgit v1.2.3 From 39a53e0ce0df01b3cf4bb898c7ae2fd2189647d5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 28 Nov 2012 13:37:31 +0900 Subject: f2fs: add superblock and major in-memory structure This adds the following major in-memory structures in f2fs. - f2fs_sb_info: contains f2fs-specific information, two special inode pointers for node and meta address spaces, and orphan inode management. - f2fs_inode_info: contains vfs_inode and other fs-specific information. - f2fs_nm_info: contains node manager information such as NAT entry cache, free nid list, and NAT page management. - f2fs_node_info: represents a node as node id, inode number, block address, and its version. - f2fs_sm_info: contains segment manager information such as SIT entry cache, free segment map, current active logs, dirty segment management, and segment utilization. The specific structures are sit_info, free_segmap_info, dirty_seglist_info, curseg_info. In addition, add F2FS_SUPER_MAGIC in magic.h. Signed-off-by: Chul Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1062 ++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/node.h | 353 +++++++++++++++ fs/f2fs/segment.h | 615 +++++++++++++++++++++++++ include/uapi/linux/magic.h | 1 + 4 files changed, 2031 insertions(+) create mode 100644 fs/f2fs/f2fs.h create mode 100644 fs/f2fs/node.h create mode 100644 fs/f2fs/segment.h (limited to 'include/uapi/linux') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h new file mode 100644 index 000000000000..7aa70b54172d --- /dev/null +++ b/fs/f2fs/f2fs.h @@ -0,0 +1,1062 @@ +/** + * fs/f2fs/f2fs.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_F2FS_H +#define _LINUX_F2FS_H + +#include +#include +#include +#include +#include +#include +#include + +/* + * For mount options + */ +#define F2FS_MOUNT_BG_GC 0x00000001 +#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 +#define F2FS_MOUNT_DISCARD 0x00000004 +#define F2FS_MOUNT_NOHEAP 0x00000008 +#define F2FS_MOUNT_XATTR_USER 0x00000010 +#define F2FS_MOUNT_POSIX_ACL 0x00000020 +#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 + +#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option) + +#define ver_after(a, b) (typecheck(unsigned long long, a) && \ + typecheck(unsigned long long, b) && \ + ((long long)((a) - (b)) > 0)) + +typedef u64 block_t; +typedef u32 nid_t; + +struct f2fs_mount_info { + unsigned int opt; +}; + +static inline __u32 f2fs_crc32(void *buff, size_t len) +{ + return crc32_le(F2FS_SUPER_MAGIC, buff, len); +} + +static inline bool f2fs_crc_valid(__u32 blk_crc, void *buff, size_t buff_size) +{ + return f2fs_crc32(buff, buff_size) == blk_crc; +} + +/* + * For checkpoint manager + */ +enum { + NAT_BITMAP, + SIT_BITMAP +}; + +/* for the list of orphan inodes */ +struct orphan_inode_entry { + struct list_head list; /* list head */ + nid_t ino; /* inode number */ +}; + +/* for the list of directory inodes */ +struct dir_inode_entry { + struct list_head list; /* list head */ + struct inode *inode; /* vfs inode pointer */ +}; + +/* for the list of fsync inodes, used only during recovery */ +struct fsync_inode_entry { + struct list_head list; /* list head */ + struct inode *inode; /* vfs inode pointer */ + block_t blkaddr; /* block address locating the last inode */ +}; + +#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) +#define sits_in_cursum(sum) (le16_to_cpu(sum->n_sits)) + +#define nat_in_journal(sum, i) (sum->nat_j.entries[i].ne) +#define nid_in_journal(sum, i) (sum->nat_j.entries[i].nid) +#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) +#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) + +static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) +{ + int before = nats_in_cursum(rs); + rs->n_nats = cpu_to_le16(before + i); + return before; +} + +static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) +{ + int before = sits_in_cursum(rs); + rs->n_sits = cpu_to_le16(before + i); + return before; +} + +/* + * For INODE and NODE manager + */ +#define XATTR_NODE_OFFSET (-1) /* + * store xattrs to one node block per + * file keeping -1 as its node offset to + * distinguish from index node blocks. + */ +#define RDONLY_NODE 1 /* + * specify a read-only mode when getting + * a node block. 0 is read-write mode. + * used by get_dnode_of_data(). + */ +#define F2FS_LINK_MAX 32000 /* maximum link count per file */ + +/* for in-memory extent cache entry */ +struct extent_info { + rwlock_t ext_lock; /* rwlock for consistency */ + unsigned int fofs; /* start offset in a file */ + u32 blk_addr; /* start block address of the extent */ + unsigned int len; /* lenth of the extent */ +}; + +/* + * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. + */ +#define FADVISE_COLD_BIT 0x01 + +struct f2fs_inode_info { + struct inode vfs_inode; /* serve a vfs inode */ + unsigned long i_flags; /* keep an inode flags for ioctl */ + unsigned char i_advise; /* use to give file attribute hints */ + unsigned int i_current_depth; /* use only in directory structure */ + umode_t i_acl_mode; /* keep file acl mode temporarily */ + + /* Use below internally in f2fs*/ + unsigned long flags; /* use to pass per-file flags */ + unsigned long long data_version;/* lastes version of data for fsync */ + atomic_t dirty_dents; /* # of dirty dentry pages */ + f2fs_hash_t chash; /* hash value of given file name */ + unsigned int clevel; /* maximum level of given file name */ + nid_t i_xattr_nid; /* node id that contains xattrs */ + struct extent_info ext; /* in-memory extent cache entry */ +}; + +static inline void get_extent_info(struct extent_info *ext, + struct f2fs_extent i_ext) +{ + write_lock(&ext->ext_lock); + ext->fofs = le32_to_cpu(i_ext.fofs); + ext->blk_addr = le32_to_cpu(i_ext.blk_addr); + ext->len = le32_to_cpu(i_ext.len); + write_unlock(&ext->ext_lock); +} + +static inline void set_raw_extent(struct extent_info *ext, + struct f2fs_extent *i_ext) +{ + read_lock(&ext->ext_lock); + i_ext->fofs = cpu_to_le32(ext->fofs); + i_ext->blk_addr = cpu_to_le32(ext->blk_addr); + i_ext->len = cpu_to_le32(ext->len); + read_unlock(&ext->ext_lock); +} + +struct f2fs_nm_info { + block_t nat_blkaddr; /* base disk address of NAT */ + nid_t max_nid; /* maximum possible node ids */ + nid_t init_scan_nid; /* the first nid to be scanned */ + nid_t next_scan_nid; /* the next nid to be scanned */ + + /* NAT cache management */ + struct radix_tree_root nat_root;/* root of the nat entry cache */ + rwlock_t nat_tree_lock; /* protect nat_tree_lock */ + unsigned int nat_cnt; /* the # of cached nat entries */ + struct list_head nat_entries; /* cached nat entry list (clean) */ + struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ + + /* free node ids management */ + struct list_head free_nid_list; /* a list for free nids */ + spinlock_t free_nid_list_lock; /* protect free nid list */ + unsigned int fcnt; /* the number of free node id */ + struct mutex build_lock; /* lock for build free nids */ + + /* for checkpoint */ + char *nat_bitmap; /* NAT bitmap pointer */ + int bitmap_size; /* bitmap size */ +}; + +/* + * this structure is used as one of function parameters. + * all the information are dedicated to a given direct node block determined + * by the data offset in a file. + */ +struct dnode_of_data { + struct inode *inode; /* vfs inode pointer */ + struct page *inode_page; /* its inode page, NULL is possible */ + struct page *node_page; /* cached direct node page */ + nid_t nid; /* node id of the direct node block */ + unsigned int ofs_in_node; /* data offset in the node page */ + bool inode_page_locked; /* inode page is locked or not */ + block_t data_blkaddr; /* block address of the node block */ +}; + +static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, + struct page *ipage, struct page *npage, nid_t nid) +{ + dn->inode = inode; + dn->inode_page = ipage; + dn->node_page = npage; + dn->nid = nid; + dn->inode_page_locked = 0; +} + +/* + * For SIT manager + * + * By default, there are 6 active log areas across the whole main area. + * When considering hot and cold data separation to reduce cleaning overhead, + * we split 3 for data logs and 3 for node logs as hot, warm, and cold types, + * respectively. + * In the current design, you should not change the numbers intentionally. + * Instead, as a mount option such as active_logs=x, you can use 2, 4, and 6 + * logs individually according to the underlying devices. (default: 6) + * Just in case, on-disk layout covers maximum 16 logs that consist of 8 for + * data and 8 for node logs. + */ +#define NR_CURSEG_DATA_TYPE (3) +#define NR_CURSEG_NODE_TYPE (3) +#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) + +enum { + CURSEG_HOT_DATA = 0, /* directory entry blocks */ + CURSEG_WARM_DATA, /* data blocks */ + CURSEG_COLD_DATA, /* multimedia or GCed data blocks */ + CURSEG_HOT_NODE, /* direct node blocks of directory files */ + CURSEG_WARM_NODE, /* direct node blocks of normal files */ + CURSEG_COLD_NODE, /* indirect node blocks */ + NO_CHECK_TYPE +}; + +struct f2fs_sm_info { + struct sit_info *sit_info; /* whole segment information */ + struct free_segmap_info *free_info; /* free segment information */ + struct dirty_seglist_info *dirty_info; /* dirty segment information */ + struct curseg_info *curseg_array; /* active segment information */ + + struct list_head wblist_head; /* list of under-writeback pages */ + spinlock_t wblist_lock; /* lock for checkpoint */ + + block_t seg0_blkaddr; /* block address of 0'th segment */ + block_t main_blkaddr; /* start block address of main area */ + block_t ssa_blkaddr; /* start block address of SSA area */ + + unsigned int segment_count; /* total # of segments */ + unsigned int main_segments; /* # of segments in main area */ + unsigned int reserved_segments; /* # of reserved segments */ + unsigned int ovp_segments; /* # of overprovision segments */ +}; + +/* + * For directory operation + */ +#define NODE_DIR1_BLOCK (ADDRS_PER_INODE + 1) +#define NODE_DIR2_BLOCK (ADDRS_PER_INODE + 2) +#define NODE_IND1_BLOCK (ADDRS_PER_INODE + 3) +#define NODE_IND2_BLOCK (ADDRS_PER_INODE + 4) +#define NODE_DIND_BLOCK (ADDRS_PER_INODE + 5) + +/* + * For superblock + */ +/* + * COUNT_TYPE for monitoring + * + * f2fs monitors the number of several block types such as on-writeback, + * dirty dentry blocks, dirty node blocks, and dirty meta blocks. + */ +enum count_type { + F2FS_WRITEBACK, + F2FS_DIRTY_DENTS, + F2FS_DIRTY_NODES, + F2FS_DIRTY_META, + NR_COUNT_TYPE, +}; + +/* + * FS_LOCK nesting subclasses for the lock validator: + * + * The locking order between these classes is + * RENAME -> DENTRY_OPS -> DATA_WRITE -> DATA_NEW + * -> DATA_TRUNC -> NODE_WRITE -> NODE_NEW -> NODE_TRUNC + */ +enum lock_type { + RENAME, /* for renaming operations */ + DENTRY_OPS, /* for directory operations */ + DATA_WRITE, /* for data write */ + DATA_NEW, /* for data allocation */ + DATA_TRUNC, /* for data truncate */ + NODE_NEW, /* for node allocation */ + NODE_TRUNC, /* for node truncate */ + NODE_WRITE, /* for node write */ + NR_LOCK_TYPE, +}; + +/* + * The below are the page types of bios used in submti_bio(). + * The available types are: + * DATA User data pages. It operates as async mode. + * NODE Node pages. It operates as async mode. + * META FS metadata pages such as SIT, NAT, CP. + * NR_PAGE_TYPE The number of page types. + * META_FLUSH Make sure the previous pages are written + * with waiting the bio's completion + * ... Only can be used with META. + */ +enum page_type { + DATA, + NODE, + META, + NR_PAGE_TYPE, + META_FLUSH, +}; + +struct f2fs_sb_info { + struct super_block *sb; /* pointer to VFS super block */ + struct buffer_head *raw_super_buf; /* buffer head of raw sb */ + struct f2fs_super_block *raw_super; /* raw super block pointer */ + int s_dirty; /* dirty flag for checkpoint */ + + /* for node-related operations */ + struct f2fs_nm_info *nm_info; /* node manager */ + struct inode *node_inode; /* cache node blocks */ + + /* for segment-related operations */ + struct f2fs_sm_info *sm_info; /* segment manager */ + struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */ + sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */ + struct rw_semaphore bio_sem; /* IO semaphore */ + + /* for checkpoint */ + struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ + struct inode *meta_inode; /* cache meta blocks */ + struct mutex cp_mutex; /* for checkpoint procedure */ + struct mutex fs_lock[NR_LOCK_TYPE]; /* for blocking FS operations */ + struct mutex write_inode; /* mutex for write inode */ + struct mutex writepages; /* mutex for writepages() */ + int por_doing; /* recovery is doing or not */ + + /* for orphan inode management */ + struct list_head orphan_inode_list; /* orphan inode list */ + struct mutex orphan_inode_mutex; /* for orphan inode list */ + unsigned int n_orphans; /* # of orphan inodes */ + + /* for directory inode management */ + struct list_head dir_inode_list; /* dir inode list */ + spinlock_t dir_inode_lock; /* for dir inode list lock */ + unsigned int n_dirty_dirs; /* # of dir inodes */ + + /* basic file system units */ + unsigned int log_sectors_per_block; /* log2 sectors per block */ + unsigned int log_blocksize; /* log2 block size */ + unsigned int blocksize; /* block size */ + unsigned int root_ino_num; /* root inode number*/ + unsigned int node_ino_num; /* node inode number*/ + unsigned int meta_ino_num; /* meta inode number*/ + unsigned int log_blocks_per_seg; /* log2 blocks per segment */ + unsigned int blocks_per_seg; /* blocks per segment */ + unsigned int segs_per_sec; /* segments per section */ + unsigned int secs_per_zone; /* sections per zone */ + unsigned int total_sections; /* total section count */ + unsigned int total_node_count; /* total node block count */ + unsigned int total_valid_node_count; /* valid node block count */ + unsigned int total_valid_inode_count; /* valid inode count */ + int active_logs; /* # of active logs */ + + block_t user_block_count; /* # of user blocks */ + block_t total_valid_block_count; /* # of valid blocks */ + block_t alloc_valid_block_count; /* # of allocated blocks */ + block_t last_valid_block_count; /* for recovery */ + u32 s_next_generation; /* for NFS support */ + atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ + + struct f2fs_mount_info mount_opt; /* mount options */ + + /* for cleaning operations */ + struct mutex gc_mutex; /* mutex for GC */ + struct f2fs_gc_kthread *gc_thread; /* GC thread */ + + /* + * for stat information. + * one is for the LFS mode, and the other is for the SSR mode. + */ + struct f2fs_stat_info *stat_info; /* FS status information */ + unsigned int segment_count[2]; /* # of allocated segments */ + unsigned int block_count[2]; /* # of allocated blocks */ + unsigned int last_victim[2]; /* last victim segment # */ + int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ + int bg_gc; /* background gc calls */ + spinlock_t stat_lock; /* lock for stat operations */ +}; + +/* + * Inline functions + */ +static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) +{ + return container_of(inode, struct f2fs_inode_info, vfs_inode); +} + +static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_super_block *)(sbi->raw_super); +} + +static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_checkpoint *)(sbi->ckpt); +} + +static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_nm_info *)(sbi->nm_info); +} + +static inline struct f2fs_sm_info *SM_I(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_sm_info *)(sbi->sm_info); +} + +static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi) +{ + return (struct sit_info *)(SM_I(sbi)->sit_info); +} + +static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi) +{ + return (struct free_segmap_info *)(SM_I(sbi)->free_info); +} + +static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi) +{ + return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info); +} + +static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) +{ + sbi->s_dirty = 1; +} + +static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) +{ + sbi->s_dirty = 0; +} + +static inline void mutex_lock_op(struct f2fs_sb_info *sbi, enum lock_type t) +{ + mutex_lock_nested(&sbi->fs_lock[t], t); +} + +static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, enum lock_type t) +{ + mutex_unlock(&sbi->fs_lock[t]); +} + +/* + * Check whether the given nid is within node id range. + */ +static inline void check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) +{ + BUG_ON((nid >= NM_I(sbi)->max_nid)); +} + +#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 + +/* + * Check whether the inode has blocks or not + */ +static inline int F2FS_HAS_BLOCKS(struct inode *inode) +{ + if (F2FS_I(inode)->i_xattr_nid) + return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1); + else + return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS); +} + +static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, blkcnt_t count) +{ + block_t valid_block_count; + + spin_lock(&sbi->stat_lock); + valid_block_count = + sbi->total_valid_block_count + (block_t)count; + if (valid_block_count > sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + inode->i_blocks += count; + sbi->total_valid_block_count = valid_block_count; + sbi->alloc_valid_block_count += (block_t)count; + spin_unlock(&sbi->stat_lock); + return true; +} + +static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, + blkcnt_t count) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(sbi->total_valid_block_count < (block_t) count); + BUG_ON(inode->i_blocks < count); + inode->i_blocks -= count; + sbi->total_valid_block_count -= (block_t)count; + spin_unlock(&sbi->stat_lock); + return 0; +} + +static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) +{ + atomic_inc(&sbi->nr_pages[count_type]); + F2FS_SET_SB_DIRT(sbi); +} + +static inline void inode_inc_dirty_dents(struct inode *inode) +{ + atomic_inc(&F2FS_I(inode)->dirty_dents); +} + +static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) +{ + atomic_dec(&sbi->nr_pages[count_type]); +} + +static inline void inode_dec_dirty_dents(struct inode *inode) +{ + atomic_dec(&F2FS_I(inode)->dirty_dents); +} + +static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) +{ + return atomic_read(&sbi->nr_pages[count_type]); +} + +static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) +{ + block_t ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_block_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + + /* return NAT or SIT bitmap */ + if (flag == NAT_BITMAP) + return le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); + else if (flag == SIT_BITMAP) + return le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); + + return 0; +} + +static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + int offset = (flag == NAT_BITMAP) ? ckpt->sit_ver_bitmap_bytesize : 0; + return &ckpt->sit_nat_version_bitmap + offset; +} + +static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) +{ + block_t start_addr; + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned long long ckpt_version = le64_to_cpu(ckpt->checkpoint_ver); + + start_addr = le64_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); + + /* + * odd numbered checkpoint should at cp segment 0 + * and even segent must be at cp segment 1 + */ + if (!(ckpt_version & 1)) + start_addr += sbi->blocks_per_seg; + + return start_addr; +} + +static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) +{ + return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); +} + +static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, + unsigned int count) +{ + block_t valid_block_count; + unsigned int valid_node_count; + + spin_lock(&sbi->stat_lock); + + valid_block_count = sbi->total_valid_block_count + (block_t)count; + sbi->alloc_valid_block_count += (block_t)count; + valid_node_count = sbi->total_valid_node_count + count; + + if (valid_block_count > sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + + if (valid_node_count > sbi->total_node_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + + if (inode) + inode->i_blocks += count; + sbi->total_valid_node_count = valid_node_count; + sbi->total_valid_block_count = valid_block_count; + spin_unlock(&sbi->stat_lock); + + return true; +} + +static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, + unsigned int count) +{ + spin_lock(&sbi->stat_lock); + + BUG_ON(sbi->total_valid_block_count < count); + BUG_ON(sbi->total_valid_node_count < count); + BUG_ON(inode->i_blocks < count); + + inode->i_blocks -= count; + sbi->total_valid_node_count -= count; + sbi->total_valid_block_count -= (block_t)count; + + spin_unlock(&sbi->stat_lock); +} + +static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) +{ + unsigned int ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_node_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count); + sbi->total_valid_inode_count++; + spin_unlock(&sbi->stat_lock); +} + +static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(!sbi->total_valid_inode_count); + sbi->total_valid_inode_count--; + spin_unlock(&sbi->stat_lock); + return 0; +} + +static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) +{ + unsigned int ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_inode_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline void f2fs_put_page(struct page *page, int unlock) +{ + if (!page || IS_ERR(page)) + return; + + if (unlock) { + BUG_ON(!PageLocked(page)); + unlock_page(page); + } + page_cache_release(page); +} + +static inline void f2fs_put_dnode(struct dnode_of_data *dn) +{ + if (dn->node_page) + f2fs_put_page(dn->node_page, 1); + if (dn->inode_page && dn->node_page != dn->inode_page) + f2fs_put_page(dn->inode_page, 0); + dn->node_page = NULL; + dn->inode_page = NULL; +} + +static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, + size_t size, void (*ctor)(void *)) +{ + return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); +} + +#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) + +static inline bool IS_INODE(struct page *page) +{ + struct f2fs_node *p = (struct f2fs_node *)page_address(page); + return RAW_IS_INODE(p); +} + +static inline __le32 *blkaddr_in_node(struct f2fs_node *node) +{ + return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr; +} + +static inline block_t datablock_addr(struct page *node_page, + unsigned int offset) +{ + struct f2fs_node *raw_node; + __le32 *addr_array; + raw_node = (struct f2fs_node *)page_address(node_page); + addr_array = blkaddr_in_node(raw_node); + return le32_to_cpu(addr_array[offset]); +} + +static inline int f2fs_test_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + return mask & *addr; +} + +static inline int f2fs_set_bit(unsigned int nr, char *addr) +{ + int mask; + int ret; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + ret = mask & *addr; + *addr |= mask; + return ret; +} + +static inline int f2fs_clear_bit(unsigned int nr, char *addr) +{ + int mask; + int ret; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + ret = mask & *addr; + *addr &= ~mask; + return ret; +} + +/* used for f2fs_inode_info->flags */ +enum { + FI_NEW_INODE, /* indicate newly allocated inode */ + FI_NEED_CP, /* need to do checkpoint during fsync */ + FI_INC_LINK, /* need to increment i_nlink */ + FI_ACL_MODE, /* indicate acl mode */ + FI_NO_ALLOC, /* should not allocate any blocks */ +}; + +static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + set_bit(flag, &fi->flags); +} + +static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) +{ + return test_bit(flag, &fi->flags); +} + +static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + clear_bit(flag, &fi->flags); +} + +static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) +{ + fi->i_acl_mode = mode; + set_inode_flag(fi, FI_ACL_MODE); +} + +static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + if (is_inode_flag_set(fi, FI_ACL_MODE)) { + clear_inode_flag(fi, FI_ACL_MODE); + return 1; + } + return 0; +} + +/* + * file.c + */ +int f2fs_sync_file(struct file *, loff_t, loff_t, int); +void truncate_data_blocks(struct dnode_of_data *); +void f2fs_truncate(struct inode *); +int f2fs_setattr(struct dentry *, struct iattr *); +int truncate_hole(struct inode *, pgoff_t, pgoff_t); +long f2fs_ioctl(struct file *, unsigned int, unsigned long); + +/* + * inode.c + */ +void f2fs_set_inode_flags(struct inode *); +struct inode *f2fs_iget_nowait(struct super_block *, unsigned long); +struct inode *f2fs_iget(struct super_block *, unsigned long); +void update_inode(struct inode *, struct page *); +int f2fs_write_inode(struct inode *, struct writeback_control *); +void f2fs_evict_inode(struct inode *); + +/* + * namei.c + */ +struct dentry *f2fs_get_parent(struct dentry *child); + +/* + * dir.c + */ +struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, + struct page **); +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); +ino_t f2fs_inode_by_name(struct inode *, struct qstr *); +void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, + struct page *, struct inode *); +void init_dent_inode(struct dentry *, struct page *); +int f2fs_add_link(struct dentry *, struct inode *); +void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +int f2fs_make_empty(struct inode *, struct inode *); +bool f2fs_empty_dir(struct inode *); + +/* + * super.c + */ +int f2fs_sync_fs(struct super_block *, int); + +/* + * hash.c + */ +f2fs_hash_t f2fs_dentry_hash(const char *, int); + +/* + * node.c + */ +struct dnode_of_data; +struct node_info; + +int is_checkpointed_node(struct f2fs_sb_info *, nid_t); +void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); +int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); +int truncate_inode_blocks(struct inode *, pgoff_t); +int remove_inode_page(struct inode *); +int new_inode_page(struct inode *, struct dentry *); +struct page *new_node_page(struct dnode_of_data *, unsigned int); +void ra_node_page(struct f2fs_sb_info *, nid_t); +struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); +struct page *get_node_page_ra(struct page *, int); +void sync_inode_page(struct dnode_of_data *); +int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); +bool alloc_nid(struct f2fs_sb_info *, nid_t *); +void alloc_nid_done(struct f2fs_sb_info *, nid_t); +void alloc_nid_failed(struct f2fs_sb_info *, nid_t); +void recover_node_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, struct node_info *, block_t); +int recover_inode_page(struct f2fs_sb_info *, struct page *); +int restore_node_summary(struct f2fs_sb_info *, unsigned int, + struct f2fs_summary_block *); +void flush_nat_entries(struct f2fs_sb_info *); +int build_node_manager(struct f2fs_sb_info *); +void destroy_node_manager(struct f2fs_sb_info *); +int create_node_manager_caches(void); +void destroy_node_manager_caches(void); + +/* + * segment.c + */ +void f2fs_balance_fs(struct f2fs_sb_info *); +void invalidate_blocks(struct f2fs_sb_info *, block_t); +void locate_dirty_segment(struct f2fs_sb_info *, unsigned int); +void clear_prefree_segments(struct f2fs_sb_info *); +int npages_for_summary_flush(struct f2fs_sb_info *); +void allocate_new_segments(struct f2fs_sb_info *); +struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); +struct bio *f2fs_bio_alloc(struct block_device *, sector_t, int, gfp_t); +void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync); +int write_meta_page(struct f2fs_sb_info *, struct page *, + struct writeback_control *); +void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, + block_t, block_t *); +void write_data_page(struct inode *, struct page *, struct dnode_of_data*, + block_t, block_t *); +void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t); +void recover_data_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, block_t, block_t); +void rewrite_node_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, block_t, block_t); +void write_data_summaries(struct f2fs_sb_info *, block_t); +void write_node_summaries(struct f2fs_sb_info *, block_t); +int lookup_journal_in_cursum(struct f2fs_summary_block *, + int, unsigned int, int); +void flush_sit_entries(struct f2fs_sb_info *); +int build_segment_manager(struct f2fs_sb_info *); +void reset_victim_segmap(struct f2fs_sb_info *); +void destroy_segment_manager(struct f2fs_sb_info *); + +/* + * checkpoint.c + */ +struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); +struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); +long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); +int check_orphan_space(struct f2fs_sb_info *); +void add_orphan_inode(struct f2fs_sb_info *, nid_t); +void remove_orphan_inode(struct f2fs_sb_info *, nid_t); +int recover_orphan_inodes(struct f2fs_sb_info *); +int get_valid_checkpoint(struct f2fs_sb_info *); +void set_dirty_dir_page(struct inode *, struct page *); +void remove_dirty_dir_inode(struct inode *); +void sync_dirty_dir_inodes(struct f2fs_sb_info *); +void block_operations(struct f2fs_sb_info *); +void write_checkpoint(struct f2fs_sb_info *, bool, bool); +void init_orphan_info(struct f2fs_sb_info *); +int create_checkpoint_caches(void); +void destroy_checkpoint_caches(void); + +/* + * data.c + */ +int reserve_new_block(struct dnode_of_data *); +void update_extent_cache(block_t, struct dnode_of_data *); +struct page *find_data_page(struct inode *, pgoff_t); +struct page *get_lock_data_page(struct inode *, pgoff_t); +struct page *get_new_data_page(struct inode *, pgoff_t, bool); +int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); +int do_write_data_page(struct page *); + +/* + * gc.c + */ +int start_gc_thread(struct f2fs_sb_info *); +void stop_gc_thread(struct f2fs_sb_info *); +block_t start_bidx_of_node(unsigned int); +int f2fs_gc(struct f2fs_sb_info *, int); +void build_gc_manager(struct f2fs_sb_info *); +int create_gc_caches(void); +void destroy_gc_caches(void); + +/* + * recovery.c + */ +void recover_fsync_data(struct f2fs_sb_info *); +bool space_for_roll_forward(struct f2fs_sb_info *); + +/* + * debug.c + */ +#ifdef CONFIG_F2FS_STAT_FS +struct f2fs_stat_info { + struct list_head stat_list; + struct f2fs_sb_info *sbi; + struct mutex stat_lock; + int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; + int main_area_segs, main_area_sections, main_area_zones; + int hit_ext, total_ext; + int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; + int nats, sits, fnids; + int total_count, utilization; + int bg_gc; + unsigned int valid_count, valid_node_count, valid_inode_count; + unsigned int bimodal, avg_vblocks; + int util_free, util_valid, util_invalid; + int rsvd_segs, overp_segs; + int dirty_count, node_pages, meta_pages; + int prefree_count, call_count; + int tot_segs, node_segs, data_segs, free_segs, free_secs; + int tot_blks, data_blks, node_blks; + int curseg[NR_CURSEG_TYPE]; + int cursec[NR_CURSEG_TYPE]; + int curzone[NR_CURSEG_TYPE]; + + unsigned int segment_count[2]; + unsigned int block_count[2]; + unsigned base_mem, cache_mem; +}; + +#define stat_inc_call_count(si) ((si)->call_count++) + +#define stat_inc_seg_count(sbi, type) \ + do { \ + struct f2fs_stat_info *si = sbi->stat_info; \ + (si)->tot_segs++; \ + if (type == SUM_TYPE_DATA) \ + si->data_segs++; \ + else \ + si->node_segs++; \ + } while (0) + +#define stat_inc_tot_blk_count(si, blks) \ + (si->tot_blks += (blks)) + +#define stat_inc_data_blk_count(sbi, blks) \ + do { \ + struct f2fs_stat_info *si = sbi->stat_info; \ + stat_inc_tot_blk_count(si, blks); \ + si->data_blks += (blks); \ + } while (0) + +#define stat_inc_node_blk_count(sbi, blks) \ + do { \ + struct f2fs_stat_info *si = sbi->stat_info; \ + stat_inc_tot_blk_count(si, blks); \ + si->node_blks += (blks); \ + } while (0) + +int f2fs_build_stats(struct f2fs_sb_info *); +void f2fs_destroy_stats(struct f2fs_sb_info *); +void destroy_root_stats(void); +#else +#define stat_inc_call_count(si) +#define stat_inc_seg_count(si, type) +#define stat_inc_tot_blk_count(si, blks) +#define stat_inc_data_blk_count(si, blks) +#define stat_inc_node_blk_count(sbi, blks) + +static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } +static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } +static inline void destroy_root_stats(void) { } +#endif + +extern const struct file_operations f2fs_dir_operations; +extern const struct file_operations f2fs_file_operations; +extern const struct inode_operations f2fs_file_inode_operations; +extern const struct address_space_operations f2fs_dblock_aops; +extern const struct address_space_operations f2fs_node_aops; +extern const struct address_space_operations f2fs_meta_aops; +extern const struct inode_operations f2fs_dir_inode_operations; +extern const struct inode_operations f2fs_symlink_inode_operations; +extern const struct inode_operations f2fs_special_inode_operations; +#endif diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h new file mode 100644 index 000000000000..5d525ed312ba --- /dev/null +++ b/fs/f2fs/node.h @@ -0,0 +1,353 @@ +/** + * fs/f2fs/node.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/* start node id of a node block dedicated to the given node id */ +#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) + +/* node block offset on the NAT area dedicated to the given start node id */ +#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) + +/* # of pages to perform readahead before building free nids */ +#define FREE_NID_PAGES 4 + +/* maximum # of free node ids to produce during build_free_nids */ +#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) + +/* maximum readahead size for node during getting data blocks */ +#define MAX_RA_NODE 128 + +/* maximum cached nat entries to manage memory footprint */ +#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK) + +/* vector size for gang look-up from nat cache that consists of radix tree */ +#define NATVEC_SIZE 64 + +/* + * For node information + */ +struct node_info { + nid_t nid; /* node id */ + nid_t ino; /* inode number of the node's owner */ + block_t blk_addr; /* block address of the node */ + unsigned char version; /* version of the node */ +}; + +struct nat_entry { + struct list_head list; /* for clean or dirty nat list */ + bool checkpointed; /* whether it is checkpointed or not */ + struct node_info ni; /* in-memory node information */ +}; + +#define nat_get_nid(nat) (nat->ni.nid) +#define nat_set_nid(nat, n) (nat->ni.nid = n) +#define nat_get_blkaddr(nat) (nat->ni.blk_addr) +#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b) +#define nat_get_ino(nat) (nat->ni.ino) +#define nat_set_ino(nat, i) (nat->ni.ino = i) +#define nat_get_version(nat) (nat->ni.version) +#define nat_set_version(nat, v) (nat->ni.version = v) + +#define __set_nat_cache_dirty(nm_i, ne) \ + list_move_tail(&ne->list, &nm_i->dirty_nat_entries); +#define __clear_nat_cache_dirty(nm_i, ne) \ + list_move_tail(&ne->list, &nm_i->nat_entries); +#define inc_node_version(version) (++version) + +static inline void node_info_from_raw_nat(struct node_info *ni, + struct f2fs_nat_entry *raw_ne) +{ + ni->ino = le32_to_cpu(raw_ne->ino); + ni->blk_addr = le32_to_cpu(raw_ne->block_addr); + ni->version = raw_ne->version; +} + +/* + * For free nid mangement + */ +enum nid_state { + NID_NEW, /* newly added to free nid list */ + NID_ALLOC /* it is allocated */ +}; + +struct free_nid { + struct list_head list; /* for free node id list */ + nid_t nid; /* node id */ + int state; /* in use or not: NID_NEW or NID_ALLOC */ +}; + +static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *fnid; + + if (nm_i->fcnt <= 0) + return -1; + spin_lock(&nm_i->free_nid_list_lock); + fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); + *nid = fnid->nid; + spin_unlock(&nm_i->free_nid_list_lock); + return 0; +} + +/* + * inline functions + */ +static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size); +} + +static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + pgoff_t block_off; + pgoff_t block_addr; + int seg_off; + + block_off = NAT_BLOCK_OFFSET(start); + seg_off = block_off >> sbi->log_blocks_per_seg; + + block_addr = (pgoff_t)(nm_i->nat_blkaddr + + (seg_off << sbi->log_blocks_per_seg << 1) + + (block_off & ((1 << sbi->log_blocks_per_seg) - 1))); + + if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) + block_addr += sbi->blocks_per_seg; + + return block_addr; +} + +static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, + pgoff_t block_addr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + block_addr -= nm_i->nat_blkaddr; + if ((block_addr >> sbi->log_blocks_per_seg) % 2) + block_addr -= sbi->blocks_per_seg; + else + block_addr += sbi->blocks_per_seg; + + return block_addr + nm_i->nat_blkaddr; +} + +static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) +{ + unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); + + if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) + f2fs_clear_bit(block_off, nm_i->nat_bitmap); + else + f2fs_set_bit(block_off, nm_i->nat_bitmap); +} + +static inline void fill_node_footer(struct page *page, nid_t nid, + nid_t ino, unsigned int ofs, bool reset) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + if (reset) + memset(rn, 0, sizeof(*rn)); + rn->footer.nid = cpu_to_le32(nid); + rn->footer.ino = cpu_to_le32(ino); + rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT); +} + +static inline void copy_node_footer(struct page *dst, struct page *src) +{ + void *src_addr = page_address(src); + void *dst_addr = page_address(dst); + struct f2fs_node *src_rn = (struct f2fs_node *)src_addr; + struct f2fs_node *dst_rn = (struct f2fs_node *)dst_addr; + memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); +} + +static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + rn->footer.cp_ver = ckpt->checkpoint_ver; + rn->footer.next_blkaddr = blkaddr; +} + +static inline nid_t ino_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + return le32_to_cpu(rn->footer.ino); +} + +static inline nid_t nid_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + return le32_to_cpu(rn->footer.nid); +} + +static inline unsigned int ofs_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned flag = le32_to_cpu(rn->footer.flag); + return flag >> OFFSET_BIT_SHIFT; +} + +static inline unsigned long long cpver_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + return le64_to_cpu(rn->footer.cp_ver); +} + +static inline block_t next_blkaddr_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + return le32_to_cpu(rn->footer.next_blkaddr); +} + +/* + * f2fs assigns the following node offsets described as (num). + * N = NIDS_PER_BLOCK + * + * Inode block (0) + * |- direct node (1) + * |- direct node (2) + * |- indirect node (3) + * | `- direct node (4 => 4 + N - 1) + * |- indirect node (4 + N) + * | `- direct node (5 + N => 5 + 2N - 1) + * `- double indirect node (5 + 2N) + * `- indirect node (6 + 2N) + * `- direct node (x(N + 1)) + */ +static inline bool IS_DNODE(struct page *node_page) +{ + unsigned int ofs = ofs_of_node(node_page); + if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || + ofs == 5 + 2 * NIDS_PER_BLOCK) + return false; + if (ofs >= 6 + 2 * NIDS_PER_BLOCK) { + ofs -= 6 + 2 * NIDS_PER_BLOCK; + if ((long int)ofs % (NIDS_PER_BLOCK + 1)) + return false; + } + return true; +} + +static inline void set_nid(struct page *p, int off, nid_t nid, bool i) +{ + struct f2fs_node *rn = (struct f2fs_node *)page_address(p); + + wait_on_page_writeback(p); + + if (i) + rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); + else + rn->in.nid[off] = cpu_to_le32(nid); + set_page_dirty(p); +} + +static inline nid_t get_nid(struct page *p, int off, bool i) +{ + struct f2fs_node *rn = (struct f2fs_node *)page_address(p); + if (i) + return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); + return le32_to_cpu(rn->in.nid[off]); +} + +/* + * Coldness identification: + * - Mark cold files in f2fs_inode_info + * - Mark cold node blocks in their node footer + * - Mark cold data pages in page cache + */ +static inline int is_cold_file(struct inode *inode) +{ + return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT; +} + +static inline int is_cold_data(struct page *page) +{ + return PageChecked(page); +} + +static inline void set_cold_data(struct page *page) +{ + SetPageChecked(page); +} + +static inline void clear_cold_data(struct page *page) +{ + ClearPageChecked(page); +} + +static inline int is_cold_node(struct page *page) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + return flag & (0x1 << COLD_BIT_SHIFT); +} + +static inline unsigned char is_fsync_dnode(struct page *page) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + return flag & (0x1 << FSYNC_BIT_SHIFT); +} + +static inline unsigned char is_dent_dnode(struct page *page) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + return flag & (0x1 << DENT_BIT_SHIFT); +} + +static inline void set_cold_node(struct inode *inode, struct page *page) +{ + struct f2fs_node *rn = (struct f2fs_node *)page_address(page); + unsigned int flag = le32_to_cpu(rn->footer.flag); + + if (S_ISDIR(inode->i_mode)) + flag &= ~(0x1 << COLD_BIT_SHIFT); + else + flag |= (0x1 << COLD_BIT_SHIFT); + rn->footer.flag = cpu_to_le32(flag); +} + +static inline void set_fsync_mark(struct page *page, int mark) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + if (mark) + flag |= (0x1 << FSYNC_BIT_SHIFT); + else + flag &= ~(0x1 << FSYNC_BIT_SHIFT); + rn->footer.flag = cpu_to_le32(flag); +} + +static inline void set_dentry_mark(struct page *page, int mark) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + if (mark) + flag |= (0x1 << DENT_BIT_SHIFT); + else + flag &= ~(0x1 << DENT_BIT_SHIFT); + rn->footer.flag = cpu_to_le32(flag); +} diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h new file mode 100644 index 000000000000..e380a8ef13f5 --- /dev/null +++ b/fs/f2fs/segment.h @@ -0,0 +1,615 @@ +/** + * fs/f2fs/segment.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/* constant macro */ +#define NULL_SEGNO ((unsigned int)(~0)) + +/* V: Logical segment # in volume, R: Relative segment # in main area */ +#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) +#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) + +#define IS_DATASEG(t) \ + ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \ + (t == CURSEG_WARM_DATA)) + +#define IS_NODESEG(t) \ + ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ + (t == CURSEG_WARM_NODE)) + +#define IS_CURSEG(sbi, segno) \ + ((segno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) + +#define IS_CURSEC(sbi, secno) \ + ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ + sbi->segs_per_sec)) \ + +#define START_BLOCK(sbi, segno) \ + (SM_I(sbi)->seg0_blkaddr + \ + (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) +#define NEXT_FREE_BLKADDR(sbi, curseg) \ + (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) + +#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) + +#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \ + ((blk_addr) - SM_I(sbi)->seg0_blkaddr) +#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) +#define GET_SEGNO(sbi, blk_addr) \ + (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ + NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ + GET_SEGNO_FROM_SEG0(sbi, blk_addr))) +#define GET_SECNO(sbi, segno) \ + ((segno) / sbi->segs_per_sec) +#define GET_ZONENO_FROM_SEGNO(sbi, segno) \ + ((segno / sbi->segs_per_sec) / sbi->secs_per_zone) + +#define GET_SUM_BLOCK(sbi, segno) \ + ((sbi->sm_info->ssa_blkaddr) + segno) + +#define GET_SUM_TYPE(footer) ((footer)->entry_type) +#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type) + +#define SIT_ENTRY_OFFSET(sit_i, segno) \ + (segno % sit_i->sents_per_block) +#define SIT_BLOCK_OFFSET(sit_i, segno) \ + (segno / SIT_ENTRY_PER_BLOCK) +#define START_SEGNO(sit_i, segno) \ + (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) +#define f2fs_bitmap_size(nr) \ + (BITS_TO_LONGS(nr) * sizeof(unsigned long)) +#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) + +/* during checkpoint, bio_private is used to synchronize the last bio */ +struct bio_private { + struct f2fs_sb_info *sbi; + bool is_sync; + void *wait; +}; + +/* + * indicate a block allocation direction: RIGHT and LEFT. + * RIGHT means allocating new sections towards the end of volume. + * LEFT means the opposite direction. + */ +enum { + ALLOC_RIGHT = 0, + ALLOC_LEFT +}; + +/* + * In the victim_sel_policy->alloc_mode, there are two block allocation modes. + * LFS writes data sequentially with cleaning operations. + * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations. + */ +enum { + LFS = 0, + SSR +}; + +/* + * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes. + * GC_CB is based on cost-benefit algorithm. + * GC_GREEDY is based on greedy algorithm. + */ +enum { + GC_CB = 0, + GC_GREEDY +}; + +/* + * BG_GC means the background cleaning job. + * FG_GC means the on-demand cleaning job. + */ +enum { + BG_GC = 0, + FG_GC +}; + +/* for a function parameter to select a victim segment */ +struct victim_sel_policy { + int alloc_mode; /* LFS or SSR */ + int gc_mode; /* GC_CB or GC_GREEDY */ + unsigned long *dirty_segmap; /* dirty segment bitmap */ + unsigned int offset; /* last scanned bitmap offset */ + unsigned int ofs_unit; /* bitmap search unit */ + unsigned int min_cost; /* minimum cost */ + unsigned int min_segno; /* segment # having min. cost */ +}; + +struct seg_entry { + unsigned short valid_blocks; /* # of valid blocks */ + unsigned char *cur_valid_map; /* validity bitmap of blocks */ + /* + * # of valid blocks and the validity bitmap stored in the the last + * checkpoint pack. This information is used by the SSR mode. + */ + unsigned short ckpt_valid_blocks; + unsigned char *ckpt_valid_map; + unsigned char type; /* segment type like CURSEG_XXX_TYPE */ + unsigned long long mtime; /* modification time of the segment */ +}; + +struct sec_entry { + unsigned int valid_blocks; /* # of valid blocks in a section */ +}; + +struct segment_allocation { + void (*allocate_segment)(struct f2fs_sb_info *, int, bool); +}; + +struct sit_info { + const struct segment_allocation *s_ops; + + block_t sit_base_addr; /* start block address of SIT area */ + block_t sit_blocks; /* # of blocks used by SIT area */ + block_t written_valid_blocks; /* # of valid blocks in main area */ + char *sit_bitmap; /* SIT bitmap pointer */ + unsigned int bitmap_size; /* SIT bitmap size */ + + unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ + unsigned int dirty_sentries; /* # of dirty sentries */ + unsigned int sents_per_block; /* # of SIT entries per block */ + struct mutex sentry_lock; /* to protect SIT cache */ + struct seg_entry *sentries; /* SIT segment-level cache */ + struct sec_entry *sec_entries; /* SIT section-level cache */ + + /* for cost-benefit algorithm in cleaning procedure */ + unsigned long long elapsed_time; /* elapsed time after mount */ + unsigned long long mounted_time; /* mount time */ + unsigned long long min_mtime; /* min. modification time */ + unsigned long long max_mtime; /* max. modification time */ +}; + +struct free_segmap_info { + unsigned int start_segno; /* start segment number logically */ + unsigned int free_segments; /* # of free segments */ + unsigned int free_sections; /* # of free sections */ + rwlock_t segmap_lock; /* free segmap lock */ + unsigned long *free_segmap; /* free segment bitmap */ + unsigned long *free_secmap; /* free section bitmap */ +}; + +/* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */ +enum dirty_type { + DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */ + DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */ + DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */ + DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */ + DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */ + DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */ + DIRTY, /* to count # of dirty segments */ + PRE, /* to count # of entirely obsolete segments */ + NR_DIRTY_TYPE +}; + +struct dirty_seglist_info { + const struct victim_selection *v_ops; /* victim selction operation */ + unsigned long *dirty_segmap[NR_DIRTY_TYPE]; + struct mutex seglist_lock; /* lock for segment bitmaps */ + int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ + unsigned long *victim_segmap[2]; /* BG_GC, FG_GC */ +}; + +/* victim selection function for cleaning and SSR */ +struct victim_selection { + int (*get_victim)(struct f2fs_sb_info *, unsigned int *, + int, int, char); +}; + +/* for active log information */ +struct curseg_info { + struct mutex curseg_mutex; /* lock for consistency */ + struct f2fs_summary_block *sum_blk; /* cached summary block */ + unsigned char alloc_type; /* current allocation type */ + unsigned int segno; /* current segment number */ + unsigned short next_blkoff; /* next block offset to write */ + unsigned int zone; /* current zone number */ + unsigned int next_segno; /* preallocated segment */ +}; + +/* + * inline functions + */ +static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) +{ + return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); +} + +static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + return &sit_i->sentries[segno]; +} + +static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + return &sit_i->sec_entries[GET_SECNO(sbi, segno)]; +} + +static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno, int section) +{ + /* + * In order to get # of valid blocks in a section instantly from many + * segments, f2fs manages two counting structures separately. + */ + if (section > 1) + return get_sec_entry(sbi, segno)->valid_blocks; + else + return get_seg_entry(sbi, segno)->valid_blocks; +} + +static inline void seg_info_from_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + se->valid_blocks = GET_SIT_VBLOCKS(rs); + se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs); + memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + se->type = GET_SIT_TYPE(rs); + se->mtime = le64_to_cpu(rs->mtime); +} + +static inline void seg_info_to_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) | + se->valid_blocks; + rs->vblocks = cpu_to_le16(raw_vblocks); + memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + se->ckpt_valid_blocks = se->valid_blocks; + rs->mtime = cpu_to_le64(se->mtime); +} + +static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, + unsigned int max, unsigned int segno) +{ + unsigned int ret; + read_lock(&free_i->segmap_lock); + ret = find_next_bit(free_i->free_segmap, max, segno); + read_unlock(&free_i->segmap_lock); + return ret; +} + +static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int next; + + write_lock(&free_i->segmap_lock); + clear_bit(segno, free_i->free_segmap); + free_i->free_segments++; + + next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); + if (next >= start_segno + sbi->segs_per_sec) { + clear_bit(secno, free_i->free_secmap); + free_i->free_sections++; + } + write_unlock(&free_i->segmap_lock); +} + +static inline void __set_inuse(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + set_bit(segno, free_i->free_segmap); + free_i->free_segments--; + if (!test_and_set_bit(secno, free_i->free_secmap)) + free_i->free_sections--; +} + +static inline void __set_test_and_free(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int next; + + write_lock(&free_i->segmap_lock); + if (test_and_clear_bit(segno, free_i->free_segmap)) { + free_i->free_segments++; + + next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), + start_segno); + if (next >= start_segno + sbi->segs_per_sec) { + if (test_and_clear_bit(secno, free_i->free_secmap)) + free_i->free_sections++; + } + } + write_unlock(&free_i->segmap_lock); +} + +static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + write_lock(&free_i->segmap_lock); + if (!test_and_set_bit(segno, free_i->free_segmap)) { + free_i->free_segments--; + if (!test_and_set_bit(secno, free_i->free_secmap)) + free_i->free_sections--; + } + write_unlock(&free_i->segmap_lock); +} + +static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, + void *dst_addr) +{ + struct sit_info *sit_i = SIT_I(sbi); + memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size); +} + +static inline block_t written_block_count(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + block_t vblocks; + + mutex_lock(&sit_i->sentry_lock); + vblocks = sit_i->written_valid_blocks; + mutex_unlock(&sit_i->sentry_lock); + + return vblocks; +} + +static inline unsigned int free_segments(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int free_segs; + + read_lock(&free_i->segmap_lock); + free_segs = free_i->free_segments; + read_unlock(&free_i->segmap_lock); + + return free_segs; +} + +static inline int reserved_segments(struct f2fs_sb_info *sbi) +{ + return SM_I(sbi)->reserved_segments; +} + +static inline unsigned int free_sections(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int free_secs; + + read_lock(&free_i->segmap_lock); + free_secs = free_i->free_sections; + read_unlock(&free_i->segmap_lock); + + return free_secs; +} + +static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) +{ + return DIRTY_I(sbi)->nr_dirty[PRE]; +} + +static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi) +{ + return DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_NODE] + + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_NODE] + + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE]; +} + +static inline int overprovision_segments(struct f2fs_sb_info *sbi) +{ + return SM_I(sbi)->ovp_segments; +} + +static inline int overprovision_sections(struct f2fs_sb_info *sbi) +{ + return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec; +} + +static inline int reserved_sections(struct f2fs_sb_info *sbi) +{ + return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec; +} + +static inline bool need_SSR(struct f2fs_sb_info *sbi) +{ + return (free_sections(sbi) < overprovision_sections(sbi)); +} + +static inline int get_ssr_segment(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return DIRTY_I(sbi)->v_ops->get_victim(sbi, + &(curseg)->next_segno, BG_GC, type, SSR); +} + +static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi) +{ + return free_sections(sbi) <= reserved_sections(sbi); +} + +static inline int utilization(struct f2fs_sb_info *sbi) +{ + return (long int)valid_user_blocks(sbi) * 100 / + (long int)sbi->user_block_count; +} + +/* + * Sometimes f2fs may be better to drop out-of-place update policy. + * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write + * data in the original place likewise other traditional file systems. + * But, currently set 100 in percentage, which means it is disabled. + * See below need_inplace_update(). + */ +#define MIN_IPU_UTIL 100 +static inline bool need_inplace_update(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (S_ISDIR(inode->i_mode)) + return false; + if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) + return true; + return false; +} + +static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, + int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->segno; +} + +static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi, + int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->alloc_type; +} + +static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->next_blkoff; +} + +static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) +{ + unsigned int end_segno = SM_I(sbi)->segment_count - 1; + BUG_ON(segno > end_segno); +} + +/* + * This function is used for only debugging. + * NOTE: In future, we have to remove this function. + */ +static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; + block_t start_addr = sm_info->seg0_blkaddr; + block_t end_addr = start_addr + total_blks - 1; + BUG_ON(blk_addr < start_addr); + BUG_ON(blk_addr > end_addr); +} + +/* + * Summary block is always treated as invalid block + */ +static inline void check_block_count(struct f2fs_sb_info *sbi, + int segno, struct f2fs_sit_entry *raw_sit) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + unsigned int end_segno = sm_info->segment_count - 1; + int valid_blocks = 0; + int i; + + /* check segment usage */ + BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); + + /* check boundary of a given segment number */ + BUG_ON(segno > end_segno); + + /* check bitmap with valid block count */ + for (i = 0; i < sbi->blocks_per_seg; i++) + if (f2fs_test_bit(i, raw_sit->valid_map)) + valid_blocks++; + BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); +} + +static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, + unsigned int start) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); + block_t blk_addr = sit_i->sit_base_addr + offset; + + check_seg_range(sbi, start); + + /* calculate sit block address */ + if (f2fs_test_bit(offset, sit_i->sit_bitmap)) + blk_addr += sit_i->sit_blocks; + + return blk_addr; +} + +static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi, + pgoff_t block_addr) +{ + struct sit_info *sit_i = SIT_I(sbi); + block_addr -= sit_i->sit_base_addr; + if (block_addr < sit_i->sit_blocks) + block_addr += sit_i->sit_blocks; + else + block_addr -= sit_i->sit_blocks; + + return block_addr + sit_i->sit_base_addr; +} + +static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) +{ + unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); + + if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) + f2fs_clear_bit(block_off, sit_i->sit_bitmap); + else + f2fs_set_bit(block_off, sit_i->sit_bitmap); +} + +static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec - + sit_i->mounted_time; +} + +static inline void set_summary(struct f2fs_summary *sum, nid_t nid, + unsigned int ofs_in_node, unsigned char version) +{ + sum->nid = cpu_to_le32(nid); + sum->ofs_in_node = cpu_to_le16(ofs_in_node); + sum->version = version; +} + +static inline block_t start_sum_block(struct f2fs_sb_info *sbi) +{ + return __start_cp_addr(sbi) + + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); +} + +static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) +{ + return __start_cp_addr(sbi) + + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count) + - (base + 1) + type; +} diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index e15192cb9cf4..66353ffd06a7 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -23,6 +23,7 @@ #define EXT4_SUPER_MAGIC 0xEF53 #define BTRFS_SUPER_MAGIC 0x9123683E #define NILFS_SUPER_MAGIC 0x3434 +#define F2FS_SUPER_MAGIC 0xF2F52010 #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 -- cgit v1.2.3 From 479e2802d09f1e18a97262c4c6f8f17ae5884bd8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Oct 2012 14:16:28 +0200 Subject: mm: mempolicy: Make MPOL_LOCAL a real policy Make MPOL_LOCAL a real and exposed policy such that applications that relied on the previous default behaviour can explicitly request it. Requested-by: Christoph Lameter Reviewed-by: Rik van Riel Cc: Lee Schermerhorn Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Mel Gorman --- include/uapi/linux/mempolicy.h | 1 + mm/mempolicy.c | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 23e62e0537e2..3e835c9d847b 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -20,6 +20,7 @@ enum { MPOL_PREFERRED, MPOL_BIND, MPOL_INTERLEAVE, + MPOL_LOCAL, MPOL_MAX, /* always last member of enum */ }; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 66e90ecc2350..54bd3e5ed776 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -269,6 +269,10 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, (flags & MPOL_F_RELATIVE_NODES))) return ERR_PTR(-EINVAL); } + } else if (mode == MPOL_LOCAL) { + if (!nodes_empty(*nodes)) + return ERR_PTR(-EINVAL); + mode = MPOL_PREFERRED; } else if (nodes_empty(*nodes)) return ERR_PTR(-EINVAL); policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); @@ -2399,7 +2403,6 @@ void numa_default_policy(void) * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag * Used only for mpol_parse_str() and mpol_to_str() */ -#define MPOL_LOCAL MPOL_MAX static const char * const policy_modes[] = { [MPOL_DEFAULT] = "default", @@ -2452,12 +2455,12 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) if (flags) *flags++ = '\0'; /* terminate mode string */ - for (mode = 0; mode <= MPOL_LOCAL; mode++) { + for (mode = 0; mode < MPOL_MAX; mode++) { if (!strcmp(str, policy_modes[mode])) { break; } } - if (mode > MPOL_LOCAL) + if (mode >= MPOL_MAX) goto out; switch (mode) { -- cgit v1.2.3 From d3a710337b0590f43fd236d5e6518439afc7410a Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Thu, 25 Oct 2012 14:16:29 +0200 Subject: mm: mempolicy: Add MPOL_NOOP This patch augments the MPOL_MF_LAZY feature by adding a "NOOP" policy to mbind(). When the NOOP policy is used with the 'MOVE and 'LAZY flags, mbind() will map the pages PROT_NONE so that they will be migrated on the next touch. This allows an application to prepare for a new phase of operation where different regions of shared storage will be assigned to worker threads, w/o changing policy. Note that we could just use "default" policy in this case. However, this also allows an application to request that pages be migrated, only if necessary, to follow any arbitrary policy that might currently apply to a range of pages, without knowing the policy, or without specifying multiple mbind()s for ranges with different policies. [ Bug in early version of mpol_parse_str() reported by Fengguang Wu. ] Bug-Reported-by: Reported-by: Fengguang Wu Signed-off-by: Lee Schermerhorn Reviewed-by: Rik van Riel Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Mel Gorman --- include/uapi/linux/mempolicy.h | 1 + mm/mempolicy.c | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 3e835c9d847b..d23dca8367cc 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -21,6 +21,7 @@ enum { MPOL_BIND, MPOL_INTERLEAVE, MPOL_LOCAL, + MPOL_NOOP, /* retain existing policy for range */ MPOL_MAX, /* always last member of enum */ }; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 54bd3e5ed776..c21e91477c4f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -251,10 +251,10 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, pr_debug("setting mode %d flags %d nodes[0] %lx\n", mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); - if (mode == MPOL_DEFAULT) { + if (mode == MPOL_DEFAULT || mode == MPOL_NOOP) { if (nodes && !nodes_empty(*nodes)) return ERR_PTR(-EINVAL); - return NULL; /* simply delete any existing policy */ + return NULL; } VM_BUG_ON(!nodes); @@ -1147,7 +1147,7 @@ static long do_mbind(unsigned long start, unsigned long len, if (start & ~PAGE_MASK) return -EINVAL; - if (mode == MPOL_DEFAULT) + if (mode == MPOL_DEFAULT || mode == MPOL_NOOP) flags &= ~MPOL_MF_STRICT; len = (len + PAGE_SIZE - 1) & PAGE_MASK; @@ -2409,7 +2409,8 @@ static const char * const policy_modes[] = [MPOL_PREFERRED] = "prefer", [MPOL_BIND] = "bind", [MPOL_INTERLEAVE] = "interleave", - [MPOL_LOCAL] = "local" + [MPOL_LOCAL] = "local", + [MPOL_NOOP] = "noop", /* should not actually be used */ }; @@ -2460,7 +2461,7 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) break; } } - if (mode >= MPOL_MAX) + if (mode >= MPOL_MAX || mode == MPOL_NOOP) goto out; switch (mode) { -- cgit v1.2.3 From 771fb4d806a92bf6c988fcfbd286ae40a9374332 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Thu, 25 Oct 2012 14:16:30 +0200 Subject: mm: mempolicy: Check for misplaced page This patch provides a new function to test whether a page resides on a node that is appropriate for the mempolicy for the vma and address where the page is supposed to be mapped. This involves looking up the node where the page belongs. So, the function returns that node so that it may be used to allocated the page without consulting the policy again. A subsequent patch will call this function from the fault path. Because of this, I don't want to go ahead and allocate the page, e.g., via alloc_page_vma() only to have to free it if it has the correct policy. So, I just mimic the alloc_page_vma() node computation logic--sort of. Note: we could use this function to implement a MPOL_MF_STRICT behavior when migrating pages to match mbind() mempolicy--e.g., to ensure that pages in an interleaved range are reinterleaved rather than left where they are when they reside on any page in the interleave nodemask. Signed-off-by: Lee Schermerhorn Reviewed-by: Rik van Riel Cc: Andrew Morton Cc: Linus Torvalds [ Added MPOL_F_LAZY to trigger migrate-on-fault; simplified code now that we don't have to bother with special crap for interleaved ] Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Mel Gorman --- include/linux/mempolicy.h | 8 +++++ include/uapi/linux/mempolicy.h | 1 + mm/mempolicy.c | 76 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index e5ccb9ddd90e..c511e2523560 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -198,6 +198,8 @@ static inline int vma_migratable(struct vm_area_struct *vma) return 1; } +extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); + #else struct mempolicy {}; @@ -323,5 +325,11 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, return 0; } +static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, + unsigned long address) +{ + return -1; /* no node preference */ +} + #endif /* CONFIG_NUMA */ #endif diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index d23dca8367cc..472de8a5d37e 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -61,6 +61,7 @@ enum mpol_rebind_step { #define MPOL_F_SHARED (1 << 0) /* identify shared policies */ #define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ #define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */ +#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ #endif /* _UAPI_LINUX_MEMPOLICY_H */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c21e91477c4f..df1466d3d2d8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2181,6 +2181,82 @@ static void sp_free(struct sp_node *n) kmem_cache_free(sn_cache, n); } +/** + * mpol_misplaced - check whether current page node is valid in policy + * + * @page - page to be checked + * @vma - vm area where page mapped + * @addr - virtual address where page mapped + * + * Lookup current policy node id for vma,addr and "compare to" page's + * node id. + * + * Returns: + * -1 - not misplaced, page is in the right node + * node - node id where the page should be + * + * Policy determination "mimics" alloc_page_vma(). + * Called from fault path where we know the vma and faulting address. + */ +int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long addr) +{ + struct mempolicy *pol; + struct zone *zone; + int curnid = page_to_nid(page); + unsigned long pgoff; + int polnid = -1; + int ret = -1; + + BUG_ON(!vma); + + pol = get_vma_policy(current, vma, addr); + if (!(pol->flags & MPOL_F_MOF)) + goto out; + + switch (pol->mode) { + case MPOL_INTERLEAVE: + BUG_ON(addr >= vma->vm_end); + BUG_ON(addr < vma->vm_start); + + pgoff = vma->vm_pgoff; + pgoff += (addr - vma->vm_start) >> PAGE_SHIFT; + polnid = offset_il_node(pol, vma, pgoff); + break; + + case MPOL_PREFERRED: + if (pol->flags & MPOL_F_LOCAL) + polnid = numa_node_id(); + else + polnid = pol->v.preferred_node; + break; + + case MPOL_BIND: + /* + * allows binding to multiple nodes. + * use current page if in policy nodemask, + * else select nearest allowed node, if any. + * If no allowed nodes, use current [!misplaced]. + */ + if (node_isset(curnid, pol->v.nodes)) + goto out; + (void)first_zones_zonelist( + node_zonelist(numa_node_id(), GFP_HIGHUSER), + gfp_zone(GFP_HIGHUSER), + &pol->v.nodes, &zone); + polnid = zone->node; + break; + + default: + BUG(); + } + if (curnid != polnid) + ret = polnid; +out: + mpol_cond_put(pol); + + return ret; +} + static void sp_delete(struct shared_policy *sp, struct sp_node *n) { pr_debug("deleting %lx-l%lx\n", n->start, n->end); -- cgit v1.2.3 From b24f53a0bea38b266d219ee651b22dba727c44ae Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Thu, 25 Oct 2012 14:16:32 +0200 Subject: mm: mempolicy: Add MPOL_MF_LAZY NOTE: Once again there is a lot of patch stealing and the end result is sufficiently different that I had to drop the signed-offs. Will re-add if the original authors are ok with that. This patch adds another mbind() flag to request "lazy migration". The flag, MPOL_MF_LAZY, modifies MPOL_MF_MOVE* such that the selected pages are marked PROT_NONE. The pages will be migrated in the fault path on "first touch", if the policy dictates at that time. "Lazy Migration" will allow testing of migrate-on-fault via mbind(). Also allows applications to specify that only subsequently touched pages be migrated to obey new policy, instead of all pages in range. This can be useful for multi-threaded applications working on a large shared data area that is initialized by an initial thread resulting in all pages on one [or a few, if overflowed] nodes. After PROT_NONE, the pages in regions assigned to the worker threads will be automatically migrated local to the threads on 1st touch. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel --- include/linux/mm.h | 5 ++ include/uapi/linux/mempolicy.h | 13 ++- mm/mempolicy.c | 185 +++++++++++++++++++++++++++++++++++++---- 3 files changed, 185 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index fa1615211159..471185e29bab 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1551,6 +1551,11 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) } #endif +#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE +void change_prot_numa(struct vm_area_struct *vma, + unsigned long start, unsigned long end); +#endif + struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 472de8a5d37e..6a1baae3775d 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -49,9 +49,16 @@ enum mpol_rebind_step { /* Flags for mbind */ #define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ -#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */ -#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ -#define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */ +#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform + to policy */ +#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to policy */ +#define MPOL_MF_LAZY (1<<3) /* Modifies '_MOVE: lazy migrate on fault */ +#define MPOL_MF_INTERNAL (1<<4) /* Internal flags start here */ + +#define MPOL_MF_VALID (MPOL_MF_STRICT | \ + MPOL_MF_MOVE | \ + MPOL_MF_MOVE_ALL | \ + MPOL_MF_LAZY) /* * Internal flags that share the struct mempolicy flags word with diff --git a/mm/mempolicy.c b/mm/mempolicy.c index df1466d3d2d8..51d3ebd8561e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include @@ -565,6 +566,145 @@ static inline int check_pgd_range(struct vm_area_struct *vma, return 0; } +#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE +/* + * Here we search for not shared page mappings (mapcount == 1) and we + * set up the pmd/pte_numa on those mappings so the very next access + * will fire a NUMA hinting page fault. + */ +static int +change_prot_numa_range(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte, *_pte; + struct page *page; + unsigned long _address, end; + spinlock_t *ptl; + int ret = 0; + + VM_BUG_ON(address & ~PAGE_MASK); + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + goto out; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + goto out; + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd)) + goto out; + + if (pmd_trans_huge_lock(pmd, vma) == 1) { + int page_nid; + ret = HPAGE_PMD_NR; + + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + + if (pmd_numa(*pmd)) { + spin_unlock(&mm->page_table_lock); + goto out; + } + + page = pmd_page(*pmd); + + /* only check non-shared pages */ + if (page_mapcount(page) != 1) { + spin_unlock(&mm->page_table_lock); + goto out; + } + + page_nid = page_to_nid(page); + + if (pmd_numa(*pmd)) { + spin_unlock(&mm->page_table_lock); + goto out; + } + + set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd)); + ret += HPAGE_PMD_NR; + /* defer TLB flush to lower the overhead */ + spin_unlock(&mm->page_table_lock); + goto out; + } + + if (pmd_trans_unstable(pmd)) + goto out; + VM_BUG_ON(!pmd_present(*pmd)); + + end = min(vma->vm_end, (address + PMD_SIZE) & PMD_MASK); + pte = pte_offset_map_lock(mm, pmd, address, &ptl); + for (_address = address, _pte = pte; _address < end; + _pte++, _address += PAGE_SIZE) { + pte_t pteval = *_pte; + if (!pte_present(pteval)) + continue; + if (pte_numa(pteval)) + continue; + page = vm_normal_page(vma, _address, pteval); + if (unlikely(!page)) + continue; + /* only check non-shared pages */ + if (page_mapcount(page) != 1) + continue; + + set_pte_at(mm, _address, _pte, pte_mknuma(pteval)); + + /* defer TLB flush to lower the overhead */ + ret++; + } + pte_unmap_unlock(pte, ptl); + + if (ret && !pmd_numa(*pmd)) { + spin_lock(&mm->page_table_lock); + set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd)); + spin_unlock(&mm->page_table_lock); + /* defer TLB flush to lower the overhead */ + } + +out: + return ret; +} + +/* Assumes mmap_sem is held */ +void +change_prot_numa(struct vm_area_struct *vma, + unsigned long address, unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + int progress = 0; + + while (address < end) { + VM_BUG_ON(address < vma->vm_start || + address + PAGE_SIZE > vma->vm_end); + + progress += change_prot_numa_range(mm, vma, address); + address = (address + PMD_SIZE) & PMD_MASK; + } + + /* + * Flush the TLB for the mm to start the NUMA hinting + * page faults after we finish scanning this vma part + * if there were any PTE updates + */ + if (progress) { + mmu_notifier_invalidate_range_start(vma->vm_mm, address, end); + flush_tlb_range(vma, address, end); + mmu_notifier_invalidate_range_end(vma->vm_mm, address, end); + } +} +#else +static unsigned long change_prot_numa(struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + return 0; +} +#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ + /* * Check if all pages in a range are on a set of nodes. * If pagelist != NULL then isolate pages from the LRU and @@ -583,22 +723,32 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, return ERR_PTR(-EFAULT); prev = NULL; for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { + unsigned long endvma = vma->vm_end; + + if (endvma > end) + endvma = end; + if (vma->vm_start > start) + start = vma->vm_start; + if (!(flags & MPOL_MF_DISCONTIG_OK)) { if (!vma->vm_next && vma->vm_end < end) return ERR_PTR(-EFAULT); if (prev && prev->vm_end < vma->vm_start) return ERR_PTR(-EFAULT); } - if (!is_vm_hugetlb_page(vma) && - ((flags & MPOL_MF_STRICT) || + + if (is_vm_hugetlb_page(vma)) + goto next; + + if (flags & MPOL_MF_LAZY) { + change_prot_numa(vma, start, endvma); + goto next; + } + + if ((flags & MPOL_MF_STRICT) || ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && - vma_migratable(vma)))) { - unsigned long endvma = vma->vm_end; + vma_migratable(vma))) { - if (endvma > end) - endvma = end; - if (vma->vm_start > start) - start = vma->vm_start; err = check_pgd_range(vma, start, endvma, nodes, flags, private); if (err) { @@ -606,6 +756,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, break; } } +next: prev = vma; } return first; @@ -1138,8 +1289,7 @@ static long do_mbind(unsigned long start, unsigned long len, int err; LIST_HEAD(pagelist); - if (flags & ~(unsigned long)(MPOL_MF_STRICT | - MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + if (flags & ~(unsigned long)MPOL_MF_VALID) return -EINVAL; if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) return -EPERM; @@ -1162,6 +1312,9 @@ static long do_mbind(unsigned long start, unsigned long len, if (IS_ERR(new)) return PTR_ERR(new); + if (flags & MPOL_MF_LAZY) + new->flags |= MPOL_F_MOF; + /* * If we are using the default policy then operation * on discontinuous address spaces is okay after all @@ -1198,13 +1351,15 @@ static long do_mbind(unsigned long start, unsigned long len, vma = check_range(mm, start, end, nmask, flags | MPOL_MF_INVERT, &pagelist); - err = PTR_ERR(vma); - if (!IS_ERR(vma)) { - int nr_failed = 0; - + err = PTR_ERR(vma); /* maybe ... */ + if (!IS_ERR(vma) && mode != MPOL_NOOP) err = mbind_range(mm, start, end, new); + if (!err) { + int nr_failed = 0; + if (!list_empty(&pagelist)) { + WARN_ON_ONCE(flags & MPOL_MF_LAZY); nr_failed = migrate_pages(&pagelist, new_vma_page, (unsigned long)vma, false, MIGRATE_SYNC, @@ -1213,7 +1368,7 @@ static long do_mbind(unsigned long start, unsigned long len, putback_lru_pages(&pagelist); } - if (!err && nr_failed && (flags & MPOL_MF_STRICT)) + if (nr_failed && (flags & MPOL_MF_STRICT)) err = -EIO; } else putback_lru_pages(&pagelist); -- cgit v1.2.3 From a720094ded8cbb303111035be91858011d2eac71 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 16 Nov 2012 09:37:58 +0000 Subject: mm: mempolicy: Hide MPOL_NOOP and MPOL_MF_LAZY from userspace for now The use of MPOL_NOOP and MPOL_MF_LAZY to allow an application to explicitly request lazy migration is a good idea but the actual API has not been well reviewed and once released we have to support it. For now this patch prevents an application using the services. This will need to be revisited. Signed-off-by: Mel Gorman --- include/uapi/linux/mempolicy.h | 4 +--- mm/mempolicy.c | 9 ++++----- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 6a1baae3775d..16fb4e6efbc4 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -21,7 +21,6 @@ enum { MPOL_BIND, MPOL_INTERLEAVE, MPOL_LOCAL, - MPOL_NOOP, /* retain existing policy for range */ MPOL_MAX, /* always last member of enum */ }; @@ -57,8 +56,7 @@ enum mpol_rebind_step { #define MPOL_MF_VALID (MPOL_MF_STRICT | \ MPOL_MF_MOVE | \ - MPOL_MF_MOVE_ALL | \ - MPOL_MF_LAZY) + MPOL_MF_MOVE_ALL) /* * Internal flags that share the struct mempolicy flags word with diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 75d4600a5e92..a7a62fe7c280 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -252,7 +252,7 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, pr_debug("setting mode %d flags %d nodes[0] %lx\n", mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); - if (mode == MPOL_DEFAULT || mode == MPOL_NOOP) { + if (mode == MPOL_DEFAULT) { if (nodes && !nodes_empty(*nodes)) return ERR_PTR(-EINVAL); return NULL; @@ -1186,7 +1186,7 @@ static long do_mbind(unsigned long start, unsigned long len, if (start & ~PAGE_MASK) return -EINVAL; - if (mode == MPOL_DEFAULT || mode == MPOL_NOOP) + if (mode == MPOL_DEFAULT) flags &= ~MPOL_MF_STRICT; len = (len + PAGE_SIZE - 1) & PAGE_MASK; @@ -1241,7 +1241,7 @@ static long do_mbind(unsigned long start, unsigned long len, flags | MPOL_MF_INVERT, &pagelist); err = PTR_ERR(vma); /* maybe ... */ - if (!IS_ERR(vma) && mode != MPOL_NOOP) + if (!IS_ERR(vma)) err = mbind_range(mm, start, end, new); if (!err) { @@ -2530,7 +2530,6 @@ static const char * const policy_modes[] = [MPOL_BIND] = "bind", [MPOL_INTERLEAVE] = "interleave", [MPOL_LOCAL] = "local", - [MPOL_NOOP] = "noop", /* should not actually be used */ }; @@ -2581,7 +2580,7 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) break; } } - if (mode >= MPOL_MAX || mode == MPOL_NOOP) + if (mode >= MPOL_MAX) goto out; switch (mode) { -- cgit v1.2.3 From 5606e3877ad8baea42f3a71ebde0a03622bbb551 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 2 Nov 2012 18:19:13 +0000 Subject: mm: numa: Migrate on reference policy This is the simplest possible policy that still does something of note. When a pte_numa is faulted, it is moved immediately. Any replacement policy must at least do better than this and in all likelihood this policy regresses normal workloads. Signed-off-by: Mel Gorman Acked-by: Rik van Riel --- include/uapi/linux/mempolicy.h | 1 + mm/mempolicy.c | 38 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 16fb4e6efbc4..0d11c3dcd3a1 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -67,6 +67,7 @@ enum mpol_rebind_step { #define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ #define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */ #define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ +#define MPOL_F_MORON (1 << 4) /* Migrate On pte_numa Reference On Node */ #endif /* _UAPI_LINUX_MEMPOLICY_H */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 516491fbfaa8..4c1c8d83ac6a 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -118,6 +118,26 @@ static struct mempolicy default_policy = { .flags = MPOL_F_LOCAL, }; +static struct mempolicy preferred_node_policy[MAX_NUMNODES]; + +static struct mempolicy *get_task_policy(struct task_struct *p) +{ + struct mempolicy *pol = p->mempolicy; + int node; + + if (!pol) { + node = numa_node_id(); + if (node != -1) + pol = &preferred_node_policy[node]; + + /* preferred_node_policy is not initialised early in boot */ + if (!pol->mode) + pol = NULL; + } + + return pol; +} + static const struct mempolicy_operations { int (*create)(struct mempolicy *pol, const nodemask_t *nodes); /* @@ -1598,7 +1618,7 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, struct mempolicy *get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = task->mempolicy; + struct mempolicy *pol = get_task_policy(task); if (vma) { if (vma->vm_ops && vma->vm_ops->get_policy) { @@ -2021,7 +2041,7 @@ retry_cpuset: */ struct page *alloc_pages_current(gfp_t gfp, unsigned order) { - struct mempolicy *pol = current->mempolicy; + struct mempolicy *pol = get_task_policy(current); struct page *page; unsigned int cpuset_mems_cookie; @@ -2295,6 +2315,11 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long default: BUG(); } + + /* Migrate the page towards the node whose CPU is referencing it */ + if (pol->flags & MPOL_F_MORON) + polnid = numa_node_id(); + if (curnid != polnid) ret = polnid; out: @@ -2483,6 +2508,15 @@ void __init numa_policy_init(void) sizeof(struct sp_node), 0, SLAB_PANIC, NULL); + for_each_node(nid) { + preferred_node_policy[nid] = (struct mempolicy) { + .refcnt = ATOMIC_INIT(1), + .mode = MPOL_PREFERRED, + .flags = MPOL_F_MOF | MPOL_F_MORON, + .v = { .preferred_node = nid, }, + }; + } + /* * Set interleaving policy for system init. Interleaving is only * enabled across suitably sized nodes (default is >= 16MB), or -- cgit v1.2.3 From 895464fa4b52f0e5a2ceffc173bad012be02b465 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 12 Dec 2012 06:58:52 +0000 Subject: uapi: add missing netconf.h to export list Add netconf.h for use by iproute2. Signed-off-by: Stephen Hemminger Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index e194387ef784..2564a968ca48 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -258,6 +258,7 @@ header-y += neighbour.h header-y += net.h header-y += net_dropmon.h header-y += net_tstamp.h +header-y += netconf.h header-y += netdevice.h header-y += netfilter.h header-y += netfilter_arp.h -- cgit v1.2.3 From 37a393bc4932d7bac360f40064aaafc01ab44901 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 11 Dec 2012 22:23:07 +0000 Subject: bridge: notify mdb changes via netlink As Stephen mentioned, we need to monitor the mdb changes in user-space, so add notifications via netlink too. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Thomas Graf Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 6 ++++ net/bridge/br_mdb.c | 80 ++++++++++++++++++++++++++++++++++++++++++ net/bridge/br_multicast.c | 2 ++ net/bridge/br_private.h | 2 ++ 4 files changed, 90 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 354a1e7d32a3..7a5eb196ade9 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -125,6 +125,10 @@ enum { RTM_GETNETCONF = 82, #define RTM_GETNETCONF RTM_GETNETCONF + RTM_NEWMDB = 84, +#define RTM_NEWMDB RTM_NEWMDB + RTM_DELMDB = 85, +#define RTM_DELMDB RTM_DELMDB RTM_GETMDB = 86, #define RTM_GETMDB RTM_GETMDB @@ -607,6 +611,8 @@ enum rtnetlink_groups { #define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF RTNLGRP_IPV6_NETCONF, #define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF + RTNLGRP_MDB, +#define RTNLGRP_MDB RTNLGRP_MDB __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index ccc43a9bff80..a8cfbf5f3c68 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -155,6 +155,86 @@ out: return skb->len; } +static int nlmsg_populate_mdb_fill(struct sk_buff *skb, + struct net_device *dev, + struct br_mdb_entry *entry, u32 pid, + u32 seq, int type, unsigned int flags) +{ + struct nlmsghdr *nlh; + struct br_port_msg *bpm; + struct nlattr *nest, *nest2; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + bpm = nlmsg_data(nlh); + bpm->family = AF_BRIDGE; + bpm->ifindex = dev->ifindex; + nest = nla_nest_start(skb, MDBA_MDB); + if (nest == NULL) + goto cancel; + nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY); + if (nest2 == NULL) + goto end; + + if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(*entry), entry)) + goto end; + + nla_nest_end(skb, nest2); + nla_nest_end(skb, nest); + return nlmsg_end(skb, nlh); + +end: + nla_nest_end(skb, nest); +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static inline size_t rtnl_mdb_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct br_port_msg)) + + nla_total_size(sizeof(struct br_mdb_entry)); +} + +static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, + int type) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); + if (!skb) + goto errout; + + err = nlmsg_populate_mdb_fill(skb, dev, entry, 0, 0, type, NTF_SELF); + if (err < 0) { + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC); + return; +errout: + rtnl_set_sk_err(net, RTNLGRP_MDB, err); +} + +void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, + struct br_ip *group, int type) +{ + struct br_mdb_entry entry; + + entry.ifindex = port->dev->ifindex; + entry.addr.proto = group->proto; + entry.addr.u.ip4 = group->u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + entry.addr.u.ip6 = group->u.ip6; +#endif + __br_mdb_notify(dev, &entry, type); +} + void br_mdb_init(void) { rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 847b98a1d5e0..d929586ce39e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -681,6 +681,7 @@ static int br_multicast_add_group(struct net_bridge *br, (unsigned long)p); rcu_assign_pointer(*pp, p); + br_mdb_notify(br->dev, port, group, RTM_NEWMDB); found: mod_timer(&p->timer, now + br->multicast_membership_interval); @@ -1240,6 +1241,7 @@ static void br_multicast_leave_group(struct net_bridge *br, hlist_del_init(&p->mglist); del_timer(&p->timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); + br_mdb_notify(br->dev, port, group, RTM_DELMDB); if (!mp->ports && !mp->mglist && netif_running(br->dev)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f95b766c7a98..2807c7680c38 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -435,6 +435,8 @@ extern int br_multicast_toggle(struct net_bridge *br, unsigned long val); extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val); extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); extern void br_mdb_init(void); +extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, + struct br_ip *group, int type); static inline bool br_multicast_is_router(struct net_bridge *br) { -- cgit v1.2.3 From cfd567543590f71ca0af397437e2554f9756d750 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 11 Dec 2012 22:23:08 +0000 Subject: bridge: add support of adding and deleting mdb entries This patch implents adding/deleting mdb entries via netlink. Currently all entries are temp, we probably need a flag to distinguish permanent entries too. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Thomas Graf Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 8 ++ net/bridge/br_mdb.c | 240 +++++++++++++++++++++++++++++++++++++++++ net/bridge/br_multicast.c | 55 +++++----- net/bridge/br_private.h | 23 ++++ 4 files changed, 297 insertions(+), 29 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 9a0f6ff0d7e7..afbb18a0227c 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -157,6 +157,7 @@ enum { #define MDBA_ROUTER_MAX (__MDBA_ROUTER_MAX - 1) struct br_port_msg { + __u8 family; __u32 ifindex; }; @@ -171,4 +172,11 @@ struct br_mdb_entry { } addr; }; +enum { + MDBA_SET_ENTRY_UNSPEC, + MDBA_SET_ENTRY, + __MDBA_SET_ENTRY_MAX, +}; +#define MDBA_SET_ENTRY_MAX (__MDBA_SET_ENTRY_MAX - 1) + #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index a8cfbf5f3c68..6f0a2eebcb27 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #if IS_ENABLED(CONFIG_IPV6) @@ -235,7 +236,246 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, __br_mdb_notify(dev, &entry, type); } +static bool is_valid_mdb_entry(struct br_mdb_entry *entry) +{ + if (entry->ifindex == 0) + return false; + + if (entry->addr.proto == htons(ETH_P_IP)) { + if (!ipv4_is_multicast(entry->addr.u.ip4)) + return false; + if (ipv4_is_local_multicast(entry->addr.u.ip4)) + return false; +#if IS_ENABLED(CONFIG_IPV6) + } else if (entry->addr.proto == htons(ETH_P_IPV6)) { + if (!ipv6_is_transient_multicast(&entry->addr.u.ip6)) + return false; +#endif + } else + return false; + + return true; +} + +static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, + struct net_device **pdev, struct br_mdb_entry **pentry) +{ + struct net *net = sock_net(skb->sk); + struct br_mdb_entry *entry; + struct br_port_msg *bpm; + struct nlattr *tb[MDBA_SET_ENTRY_MAX+1]; + struct net_device *dev; + int err; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL); + if (err < 0) + return err; + + bpm = nlmsg_data(nlh); + if (bpm->ifindex == 0) { + pr_info("PF_BRIDGE: br_mdb_parse() with invalid ifindex\n"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (dev == NULL) { + pr_info("PF_BRIDGE: br_mdb_parse() with unknown ifindex\n"); + return -ENODEV; + } + + if (!(dev->priv_flags & IFF_EBRIDGE)) { + pr_info("PF_BRIDGE: br_mdb_parse() with non-bridge\n"); + return -EOPNOTSUPP; + } + + *pdev = dev; + + if (!tb[MDBA_SET_ENTRY] || + nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) { + pr_info("PF_BRIDGE: br_mdb_parse() with invalid attr\n"); + return -EINVAL; + } + + entry = nla_data(tb[MDBA_SET_ENTRY]); + if (!is_valid_mdb_entry(entry)) { + pr_info("PF_BRIDGE: br_mdb_parse() with invalid entry\n"); + return -EINVAL; + } + + *pentry = entry; + return 0; +} + +static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, + struct br_ip *group) +{ + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct net_bridge_mdb_htable *mdb; + int err; + + mdb = mlock_dereference(br->mdb, br); + mp = br_mdb_ip_get(mdb, group); + if (!mp) { + mp = br_multicast_new_group(br, port, group); + err = PTR_ERR(mp); + if (IS_ERR(mp)) + return err; + } + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->port == port) + return -EEXIST; + if ((unsigned long)p->port < (unsigned long)port) + break; + } + + p = br_multicast_new_port_group(port, group, *pp); + if (unlikely(!p)) + return -ENOMEM; + rcu_assign_pointer(*pp, p); + + br_mdb_notify(br->dev, port, group, RTM_NEWMDB); + return 0; +} + +static int __br_mdb_add(struct net *net, struct net_bridge *br, + struct br_mdb_entry *entry) +{ + struct br_ip ip; + struct net_device *dev; + struct net_bridge_port *p; + int ret; + + if (!netif_running(br->dev) || br->multicast_disabled) + return -EINVAL; + + dev = __dev_get_by_index(net, entry->ifindex); + if (!dev) + return -ENODEV; + + p = br_port_get_rtnl(dev); + if (!p || p->br != br || p->state == BR_STATE_DISABLED) + return -EINVAL; + + ip.proto = entry->addr.proto; + if (ip.proto == htons(ETH_P_IP)) + ip.u.ip4 = entry->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + else + ip.u.ip6 = entry->addr.u.ip6; +#endif + + spin_lock_bh(&br->multicast_lock); + ret = br_mdb_add_group(br, p, &ip); + spin_unlock_bh(&br->multicast_lock); + return ret; +} + +static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = sock_net(skb->sk); + struct br_mdb_entry *entry; + struct net_device *dev; + struct net_bridge *br; + int err; + + err = br_mdb_parse(skb, nlh, &dev, &entry); + if (err < 0) + return err; + + br = netdev_priv(dev); + + err = __br_mdb_add(net, br, entry); + if (!err) + __br_mdb_notify(dev, entry, RTM_NEWMDB); + return err; +} + +static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) +{ + struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct br_ip ip; + int err = -EINVAL; + + if (!netif_running(br->dev) || br->multicast_disabled) + return -EINVAL; + + if (timer_pending(&br->multicast_querier_timer)) + return -EBUSY; + + ip.proto = entry->addr.proto; + if (ip.proto == htons(ETH_P_IP)) + ip.u.ip4 = entry->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + else + ip.u.ip6 = entry->addr.u.ip6; +#endif + + spin_lock_bh(&br->multicast_lock); + mdb = mlock_dereference(br->mdb, br); + + mp = br_mdb_ip_get(mdb, &ip); + if (!mp) + goto unlock; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (!p->port || p->port->dev->ifindex != entry->ifindex) + continue; + + if (p->port->state == BR_STATE_DISABLED) + goto unlock; + + rcu_assign_pointer(*pp, p->next); + hlist_del_init(&p->mglist); + del_timer(&p->timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + err = 0; + + if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + break; + } + +unlock: + spin_unlock_bh(&br->multicast_lock); + return err; +} + +static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net_device *dev; + struct br_mdb_entry *entry; + struct net_bridge *br; + int err; + + err = br_mdb_parse(skb, nlh, &dev, &entry); + if (err < 0) + return err; + + br = netdev_priv(dev); + + err = __br_mdb_del(br, entry); + if (!err) + __br_mdb_notify(dev, entry, RTM_DELMDB); + return err; +} + void br_mdb_init(void) { rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL); + rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL); + rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL); } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d929586ce39e..977c3ee02e65 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -27,27 +27,14 @@ #if IS_ENABLED(CONFIG_IPV6) #include #include -#include #include #endif #include "br_private.h" -#define mlock_dereference(X, br) \ - rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) - static void br_multicast_start_querier(struct net_bridge *br); unsigned int br_mdb_rehash_seq; -#if IS_ENABLED(CONFIG_IPV6) -static inline int ipv6_is_transient_multicast(const struct in6_addr *addr) -{ - if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr)) - return 1; - return 0; -} -#endif - static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) { if (a->proto != b->proto) @@ -104,8 +91,8 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get( return NULL; } -static struct net_bridge_mdb_entry *br_mdb_ip_get( - struct net_bridge_mdb_htable *mdb, struct br_ip *dst) +struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, + struct br_ip *dst) { if (!mdb) return NULL; @@ -208,7 +195,7 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new, return maxlen > elasticity ? -EINVAL : 0; } -static void br_multicast_free_pg(struct rcu_head *head) +void br_multicast_free_pg(struct rcu_head *head) { struct net_bridge_port_group *p = container_of(head, struct net_bridge_port_group, rcu); @@ -584,9 +571,8 @@ err: return mp; } -static struct net_bridge_mdb_entry *br_multicast_new_group( - struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group) +struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, + struct net_bridge_port *port, struct br_ip *group) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; @@ -633,6 +619,26 @@ out: return mp; } +struct net_bridge_port_group *br_multicast_new_port_group( + struct net_bridge_port *port, + struct br_ip *group, + struct net_bridge_port_group *next) +{ + struct net_bridge_port_group *p; + + p = kzalloc(sizeof(*p), GFP_ATOMIC); + if (unlikely(!p)) + return NULL; + + p->addr = *group; + p->port = port; + p->next = next; + hlist_add_head(&p->mglist, &port->mglist); + setup_timer(&p->timer, br_multicast_port_group_expired, + (unsigned long)p); + return p; +} + static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group) @@ -668,18 +674,9 @@ static int br_multicast_add_group(struct net_bridge *br, break; } - p = kzalloc(sizeof(*p), GFP_ATOMIC); - err = -ENOMEM; + p = br_multicast_new_port_group(port, group, *pp); if (unlikely(!p)) goto err; - - p->addr = *group; - p->port = port; - p->next = *pp; - hlist_add_head(&p->mglist, &port->mglist); - setup_timer(&p->timer, br_multicast_port_group_expired, - (unsigned long)p); - rcu_assign_pointer(*pp, p); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2807c7680c38..f21a739a6186 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -434,10 +434,33 @@ extern int br_multicast_set_port_router(struct net_bridge_port *p, extern int br_multicast_toggle(struct net_bridge *br, unsigned long val); extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val); extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); +extern struct net_bridge_mdb_entry *br_mdb_ip_get( + struct net_bridge_mdb_htable *mdb, + struct br_ip *dst); +extern struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, + struct net_bridge_port *port, struct br_ip *group); +extern void br_multicast_free_pg(struct rcu_head *head); +extern struct net_bridge_port_group *br_multicast_new_port_group( + struct net_bridge_port *port, + struct br_ip *group, + struct net_bridge_port_group *next); extern void br_mdb_init(void); extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_ip *group, int type); +#define mlock_dereference(X, br) \ + rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) + +#if IS_ENABLED(CONFIG_IPV6) +#include +static inline int ipv6_is_transient_multicast(const struct in6_addr *addr) +{ + if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr)) + return 1; + return 0; +} +#endif + static inline bool br_multicast_is_router(struct net_bridge *br) { return br->multicast_router == 2 || -- cgit v1.2.3 From d4676eac0de2e6d88eb3e2c02b4e9813d7d7f205 Mon Sep 17 00:00:00 2001 From: Yan Burman Date: Wed, 12 Dec 2012 02:13:17 +0000 Subject: net: ethtool: Add destination MAC address to flow steering API Add ability to specify destination MAC address for L3/L4 flow spec in order to be able to specify action for different VM's under vSwitch configuration. This change is transparent to older userspace. Signed-off-by: Yan Burman Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index d3eaaaf1009e..be8c41e2dc15 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -500,13 +500,15 @@ union ethtool_flow_union { struct ethtool_ah_espip4_spec esp_ip4_spec; struct ethtool_usrip4_spec usr_ip4_spec; struct ethhdr ether_spec; - __u8 hdata[60]; + __u8 hdata[52]; }; struct ethtool_flow_ext { - __be16 vlan_etype; - __be16 vlan_tci; - __be32 data[2]; + __u8 padding[2]; + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + __be16 vlan_etype; + __be16 vlan_tci; + __be32 data[2]; }; /** @@ -1027,6 +1029,7 @@ enum ethtool_sfeatures_retval_bits { #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ #define FLOW_EXT 0x80000000 +#define FLOW_MAC_EXT 0x40000000 /* L3-L4 network traffic flow hash options */ #define RXH_L2DA (1 << 1) -- cgit v1.2.3 From dc2e57340deb8be1133b1eae2c7d4303133c133c Mon Sep 17 00:00:00 2001 From: Yan Burman Date: Thu, 13 Dec 2012 05:20:59 +0000 Subject: net: ethool: Document struct ethtool_flow_ext Add documentation for struct ethtool_flow_ext especially in regard to what flags are needed for which fields. Signed-off-by: Yan Burman Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index be8c41e2dc15..0c9b44871df0 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -503,9 +503,20 @@ union ethtool_flow_union { __u8 hdata[52]; }; +/** + * struct ethtool_flow_ext - additional RX flow fields + * @h_dest: destination MAC address + * @vlan_etype: VLAN EtherType + * @vlan_tci: VLAN tag control information + * @data: user defined data + * + * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT + * is set in &struct ethtool_rx_flow_spec @flow_type. + * @h_dest is valid if %FLOW_MAC_EXT is set. + */ struct ethtool_flow_ext { __u8 padding[2]; - unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + unsigned char h_dest[ETH_ALEN]; __be16 vlan_etype; __be16 vlan_tci; __be32 data[2]; @@ -519,7 +530,8 @@ struct ethtool_flow_ext { * @m_u: Masks for flow field bits to be matched * @m_ext: Masks for additional field bits to be matched * Note, all additional fields must be ignored unless @flow_type - * includes the %FLOW_EXT flag. + * includes the %FLOW_EXT or %FLOW_MAC_EXT flag + * (see &struct ethtool_flow_ext description). * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC * if packets should be discarded * @location: Location of rule in the table. Locations must be -- cgit v1.2.3 From 2f3238aebedb243804f58d62d57244edec4149b2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Oct 2012 18:09:41 +1030 Subject: module: add flags arg to sys_finit_module() Thanks to Michael Kerrisk for keeping us honest. These flags are actually useful for eliminating the only case where kmod has to mangle a module's internals: for overriding module versioning. Signed-off-by: Rusty Russell Acked-by: Lucas De Marchi Acked-by: Kees Cook --- include/linux/syscalls.h | 2 +- include/uapi/linux/module.h | 8 ++++++++ kernel/module.c | 40 ++++++++++++++++++++++++++-------------- 3 files changed, 35 insertions(+), 15 deletions(-) create mode 100644 include/uapi/linux/module.h (limited to 'include/uapi/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 32bc035bcd68..8cf7b508cb50 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -868,5 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); -asmlinkage long sys_finit_module(int fd, const char __user *uargs); +asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); #endif diff --git a/include/uapi/linux/module.h b/include/uapi/linux/module.h new file mode 100644 index 000000000000..38da4258b12f --- /dev/null +++ b/include/uapi/linux/module.h @@ -0,0 +1,8 @@ +#ifndef _UAPI_LINUX_MODULE_H +#define _UAPI_LINUX_MODULE_H + +/* Flags for sys_finit_module: */ +#define MODULE_INIT_IGNORE_MODVERSIONS 1 +#define MODULE_INIT_IGNORE_VERMAGIC 2 + +#endif /* _UAPI_LINUX_MODULE_H */ diff --git a/kernel/module.c b/kernel/module.c index 6d2c4e4ca1f5..1395ca382fb5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -60,6 +60,7 @@ #include #include #include +#include #include "module-internal.h" #define CREATE_TRACE_POINTS @@ -2553,7 +2554,7 @@ static void free_copy(struct load_info *info) vfree(info->hdr); } -static int rewrite_section_headers(struct load_info *info) +static int rewrite_section_headers(struct load_info *info, int flags) { unsigned int i; @@ -2581,7 +2582,10 @@ static int rewrite_section_headers(struct load_info *info) } /* Track but don't keep modinfo and version sections. */ - info->index.vers = find_sec(info, "__versions"); + if (flags & MODULE_INIT_IGNORE_MODVERSIONS) + info->index.vers = 0; /* Pretend no __versions section! */ + else + info->index.vers = find_sec(info, "__versions"); info->index.info = find_sec(info, ".modinfo"); info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; @@ -2596,7 +2600,7 @@ static int rewrite_section_headers(struct load_info *info) * Return the temporary module pointer (we'll replace it with the final * one when we move the module sections around). */ -static struct module *setup_load_info(struct load_info *info) +static struct module *setup_load_info(struct load_info *info, int flags) { unsigned int i; int err; @@ -2607,7 +2611,7 @@ static struct module *setup_load_info(struct load_info *info) info->secstrings = (void *)info->hdr + info->sechdrs[info->hdr->e_shstrndx].sh_offset; - err = rewrite_section_headers(info); + err = rewrite_section_headers(info, flags); if (err) return ERR_PTR(err); @@ -2645,11 +2649,14 @@ static struct module *setup_load_info(struct load_info *info) return mod; } -static int check_modinfo(struct module *mod, struct load_info *info) +static int check_modinfo(struct module *mod, struct load_info *info, int flags) { const char *modmagic = get_modinfo(info, "vermagic"); int err; + if (flags & MODULE_INIT_IGNORE_VERMAGIC) + modmagic = NULL; + /* This is allowed: modprobe --force will invalidate it. */ if (!modmagic) { err = try_to_force_load(mod, "bad vermagic"); @@ -2885,18 +2892,18 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr, return 0; } -static struct module *layout_and_allocate(struct load_info *info) +static struct module *layout_and_allocate(struct load_info *info, int flags) { /* Module within temporary copy. */ struct module *mod; Elf_Shdr *pcpusec; int err; - mod = setup_load_info(info); + mod = setup_load_info(info, flags); if (IS_ERR(mod)) return mod; - err = check_modinfo(mod, info); + err = check_modinfo(mod, info, flags); if (err) return ERR_PTR(err); @@ -3078,7 +3085,8 @@ static int may_init_module(void) /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ -static int load_module(struct load_info *info, const char __user *uargs) +static int load_module(struct load_info *info, const char __user *uargs, + int flags) { struct module *mod, *old; long err; @@ -3092,7 +3100,7 @@ static int load_module(struct load_info *info, const char __user *uargs) goto free_copy; /* Figure out module layout, and allocate all the memory. */ - mod = layout_and_allocate(info); + mod = layout_and_allocate(info, flags); if (IS_ERR(mod)) { err = PTR_ERR(mod); goto free_copy; @@ -3241,10 +3249,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, if (err) return err; - return load_module(&info, uargs); + return load_module(&info, uargs, 0); } -SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs) +SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) { int err; struct load_info info = { }; @@ -3253,13 +3261,17 @@ SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs) if (err) return err; - pr_debug("finit_module: fd=%d, uargs=%p\n", fd, uargs); + pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags); + + if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS + |MODULE_INIT_IGNORE_VERMAGIC)) + return -EINVAL; err = copy_module_from_fd(fd, &info); if (err) return err; - return load_module(&info, uargs); + return load_module(&info, uargs, flags); } static inline int within(unsigned long addr, void *start, unsigned long size) -- cgit v1.2.3 From ccb1c31a7a8744cd153a7d92b726a56b56ad61d3 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 14 Dec 2012 22:09:51 +0000 Subject: bridge: add flags to distinguish permanent mdb entires This patch adds a flag to each mdb entry, so that we can distinguish permanent entries with temporary entries. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 3 +++ net/bridge/br_mdb.c | 9 ++++++--- net/bridge/br_multicast.c | 8 +++++--- net/bridge/br_private.h | 4 +++- 4 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index afbb18a0227c..5db297514aec 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -163,6 +163,9 @@ struct br_port_msg { struct br_mdb_entry { __u32 ifindex; +#define MDB_TEMPORARY 0 +#define MDB_PERMANENT 1 + __u8 state; struct { union { __be32 ip4; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 6f0a2eebcb27..9cf5d2b28c76 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -83,6 +83,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (port) { struct br_mdb_entry e; e.ifindex = port->dev->ifindex; + e.state = p->state; e.addr.u.ip4 = p->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) e.addr.u.ip6 = p->addr.u.ip6; @@ -253,6 +254,8 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry) #endif } else return false; + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) + return false; return true; } @@ -310,7 +313,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, } static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group) + struct br_ip *group, unsigned char state) { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; @@ -336,7 +339,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, break; } - p = br_multicast_new_port_group(port, group, *pp); + p = br_multicast_new_port_group(port, group, *pp, state); if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); @@ -373,7 +376,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, #endif spin_lock_bh(&br->multicast_lock); - ret = br_mdb_add_group(br, p, &ip); + ret = br_mdb_add_group(br, p, &ip, entry->state); spin_unlock_bh(&br->multicast_lock); return ret; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2561af9d18a2..dce9defae3c6 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -279,7 +279,7 @@ static void br_multicast_port_group_expired(unsigned long data) spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || timer_pending(&pg->timer) || - hlist_unhashed(&pg->mglist)) + hlist_unhashed(&pg->mglist) || pg->state & MDB_PERMANENT) goto out; br_multicast_del_pg(br, pg); @@ -622,7 +622,8 @@ out: struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, struct br_ip *group, - struct net_bridge_port_group __rcu *next) + struct net_bridge_port_group __rcu *next, + unsigned char state) { struct net_bridge_port_group *p; @@ -632,6 +633,7 @@ struct net_bridge_port_group *br_multicast_new_port_group( p->addr = *group; p->port = port; + p->state = state; rcu_assign_pointer(p->next, next); hlist_add_head(&p->mglist, &port->mglist); setup_timer(&p->timer, br_multicast_port_group_expired, @@ -674,7 +676,7 @@ static int br_multicast_add_group(struct net_bridge *br, break; } - p = br_multicast_new_port_group(port, group, *pp); + p = br_multicast_new_port_group(port, group, *pp, MDB_TEMPORARY); if (unlikely(!p)) goto err; rcu_assign_pointer(*pp, p); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f21a739a6186..49b85af44016 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -83,6 +83,7 @@ struct net_bridge_port_group { struct rcu_head rcu; struct timer_list timer; struct br_ip addr; + unsigned char state; }; struct net_bridge_mdb_entry @@ -443,7 +444,8 @@ extern void br_multicast_free_pg(struct rcu_head *head); extern struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, struct br_ip *group, - struct net_bridge_port_group *next); + struct net_bridge_port_group *next, + unsigned char state); extern void br_mdb_init(void); extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_ip *group, int type); -- cgit v1.2.3 From 992fb6e170639b0849bace8e49bf31bd37c4123c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Dec 2012 16:03:07 -0800 Subject: ptrace: introduce PTRACE_O_EXITKILL Ptrace jailers want to be sure that the tracee can never escape from the control. However if the tracer dies unexpectedly the tracee continues to run in potentially unsafe mode. Add the new ptrace option PTRACE_O_EXITKILL. If the tracer exits it sends SIGKILL to every tracee which has this bit set. Note that the new option is not equal to the last-option << 1. Because currently all options have an event, and the new one starts the eventless group. It uses the random 20 bit, so we have the room for 12 more events, but we can also add the new eventless options below this one. Suggested by Amnon Shiloh. Signed-off-by: Oleg Nesterov Tested-by: Amnon Shiloh Cc: Denys Vlasenko Cc: Michael Kerrisk Cc: Serge Hallyn Cc: Chris Evans Cc: David Howells Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 2 ++ include/uapi/linux/ptrace.h | 5 ++++- kernel/ptrace.c | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index a89ff04bddd9..addfbe7c180e 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -32,6 +32,8 @@ #define PT_TRACE_EXIT PT_EVENT_FLAG(PTRACE_EVENT_EXIT) #define PT_TRACE_SECCOMP PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP) +#define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) + /* single stepping state bits (used on ARM and PA-RISC) */ #define PT_SINGLESTEP_BIT 31 #define PT_SINGLESTEP (1< diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1f5e55dda955..ec8118ab2a47 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -457,6 +457,9 @@ void exit_ptrace(struct task_struct *tracer) return; list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) { + if (unlikely(p->ptrace & PT_EXITKILL)) + send_sig_info(SIGKILL, SEND_SIG_FORCED, p); + if (__ptrace_detach(tracer, p)) list_add(&p->ptrace_entry, &ptrace_dead); } -- cgit v1.2.3 From 1b6370463e88b0c1c317de16d7b962acc1dab4f2 Mon Sep 17 00:00:00 2001 From: Sjur Brændeland Date: Fri, 14 Dec 2012 14:40:51 +1030 Subject: virtio_console: Add support for remoteproc serial MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a simple serial connection driver called VIRTIO_ID_RPROC_SERIAL (11) for communicating with a remote processor in an asymmetric multi-processing configuration. This implementation reuses the existing virtio_console implementation, and adds support for DMA allocation of data buffers and disables use of tty console and the virtio control queue. Signed-off-by: Sjur Brændeland Acked-by: Amit Shah Signed-off-by: Rusty Russell --- drivers/char/virtio_console.c | 192 +++++++++++++++++++++++++++++++++++----- include/uapi/linux/virtio_ids.h | 1 + 2 files changed, 170 insertions(+), 23 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 548224686963..55a89a4ae42f 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -37,8 +37,12 @@ #include #include #include +#include +#include #include "../tty/hvc/hvc_console.h" +#define is_rproc_enabled IS_ENABLED(CONFIG_REMOTEPROC) + /* * This is a global struct for storing common data for all the devices * this driver handles. @@ -112,6 +116,15 @@ struct port_buffer { /* offset in the buf from which to consume data */ size_t offset; + /* DMA address of buffer */ + dma_addr_t dma; + + /* Device we got DMA memory from */ + struct device *dev; + + /* List of pending dma buffers to free */ + struct list_head list; + /* If sgpages == 0 then buf is used */ unsigned int sgpages; @@ -331,6 +344,11 @@ static bool is_console_port(struct port *port) return false; } +static bool is_rproc_serial(const struct virtio_device *vdev) +{ + return is_rproc_enabled && vdev->id.device == VIRTIO_ID_RPROC_SERIAL; +} + static inline bool use_multiport(struct ports_device *portdev) { /* @@ -342,11 +360,13 @@ static inline bool use_multiport(struct ports_device *portdev) return portdev->vdev->features[0] & (1 << VIRTIO_CONSOLE_F_MULTIPORT); } -static void free_buf(struct port_buffer *buf) +static DEFINE_SPINLOCK(dma_bufs_lock); +static LIST_HEAD(pending_free_dma_bufs); + +static void free_buf(struct port_buffer *buf, bool can_sleep) { unsigned int i; - kfree(buf->buf); for (i = 0; i < buf->sgpages; i++) { struct page *page = sg_page(&buf->sg[i]); if (!page) @@ -354,14 +374,57 @@ static void free_buf(struct port_buffer *buf) put_page(page); } + if (!buf->dev) { + kfree(buf->buf); + } else if (is_rproc_enabled) { + unsigned long flags; + + /* dma_free_coherent requires interrupts to be enabled. */ + if (!can_sleep) { + /* queue up dma-buffers to be freed later */ + spin_lock_irqsave(&dma_bufs_lock, flags); + list_add_tail(&buf->list, &pending_free_dma_bufs); + spin_unlock_irqrestore(&dma_bufs_lock, flags); + return; + } + dma_free_coherent(buf->dev, buf->size, buf->buf, buf->dma); + + /* Release device refcnt and allow it to be freed */ + put_device(buf->dev); + } + kfree(buf); } +static void reclaim_dma_bufs(void) +{ + unsigned long flags; + struct port_buffer *buf, *tmp; + LIST_HEAD(tmp_list); + + if (list_empty(&pending_free_dma_bufs)) + return; + + /* Create a copy of the pending_free_dma_bufs while holding the lock */ + spin_lock_irqsave(&dma_bufs_lock, flags); + list_cut_position(&tmp_list, &pending_free_dma_bufs, + pending_free_dma_bufs.prev); + spin_unlock_irqrestore(&dma_bufs_lock, flags); + + /* Release the dma buffers, without irqs enabled */ + list_for_each_entry_safe(buf, tmp, &tmp_list, list) { + list_del(&buf->list); + free_buf(buf, true); + } +} + static struct port_buffer *alloc_buf(struct virtqueue *vq, size_t buf_size, int pages) { struct port_buffer *buf; + reclaim_dma_bufs(); + /* * Allocate buffer and the sg list. The sg list array is allocated * directly after the port_buffer struct. @@ -373,11 +436,34 @@ static struct port_buffer *alloc_buf(struct virtqueue *vq, size_t buf_size, buf->sgpages = pages; if (pages > 0) { + buf->dev = NULL; buf->buf = NULL; return buf; } - buf->buf = kmalloc(buf_size, GFP_KERNEL); + if (is_rproc_serial(vq->vdev)) { + /* + * Allocate DMA memory from ancestor. When a virtio + * device is created by remoteproc, the DMA memory is + * associated with the grandparent device: + * vdev => rproc => platform-dev. + * The code here would have been less quirky if + * DMA_MEMORY_INCLUDES_CHILDREN had been supported + * in dma-coherent.c + */ + if (!vq->vdev->dev.parent || !vq->vdev->dev.parent->parent) + goto free_buf; + buf->dev = vq->vdev->dev.parent->parent; + + /* Increase device refcnt to avoid freeing it */ + get_device(buf->dev); + buf->buf = dma_alloc_coherent(buf->dev, buf_size, &buf->dma, + GFP_KERNEL); + } else { + buf->dev = NULL; + buf->buf = kmalloc(buf_size, GFP_KERNEL); + } + if (!buf->buf) goto free_buf; buf->len = 0; @@ -446,7 +532,7 @@ static void discard_port_data(struct port *port) port->stats.bytes_discarded += buf->len - buf->offset; if (add_inbuf(port->in_vq, buf) < 0) { err++; - free_buf(buf); + free_buf(buf, false); } port->inbuf = NULL; buf = get_inbuf(port); @@ -518,7 +604,7 @@ static void reclaim_consumed_buffers(struct port *port) return; } while ((buf = virtqueue_get_buf(port->out_vq, &len))) { - free_buf(buf); + free_buf(buf, false); port->outvq_full = false; } } @@ -765,7 +851,7 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, goto out; free_buf: - free_buf(buf); + free_buf(buf, true); out: return ret; } @@ -839,6 +925,15 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, .u.data = &sgl, }; + /* + * Rproc_serial does not yet support splice. To support splice + * pipe_to_sg() must allocate dma-buffers and copy content from + * regular pages to dma pages. And alloc_buf and free_buf must + * support allocating and freeing such a list of dma-buffers. + */ + if (is_rproc_serial(port->out_vq->vdev)) + return -EINVAL; + ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK); if (ret < 0) return ret; @@ -857,7 +952,7 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, ret = __send_to_port(port, buf->sg, sgl.n, sgl.len, buf, true); if (unlikely(ret <= 0)) - kfree(sgl.sg); + free_buf(buf, true); return ret; } @@ -906,6 +1001,7 @@ static int port_fops_release(struct inode *inode, struct file *filp) reclaim_consumed_buffers(port); spin_unlock_irq(&port->outvq_lock); + reclaim_dma_bufs(); /* * Locks aren't necessary here as a port can't be opened after * unplug, and if a port isn't unplugged, a kref would already @@ -1057,7 +1153,10 @@ static void resize_console(struct port *port) return; vdev = port->portdev->vdev; - if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE)) + + /* Don't test F_SIZE at all if we're rproc: not a valid feature! */ + if (!is_rproc_serial(vdev) && + virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE)) hvc_resize(port->cons.hvc, port->cons.ws); } @@ -1249,7 +1348,7 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) ret = add_inbuf(vq, buf); if (ret < 0) { spin_unlock_irq(lock); - free_buf(buf); + free_buf(buf, true); break; } nr_added_bufs++; @@ -1337,10 +1436,18 @@ static int add_port(struct ports_device *portdev, u32 id) goto free_device; } - /* - * If we're not using multiport support, this has to be a console port - */ - if (!use_multiport(port->portdev)) { + if (is_rproc_serial(port->portdev->vdev)) + /* + * For rproc_serial assume remote processor is connected. + * rproc_serial does not want the console port, only + * the generic port implementation. + */ + port->host_connected = true; + else if (!use_multiport(port->portdev)) { + /* + * If we're not using multiport support, + * this has to be a console port. + */ err = init_port_console(port); if (err) goto free_inbufs; @@ -1373,7 +1480,7 @@ static int add_port(struct ports_device *portdev, u32 id) free_inbufs: while ((buf = virtqueue_detach_unused_buf(port->in_vq))) - free_buf(buf); + free_buf(buf, true); free_device: device_destroy(pdrvdata.class, port->dev->devt); free_cdev: @@ -1415,11 +1522,11 @@ static void remove_port_data(struct port *port) /* Remove buffers we queued up for the Host to send us data in. */ while ((buf = virtqueue_detach_unused_buf(port->in_vq))) - free_buf(buf); + free_buf(buf, true); /* Free pending buffers from the out-queue. */ while ((buf = virtqueue_detach_unused_buf(port->out_vq))) - free_buf(buf); + free_buf(buf, true); } /* @@ -1621,7 +1728,7 @@ static void control_work_handler(struct work_struct *work) if (add_inbuf(portdev->c_ivq, buf) < 0) { dev_warn(&portdev->vdev->dev, "Error adding buffer to queue\n"); - free_buf(buf); + free_buf(buf, false); } } spin_unlock(&portdev->cvq_lock); @@ -1817,10 +1924,10 @@ static void remove_controlq_data(struct ports_device *portdev) return; while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) - free_buf(buf); + free_buf(buf, true); while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) - free_buf(buf); + free_buf(buf, true); } /* @@ -1867,11 +1974,15 @@ static int __devinit virtcons_probe(struct virtio_device *vdev) multiport = false; portdev->config.max_nr_ports = 1; - if (virtio_config_val(vdev, VIRTIO_CONSOLE_F_MULTIPORT, - offsetof(struct virtio_console_config, - max_nr_ports), - &portdev->config.max_nr_ports) == 0) + + /* Don't test MULTIPORT at all if we're rproc: not a valid feature! */ + if (!is_rproc_serial(vdev) && + virtio_config_val(vdev, VIRTIO_CONSOLE_F_MULTIPORT, + offsetof(struct virtio_console_config, + max_nr_ports), + &portdev->config.max_nr_ports) == 0) { multiport = true; + } err = init_vqs(portdev); if (err < 0) { @@ -1981,6 +2092,16 @@ static unsigned int features[] = { VIRTIO_CONSOLE_F_MULTIPORT, }; +static struct virtio_device_id rproc_serial_id_table[] = { +#if IS_ENABLED(CONFIG_REMOTEPROC) + { VIRTIO_ID_RPROC_SERIAL, VIRTIO_DEV_ANY_ID }, +#endif + { 0 }, +}; + +static unsigned int rproc_serial_features[] = { +}; + #ifdef CONFIG_PM static int virtcons_freeze(struct virtio_device *vdev) { @@ -2065,6 +2186,20 @@ static struct virtio_driver virtio_console = { #endif }; +/* + * virtio_rproc_serial refers to __devinit function which causes + * section mismatch warnings. So use __refdata to silence warnings. + */ +static struct virtio_driver __refdata virtio_rproc_serial = { + .feature_table = rproc_serial_features, + .feature_table_size = ARRAY_SIZE(rproc_serial_features), + .driver.name = "virtio_rproc_serial", + .driver.owner = THIS_MODULE, + .id_table = rproc_serial_id_table, + .probe = virtcons_probe, + .remove = virtcons_remove, +}; + static int __init init(void) { int err; @@ -2089,7 +2224,15 @@ static int __init init(void) pr_err("Error %d registering virtio driver\n", err); goto free; } + err = register_virtio_driver(&virtio_rproc_serial); + if (err < 0) { + pr_err("Error %d registering virtio rproc serial driver\n", + err); + goto unregister; + } return 0; +unregister: + unregister_virtio_driver(&virtio_console); free: if (pdrvdata.debugfs_dir) debugfs_remove_recursive(pdrvdata.debugfs_dir); @@ -2099,7 +2242,10 @@ free: static void __exit fini(void) { + reclaim_dma_bufs(); + unregister_virtio_driver(&virtio_console); + unregister_virtio_driver(&virtio_rproc_serial); class_destroy(pdrvdata.class); if (pdrvdata.debugfs_dir) diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 270fb22c5811..a7630d04029f 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -37,5 +37,6 @@ #define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ #define VIRTIO_ID_SCSI 8 /* virtio scsi */ #define VIRTIO_ID_9P 9 /* 9p virtio console */ +#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ #endif /* _LINUX_VIRTIO_IDS_H */ -- cgit v1.2.3 From c196f6ee61e42df9acec797ab41a0c8d87e8aa2e Mon Sep 17 00:00:00 2001 From: Manjunath Hadli Date: Thu, 18 Oct 2012 07:54:59 -0300 Subject: [media] media: add new mediabus format enums for dm365 add new enum entries for supporting the media-bus formats on dm365. These include some bayer and some non-bayer formats. V4L2_MBUS_FMT_YDYUYDYV8_1X16 and V4L2_MBUS_FMT_UV8_1X8 are used internal to the hardware by the resizer. V4L2_MBUS_FMT_SBGGR10_ALAW8_1X8 represents the bayer ALAW format that is supported by dm365 hardware. Signed-off-by: Manjunath Hadli Signed-off-by: Lad, Prabhakar Acked-by: Sakari Ailus Acked-by: Laurent Pinchart Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/subdev-formats.xml | 250 ++++++++++++++++++++- include/uapi/linux/v4l2-mediabus.h | 10 +- 2 files changed, 252 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml index a0a936455fae..6f341d1eca1a 100644 --- a/Documentation/DocBook/media/v4l/subdev-formats.xml +++ b/Documentation/DocBook/media/v4l/subdev-formats.xml @@ -353,9 +353,9 @@ The number of bits per pixel component. All components are transferred on the same number of bits. Common values are 8, 10 and 12. - If the pixel components are DPCM-compressed, a mention of the - DPCM compression and the number of bits per compressed pixel component. - + The compression (optional). If the pixel components are + ALAW- or DPCM-compressed, a mention of the compression scheme and the + number of bits per compressed pixel component. The number of bus samples per pixel. Pixels that are wider than the bus width must be transferred in multiple samples. Common values are 1 and 2. @@ -504,6 +504,74 @@ r1 r0 + + V4L2_MBUS_FMT_SBGGR10_ALAW8_1X8 + 0x3015 + + - + - + - + - + b7 + b6 + b5 + b4 + b3 + b2 + b1 + b0 + + + V4L2_MBUS_FMT_SGBRG10_ALAW8_1X8 + 0x3016 + + - + - + - + - + g7 + g6 + g5 + g4 + g3 + g2 + g1 + g0 + + + V4L2_MBUS_FMT_SGRBG10_ALAW8_1X8 + 0x3017 + + - + - + - + - + g7 + g6 + g5 + g4 + g3 + g2 + g1 + g0 + + + V4L2_MBUS_FMT_SRGGB10_ALAW8_1X8 + 0x3018 + + - + - + - + - + r7 + r6 + r5 + r4 + r3 + r2 + r1 + r0 + V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8 0x300b @@ -853,10 +921,16 @@ Packed YUV Formats Those data formats transfer pixel data as (possibly downsampled) Y, U - and V components. The format code is made of the following information. + and V components. Some formats include dummy bits in some of their samples + and are collectively referred to as "YDYC" (Y-Dummy-Y-Chroma) formats. + One cannot rely on the values of these dummy bits as those are undefined. + + The format code is made of the following information. The Y, U and V components order code, as transferred on the - bus. Possible values are YUYV, UYVY, YVYU and VYUY. + bus. Possible values are YUYV, UYVY, YVYU and VYUY for formats with no + dummy bit, and YDYUYDYV, YDYVYDYU, YUYDYVYD and YVYDYUYD for YDYC formats. + The number of bits per pixel component. All components are transferred on the same number of bits. Common values are 8, 10 and 12. @@ -877,7 +951,21 @@ U, Y, V, Y order will be named V4L2_MBUS_FMT_UYVY8_2X8. - The following table lisst existing packet YUV formats. + list existing packet YUV + formats and describes the organization of each pixel data in each sample. + When a format pattern is split across multiple samples each of the samples + in the pattern is described. + + The role of each bit transferred over the bus is identified by one + of the following codes. + + + yx for luma component bit number x + ux for blue chroma component bit number x + vx for red chroma component bit number x + - for non-available bits (for positions higher than the bus width) + d for dummy bits + YUV Formats @@ -965,6 +1053,56 @@ y1y0 + + V4L2_MBUS_FMT_UV8_1X8 + 0x2015 + + - + - + - + - + - + - + - + - + - + - + - + - + u7 + u6 + u5 + u4 + u3 + u2 + u1 + u0 + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + v7 + v6 + v5 + v4 + v3 + v2 + v1 + v0 + V4L2_MBUS_FMT_UYVY8_1_5X8 0x2002 @@ -2415,6 +2553,106 @@ u1 u0 + + V4L2_MBUS_FMT_YDYUYDYV8_1X16 + 0x2014 + + - + - + - + - + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + d + d + d + d + d + d + d + d + + + + + + - + - + - + - + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + u7 + u6 + u5 + u4 + u3 + u2 + u1 + u0 + + + + + + - + - + - + - + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + d + d + d + d + d + d + d + d + + + + + + - + - + - + - + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + v7 + v6 + v5 + v4 + v3 + v2 + v1 + v0 + V4L2_MBUS_FMT_YUYV10_1X20 0x200d diff --git a/include/uapi/linux/v4l2-mediabus.h b/include/uapi/linux/v4l2-mediabus.h index 7d64e0e1a18b..e860f55820ec 100644 --- a/include/uapi/linux/v4l2-mediabus.h +++ b/include/uapi/linux/v4l2-mediabus.h @@ -47,8 +47,9 @@ enum v4l2_mbus_pixelcode { V4L2_MBUS_FMT_RGB565_2X8_BE = 0x1007, V4L2_MBUS_FMT_RGB565_2X8_LE = 0x1008, - /* YUV (including grey) - next is 0x2014 */ + /* YUV (including grey) - next is 0x2016 */ V4L2_MBUS_FMT_Y8_1X8 = 0x2001, + V4L2_MBUS_FMT_UV8_1X8 = 0x2015, V4L2_MBUS_FMT_UYVY8_1_5X8 = 0x2002, V4L2_MBUS_FMT_VYUY8_1_5X8 = 0x2003, V4L2_MBUS_FMT_YUYV8_1_5X8 = 0x2004, @@ -65,14 +66,19 @@ enum v4l2_mbus_pixelcode { V4L2_MBUS_FMT_VYUY8_1X16 = 0x2010, V4L2_MBUS_FMT_YUYV8_1X16 = 0x2011, V4L2_MBUS_FMT_YVYU8_1X16 = 0x2012, + V4L2_MBUS_FMT_YDYUYDYV8_1X16 = 0x2014, V4L2_MBUS_FMT_YUYV10_1X20 = 0x200d, V4L2_MBUS_FMT_YVYU10_1X20 = 0x200e, - /* Bayer - next is 0x3015 */ + /* Bayer - next is 0x3019 */ V4L2_MBUS_FMT_SBGGR8_1X8 = 0x3001, V4L2_MBUS_FMT_SGBRG8_1X8 = 0x3013, V4L2_MBUS_FMT_SGRBG8_1X8 = 0x3002, V4L2_MBUS_FMT_SRGGB8_1X8 = 0x3014, + V4L2_MBUS_FMT_SBGGR10_ALAW8_1X8 = 0x3015, + V4L2_MBUS_FMT_SGBRG10_ALAW8_1X8 = 0x3016, + V4L2_MBUS_FMT_SGRBG10_ALAW8_1X8 = 0x3017, + V4L2_MBUS_FMT_SRGGB10_ALAW8_1X8 = 0x3018, V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8 = 0x300b, V4L2_MBUS_FMT_SGBRG10_DPCM8_1X8 = 0x300c, V4L2_MBUS_FMT_SGRBG10_DPCM8_1X8 = 0x3009, -- cgit v1.2.3 From 05ad6fc1d54f106d5b8c598e2f9b59b12f3fb476 Mon Sep 17 00:00:00 2001 From: Manjunath Hadli Date: Thu, 18 Oct 2012 07:58:02 -0300 Subject: [media] v4l2: add new pixel formats supported on dm365 add new macro V4L2_PIX_FMT_SGRBG10ALAW8 and associated formats to represent Bayer format frames compressed by A-LAW algorithm, add V4L2_PIX_FMT_UV8 to represent storage of CbCr data (UV interleaved) only. Signed-off-by: Manjunath Hadli Signed-off-by: Lad, Prabhakar Acked-by: Sakari Ailus Acked-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- .../DocBook/media/v4l/pixfmt-srggb10alaw8.xml | 34 ++++++++++++ Documentation/DocBook/media/v4l/pixfmt-uv8.xml | 62 ++++++++++++++++++++++ Documentation/DocBook/media/v4l/pixfmt.xml | 2 + include/uapi/linux/videodev2.h | 8 +++ 4 files changed, 106 insertions(+) create mode 100644 Documentation/DocBook/media/v4l/pixfmt-srggb10alaw8.xml create mode 100644 Documentation/DocBook/media/v4l/pixfmt-uv8.xml (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/v4l/pixfmt-srggb10alaw8.xml b/Documentation/DocBook/media/v4l/pixfmt-srggb10alaw8.xml new file mode 100644 index 000000000000..c934192e98ce --- /dev/null +++ b/Documentation/DocBook/media/v4l/pixfmt-srggb10alaw8.xml @@ -0,0 +1,34 @@ + + + + V4L2_PIX_FMT_SBGGR10ALAW8 ('aBA8'), + V4L2_PIX_FMT_SGBRG10ALAW8 ('aGA8'), + V4L2_PIX_FMT_SGRBG10ALAW8 ('agA8'), + V4L2_PIX_FMT_SRGGB10ALAW8 ('aRA8'), + + &manvol; + + + + V4L2_PIX_FMT_SBGGR10ALAW8 + + + V4L2_PIX_FMT_SGBRG10ALAW8 + + + V4L2_PIX_FMT_SGRBG10ALAW8 + + + V4L2_PIX_FMT_SRGGB10ALAW8 + + 10-bit Bayer formats compressed to 8 bits + + + Description + The following four pixel formats are raw sRGB / Bayer + formats with 10 bits per color compressed to 8 bits each, + using the A-LAW algorithm. Each color component consumes 8 + bits of memory. In other respects this format is similar to + . + + diff --git a/Documentation/DocBook/media/v4l/pixfmt-uv8.xml b/Documentation/DocBook/media/v4l/pixfmt-uv8.xml new file mode 100644 index 000000000000..c507c1f73cd0 --- /dev/null +++ b/Documentation/DocBook/media/v4l/pixfmt-uv8.xml @@ -0,0 +1,62 @@ + + + V4L2_PIX_FMT_UV8 ('UV8') + &manvol; + + + V4L2_PIX_FMT_UV8 + UV plane interleaved + + + Description + In this format there is no Y plane, Only CbCr plane. ie + (UV interleaved) + + + <constant>V4L2_PIX_FMT_UV8</constant> + pixel image + + + + Byte Order. + Each cell is one byte. + + + + + + start + 0: + Cb00 + Cr00 + Cb01 + Cr01 + + + start + 4: + Cb10 + Cr10 + Cb11 + Cr11 + + + start + 8: + Cb20 + Cr20 + Cb21 + Cr21 + + + start + 12: + Cb30 + Cr30 + Cb31 + Cr31 + + + + + + + + + diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml index bf94f417592c..99b8d2ad6e4f 100644 --- a/Documentation/DocBook/media/v4l/pixfmt.xml +++ b/Documentation/DocBook/media/v4l/pixfmt.xml @@ -673,6 +673,7 @@ access the palette, this must be done with ioctls of the Linux framebuffer API.< &sub-srggb8; &sub-sbggr16; &sub-srggb10; + &sub-srggb10alaw8; &sub-srggb10dpcm8; &sub-srggb12; @@ -701,6 +702,7 @@ information. &sub-y12; &sub-y10b; &sub-y16; + &sub-uv8; &sub-yuyv; &sub-uyvy; &sub-yvyu; diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 3cf3e946e331..39d2cecdf38c 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -334,6 +334,9 @@ struct v4l2_pix_format { /* Palette formats */ #define V4L2_PIX_FMT_PAL8 v4l2_fourcc('P', 'A', 'L', '8') /* 8 8-bit palette */ +/* Chrominance formats */ +#define V4L2_PIX_FMT_UV8 v4l2_fourcc('U', 'V', '8', ' ') /* 8 UV 4:4 */ + /* Luminance+Chrominance formats */ #define V4L2_PIX_FMT_YVU410 v4l2_fourcc('Y', 'V', 'U', '9') /* 9 YVU 4:1:0 */ #define V4L2_PIX_FMT_YVU420 v4l2_fourcc('Y', 'V', '1', '2') /* 12 YVU 4:2:0 */ @@ -386,6 +389,11 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_SGBRG12 v4l2_fourcc('G', 'B', '1', '2') /* 12 GBGB.. RGRG.. */ #define V4L2_PIX_FMT_SGRBG12 v4l2_fourcc('B', 'A', '1', '2') /* 12 GRGR.. BGBG.. */ #define V4L2_PIX_FMT_SRGGB12 v4l2_fourcc('R', 'G', '1', '2') /* 12 RGRG.. GBGB.. */ + /* 10bit raw bayer a-law compressed to 8 bits */ +#define V4L2_PIX_FMT_SBGGR10ALAW8 v4l2_fourcc('a', 'B', 'A', '8') +#define V4L2_PIX_FMT_SGBRG10ALAW8 v4l2_fourcc('a', 'G', 'A', '8') +#define V4L2_PIX_FMT_SGRBG10ALAW8 v4l2_fourcc('a', 'g', 'A', '8') +#define V4L2_PIX_FMT_SRGGB10ALAW8 v4l2_fourcc('a', 'R', 'A', '8') /* 10bit raw bayer DPCM compressed to 8 bits */ #define V4L2_PIX_FMT_SBGGR10DPCM8 v4l2_fourcc('b', 'B', 'A', '8') #define V4L2_PIX_FMT_SGBRG10DPCM8 v4l2_fourcc('b', 'G', 'A', '8') -- cgit v1.2.3 From 031b6566983ad9c0247087f039af22b3f87596a3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 18 Nov 2012 15:13:17 -0500 Subject: unify SS_ONSTACK/SS_DISABLE definitions Signed-off-by: Al Viro --- arch/alpha/include/asm/signal.h | 6 ------ arch/arm/include/uapi/asm/signal.h | 7 ------- arch/avr32/include/uapi/asm/signal.h | 6 ------ arch/cris/include/asm/signal.h | 6 ------ arch/h8300/include/asm/signal.h | 6 ------ arch/ia64/include/uapi/asm/signal.h | 6 ------ arch/m32r/include/asm/signal.h | 6 ------ arch/m68k/include/uapi/asm/signal.h | 6 ------ arch/mips/include/uapi/asm/signal.h | 6 ------ arch/mn10300/include/uapi/asm/signal.h | 6 ------ arch/parisc/include/uapi/asm/signal.h | 6 ------ arch/powerpc/include/uapi/asm/signal.h | 6 ------ arch/s390/include/uapi/asm/signal.h | 6 ------ arch/sparc/include/uapi/asm/signal.h | 6 ------ arch/x86/include/asm/signal.h | 6 ------ arch/xtensa/include/uapi/asm/signal.h | 6 ------ include/uapi/asm-generic/signal.h | 6 ------ include/uapi/linux/signal.h | 2 ++ 18 files changed, 2 insertions(+), 103 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/alpha/include/asm/signal.h b/arch/alpha/include/asm/signal.h index 45552862cc10..a9aa0133f656 100644 --- a/arch/alpha/include/asm/signal.h +++ b/arch/alpha/include/asm/signal.h @@ -98,12 +98,6 @@ typedef unsigned long sigset_t; #define SA_ONESHOT SA_RESETHAND #define SA_NOMASK SA_NODEFER -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 4096 #define SIGSTKSZ 16384 diff --git a/arch/arm/include/uapi/asm/signal.h b/arch/arm/include/uapi/asm/signal.h index 921c57fdc52e..33073bdcf091 100644 --- a/arch/arm/include/uapi/asm/signal.h +++ b/arch/arm/include/uapi/asm/signal.h @@ -87,13 +87,6 @@ typedef unsigned long sigset_t; #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND - -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/avr32/include/uapi/asm/signal.h b/arch/avr32/include/uapi/asm/signal.h index eb46f61adb7d..1b77a93eff50 100644 --- a/arch/avr32/include/uapi/asm/signal.h +++ b/arch/avr32/include/uapi/asm/signal.h @@ -89,12 +89,6 @@ typedef unsigned long sigset_t; #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/cris/include/asm/signal.h b/arch/cris/include/asm/signal.h index 72dbbf59dfae..fdbffb773d81 100644 --- a/arch/cris/include/asm/signal.h +++ b/arch/cris/include/asm/signal.h @@ -97,12 +97,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/h8300/include/asm/signal.h b/arch/h8300/include/asm/signal.h index c43c0a7d2c2e..575c79a008f4 100644 --- a/arch/h8300/include/asm/signal.h +++ b/arch/h8300/include/asm/signal.h @@ -96,12 +96,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/ia64/include/uapi/asm/signal.h b/arch/ia64/include/uapi/asm/signal.h index e531c424434c..c0ea2855e96b 100644 --- a/arch/ia64/include/uapi/asm/signal.h +++ b/arch/ia64/include/uapi/asm/signal.h @@ -78,12 +78,6 @@ #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - /* * The minimum stack size needs to be fairly large because we want to * be sure that an app compiled for today's CPUs will continue to run diff --git a/arch/m32r/include/asm/signal.h b/arch/m32r/include/asm/signal.h index e4d2e2ad5f1e..ba714c45259d 100644 --- a/arch/m32r/include/asm/signal.h +++ b/arch/m32r/include/asm/signal.h @@ -98,12 +98,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/m68k/include/uapi/asm/signal.h b/arch/m68k/include/uapi/asm/signal.h index 2b450f311bd9..cba6f858bb46 100644 --- a/arch/m68k/include/uapi/asm/signal.h +++ b/arch/m68k/include/uapi/asm/signal.h @@ -80,12 +80,6 @@ typedef unsigned long sigset_t; #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/mips/include/uapi/asm/signal.h b/arch/mips/include/uapi/asm/signal.h index 3f1237c6c80e..770732cb8d03 100644 --- a/arch/mips/include/uapi/asm/signal.h +++ b/arch/mips/include/uapi/asm/signal.h @@ -86,12 +86,6 @@ typedef unsigned long old_sigset_t; /* at least 32 bits */ #define SA_RESTORER 0x04000000 /* Only for o32 */ -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/mn10300/include/uapi/asm/signal.h b/arch/mn10300/include/uapi/asm/signal.h index 08dcd6a85618..f423a08d7eeb 100644 --- a/arch/mn10300/include/uapi/asm/signal.h +++ b/arch/mn10300/include/uapi/asm/signal.h @@ -92,12 +92,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h index b1ddaa243376..a2fa297196bc 100644 --- a/arch/parisc/include/uapi/asm/signal.h +++ b/arch/parisc/include/uapi/asm/signal.h @@ -71,12 +71,6 @@ #define SA_RESTORER 0x04000000 /* obsolete -- ignored */ -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/powerpc/include/uapi/asm/signal.h b/arch/powerpc/include/uapi/asm/signal.h index 48fa8d3f2f9a..e079fb39d5bc 100644 --- a/arch/powerpc/include/uapi/asm/signal.h +++ b/arch/powerpc/include/uapi/asm/signal.h @@ -85,12 +85,6 @@ typedef struct { #define SA_RESTORER 0x04000000U -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h index 8c6a49e392ee..2f43cfbf5f1a 100644 --- a/arch/s390/include/uapi/asm/signal.h +++ b/arch/s390/include/uapi/asm/signal.h @@ -90,12 +90,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/sparc/include/uapi/asm/signal.h b/arch/sparc/include/uapi/asm/signal.h index 1a041892538f..c4ffd6c97106 100644 --- a/arch/sparc/include/uapi/asm/signal.h +++ b/arch/sparc/include/uapi/asm/signal.h @@ -147,12 +147,6 @@ struct sigstack { #define SIG_UNBLOCK 0x02 /* for unblocking signals */ #define SIG_SETMASK 0x04 /* for setting the signal mask */ -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 4096 #define SIGSTKSZ 16384 diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 0dba8b7a6ac7..7bb5490b3aa2 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -112,12 +112,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/xtensa/include/uapi/asm/signal.h b/arch/xtensa/include/uapi/asm/signal.h index b88ce96f2af9..dacf716dd3e0 100644 --- a/arch/xtensa/include/uapi/asm/signal.h +++ b/arch/xtensa/include/uapi/asm/signal.h @@ -97,12 +97,6 @@ typedef struct { #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/include/uapi/asm-generic/signal.h b/include/uapi/asm-generic/signal.h index 0a78028984de..6fae30fd16ab 100644 --- a/include/uapi/asm-generic/signal.h +++ b/include/uapi/asm-generic/signal.h @@ -80,12 +80,6 @@ * SA_RESTORER 0x04000000 */ -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/include/uapi/linux/signal.h b/include/uapi/linux/signal.h index dff452ed6d00..e1bd50c29ded 100644 --- a/include/uapi/linux/signal.h +++ b/include/uapi/linux/signal.h @@ -4,5 +4,7 @@ #include #include +#define SS_ONSTACK 1 +#define SS_DISABLE 2 #endif /* _UAPI_LINUX_SIGNAL_H */ -- cgit v1.2.3 From 1202ecdc24fc88d5b144824f55ec9c8899591caf Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Sun, 21 Oct 2012 16:02:47 -0300 Subject: [media] v4l: Define video buffer flags for timestamp types Define video buffer flags for different timestamp types. Everything up to now have used either realtime clock or monotonic clock, without a way to tell which clock the timestamp was taken from. Also document that the clock source of the timestamp in the timestamp field depends on buffer flags. [mchehab@redhat.com: fix a few wrong references to Kernel 3.8 - as this patch is meant for 3.9] Signed-off-by: Sakari Ailus Acked-by: Laurent Pinchart Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/compat.xml | 12 +++++++ Documentation/DocBook/media/v4l/io.xml | 53 +++++++++++++++++++++++------- Documentation/DocBook/media/v4l/v4l2.xml | 12 ++++++- include/uapi/linux/videodev2.h | 4 +++ 4 files changed, 69 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml index 3dd9e78815d1..ebd2bfd1ee8e 100644 --- a/Documentation/DocBook/media/v4l/compat.xml +++ b/Documentation/DocBook/media/v4l/compat.xml @@ -2477,6 +2477,18 @@ that used it. It was originally scheduled for removal in 2.6.35. +
+ V4L2 in Linux 3.9 + + + Added timestamp types to + flags field in + v4l2_buffer. See . + + +
+
Relation of V4L2 to other Linux multimedia APIs diff --git a/Documentation/DocBook/media/v4l/io.xml b/Documentation/DocBook/media/v4l/io.xml index 388a34032653..09e8dcf5e9c4 100644 --- a/Documentation/DocBook/media/v4l/io.xml +++ b/Documentation/DocBook/media/v4l/io.xml @@ -741,17 +741,19 @@ applications when an output stream. struct timeval timestamp - For input streams this is the -system time (as returned by the gettimeofday() -function) when the first data byte was captured. For output streams -the data will not be displayed before this time, secondary to the -nominal frame rate determined by the current video standard in -enqueued order. Applications can for example zero this field to -display frames as soon as possible. The driver stores the time at -which the first data byte was actually sent out in the -timestamp field. This permits -applications to monitor the drift between the video and system -clock. + For input streams this is time when the first data + byte was captured, as returned by the + clock_gettime() function for the relevant + clock id; see V4L2_BUF_FLAG_TIMESTAMP_* in + . For output streams the data + will not be displayed before this time, secondary to the nominal + frame rate determined by the current video standard in enqueued + order. Applications can for example zero this field to display + frames as soon as possible. The driver stores the time at which + the first data byte was actually sent out in the + timestamp field. This permits + applications to monitor the drift between the video and system + clock. &v4l2-timecode; @@ -1114,6 +1116,35 @@ Typically applications shall use this flag for output buffers if the data in this buffer has not been created by the CPU but by some DMA-capable unit, in which case caches have not been used. + + V4L2_BUF_FLAG_TIMESTAMP_MASK + 0xe000 + Mask for timestamp types below. To test the + timestamp type, mask out bits not belonging to timestamp + type by performing a logical and operation with buffer + flags and timestamp mask. + + + V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN + 0x0000 + Unknown timestamp type. This type is used by + drivers before Linux 3.9 and may be either monotonic (see + below) or realtime (wall clock). Monotonic clock has been + favoured in embedded systems whereas most of the drivers + use the realtime clock. Either kinds of timestamps are + available in user space via + clock_gettime(2) using clock IDs + CLOCK_MONOTONIC and + CLOCK_REALTIME, respectively. + + + V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC + 0x2000 + The buffer timestamp has been taken from the + CLOCK_MONOTONIC clock. To access the + same clock outside V4L2, use + clock_gettime(2) . +
diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml index 4d110b1ad3e9..8fe29427c8e4 100644 --- a/Documentation/DocBook/media/v4l/v4l2.xml +++ b/Documentation/DocBook/media/v4l/v4l2.xml @@ -139,6 +139,16 @@ structs, ioctls) must be noted in more detail in the history chapter (compat.xml), along with the possible impact on existing drivers and applications. --> + + 3.9 + 2012-12-03 + sa + Added timestamp types to + v4l2_buffer, see . + + + 3.6 2012-07-02 @@ -472,7 +482,7 @@ and discussions on the V4L mailing list. Video for Linux Two API Specification - Revision 3.6 + Revision 3.9 &sub-common; diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 39d2cecdf38c..94cbe26e9f00 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -701,6 +701,10 @@ struct v4l2_buffer { /* Cache handling flags */ #define V4L2_BUF_FLAG_NO_CACHE_INVALIDATE 0x0800 #define V4L2_BUF_FLAG_NO_CACHE_CLEAN 0x1000 +/* Timestamp type */ +#define V4L2_BUF_FLAG_TIMESTAMP_MASK 0xe000 +#define V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN 0x0000 +#define V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC 0x2000 /** * struct v4l2_exportbuffer - export of video buffer as DMABUF file descriptor -- cgit v1.2.3 From b01189b85b7653a51ebe851fc4d70702cebfed3c Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Wed, 28 Nov 2012 14:40:32 -0300 Subject: [media] V4L: DocBook: Add V4L2_MBUS_FMT_YUV10_1X30 media bus pixel code This patch adds definition of media bus code for YUV pixel format transferred in 30-bit samples where each component has 10 bits width. [mchehab@redhat.com: fix a merge conflict at v4l2-mediabus.h] Signed-off-by: Sylwester Nawrocki Signed-off-by: Kyungmin Park Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/subdev-formats.xml | 716 ++++++--------------- Documentation/DocBook/media_api.tmpl | 1 + include/uapi/linux/v4l2-mediabus.h | 3 +- 3 files changed, 195 insertions(+), 525 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml index 6f341d1eca1a..cc51372ed5e0 100644 --- a/Documentation/DocBook/media/v4l/subdev-formats.xml +++ b/Documentation/DocBook/media/v4l/subdev-formats.xml @@ -973,27 +973,37 @@ - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Identifier @@ -1005,6 +1015,16 @@ Bit + 29 + 28 + 27 + 26 + 25 + 24 + 23 + 22 + 21 + 10 19 18 17 @@ -1032,16 +1052,8 @@ V4L2_MBUS_FMT_Y8_1X8 0x2001 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1107,16 +1119,8 @@ V4L2_MBUS_FMT_UYVY8_1_5X8 0x2002 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1132,16 +1136,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1157,16 +1153,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1182,16 +1170,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -1207,16 +1187,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1232,16 +1204,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1257,16 +1221,8 @@ V4L2_MBUS_FMT_VYUY8_1_5X8 0x2003 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -1282,16 +1238,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1307,16 +1255,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1332,16 +1272,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1357,16 +1289,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1382,16 +1306,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1407,16 +1323,8 @@ V4L2_MBUS_FMT_YUYV8_1_5X8 0x2004 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1432,16 +1340,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1457,16 +1357,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1482,16 +1374,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1507,16 +1391,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1532,16 +1408,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -1557,16 +1425,8 @@ V4L2_MBUS_FMT_YVYU8_1_5X8 0x2005 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1582,16 +1442,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1607,16 +1459,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -1632,16 +1476,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1657,16 +1493,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1682,16 +1510,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1707,16 +1527,8 @@ V4L2_MBUS_FMT_UYVY8_2X8 0x2006 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1732,16 +1544,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1754,19 +1558,11 @@ y0 - - - - - - - - - - - - - - - - - - - - - - - + + + + &dash-ent-10; + &dash-ent-10; - - v7 @@ -1782,16 +1578,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1807,16 +1595,8 @@ V4L2_MBUS_FMT_VYUY8_2X8 0x2007 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -1832,16 +1612,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1857,16 +1629,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1882,16 +1646,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1907,16 +1663,8 @@ V4L2_MBUS_FMT_YUYV8_2X8 0x2008 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1932,16 +1680,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -1957,16 +1697,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -1982,16 +1714,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -2007,16 +1731,8 @@ V4L2_MBUS_FMT_YVYU8_2X8 0x2009 - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -2032,16 +1748,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - v7 @@ -2057,16 +1765,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - y7 @@ -2082,16 +1782,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; - - u7 @@ -2107,16 +1799,8 @@ V4L2_MBUS_FMT_Y10_1X10 0x200a - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; y9 y8 y7 @@ -2132,16 +1816,8 @@ V4L2_MBUS_FMT_YUYV10_2X10 0x200b - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; y9 y8 y7 @@ -2157,16 +1833,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; u9 u8 u7 @@ -2182,16 +1850,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; y9 y8 y7 @@ -2207,16 +1867,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; v9 v8 v7 @@ -2232,16 +1884,8 @@ V4L2_MBUS_FMT_YVYU10_2X10 0x200c - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; y9 y8 y7 @@ -2257,16 +1901,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; v9 v8 v7 @@ -2282,16 +1918,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; y9 y8 y7 @@ -2307,16 +1935,8 @@ - - - - - - - - - - - - - - - - - - - - + &dash-ent-10; + &dash-ent-10; u9 u8 u7 @@ -2332,6 +1952,7 @@ V4L2_MBUS_FMT_Y12_1X12 0x2013 + &dash-ent-10; - - - @@ -2357,6 +1978,7 @@ V4L2_MBUS_FMT_UYVY8_1X16 0x200f + &dash-ent-10; - - - @@ -2382,6 +2004,7 @@ + &dash-ent-10; - - - @@ -2407,6 +2030,7 @@ V4L2_MBUS_FMT_VYUY8_1X16 0x2010 + &dash-ent-10; - - - @@ -2432,6 +2056,7 @@ + &dash-ent-10; - - - @@ -2457,6 +2082,7 @@ V4L2_MBUS_FMT_YUYV8_1X16 0x2011 + &dash-ent-10; - - - @@ -2482,6 +2108,7 @@ + &dash-ent-10; - - - @@ -2507,6 +2134,7 @@ V4L2_MBUS_FMT_YVYU8_1X16 0x2012 + &dash-ent-10; - - - @@ -2532,6 +2160,7 @@ + &dash-ent-10; - - - @@ -2657,6 +2286,7 @@ V4L2_MBUS_FMT_YUYV10_1X20 0x200d + &dash-ent-10; y9 y8 y7 @@ -2682,6 +2312,7 @@ + &dash-ent-10; y9 y8 y7 @@ -2707,6 +2338,7 @@ V4L2_MBUS_FMT_YVYU10_1X20 0x200e + &dash-ent-10; y9 y8 y7 @@ -2732,6 +2364,32 @@ + &dash-ent-10; + y9 + y8 + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + u9 + u8 + u7 + u6 + u5 + u4 + u3 + u2 + u1 + u0 + + + V4L2_MBUS_FMT_YUV10_1X30 + 0x2014 + y9 y8 y7 @@ -2752,6 +2410,16 @@ u2 u1 u0 + v9 + v8 + v7 + v6 + v5 + v4 + v3 + v2 + v1 + v0 diff --git a/Documentation/DocBook/media_api.tmpl b/Documentation/DocBook/media_api.tmpl index f2413acfe241..1f6593deb995 100644 --- a/Documentation/DocBook/media_api.tmpl +++ b/Documentation/DocBook/media_api.tmpl @@ -22,6 +22,7 @@ http://linuxtv.org/repo/"> +----------"> ]> diff --git a/include/uapi/linux/v4l2-mediabus.h b/include/uapi/linux/v4l2-mediabus.h index e860f55820ec..b9b7bea04537 100644 --- a/include/uapi/linux/v4l2-mediabus.h +++ b/include/uapi/linux/v4l2-mediabus.h @@ -47,7 +47,7 @@ enum v4l2_mbus_pixelcode { V4L2_MBUS_FMT_RGB565_2X8_BE = 0x1007, V4L2_MBUS_FMT_RGB565_2X8_LE = 0x1008, - /* YUV (including grey) - next is 0x2016 */ + /* YUV (including grey) - next is 0x2017 */ V4L2_MBUS_FMT_Y8_1X8 = 0x2001, V4L2_MBUS_FMT_UV8_1X8 = 0x2015, V4L2_MBUS_FMT_UYVY8_1_5X8 = 0x2002, @@ -69,6 +69,7 @@ enum v4l2_mbus_pixelcode { V4L2_MBUS_FMT_YDYUYDYV8_1X16 = 0x2014, V4L2_MBUS_FMT_YUYV10_1X20 = 0x200d, V4L2_MBUS_FMT_YVYU10_1X20 = 0x200e, + V4L2_MBUS_FMT_YUV10_1X30 = 0x2016, /* Bayer - next is 0x3019 */ V4L2_MBUS_FMT_SBGGR8_1X8 = 0x3001, -- cgit v1.2.3 From 5023e5cf58e1dae904e2e8b5b9779c33512b75a1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:36 +0000 Subject: dm ioctl: remove PF_MEMALLOC When allocating memory for the userspace ioctl data, set some appropriate GPF flags directly instead of using PF_MEMALLOC. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 13 ++++--------- include/uapi/linux/dm-ioctl.h | 4 ++-- 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index a651d528f80d..a37aeba7dc1b 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1556,7 +1556,10 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) secure_data = tmp.flags & DM_SECURE_DATA_FLAG; - dmi = vmalloc(tmp.data_size); + /* + * Try to avoid low memory issues when a device is suspended. + */ + dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); if (!dmi) { if (secure_data && clear_user(user, tmp.data_size)) return -EFAULT; @@ -1656,19 +1659,11 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) return -ENOTTY; } - /* - * Trying to avoid low memory issues when a device is - * suspended. - */ - current->flags |= PF_MEMALLOC; - /* * Copy the parameters into kernel space. */ r = copy_params(user, ¶m); - current->flags &= ~PF_MEMALLOC; - if (r) return r; diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 91e3a360f611..539b179b349c 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -268,8 +268,8 @@ enum { #define DM_VERSION_MAJOR 4 #define DM_VERSION_MINOR 23 -#define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2012-07-25)" +#define DM_VERSION_PATCHLEVEL 1 +#define DM_VERSION_EXTRA "-ioctl (2012-12-18)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 130f1b8f35f14d27c43da755f3c9226318c17f57 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 26 Dec 2012 10:39:23 -0700 Subject: PCI: Add PCIe Link Capability link speed and width names Add standard #defines for the Supported Link Speeds field in the PCIe Link Capabilities register. Note that prior to PCIe spec r3.0, these encodings were defined: 0001b 2.5GT/s Link speed supported 0010b 5.0GT/s and 2.5GT/s Link speed supported Starting with spec r3.0, these encodings refer to bits 0 and 1 in the Supported Link Speeds Vector in the Link Capabilities 2 register, and bits 0 and 1 there mean 2.5 GT/s and 5.0 GT/s, respectively. Therefore, code that followed r2.0 and interpreted 0x1 as 2.5GT/s and 0x2 as 5.0GT/s will continue to work, and we can identify a device using the new encodings because it will have a non-zero Link Capabilities 2 register. Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 6b7b6f1e2fd6..ebfadc56d1b4 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -458,6 +458,8 @@ #define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ #define PCI_EXP_LNKCAP 12 /* Link Capabilities */ #define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ +#define PCI_EXP_LNKCAP_SLS_2_5GB 0x1 /* LNKCAP2 SLS Vector bit 0 (2.5GT/s) */ +#define PCI_EXP_LNKCAP_SLS_5_0GB 0x2 /* LNKCAP2 SLS Vector bit 1 (5.0GT/s) */ #define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ #define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ #define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ -- cgit v1.2.3 From 9a57247f31e361f80508c40363366222dbbb6aa5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 27 Dec 2012 23:49:39 +0000 Subject: rtnl: expose carrier value with possibility to set it Signed-off-by: Jiri Pirko Acked-by: Flavio Leitner Signed-off-by: David S. Miller --- Documentation/networking/operstates.txt | 4 ++++ include/uapi/linux/if_link.h | 1 + net/core/rtnetlink.c | 10 ++++++++++ 3 files changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/operstates.txt b/Documentation/networking/operstates.txt index 1a77a3cfae54..97694572338b 100644 --- a/Documentation/networking/operstates.txt +++ b/Documentation/networking/operstates.txt @@ -88,6 +88,10 @@ set this flag. On netif_carrier_off(), the scheduler stops sending packets. The name 'carrier' and the inversion are historical, think of it as lower layer. +Note that for certain kind of soft-devices, which are not managing any +real hardware, there is possible to set this bit from userpsace. +One should use TVL IFLA_CARRIER to do so. + netif_carrier_ok() can be used to query that bit. __LINK_STATE_DORMANT, maps to IFF_DORMANT: diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 60f3b6b90602..c4edfe11f1f7 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -142,6 +142,7 @@ enum { #define IFLA_PROMISCUITY IFLA_PROMISCUITY IFLA_NUM_TX_QUEUES, IFLA_NUM_RX_QUEUES, + IFLA_CARRIER, __IFLA_MAX }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1868625af25e..2ef7a56ba117 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -780,6 +780,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_MTU */ + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(1) /* IFLA_CARRIER */ + nla_total_size(4) /* IFLA_PROMISCUITY */ + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ @@ -909,6 +910,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u32(skb, IFLA_LINK, dev->iflink)) || (dev->master && nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || + nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || (dev->qdisc && nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || (dev->ifalias && @@ -1108,6 +1110,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_LINK] = { .type = NLA_U32 }, [IFLA_MASTER] = { .type = NLA_U32 }, + [IFLA_CARRIER] = { .type = NLA_U8 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, [IFLA_WEIGHT] = { .type = NLA_U32 }, [IFLA_OPERSTATE] = { .type = NLA_U8 }, @@ -1438,6 +1441,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, modified = 1; } + if (tb[IFLA_CARRIER]) { + err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); + if (err) + goto errout; + modified = 1; + } + if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); -- cgit v1.2.3 From fee5dfecb0c74c9eab475a2a20d7a5ababe2f8e6 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 17 Dec 2012 13:20:43 +0100 Subject: HID: uhid: use __packed__ for uhid_feature_answer_req We use __packed__ for all API structures so we can extend them without breaking alignment rules. We do try to explicitly align the structures, but to be safe we also use __packed__. uhid_feature_answer_req is already 64bit aligned so we can add __packed__ without breaking ABI. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- include/uapi/linux/uhid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/uhid.h b/include/uapi/linux/uhid.h index 9c6974f16966..e9ed951e2b09 100644 --- a/include/uapi/linux/uhid.h +++ b/include/uapi/linux/uhid.h @@ -86,7 +86,7 @@ struct uhid_feature_answer_req { __u16 err; __u16 size; __u8 data[UHID_DATA_MAX]; -}; +} __attribute__((__packed__)); struct uhid_event { __u32 type; -- cgit v1.2.3 From d582cffbcd04eae0bd8a83b05648bfd54bfd21c9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Oct 2012 17:53:44 +0200 Subject: nl80211/mac80211: support full station state in AP mode Today, stations are added already associated. That is inefficient if, for example, the driver has no room for stations any more because then the station will go through the entire auth/assoc handshake, only to be kicked out afterwards. To address this a bit better, at least with drivers using the new station state callback, allow hostapd to add stations in unauthenticated mode, just after receiving the AUTH frame, before even replying. Thus if there's no more space at that point, it can send a negative auth frame back. It still needs to handle later state transition errors though, of course. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 16 ++++++ net/mac80211/cfg.c | 115 ++++++++++++++++++++++++++----------------- net/mac80211/main.c | 3 +- net/wireless/nl80211.c | 24 +++++++++ 4 files changed, 113 insertions(+), 45 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e3e19f8b16f2..547017100a30 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1697,6 +1697,9 @@ enum nl80211_iftype { * flag can't be changed, it is only valid while adding a station, and * attempts to change it will silently be ignored (rather than rejected * as errors.) + * @NL80211_STA_FLAG_ASSOCIATED: station is associated; used with drivers + * that support %NL80211_FEATURE_FULL_AP_CLIENT_STATE to transition a + * previously added station into associated state * @NL80211_STA_FLAG_MAX: highest station flag number currently defined * @__NL80211_STA_FLAG_AFTER_LAST: internal use */ @@ -1708,6 +1711,7 @@ enum nl80211_sta_flags { NL80211_STA_FLAG_MFP, NL80211_STA_FLAG_AUTHENTICATED, NL80211_STA_FLAG_TDLS_PEER, + NL80211_STA_FLAG_ASSOCIATED, /* keep last */ __NL80211_STA_FLAG_AFTER_LAST, @@ -3140,6 +3144,17 @@ enum nl80211_ap_sme_features { * setting * @NL80211_FEATURE_P2P_GO_OPPPS: P2P GO implementation supports opportunistic * powersave + * @NL80211_FEATURE_FULL_AP_CLIENT_STATE: The driver supports full state + * transitions for AP clients. Without this flag (and if the driver + * doesn't have the AP SME in the device) the driver supports adding + * stations only when they're associated and adds them in associated + * state (to later be transitioned into authorized), with this flag + * they should be added before even sending the authentication reply + * and then transitioned into authenticated, associated and authorized + * states using station flags. + * Note that even for drivers that support this, the default is to add + * stations in authenticated/associated state, so to add unauthenticated + * stations the authenticated/associated bits have to be set in the mask. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3155,6 +3170,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10, NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11, NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12, + NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 13, }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3e7d557fd481..f4d12c71928d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -510,6 +510,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_TDLS_PEER); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED); @@ -521,6 +522,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP); if (test_sta_flag(sta, WLAN_STA_AUTH)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED); + if (test_sta_flag(sta, WLAN_STA_ASSOC)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); } @@ -1077,6 +1080,58 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) netif_rx_ni(skb); } +static int sta_apply_auth_flags(struct ieee80211_local *local, + struct sta_info *sta, + u32 mask, u32 set) +{ + int ret; + + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + !test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && + set & BIT(NL80211_STA_FLAG_ASSOCIATED) && + !test_sta_flag(sta, WLAN_STA_ASSOC)) { + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { + if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) + ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); + else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); + else + ret = 0; + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_ASSOCIATED) && + !(set & BIT(NL80211_STA_FLAG_ASSOCIATED)) && + test_sta_flag(sta, WLAN_STA_ASSOC)) { + ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); + if (ret) + return ret; + } + + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED) && + !(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && + test_sta_flag(sta, WLAN_STA_AUTH)) { + ret = sta_info_move_state(sta, IEEE80211_STA_NONE); + if (ret) + return ret; + } + + return 0; +} + static int sta_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) @@ -1094,52 +1149,20 @@ static int sta_apply_parameters(struct ieee80211_local *local, mask = params->sta_flags_mask; set = params->sta_flags_set; - /* - * In mesh mode, we can clear AUTHENTICATED flag but must - * also make ASSOCIATED follow appropriately for the driver - * API. See also below, after AUTHORIZED changes. - */ - if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - /* cfg80211 should not allow this in non-mesh modes */ - if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) - return -EINVAL; - - if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED) && - !test_sta_flag(sta, WLAN_STA_AUTH)) { - ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (ret) - return ret; - ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); - if (ret) - return ret; - } - } - - if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { - if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) - ret = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); - else if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) - ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); - if (ret) - return ret; - } - - if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - /* cfg80211 should not allow this in non-mesh modes */ - if (WARN_ON(!ieee80211_vif_is_mesh(&sdata->vif))) - return -EINVAL; - - if (!(set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) && - test_sta_flag(sta, WLAN_STA_AUTH)) { - ret = sta_info_move_state(sta, IEEE80211_STA_AUTH); - if (ret) - return ret; - ret = sta_info_move_state(sta, IEEE80211_STA_NONE); - if (ret) - return ret; - } + if (ieee80211_vif_is_mesh(&sdata->vif)) { + /* + * In mesh mode, ASSOCIATED isn't part of the nl80211 + * API but must follow AUTHENTICATED for driver state. + */ + if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) + mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); + if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) + set |= BIT(NL80211_STA_FLAG_ASSOCIATED); } + ret = sta_apply_auth_flags(local, sta, mask, set); + if (ret) + return ret; if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) @@ -1273,6 +1296,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (!sta) return -ENOMEM; + /* + * defaults -- if userspace wants something else we'll + * change it accordingly in sta_apply_parameters() + */ sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e6514f240fce..39cfe8f10ad2 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -541,7 +541,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, wiphy->features |= NL80211_FEATURE_SK_TX_STATUS | NL80211_FEATURE_SAE | NL80211_FEATURE_HT_IBSS | - NL80211_FEATURE_VIF_TXPOWER; + NL80211_FEATURE_VIF_TXPOWER | + NL80211_FEATURE_FULL_AP_CLIENT_STATE; if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b3cf7cc0d4a1..087f68ba6d7a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3231,11 +3231,21 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* accept only the listed bits */ if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP))) return -EINVAL; + /* but authenticated/associated only if driver handles it */ + if (!(rdev->wiphy.features & + NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) @@ -3393,17 +3403,31 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* but don't bother the driver with it */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + /* allow authenticated/associated only if driver handles it */ + if (!(rdev->wiphy.features & + NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) return PTR_ERR(params.vlan); break; case NL80211_IFTYPE_MESH_POINT: + /* associated is disallowed */ + if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + return -EINVAL; /* TDLS peers cannot be added */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) return -EINVAL; break; case NL80211_IFTYPE_STATION: + /* associated is disallowed */ + if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + return -EINVAL; /* Only TDLS peers can be added */ if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) return -EINVAL; -- cgit v1.2.3 From 4a674f34ba04a002244edaf891b5da7fc1473ae8 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:55 -0800 Subject: ipc: introduce message queue copy feature This patch is required for checkpoint/restore in userspace. c/r requires some way to get all pending IPC messages without deleting them from the queue (checkpoint can fail and in this case tasks will be resumed, so queue have to be valid). To achive this, new operation flag MSG_COPY for sys_msgrcv() system call was introduced. If this flag was specified, then mtype is interpreted as number of the message to copy. If MSG_COPY is set, then kernel will allocate dummy message with passed size, and then use new copy_msg() helper function to copy desired message (instead of unlinking it from the queue). Notes: 1) Return -ENOSYS if MSG_COPY is specified, but CONFIG_CHECKPOINT_RESTORE is not set. Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/msg.h | 1 + ipc/msg.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++-- ipc/msgutil.c | 38 ++++++++++++++++++++++++++++ ipc/util.h | 1 + 4 files changed, 102 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/msg.h b/include/uapi/linux/msg.h index 78dbd2f996a3..22d95c6854e0 100644 --- a/include/uapi/linux/msg.h +++ b/include/uapi/linux/msg.h @@ -10,6 +10,7 @@ /* msgrcv options */ #define MSG_NOERROR 010000 /* no error if message is too big */ #define MSG_EXCEPT 020000 /* recv any msg except of specified type.*/ +#define MSG_COPY 040000 /* copy (not remove) all queue messages */ /* Obsolete, used only for backwards compatibility and libc5 compiles */ struct msqid_ds { diff --git a/ipc/msg.c b/ipc/msg.c index cefc24f46e3e..d20ffc7d3f24 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -769,6 +769,45 @@ static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) return msgsz; } +#ifdef CONFIG_CHECKPOINT_RESTORE +static inline struct msg_msg *fill_copy(unsigned long copy_nr, + unsigned long msg_nr, + struct msg_msg *msg, + struct msg_msg *copy) +{ + if (copy_nr == msg_nr) + return copy_msg(msg, copy); + return NULL; +} + +static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, + int msgflg, long *msgtyp, + unsigned long *copy_number) +{ + struct msg_msg *copy; + + *copy_number = *msgtyp; + *msgtyp = 0; + /* + * Create dummy message to copy real message to. + */ + copy = load_msg(buf, bufsz); + if (!IS_ERR(copy)) + copy->m_ts = bufsz; + return copy; +} + +static inline void free_copy(int msgflg, struct msg_msg *copy) +{ + if (msgflg & MSG_COPY) + free_msg(copy); +} +#else +#define free_copy(msgflg, copy) do {} while (0) +#define prepare_copy(buf, sz, msgflg, msgtyp, copy_nr) ERR_PTR(-ENOSYS) +#define fill_copy(copy_nr, msg_nr, msg, copy) NULL +#endif + long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, long (*msg_handler)(void __user *, struct msg_msg *, size_t)) @@ -777,19 +816,29 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, struct msg_msg *msg; int mode; struct ipc_namespace *ns; + struct msg_msg *copy; + unsigned long __maybe_unused copy_number; if (msqid < 0 || (long) bufsz < 0) return -EINVAL; + if (msgflg & MSG_COPY) { + copy = prepare_copy(buf, bufsz, msgflg, &msgtyp, ©_number); + if (IS_ERR(copy)) + return PTR_ERR(copy); + } mode = convert_mode(&msgtyp, msgflg); ns = current->nsproxy->ipc_ns; msq = msg_lock_check(ns, msqid); - if (IS_ERR(msq)) + if (IS_ERR(msq)) { + free_copy(msgflg, copy); return PTR_ERR(msq); + } for (;;) { struct msg_receiver msr_d; struct list_head *tmp; + long msg_counter = 0; msg = ERR_PTR(-EACCES); if (ipcperms(ns, &msq->q_perm, S_IRUGO)) @@ -809,8 +858,15 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, if (mode == SEARCH_LESSEQUAL && walk_msg->m_type != 1) { msgtyp = walk_msg->m_type - 1; + } else if (msgflg & MSG_COPY) { + msg = fill_copy(copy_number, + msg_counter, + walk_msg, copy); + if (msg) + break; } else break; + msg_counter++; } tmp = tmp->next; } @@ -823,6 +879,8 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, msg = ERR_PTR(-E2BIG); goto out_unlock; } + if (msgflg & MSG_COPY) + goto out_unlock; list_del(&msg->m_list); msq->q_qnum--; msq->q_rtime = get_seconds(); @@ -906,8 +964,10 @@ out_unlock: break; } } - if (IS_ERR(msg)) + if (IS_ERR(msg)) { + free_copy(msgflg, copy); return PTR_ERR(msg); + } bufsz = msg_handler(buf, msg, bufsz); free_msg(msg); diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 6471f1bdae96..7eecdad40efc 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -102,7 +102,45 @@ out_err: free_msg(msg); return ERR_PTR(err); } +#ifdef CONFIG_CHECKPOINT_RESTORE +struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) +{ + struct msg_msgseg *dst_pseg, *src_pseg; + int len = src->m_ts; + int alen; + + BUG_ON(dst == NULL); + if (src->m_ts > dst->m_ts) + return ERR_PTR(-EINVAL); + + alen = len; + if (alen > DATALEN_MSG) + alen = DATALEN_MSG; + + dst->next = NULL; + dst->security = NULL; + memcpy(dst + 1, src + 1, alen); + + len -= alen; + dst_pseg = dst->next; + src_pseg = src->next; + while (len > 0) { + alen = len; + if (alen > DATALEN_SEG) + alen = DATALEN_SEG; + memcpy(dst_pseg + 1, src_pseg + 1, alen); + dst_pseg = dst_pseg->next; + len -= alen; + src_pseg = src_pseg->next; + } + + dst->m_type = src->m_type; + dst->m_ts = src->m_ts; + + return dst; +} +#endif int store_msg(void __user *dest, struct msg_msg *msg, int len) { int alen; diff --git a/ipc/util.h b/ipc/util.h index a61e0ca2bffd..eeb79a1fbd83 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -140,6 +140,7 @@ int ipc_parse_version (int *cmd); extern void free_msg(struct msg_msg *msg); extern struct msg_msg *load_msg(const void __user *src, int len); +extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst); extern int store_msg(void __user *dest, struct msg_msg *msg, int len); extern void recompute_msgmni(struct ipc_namespace *); -- cgit v1.2.3 From 24b9f50170f55a3179c6f6d51022eb7d50502d05 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 3 Jan 2013 12:30:30 -0300 Subject: [media] V4L: Remove deprecated image centering controls It has been over 3 years since the V4L2_CID_[HV]CENTER were deprecated. Clean up the DocBook and remove the V4L2_CID_VCENTER_DEPRECATED, V4L2_CID_VCENTER_DEPRECATED control related paragraphs. Remove the V4L2_CID_[HV]CENTER controls definitions from v4l2-controls.h, these controls are not used by any driver in the mainline now. Signed-off-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/controls.xml | 23 ----------------------- drivers/media/v4l2-core/v4l2-ctrls.c | 2 -- include/uapi/linux/v4l2-controls.h | 4 ---- 3 files changed, 29 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml index 7fe5be1d3bbb..9e8f85498678 100644 --- a/Documentation/DocBook/media/v4l/controls.xml +++ b/Documentation/DocBook/media/v4l/controls.xml @@ -203,29 +203,6 @@ and should not be used in new drivers and applications. boolean Mirror the picture vertically.
- - V4L2_CID_HCENTER_DEPRECATED (formerly V4L2_CID_HCENTER) - integer - Horizontal image centering. This control is -deprecated. New drivers and applications should use the Camera class controls -V4L2_CID_PAN_ABSOLUTE, -V4L2_CID_PAN_RELATIVE and -V4L2_CID_PAN_RESET instead. - - - V4L2_CID_VCENTER_DEPRECATED - (formerly V4L2_CID_VCENTER) - integer - Vertical image centering. Centering is intended to -physically adjust cameras. For image cropping see -, for clipping . This -control is deprecated. New drivers and applications should use the -Camera class controls -V4L2_CID_TILT_ABSOLUTE, -V4L2_CID_TILT_RELATIVE and -V4L2_CID_TILT_RESET instead. - V4L2_CID_POWER_LINE_FREQUENCY enum diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index fa02363e7db4..7b486ac3f4d9 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -577,8 +577,6 @@ const char *v4l2_ctrl_get_name(u32 id) case V4L2_CID_GAIN: return "Gain"; case V4L2_CID_HFLIP: return "Horizontal Flip"; case V4L2_CID_VFLIP: return "Vertical Flip"; - case V4L2_CID_HCENTER: return "Horizontal Center"; - case V4L2_CID_VCENTER: return "Vertical Center"; case V4L2_CID_POWER_LINE_FREQUENCY: return "Power Line Frequency"; case V4L2_CID_HUE_AUTO: return "Hue, Automatic"; case V4L2_CID_WHITE_BALANCE_TEMPERATURE: return "White Balance Temperature"; diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index f56c945cecd4..4dc0822700fe 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -88,10 +88,6 @@ #define V4L2_CID_HFLIP (V4L2_CID_BASE+20) #define V4L2_CID_VFLIP (V4L2_CID_BASE+21) -/* Deprecated; use V4L2_CID_PAN_RESET and V4L2_CID_TILT_RESET */ -#define V4L2_CID_HCENTER (V4L2_CID_BASE+22) -#define V4L2_CID_VCENTER (V4L2_CID_BASE+23) - #define V4L2_CID_POWER_LINE_FREQUENCY (V4L2_CID_BASE+24) enum v4l2_power_line_frequency { V4L2_CID_POWER_LINE_FREQUENCY_DISABLED = 0, -- cgit v1.2.3 From bb65a9cb953fdfe9c507e8dbb6c4ec2540484bd3 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 28 Dec 2012 16:06:28 +0800 Subject: xfrm: removes a superfluous check and add a statistic Remove the check if x->km.state equal to XFRM_STATE_VALID in xfrm_state_check_expire(), which will be done before call xfrm_state_check_expire(). add a LINUX_MIB_XFRMOUTSTATEINVALID statistic to record the outbound error due to invalid xfrm state. Signed-off-by: Li RongQing Signed-off-by: Steffen Klassert --- include/uapi/linux/snmp.h | 1 + net/xfrm/xfrm_output.c | 6 ++++++ net/xfrm/xfrm_proc.c | 1 + net/xfrm/xfrm_state.c | 3 --- 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index fdfba235f9f1..b49eab89c9fd 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -278,6 +278,7 @@ enum LINUX_MIB_XFRMOUTPOLDEAD, /* XfrmOutPolDead */ LINUX_MIB_XFRMOUTPOLERROR, /* XfrmOutPolError */ LINUX_MIB_XFRMFWDHDRERROR, /* XfrmFwdHdrError*/ + LINUX_MIB_XFRMOUTSTATEINVALID, /* XfrmOutStateInvalid */ __LINUX_MIB_XFRMMAX }; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 95a338c89f99..3670526e70b9 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -61,6 +61,12 @@ static int xfrm_output_one(struct sk_buff *skb, int err) } spin_lock_bh(&x->lock); + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); + goto error_nolock; + } + err = xfrm_state_check_expire(x); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index d0a1af8ed584..603903853e89 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -43,6 +43,7 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD), SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), + SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), SNMP_MIB_SENTINEL }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3459692092ec..05db2362a231 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1370,9 +1370,6 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (!x->curlft.use_time) x->curlft.use_time = get_seconds(); - if (x->km.state != XFRM_STATE_VALID) - return -EINVAL; - if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; -- cgit v1.2.3 From d8346b7d9bab37e6cc712ff1622c65ff98bdfef8 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 20 Dec 2012 15:32:08 +0100 Subject: KVM: s390: Support for I/O interrupts. Add support for handling I/O interrupts (standard, subchannel-related ones and rudimentary adapter interrupts). The subchannel-identifying parameters are encoded into the interrupt type. I/O interrupts are floating, so they can't be injected on a specific vcpu. Reviewed-by: Alexander Graf Reviewed-by: Marcelo Tosatti Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- Documentation/virtual/kvm/api.txt | 4 ++ arch/s390/include/asm/kvm_host.h | 2 + arch/s390/kvm/interrupt.c | 103 +++++++++++++++++++++++++++++++++++++- include/uapi/linux/kvm.h | 9 ++++ 4 files changed, 116 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a4df5535996b..83bd92b52936 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2069,6 +2069,10 @@ KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm KVM_S390_INT_EMERGENCY (vcpu) - sigp emergency; source cpu in parm KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm +KVM_S390_INT_IO(ai,cssid,ssid,schid) (vm) - compound value to indicate an + I/O interrupt (ai - adapter interrupt; cssid,ssid,schid - subchannel); + I/O interruption parameters in parm (subchannel) and parm64 (intparm, + interruption subclass) Note that the vcpu ioctl is asynchronous to vcpu execution. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 711c5ab391cf..a8e35c43df78 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -74,6 +74,7 @@ struct kvm_s390_sie_block { __u64 epoch; /* 0x0038 */ __u8 reserved40[4]; /* 0x0040 */ #define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 __u16 lctl; /* 0x0044 */ __s16 icpua; /* 0x0046 */ __u32 ictl; /* 0x0048 */ @@ -125,6 +126,7 @@ struct kvm_vcpu_stat { u32 deliver_prefix_signal; u32 deliver_restart_signal; u32 deliver_program_int; + u32 deliver_io_int; u32 exit_wait_state; u32 instruction_stidp; u32 instruction_spx; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c30615e605ac..52cdf20906ab 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -21,11 +21,26 @@ #include "gaccess.h" #include "trace-s390.h" +#define IOINT_SCHID_MASK 0x0000ffff +#define IOINT_SSID_MASK 0x00030000 +#define IOINT_CSSID_MASK 0x03fc0000 +#define IOINT_AI_MASK 0x04000000 + +static int is_ioint(u64 type) +{ + return ((type & 0xfffe0000u) != 0xfffe0000u); +} + static int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); } +static int psw_ioint_disabled(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO); +} + static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) { if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || @@ -67,7 +82,15 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, case KVM_S390_SIGP_SET_PREFIX: case KVM_S390_RESTART: return 1; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + if (psw_ioint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[6] & inti->io.io_int_word) + return 1; + return 0; default: + printk(KERN_WARNING "illegal interrupt type %llx\n", + inti->type); BUG(); } return 0; @@ -116,6 +139,12 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, case KVM_S390_SIGP_STOP: __set_cpuflag(vcpu, CPUSTAT_STOP_INT); break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + if (psw_ioint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_IO_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR6; + break; default: BUG(); } @@ -297,6 +326,47 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, exception = 1; break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + { + __u32 param0 = ((__u32)inti->io.subchannel_id << 16) | + inti->io.subchannel_nr; + __u64 param1 = ((__u64)inti->io.io_int_parm << 32) | + inti->io.io_int_word; + VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); + vcpu->stat.deliver_io_int++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + param0, param1); + rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID, + inti->io.subchannel_id); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_NR, + inti->io.subchannel_nr); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u32(vcpu, __LC_IO_INT_PARM, + inti->io.io_int_parm); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u32(vcpu, __LC_IO_INT_WORD, + inti->io.io_int_word); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_IO_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + } default: BUG(); } @@ -545,7 +615,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, { struct kvm_s390_local_interrupt *li; struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *inti; + struct kvm_s390_interrupt_info *inti, *iter; int sigcpu; inti = kzalloc(sizeof(*inti), GFP_KERNEL); @@ -569,6 +639,22 @@ int kvm_s390_inject_vm(struct kvm *kvm, case KVM_S390_SIGP_STOP: case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: + kfree(inti); + return -EINVAL; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + if (s390int->type & IOINT_AI_MASK) + VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); + else + VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", + s390int->type & IOINT_CSSID_MASK, + s390int->type & IOINT_SSID_MASK, + s390int->type & IOINT_SCHID_MASK); + inti->type = s390int->type; + inti->io.subchannel_id = s390int->parm >> 16; + inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; + inti->io.io_int_parm = s390int->parm64 >> 32; + inti->io.io_int_word = s390int->parm64 & 0x00000000ffffffffull; + break; default: kfree(inti); return -EINVAL; @@ -579,7 +665,19 @@ int kvm_s390_inject_vm(struct kvm *kvm, mutex_lock(&kvm->lock); fi = &kvm->arch.float_int; spin_lock(&fi->lock); - list_add_tail(&inti->list, &fi->list); + if (!is_ioint(inti->type)) + list_add_tail(&inti->list, &fi->list); + else { + /* Keep I/O interrupts sorted in isc order. */ + list_for_each_entry(iter, &fi->list, list) { + if (!is_ioint(iter->type)) + continue; + if (iter->io.io_int_word <= inti->io.io_int_word) + continue; + break; + } + list_add_tail(&inti->list, &iter->list); + } atomic_set(&fi->active, 1); sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); if (sigcpu == KVM_MAX_VCPUS) { @@ -653,6 +751,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, break; case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: default: kfree(inti); return -EINVAL; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e6e5d4b13708..54540bdd3340 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -401,6 +401,15 @@ struct kvm_s390_psw { #define KVM_S390_INT_SERVICE 0xffff2401u #define KVM_S390_INT_EMERGENCY 0xffff1201u #define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u +/* Anything below 0xfffe0000u is taken by INT_IO */ +#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ + (((schid)) | \ + ((ssid) << 16) | \ + ((cssid) << 18) | \ + ((ai) << 26)) +#define KVM_S390_INT_IO_MIN 0x00000000u +#define KVM_S390_INT_IO_MAX 0xfffdffffu + struct kvm_s390_interrupt { __u32 type; -- cgit v1.2.3 From 48a3e950f4cee6a345ffbe9baf599f1e9a54c479 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 20 Dec 2012 15:32:09 +0100 Subject: KVM: s390: Add support for machine checks. Add support for injecting machine checks (only repressible conditions for now). This is a bit more involved than I/O interrupts, for these reasons: - Machine checks come in both floating and cpu varieties. - We don't have a bit for machine checks enabling, but have to use a roundabout approach with trapping PSW changing instructions and watching for opened machine checks. Reviewed-by: Alexander Graf Reviewed-by: Marcelo Tosatti Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- Documentation/virtual/kvm/api.txt | 4 ++ arch/s390/include/asm/kvm_host.h | 8 +++ arch/s390/kvm/intercept.c | 2 + arch/s390/kvm/interrupt.c | 112 +++++++++++++++++++++++++++++++ arch/s390/kvm/kvm-s390.h | 3 + arch/s390/kvm/priv.c | 135 ++++++++++++++++++++++++++++++++++++++ arch/s390/kvm/trace-s390.h | 6 +- include/uapi/linux/kvm.h | 1 + 8 files changed, 268 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 83bd92b52936..8a0de309932f 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2073,6 +2073,10 @@ KVM_S390_INT_IO(ai,cssid,ssid,schid) (vm) - compound value to indicate an I/O interrupt (ai - adapter interrupt; cssid,ssid,schid - subchannel); I/O interruption parameters in parm (subchannel) and parm64 (intparm, interruption subclass) +KVM_S390_MCHK (vm, vcpu) - machine check interrupt; cr 14 bits in parm, + machine check interrupt code in parm64 (note that + machine checks needing further payload are not + supported by this ioctl) Note that the vcpu ioctl is asynchronous to vcpu execution. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a8e35c43df78..29363d155cd5 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -75,8 +75,10 @@ struct kvm_s390_sie_block { __u8 reserved40[4]; /* 0x0040 */ #define LCTL_CR0 0x8000 #define LCTL_CR6 0x0200 +#define LCTL_CR14 0x0002 __u16 lctl; /* 0x0044 */ __s16 icpua; /* 0x0046 */ +#define ICTL_LPSW 0x00400000 __u32 ictl; /* 0x0048 */ __u32 eca; /* 0x004c */ __u8 icptcode; /* 0x0050 */ @@ -187,6 +189,11 @@ struct kvm_s390_emerg_info { __u16 code; }; +struct kvm_s390_mchk_info { + __u64 cr14; + __u64 mcic; +}; + struct kvm_s390_interrupt_info { struct list_head list; u64 type; @@ -197,6 +204,7 @@ struct kvm_s390_interrupt_info { struct kvm_s390_emerg_info emerg; struct kvm_s390_extcall_info extcall; struct kvm_s390_prefix_info prefix; + struct kvm_s390_mchk_info mchk; }; }; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index df6c0ad085aa..950c13ecaf60 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -97,10 +97,12 @@ static int handle_lctl(struct kvm_vcpu *vcpu) static const intercept_handler_t instruction_handlers[256] = { [0x01] = kvm_s390_handle_01, + [0x82] = kvm_s390_handle_lpsw, [0x83] = kvm_s390_handle_diag, [0xae] = kvm_s390_handle_sigp, [0xb2] = kvm_s390_handle_b2, [0xb7] = handle_lctl, + [0xb9] = kvm_s390_handle_b9, [0xe5] = kvm_s390_handle_e5, [0xeb] = handle_lctlg, }; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 52cdf20906ab..b3b4748485ee 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -41,6 +41,11 @@ static int psw_ioint_disabled(struct kvm_vcpu *vcpu) return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO); } +static int psw_mchk_disabled(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK); +} + static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) { if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || @@ -82,6 +87,12 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, case KVM_S390_SIGP_SET_PREFIX: case KVM_S390_RESTART: return 1; + case KVM_S390_MCHK: + if (psw_mchk_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14) + return 1; + return 0; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: if (psw_ioint_disabled(vcpu)) return 0; @@ -116,6 +127,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); vcpu->arch.sie_block->lctl = 0x0000; + vcpu->arch.sie_block->ictl &= ~ICTL_LPSW; } static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) @@ -139,6 +151,12 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, case KVM_S390_SIGP_STOP: __set_cpuflag(vcpu, CPUSTAT_STOP_INT); break; + case KVM_S390_MCHK: + if (psw_mchk_disabled(vcpu)) + vcpu->arch.sie_block->ictl |= ICTL_LPSW; + else + vcpu->arch.sie_block->lctl |= LCTL_CR14; + break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: if (psw_ioint_disabled(vcpu)) __set_cpuflag(vcpu, CPUSTAT_IO_INT); @@ -326,6 +344,32 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, exception = 1; break; + case KVM_S390_MCHK: + VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", + inti->mchk.mcic); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, + inti->mchk.cr14, + inti->mchk.mcic); + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_PREFIXED); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_MCK_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: { __u32 param0 = ((__u32)inti->io.subchannel_id << 16) | @@ -588,6 +632,61 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) } } +void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; + struct kvm_s390_interrupt_info *n, *inti = NULL; + int deliver; + + __reset_intercept_indicators(vcpu); + if (atomic_read(&li->active)) { + do { + deliver = 0; + spin_lock_bh(&li->lock); + list_for_each_entry_safe(inti, n, &li->list, list) { + if ((inti->type == KVM_S390_MCHK) && + __interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&li->list)) + atomic_set(&li->active, 0); + spin_unlock_bh(&li->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } + + if (atomic_read(&fi->active)) { + do { + deliver = 0; + spin_lock(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + if ((inti->type == KVM_S390_MCHK) && + __interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } +} + int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -641,6 +740,13 @@ int kvm_s390_inject_vm(struct kvm *kvm, case KVM_S390_INT_EMERGENCY: kfree(inti); return -EINVAL; + case KVM_S390_MCHK: + VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", + s390int->parm64); + inti->type = s390int->type; + inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ + inti->mchk.mcic = s390int->parm64; + break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: if (s390int->type & IOINT_AI_MASK) VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); @@ -749,6 +855,12 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, inti->type = s390int->type; inti->emerg.code = s390int->parm; break; + case KVM_S390_MCHK: + VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", + s390int->parm64); + inti->type = s390int->type; + inti->mchk.mcic = s390int->parm64; + break; case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index dccc0242b7ca..1f7cc6ccf102 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -106,6 +106,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); +void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int); int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, @@ -117,6 +118,8 @@ int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); int kvm_s390_handle_01(struct kvm_vcpu *vcpu); +int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); +int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d715842f56ca..d3cbcd3c9ada 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include "gaccess.h" #include "kvm-s390.h" #include "trace.h" @@ -166,6 +168,99 @@ static int handle_stfl(struct kvm_vcpu *vcpu) return 0; } +static void handle_new_psw(struct kvm_vcpu *vcpu) +{ + /* Check whether the new psw is enabled for machine checks. */ + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK) + kvm_s390_deliver_pending_machine_checks(vcpu); +} + +#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) +#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL +#define PSW_ADDR_24 0x00000000000fffffUL +#define PSW_ADDR_31 0x000000007fffffffUL + +int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) +{ + u64 addr; + psw_compat_t new_psw; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, + PGM_PRIVILEGED_OPERATION); + + addr = kvm_s390_get_base_disp_s(vcpu); + + if (addr & 7) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + if (!(new_psw.mask & PSW32_MASK_BASE)) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + vcpu->arch.sie_block->gpsw.mask = + (new_psw.mask & ~PSW32_MASK_BASE) << 32; + vcpu->arch.sie_block->gpsw.addr = new_psw.addr; + + if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || + (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && + (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || + ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == + PSW_MASK_EA)) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + handle_new_psw(vcpu); +out: + return 0; +} + +static int handle_lpswe(struct kvm_vcpu *vcpu) +{ + u64 addr; + psw_t new_psw; + + addr = kvm_s390_get_base_disp_s(vcpu); + + if (addr & 7) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + vcpu->arch.sie_block->gpsw.mask = new_psw.mask; + vcpu->arch.sie_block->gpsw.addr = new_psw.addr; + + if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || + (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == + PSW_MASK_BA) && + (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) || + (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && + (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || + ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == + PSW_MASK_EA)) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + handle_new_psw(vcpu); +out: + return 0; +} + static int handle_stidp(struct kvm_vcpu *vcpu) { u64 operand2; @@ -292,6 +387,7 @@ static const intercept_handler_t priv_handlers[256] = { [0x5f] = handle_chsc, [0x7d] = handle_stsi, [0xb1] = handle_stfl, + [0xb2] = handle_lpswe, }; int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) @@ -316,6 +412,45 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } +static int handle_epsw(struct kvm_vcpu *vcpu) +{ + int reg1, reg2; + + reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24; + reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; + + /* This basically extracts the mask half of the psw. */ + vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; + vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32; + if (reg2) { + vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000; + vcpu->run->s.regs.gprs[reg2] |= + vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff; + } + return 0; +} + +static const intercept_handler_t b9_handlers[256] = { + [0x8d] = handle_epsw, +}; + +int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + /* This is handled just as for the B2 instructions. */ + handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) { + if ((handler != handle_epsw) && + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)) + return kvm_s390_inject_program_int(vcpu, + PGM_PRIVILEGED_OPERATION); + else + return handler(vcpu); + } + return -EOPNOTSUPP; +} + static int handle_tprot(struct kvm_vcpu *vcpu) { u64 address1, address2; diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 90fdf85b5ff7..95fbc1ab88dc 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -141,13 +141,13 @@ TRACE_EVENT(kvm_s390_inject_vcpu, * Trace point for the actual delivery of interrupts. */ TRACE_EVENT(kvm_s390_deliver_interrupt, - TP_PROTO(unsigned int id, __u64 type, __u32 data0, __u64 data1), + TP_PROTO(unsigned int id, __u64 type, __u64 data0, __u64 data1), TP_ARGS(id, type, data0, data1), TP_STRUCT__entry( __field(int, id) __field(__u32, inttype) - __field(__u32, data0) + __field(__u64, data0) __field(__u64, data1) ), @@ -159,7 +159,7 @@ TRACE_EVENT(kvm_s390_deliver_interrupt, ), TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \ - "data:%08x %016llx", + "data:%08llx %016llx", __entry->id, __entry->inttype, __print_symbolic(__entry->inttype, kvm_s390_int_type), __entry->data0, __entry->data1) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 54540bdd3340..80bb3b801116 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -397,6 +397,7 @@ struct kvm_s390_psw { #define KVM_S390_PROGRAM_INT 0xfffe0001u #define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u #define KVM_S390_RESTART 0xfffe0003u +#define KVM_S390_MCHK 0xfffe1000u #define KVM_S390_INT_VIRTIO 0xffff2603u #define KVM_S390_INT_SERVICE 0xffff2401u #define KVM_S390_INT_EMERGENCY 0xffff1201u -- cgit v1.2.3 From fa6b7fe9928d50444c29b29c8563746c6b0c6299 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 20 Dec 2012 15:32:12 +0100 Subject: KVM: s390: Add support for channel I/O instructions. Add a new capability, KVM_CAP_S390_CSS_SUPPORT, which will pass intercepts for channel I/O instructions to userspace. Only I/O instructions interacting with I/O interrupts need to be handled in-kernel: - TEST PENDING INTERRUPTION (tpi) dequeues and stores pending interrupts entirely in-kernel. - TEST SUBCHANNEL (tsch) dequeues pending interrupts in-kernel and exits via KVM_EXIT_S390_TSCH to userspace for subchannel- related processing. Reviewed-by: Marcelo Tosatti Reviewed-by: Alexander Graf Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- Documentation/virtual/kvm/api.txt | 30 +++++++++++++ arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/intercept.c | 1 + arch/s390/kvm/interrupt.c | 37 ++++++++++++++++ arch/s390/kvm/kvm-s390.c | 12 ++++++ arch/s390/kvm/kvm-s390.h | 2 + arch/s390/kvm/priv.c | 91 +++++++++++++++++++++++++++++++++++++-- arch/s390/kvm/trace-s390.h | 20 +++++++++ include/trace/events/kvm.h | 2 +- include/uapi/linux/kvm.h | 11 +++++ 10 files changed, 202 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 73bd159c5559..f2d6391178b9 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2350,6 +2350,22 @@ The possible hypercalls are defined in the Power Architecture Platform Requirements (PAPR) document available from www.power.org (free developer registration required to access it). + /* KVM_EXIT_S390_TSCH */ + struct { + __u16 subchannel_id; + __u16 subchannel_nr; + __u32 io_int_parm; + __u32 io_int_word; + __u32 ipb; + __u8 dequeued; + } s390_tsch; + +s390 specific. This exit occurs when KVM_CAP_S390_CSS_SUPPORT has been enabled +and TEST SUBCHANNEL was intercepted. If dequeued is set, a pending I/O +interrupt for the target subchannel has been dequeued and subchannel_id, +subchannel_nr, io_int_parm and io_int_word contain the parameters for that +interrupt. ipb is needed for instruction parameter decoding. + /* Fix the size of the union. */ char padding[256]; }; @@ -2471,3 +2487,17 @@ For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV: where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value. - The tsize field of mas1 shall be set to 4K on TLB0, even though the hardware ignores this value for TLB0. + +6.4 KVM_CAP_S390_CSS_SUPPORT + +Architectures: s390 +Parameters: none +Returns: 0 on success; -1 on error + +This capability enables support for handling of channel I/O instructions. + +TEST PENDING INTERRUPTION and the interrupt portion of TEST SUBCHANNEL are +handled in-kernel, while the other I/O instructions are passed to userspace. + +When this capability is enabled, KVM_EXIT_S390_TSCH will occur on TEST +SUBCHANNEL intercepts. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 29363d155cd5..16bd5d169cdb 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -262,6 +262,7 @@ struct kvm_arch{ debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; struct gmap *gmap; + int css_support; }; extern int sie64a(struct kvm_s390_sie_block *, u64 *); diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 71af87dbb42c..f26ff1e31bdb 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -264,6 +264,7 @@ static const intercept_handler_t intercept_funcs[] = { [0x0C >> 2] = handle_instruction_and_prog, [0x10 >> 2] = handle_noop, [0x14 >> 2] = handle_noop, + [0x18 >> 2] = handle_noop, [0x1C >> 2] = kvm_s390_handle_wait, [0x20 >> 2] = handle_validity, [0x28 >> 2] = handle_stop, diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index b3b4748485ee..9a128357fd15 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -709,6 +709,43 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) return 0; } +struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, + u64 cr6, u64 schid) +{ + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *inti, *iter; + + if ((!schid && !cr6) || (schid && cr6)) + return NULL; + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + inti = NULL; + list_for_each_entry(iter, &fi->list, list) { + if (!is_ioint(iter->type)) + continue; + if (cr6 && ((cr6 & iter->io.io_int_word) == 0)) + continue; + if (schid) { + if (((schid & 0x00000000ffff0000) >> 16) != + iter->io.subchannel_id) + continue; + if ((schid & 0x000000000000ffff) != + iter->io.subchannel_nr) + continue; + } + inti = iter; + break; + } + if (inti) + list_del_init(&inti->list); + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); + return inti; +} + int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int) { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5ff26033825c..5b01f0953900 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -141,6 +141,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SYNC_REGS: case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: + case KVM_CAP_S390_CSS_SUPPORT: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -235,6 +236,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.gmap) goto out_nogmap; } + + kvm->arch.css_support = 0; + return 0; out_nogmap: debug_unregister(kvm->arch.dbf); @@ -658,6 +662,7 @@ rerun_vcpu: case KVM_EXIT_INTR: case KVM_EXIT_S390_RESET: case KVM_EXIT_S390_UCONTROL: + case KVM_EXIT_S390_TSCH: break; default: BUG(); @@ -818,6 +823,13 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return -EINVAL; switch (cap->cap) { + case KVM_CAP_S390_CSS_SUPPORT: + if (!vcpu->kvm->arch.css_support) { + vcpu->kvm->arch.css_support = 1; + trace_kvm_s390_enable_css(vcpu->kvm); + } + r = 0; + break; default: r = -EINVAL; break; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 211b340385a7..3e05deff21b6 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -113,6 +113,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt *s390int); int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); +struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, + u64 cr6, u64 schid); /* implemented in priv.c */ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 8ad776f87856..0ef9894606e5 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -127,15 +127,98 @@ static int handle_skey(struct kvm_vcpu *vcpu) return 0; } -static int handle_io_inst(struct kvm_vcpu *vcpu) +static int handle_tpi(struct kvm_vcpu *vcpu) { - VCPU_EVENT(vcpu, 4, "%s", "I/O instruction"); - /* condition code 3 */ + u64 addr; + struct kvm_s390_interrupt_info *inti; + int cc; + + addr = kvm_s390_get_base_disp_s(vcpu); + + inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); + if (inti) { + if (addr) { + /* + * Store the two-word I/O interruption code into the + * provided area. + */ + put_guest_u16(vcpu, addr, inti->io.subchannel_id); + put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr); + put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm); + } else { + /* + * Store the three-word I/O interruption code into + * the appropriate lowcore area. + */ + put_guest_u16(vcpu, 184, inti->io.subchannel_id); + put_guest_u16(vcpu, 186, inti->io.subchannel_nr); + put_guest_u32(vcpu, 188, inti->io.io_int_parm); + put_guest_u32(vcpu, 192, inti->io.io_int_word); + } + cc = 1; + } else + cc = 0; + kfree(inti); + /* Set condition code and we're done. */ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; return 0; } +static int handle_tsch(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_interrupt_info *inti; + + inti = kvm_s390_get_io_int(vcpu->kvm, 0, + vcpu->run->s.regs.gprs[1]); + + /* + * Prepare exit to userspace. + * We indicate whether we dequeued a pending I/O interrupt + * so that userspace can re-inject it if the instruction gets + * a program check. While this may re-order the pending I/O + * interrupts, this is no problem since the priority is kept + * intact. + */ + vcpu->run->exit_reason = KVM_EXIT_S390_TSCH; + vcpu->run->s390_tsch.dequeued = !!inti; + if (inti) { + vcpu->run->s390_tsch.subchannel_id = inti->io.subchannel_id; + vcpu->run->s390_tsch.subchannel_nr = inti->io.subchannel_nr; + vcpu->run->s390_tsch.io_int_parm = inti->io.io_int_parm; + vcpu->run->s390_tsch.io_int_word = inti->io.io_int_word; + } + vcpu->run->s390_tsch.ipb = vcpu->arch.sie_block->ipb; + kfree(inti); + return -EREMOTE; +} + +static int handle_io_inst(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 4, "%s", "I/O instruction"); + + if (vcpu->kvm->arch.css_support) { + /* + * Most I/O instructions will be handled by userspace. + * Exceptions are tpi and the interrupt portion of tsch. + */ + if (vcpu->arch.sie_block->ipa == 0xb236) + return handle_tpi(vcpu); + if (vcpu->arch.sie_block->ipa == 0xb235) + return handle_tsch(vcpu); + /* Handle in userspace. */ + return -EOPNOTSUPP; + } else { + /* + * Set condition code 3 to stop the guest from issueing channel + * I/O instructions. + */ + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + return 0; + } +} + static int handle_stfl(struct kvm_vcpu *vcpu) { unsigned int facility_list; diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 95fbc1ab88dc..13f30f58a2df 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -204,6 +204,26 @@ TRACE_EVENT(kvm_s390_stop_request, ); +/* + * Trace point for enabling channel I/O instruction support. + */ +TRACE_EVENT(kvm_s390_enable_css, + TP_PROTO(void *kvm), + TP_ARGS(kvm), + + TP_STRUCT__entry( + __field(void *, kvm) + ), + + TP_fast_assign( + __entry->kvm = kvm; + ), + + TP_printk("enabling channel I/O support (kvm @ %p)\n", + __entry->kvm) + ); + + #endif /* _TRACE_KVMS390_H */ /* This part must be outside protection */ diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 7ef9e759f499..a23f47c884cf 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -14,7 +14,7 @@ ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR), \ ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\ ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL), \ - ERSN(S390_UCONTROL) + ERSN(S390_UCONTROL), ERSN(S390_TSCH) TRACE_EVENT(kvm_userspace_exit, TP_PROTO(__u32 reason, int errno), diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 80bb3b801116..8bb0bf83afc5 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -168,6 +168,7 @@ struct kvm_pit_config { #define KVM_EXIT_PAPR_HCALL 19 #define KVM_EXIT_S390_UCONTROL 20 #define KVM_EXIT_WATCHDOG 21 +#define KVM_EXIT_S390_TSCH 22 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -285,6 +286,15 @@ struct kvm_run { __u64 ret; __u64 args[9]; } papr_hcall; + /* KVM_EXIT_S390_TSCH */ + struct { + __u16 subchannel_id; + __u16 subchannel_nr; + __u32 io_int_parm; + __u32 io_int_word; + __u32 ipb; + __u8 dequeued; + } s390_tsch; /* Fix the size of the union. */ char padding[256]; }; @@ -645,6 +655,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQFD_RESAMPLE 82 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 +#define KVM_CAP_S390_CSS_SUPPORT 85 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 390a1bd8538132186ddb679cafe9e75b7ef7e2d2 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 19 Dec 2012 19:11:32 +0100 Subject: NFC: Initial Secure Element API Each NFC adapter can have several links to different secure elements and that property needs to be exported by the drivers. A secure element link can be enabled and disabled, and card emulation will be handled by the currently active one. Otherwise card emulation will be host implemented. Signed-off-by: Samuel Ortiz --- drivers/nfc/nfcwilink.c | 1 + drivers/nfc/pn533.c | 1 + drivers/nfc/pn544/pn544.c | 6 ++++-- include/net/nfc/hci.h | 3 +++ include/net/nfc/nci_core.h | 1 + include/net/nfc/nfc.h | 6 ++++++ include/uapi/linux/nfc.h | 14 ++++++++++++++ net/nfc/core.c | 3 +++ net/nfc/hci/core.c | 3 ++- net/nfc/nci/core.c | 2 ++ net/nfc/netlink.c | 1 + 11 files changed, 38 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/nfc/nfcwilink.c b/drivers/nfc/nfcwilink.c index c7c182d2b7df..3b731acbc408 100644 --- a/drivers/nfc/nfcwilink.c +++ b/drivers/nfc/nfcwilink.c @@ -542,6 +542,7 @@ static int nfcwilink_probe(struct platform_device *pdev) drv->ndev = nci_allocate_device(&nfcwilink_ops, protocols, + NFC_SE_NONE, NFCWILINK_HDR_LEN, 0); if (!drv->ndev) { diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c index e8c083203b33..31a5b3b53b2a 100644 --- a/drivers/nfc/pn533.c +++ b/drivers/nfc/pn533.c @@ -2525,6 +2525,7 @@ static int pn533_probe(struct usb_interface *interface, dev->nfc_dev = nfc_allocate_device(&pn533_nfc_ops, protocols, + NFC_SE_NONE, dev->ops->tx_header_len + PN533_CMD_DATAEXCH_HEAD_LEN, dev->ops->tx_tail_len); diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index d108c794008d..9c5f16e7baef 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -801,7 +801,7 @@ int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, struct nfc_hci_dev **hdev) { struct pn544_hci_info *info; - u32 protocols; + u32 protocols, se; struct nfc_hci_init_data init_data; int r; @@ -834,8 +834,10 @@ int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name, NFC_PROTO_ISO14443_B_MASK | NFC_PROTO_NFC_DEP_MASK; + se = NFC_SE_UICC | NFC_SE_EMBEDDED; + info->hdev = nfc_hci_allocate_device(&pn544_hci_ops, &init_data, 0, - protocols, llc_name, + protocols, se, llc_name, phy_headroom + PN544_CMDS_HEADROOM, phy_tailroom, phy_payload); if (!info->hdev) { diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 2ff71750c428..b87a1692b086 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -59,6 +59,8 @@ struct nfc_hci_ops { struct nfc_target *target); int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event, struct sk_buff *skb); + int (*enable_se)(struct nfc_dev *dev, u32 secure_element); + int (*disable_se)(struct nfc_dev *dev, u32 secure_element); }; /* Pipes */ @@ -150,6 +152,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, + u32 supported_se, const char *llc_name, int tx_headroom, int tx_tailroom, diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index d705d8674949..5bc0c460edc0 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -147,6 +147,7 @@ struct nci_dev { /* ----- NCI Devices ----- */ struct nci_dev *nci_allocate_device(struct nci_ops *ops, __u32 supported_protocols, + __u32 supported_se, int tx_headroom, int tx_tailroom); void nci_free_device(struct nci_dev *ndev); diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 1665674e86b2..87a6417fc934 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -68,6 +68,8 @@ struct nfc_ops { void *cb_context); int (*tm_send)(struct nfc_dev *dev, struct sk_buff *skb); int (*check_presence)(struct nfc_dev *dev, struct nfc_target *target); + int (*enable_se)(struct nfc_dev *dev, u32 secure_element); + int (*disable_se)(struct nfc_dev *dev, u32 secure_element); }; #define NFC_TARGET_IDX_ANY -1 @@ -109,6 +111,9 @@ struct nfc_dev { struct nfc_genl_data genl_data; u32 supported_protocols; + u32 supported_se; + u32 active_se; + int tx_headroom; int tx_tailroom; @@ -125,6 +130,7 @@ extern struct class nfc_class; struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, u32 supported_protocols, + u32 supported_se, int tx_headroom, int tx_tailroom); diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 0e63cee8d810..80e4ecd8c04c 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -67,6 +67,11 @@ * subsequent CONNECT and CC messages. * If one of the passed parameters is wrong none is set and -EINVAL is * returned. + * @NFC_CMD_ENABLE_SE: Enable the physical link to a specific secure element. + * Once enabled a secure element will handle card emulation mode, i.e. + * starting a poll from a device which has a secure element enabled means + * we want to do SE based card emulation. + * @NFC_CMD_DISABLE_SE: Disable the physical link to a specific secure element. */ enum nfc_commands { NFC_CMD_UNSPEC, @@ -86,6 +91,8 @@ enum nfc_commands { NFC_EVENT_TM_DEACTIVATED, NFC_CMD_LLC_GET_PARAMS, NFC_CMD_LLC_SET_PARAMS, + NFC_CMD_ENABLE_SE, + NFC_CMD_DISABLE_SE, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; @@ -114,6 +121,7 @@ enum nfc_commands { * @NFC_ATTR_LLC_PARAM_LTO: Link TimeOut parameter * @NFC_ATTR_LLC_PARAM_RW: Receive Window size parameter * @NFC_ATTR_LLC_PARAM_MIUX: MIU eXtension parameter + * @NFC_ATTR_SE: Available Secure Elements */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -134,6 +142,7 @@ enum nfc_attrs { NFC_ATTR_LLC_PARAM_LTO, NFC_ATTR_LLC_PARAM_RW, NFC_ATTR_LLC_PARAM_MIUX, + NFC_ATTR_SE, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; @@ -172,6 +181,11 @@ enum nfc_attrs { #define NFC_PROTO_NFC_DEP_MASK (1 << NFC_PROTO_NFC_DEP) #define NFC_PROTO_ISO14443_B_MASK (1 << NFC_PROTO_ISO14443_B) +/* NFC Secure Elements */ +#define NFC_SE_NONE 0x0 +#define NFC_SE_UICC 0x1 +#define NFC_SE_EMBEDDED 0x2 + struct sockaddr_nfc { sa_family_t sa_family; __u32 dev_idx; diff --git a/net/nfc/core.c b/net/nfc/core.c index 7d7b4ee34015..25522e56d350 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -757,6 +757,7 @@ struct nfc_dev *nfc_get_device(unsigned int idx) */ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, u32 supported_protocols, + u32 supported_se, int tx_headroom, int tx_tailroom) { struct nfc_dev *dev; @@ -774,6 +775,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->ops = ops; dev->supported_protocols = supported_protocols; + dev->supported_se = supported_se; + dev->active_se = NFC_SE_NONE; dev->tx_headroom = tx_headroom; dev->tx_tailroom = tx_tailroom; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 755a6b9774ab..91020b210d87 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -797,6 +797,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, + u32 supported_se, const char *llc_name, int tx_headroom, int tx_tailroom, @@ -822,7 +823,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, return NULL; } - hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, + hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, supported_se, tx_headroom + HCI_CMDS_HEADROOM, tx_tailroom); if (!hdev->ndev) { diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 5f98dc1bf039..48ada0ec749e 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -658,6 +658,7 @@ static struct nfc_ops nci_nfc_ops = { */ struct nci_dev *nci_allocate_device(struct nci_ops *ops, __u32 supported_protocols, + __u32 supported_se, int tx_headroom, int tx_tailroom) { struct nci_dev *ndev; @@ -680,6 +681,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, supported_protocols, + supported_se, tx_headroom + NCI_DATA_HDR_SIZE, tx_tailroom); if (!ndev->nfc_dev) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 3568ae16786d..504b883439f1 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -366,6 +366,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || + nla_put_u32(msg, NFC_ATTR_SE, dev->supported_se) || nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) goto nla_put_failure; -- cgit v1.2.3 From 1c810636556c8d53a37406b34a64d9b9b0161aa6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 4 Jan 2013 18:12:48 +0100 Subject: KVM: PPC: BookE: Implement EPR exit The External Proxy Facility in FSL BookE chips allows the interrupt controller to automatically acknowledge an interrupt as soon as a core gets its pending external interrupt delivered. Today, user space implements the interrupt controller, so we need to check on it during such a cycle. This patch implements logic for user space to enable EPR exiting, disable EPR exiting and EPR exiting itself, so that user space can acknowledge an interrupt when an external interrupt has successfully been delivered into the guest vcpu. Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 40 +++++++++++++++++++++++++++++++++++-- arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/powerpc/include/asm/kvm_ppc.h | 9 +++++++++ arch/powerpc/kvm/booke.c | 14 ++++++++++++- arch/powerpc/kvm/powerpc.c | 10 ++++++++++ include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 6 ++++++ 7 files changed, 79 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4fc2bfcb16d5..a98ed09269d7 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2246,8 +2246,8 @@ executed a memory-mapped I/O instruction which could not be satisfied by kvm. The 'data' member contains the written data if 'is_write' is true, and should be filled by application code otherwise. -NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR - and KVM_EXIT_PAPR the corresponding +NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR, + KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding operations are complete (and guest state is consistent) only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. Userspace @@ -2366,6 +2366,25 @@ interrupt for the target subchannel has been dequeued and subchannel_id, subchannel_nr, io_int_parm and io_int_word contain the parameters for that interrupt. ipb is needed for instruction parameter decoding. + /* KVM_EXIT_EPR */ + struct { + __u32 epr; + } epr; + +On FSL BookE PowerPC chips, the interrupt controller has a fast patch +interrupt acknowledge path to the core. When the core successfully +delivers an interrupt, it automatically populates the EPR register with +the interrupt vector number and acknowledges the interrupt inside +the interrupt controller. + +In case the interrupt controller lives in user space, we need to do +the interrupt acknowledge cycle through it to fetch the next to be +delivered interrupt vector using this exit. + +It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an +external interrupt has just been delivered into the guest. User space +should put the acknowledged interrupt vector into the 'epr' field. + /* Fix the size of the union. */ char padding[256]; }; @@ -2501,3 +2520,20 @@ handled in-kernel, while the other I/O instructions are passed to userspace. When this capability is enabled, KVM_EXIT_S390_TSCH will occur on TEST SUBCHANNEL intercepts. + +6.5 KVM_CAP_PPC_EPR + +Architectures: ppc +Parameters: args[0] defines whether the proxy facility is active +Returns: 0 on success; -1 on error + +This capability enables or disables the delivery of interrupts through the +external proxy facility. + +When enabled (args[0] != 0), every time the guest gets an external interrupt +delivered, it automatically exits into user space with a KVM_EXIT_EPR exit +to receive the topmost interrupt vector. + +When disabled (args[0] == 0), behavior is as if this facility is unsupported. + +When this capability is enabled, KVM_EXIT_EPR can occur. diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index ab49c6cf891c..8a72d59467eb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -520,6 +520,8 @@ struct kvm_vcpu_arch { u8 sane; u8 cpu_type; u8 hcall_needed; + u8 epr_enabled; + u8 epr_needed; u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 5f5f69abd281..493630e209c8 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -264,6 +264,15 @@ static inline void kvm_linear_init(void) {} #endif +static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) +{ +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GEPR, epr); +#elif defined(CONFIG_BOOKE) + vcpu->arch.epr = epr; +#endif +} + int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, struct kvm_config_tlb *cfg); int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 964f4475f55c..940ec806187e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -306,7 +306,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, { int allowed = 0; ulong msr_mask = 0; - bool update_esr = false, update_dear = false; + bool update_esr = false, update_dear = false, update_epr = false; ulong crit_raw = vcpu->arch.shared->critical; ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); bool crit; @@ -330,6 +330,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, keep_irq = true; } + if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled) + update_epr = true; + switch (priority) { case BOOKE_IRQPRIO_DTLB_MISS: case BOOKE_IRQPRIO_DATA_STORAGE: @@ -408,6 +411,8 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) set_guest_dear(vcpu, vcpu->arch.queued_dear); + if (update_epr == true) + kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); new_msr &= msr_mask; #if defined(CONFIG_64BIT) @@ -615,6 +620,13 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) r = 0; } + if (kvm_check_request(KVM_REQ_EPR_EXIT, vcpu)) { + vcpu->run->epr.epr = 0; + vcpu->arch.epr_needed = true; + vcpu->run->exit_reason = KVM_EXIT_EPR; + r = 0; + } + return r; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index e2225e5b8a4c..934413cd3a1b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -306,6 +306,7 @@ int kvm_dev_ioctl_check_extension(long ext) #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_SREGS: case KVM_CAP_PPC_BOOKE_WATCHDOG: + case KVM_CAP_PPC_EPR: #else case KVM_CAP_PPC_SEGSTATE: case KVM_CAP_PPC_HIOR: @@ -721,6 +722,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) for (i = 0; i < 9; ++i) kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]); vcpu->arch.hcall_needed = 0; +#ifdef CONFIG_BOOKE + } else if (vcpu->arch.epr_needed) { + kvmppc_set_epr(vcpu, run->epr.epr); + vcpu->arch.epr_needed = 0; +#endif } r = kvmppc_vcpu_run(run, vcpu); @@ -762,6 +768,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; + case KVM_CAP_PPC_EPR: + r = 0; + vcpu->arch.epr_enabled = cap->args[0]; + break; #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_WATCHDOG: r = 0; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cbe0d683e2e5..4dd7d7531e69 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -122,6 +122,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_WATCHDOG 18 #define KVM_REQ_MASTERCLOCK_UPDATE 19 #define KVM_REQ_MCLOCK_INPROGRESS 20 +#define KVM_REQ_EPR_EXIT 21 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 8bb0bf83afc5..9a2db5767ed5 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -169,6 +169,7 @@ struct kvm_pit_config { #define KVM_EXIT_S390_UCONTROL 20 #define KVM_EXIT_WATCHDOG 21 #define KVM_EXIT_S390_TSCH 22 +#define KVM_EXIT_EPR 23 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -295,6 +296,10 @@ struct kvm_run { __u32 ipb; __u8 dequeued; } s390_tsch; + /* KVM_EXIT_EPR */ + struct { + __u32 epr; + } epr; /* Fix the size of the union. */ char padding[256]; }; @@ -656,6 +661,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 #define KVM_CAP_S390_CSS_SUPPORT 85 +#define KVM_CAP_PPC_EPR 86 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 7b9205bd775afc4439ed86d617f9042ee9e76a71 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Jan 2013 14:32:05 -0800 Subject: audit: create explicit AUDIT_SECCOMP event type The seccomp path was using AUDIT_ANOM_ABEND from when seccomp mode 1 could only kill a process. While we still want to make sure an audit record is forced on a kill, this should use a separate record type since seccomp mode 2 introduces other behaviors. In the case of "handled" behaviors (process wasn't killed), only emit a record if the process is under inspection. This change also fixes userspace examination of seccomp audit events, since it was considered malformed due to missing fields of the AUDIT_ANOM_ABEND event type. Signed-off-by: Kees Cook Cc: Al Viro Cc: Eric Paris Cc: Jeff Layton Cc: "Eric W. Biederman" Cc: Julien Tinnes Acked-by: Will Drewry Acked-by: Steve Grubb Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/audit.h | 3 ++- include/uapi/linux/audit.h | 1 + kernel/auditsc.c | 14 +++++++++++--- 3 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index bce729afbcf9..9d5104d7aba9 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -157,7 +157,8 @@ void audit_core_dumps(long signr); static inline void audit_seccomp(unsigned long syscall, long signr, int code) { - if (unlikely(!audit_dummy_context())) + /* Force a record to be reported if a signal was delivered. */ + if (signr || unlikely(!audit_dummy_context())) __audit_seccomp(syscall, signr, code); } diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 76352ac45f24..09a2d94ab113 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -106,6 +106,7 @@ #define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */ #define AUDIT_NETFILTER_PKT 1324 /* Packets traversing netfilter chains */ #define AUDIT_NETFILTER_CFG 1325 /* Netfilter chain modifications */ +#define AUDIT_SECCOMP 1326 /* Secure Computing event */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e37e6a12c5e3..3e46d1dec613 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2675,7 +2675,7 @@ void __audit_mmap_fd(int fd, int flags) context->type = AUDIT_MMAP; } -static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) +static void audit_log_task(struct audit_buffer *ab) { kuid_t auid, uid; kgid_t gid; @@ -2693,6 +2693,11 @@ static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) audit_log_task_context(ab); audit_log_format(ab, " pid=%d comm=", current->pid); audit_log_untrustedstring(ab, current->comm); +} + +static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) +{ + audit_log_task(ab); audit_log_format(ab, " reason="); audit_log_string(ab, reason); audit_log_format(ab, " sig=%ld", signr); @@ -2723,8 +2728,11 @@ void __audit_seccomp(unsigned long syscall, long signr, int code) { struct audit_buffer *ab; - ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); - audit_log_abend(ab, "seccomp", signr); + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP); + if (unlikely(!ab)) + return; + audit_log_task(ab); + audit_log_format(ab, " sig=%ld", signr); audit_log_format(ab, " syscall=%ld", syscall); audit_log_format(ab, " compat=%d", is_compat_task()); audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current)); -- cgit v1.2.3 From c0a3a20b6c4b5229ef5d26fd9b1c4b1957632aa7 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Fri, 11 Jan 2013 14:32:13 -0800 Subject: linux/audit.h: move ptrace.h include to kernel header While the kernel internals want pt_regs (and so it includes linux/ptrace.h), the user version of audit.h does not need it. So move the include out of the uapi version. This avoids issues where people want the audit defines and userland ptrace api. Including both the kernel ptrace and the userland ptrace headers can easily lead to failure. Signed-off-by: Mike Frysinger Cc: Eric Paris Cc: Al Viro Reviewed-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/audit.h | 1 + include/uapi/linux/audit.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 9d5104d7aba9..5a6d718adf34 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -24,6 +24,7 @@ #define _LINUX_AUDIT_H_ #include +#include #include struct audit_sig_info { diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 09a2d94ab113..9f096f1c0907 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -26,7 +26,6 @@ #include #include -#include /* The netlink messages for the audit system is divided into blocks: * 1000 - 1099 are for commanding the audit system -- cgit v1.2.3 From dd3332bfcb2223458f553f341d3388cb84040e6a Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Sun, 13 Jan 2013 05:02:45 +0000 Subject: ipv6: Store Router Alert option in IP6CB directly. Router Alert option is very small and we can store the value itself in the skb. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 ++- include/uapi/linux/ipv6.h | 2 ++ net/ipv6/exthdrs.c | 3 ++- net/ipv6/ip6_input.c | 5 ++--- 4 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 304a9f46b578..e971e3742172 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -84,7 +84,7 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb) struct inet6_skb_parm { int iif; - __u16 ra; + __be16 ra; __u16 hop; __u16 dst0; __u16 srcrt; @@ -100,6 +100,7 @@ struct inet6_skb_parm { #define IP6SKB_XFRM_TRANSFORMED 1 #define IP6SKB_FORWARDED 2 #define IP6SKB_REROUTED 4 +#define IP6SKB_ROUTERALERT 8 }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 5a2991cf0251..4bda4cf5b0f5 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -63,6 +63,8 @@ struct ipv6_opt_hdr { #define ipv6_destopt_hdr ipv6_opt_hdr #define ipv6_hopopt_hdr ipv6_opt_hdr +/* Router Alert option values (RFC2711) */ +#define IPV6_OPT_ROUTERALERT_MLD 0x0000 /* MLD(RFC2710) */ /* * routing header type 0 (used in cmsghdr struct) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 473f628f9f20..07a7d65a7cb6 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -553,7 +553,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] == 2) { - IP6CB(skb)->ra = optoff; + IP6CB(skb)->flags |= IP6SKB_ROUTERALERT; + memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra)); return true; } LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 2ccd35ec3628..4ac5bf30e16a 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -280,9 +280,8 @@ int ip6_mc_input(struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); /* Check for MLD */ - if (unlikely(opt->ra)) { + if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { /* Check if this is a mld message */ - u8 *ptr = skb_network_header(skb) + opt->ra; u8 nexthdr = hdr->nexthdr; __be16 frag_off; int offset; @@ -290,7 +289,7 @@ int ip6_mc_input(struct sk_buff *skb) /* Check if the value of Router Alert * is for MLD (0x0000). */ - if ((ptr[2] | ptr[3]) == 0) { + if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) { deliver = false; if (!ipv6_ext_hdr(nexthdr)) { -- cgit v1.2.3 From 25d46f43a911b08c5aa8c8fd4fe7fa9b36445068 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Sun, 13 Jan 2013 16:02:06 +0000 Subject: ipv6: Move comment to right place. IN6ADDR_* and in6addr_* are not exported to userspace, and are defined in include/linux/in6.h. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/in6.h | 4 ++++ include/uapi/linux/in6.h | 5 ----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index 9e2ae26fb598..a16e19349ec0 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -22,6 +22,10 @@ #include +/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553 + * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined + * in network byte order, not in host byte order as are the IPv4 equivalents + */ extern const struct in6_addr in6addr_any; #define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } } extern const struct in6_addr in6addr_loopback; diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index f79c3721da6e..5673b97dcf54 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -38,11 +38,6 @@ struct in6_addr { #define s6_addr32 in6_u.u6_addr32 }; -/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553 - * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined - * in network byte order, not in host byte order as are the IPv4 equivalents - */ - struct sockaddr_in6 { unsigned short int sin6_family; /* AF_INET6 */ __be16 sin6_port; /* Transport layer port # */ -- cgit v1.2.3 From d3710e74cf329839dea8d13b1ad56e572b06b173 Mon Sep 17 00:00:00 2001 From: Lauro Ramos Venancio Date: Wed, 5 Dec 2012 21:12:25 -0300 Subject: NFC: Change nfc.h license nfc.h being GPL makes it quite controversial for non GPL applications to include it. Moreover, nfc.h only includes structures and API definitions that are hardly copyrightable. Signed-off-by: Lauro Ramos Venancio Signed-off-by: Samuel Ortiz --- include/uapi/linux/nfc.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 80e4ecd8c04c..7969f46f1bb3 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -5,20 +5,17 @@ * Lauro Ramos Venancio * Aloisio Almeida Jr * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifndef __LINUX_NFC_H -- cgit v1.2.3 From 3b1c5a5307fb5277f395efdcf330c064d79df07d Mon Sep 17 00:00:00 2001 From: Marco Porsch Date: Mon, 7 Jan 2013 16:04:52 +0100 Subject: {cfg,nl}80211: mesh power mode primitives and userspace access Add the nl80211_mesh_power_mode enumeration which holds possible values for the mesh power mode. These modes are unknown, active, light sleep and deep sleep. Add power_mode entry to the mesh config structure to hold the user-configured default mesh power mode. This value will be used for new peer links. Add the dot11MeshAwakeWindowDuration value to the mesh config. The awake window is a duration in TU describing how long the STA will stay awake after transmitting its beacon in PS mode. Add access routines to: - get/set local link-specific power mode (STA) - get remote STA's link-specific power mode (STA) - get remote STA's non-peer power mode (STA) - get/set default mesh power mode (mesh config) - get/set mesh awake window duration (mesh config) All config changes may be done at mesh runtime and take effect immediately. Signed-off-by: Marco Porsch Signed-off-by: Ivan Bezyazychnyy Signed-off-by: Mike Krinkin [fix commit message line length, error handling in set station] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 21 ++++++++++++++++++++ include/uapi/linux/nl80211.h | 47 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/mesh.c | 3 +++ net/wireless/nl80211.c | 43 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 113 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 516aded3697f..d9f08f65f7a5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -610,6 +610,8 @@ enum station_parameters_apply_mask { * @sta_modify_mask: bitmap indicating which parameters changed * (for those that don't have a natural "no change" value), * see &enum station_parameters_apply_mask + * @local_pm: local link-specific mesh power save mode (no change when set + * to unknown) */ struct station_parameters { u8 *supported_rates; @@ -625,6 +627,7 @@ struct station_parameters { struct ieee80211_vht_cap *vht_capa; u8 uapsd_queues; u8 max_sp; + enum nl80211_mesh_power_mode local_pm; }; /** @@ -655,6 +658,9 @@ struct station_parameters { * @STATION_INFO_STA_FLAGS: @sta_flags filled * @STATION_INFO_BEACON_LOSS_COUNT: @beacon_loss_count filled * @STATION_INFO_T_OFFSET: @t_offset filled + * @STATION_INFO_LOCAL_PM: @local_pm filled + * @STATION_INFO_PEER_PM: @peer_pm filled + * @STATION_INFO_NONPEER_PM: @nonpeer_pm filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -678,6 +684,9 @@ enum station_info_flags { STATION_INFO_STA_FLAGS = 1<<18, STATION_INFO_BEACON_LOSS_COUNT = 1<<19, STATION_INFO_T_OFFSET = 1<<20, + STATION_INFO_LOCAL_PM = 1<<21, + STATION_INFO_PEER_PM = 1<<22, + STATION_INFO_NONPEER_PM = 1<<23, }; /** @@ -791,6 +800,9 @@ struct sta_bss_parameters { * @sta_flags: station flags mask & values * @beacon_loss_count: Number of times beacon loss event has triggered. * @t_offset: Time offset of the station relative to this host. + * @local_pm: local mesh STA power save mode + * @peer_pm: peer mesh STA power save mode + * @nonpeer_pm: non-peer mesh STA power save mode */ struct station_info { u32 filled; @@ -820,6 +832,9 @@ struct station_info { u32 beacon_loss_count; s64 t_offset; + enum nl80211_mesh_power_mode local_pm; + enum nl80211_mesh_power_mode peer_pm; + enum nl80211_mesh_power_mode nonpeer_pm; /* * Note: Add a new enum station_info_flags value for each new field and @@ -995,6 +1010,10 @@ struct bss_parameters { * @dot11MeshHWMPconfirmationInterval: The minimum interval of time (in TUs) * during which a mesh STA can send only one Action frame containing * a PREQ element for root path confirmation. + * @power_mode: The default mesh power save mode which will be the initial + * setting for new peer links. + * @dot11MeshAwakeWindowDuration: The duration in TUs the STA will remain awake + * after transmitting its beacon. */ struct mesh_config { u16 dot11MeshRetryTimeout; @@ -1022,6 +1041,8 @@ struct mesh_config { u32 dot11MeshHWMPactivePathToRootTimeout; u16 dot11MeshHWMProotInterval; u16 dot11MeshHWMPconfirmationInterval; + enum nl80211_mesh_power_mode power_mode; + u16 dot11MeshAwakeWindowDuration; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 547017100a30..6c4f703ae890 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1310,6 +1310,9 @@ enum nl80211_commands { * if not given in START_AP 0 is assumed, if not given in SET_BSS * no change is made. * + * @NL80211_ATTR_LOCAL_MESH_POWER_MODE: local mesh STA link-specific power mode + * defined in &enum nl80211_mesh_power_mode. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1580,6 +1583,8 @@ enum nl80211_attrs { NL80211_ATTR_P2P_CTWINDOW, NL80211_ATTR_P2P_OPPPS, + NL80211_ATTR_LOCAL_MESH_POWER_MODE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1838,6 +1843,10 @@ enum nl80211_sta_bss_param { * @NL80211_STA_INFO_STA_FLAGS: Contains a struct nl80211_sta_flag_update. * @NL80211_STA_INFO_BEACON_LOSS: count of times beacon loss was detected (u32) * @NL80211_STA_INFO_T_OFFSET: timing offset with respect to this STA (s64) + * @NL80211_STA_INFO_LOCAL_PM: local mesh STA link-specific power mode + * @NL80211_STA_INFO_PEER_PM: peer mesh STA link-specific power mode + * @NL80211_STA_INFO_NONPEER_PM: neighbor mesh STA power save mode towards + * non-peer STA * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute */ @@ -1862,6 +1871,9 @@ enum nl80211_sta_info { NL80211_STA_INFO_STA_FLAGS, NL80211_STA_INFO_BEACON_LOSS, NL80211_STA_INFO_T_OFFSET, + NL80211_STA_INFO_LOCAL_PM, + NL80211_STA_INFO_PEER_PM, + NL80211_STA_INFO_NONPEER_PM, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, @@ -2252,6 +2264,34 @@ enum nl80211_mntr_flags { NL80211_MNTR_FLAG_MAX = __NL80211_MNTR_FLAG_AFTER_LAST - 1 }; +/** + * enum nl80211_mesh_power_mode - mesh power save modes + * + * @NL80211_MESH_POWER_UNKNOWN: The mesh power mode of the mesh STA is + * not known or has not been set yet. + * @NL80211_MESH_POWER_ACTIVE: Active mesh power mode. The mesh STA is + * in Awake state all the time. + * @NL80211_MESH_POWER_LIGHT_SLEEP: Light sleep mode. The mesh STA will + * alternate between Active and Doze states, but will wake up for + * neighbor's beacons. + * @NL80211_MESH_POWER_DEEP_SLEEP: Deep sleep mode. The mesh STA will + * alternate between Active and Doze states, but may not wake up + * for neighbor's beacons. + * + * @__NL80211_MESH_POWER_AFTER_LAST - internal use + * @NL80211_MESH_POWER_MAX - highest possible power save level + */ + +enum nl80211_mesh_power_mode { + NL80211_MESH_POWER_UNKNOWN, + NL80211_MESH_POWER_ACTIVE, + NL80211_MESH_POWER_LIGHT_SLEEP, + NL80211_MESH_POWER_DEEP_SLEEP, + + __NL80211_MESH_POWER_AFTER_LAST, + NL80211_MESH_POWER_MAX = __NL80211_MESH_POWER_AFTER_LAST - 1 +}; + /** * enum nl80211_meshconf_params - mesh configuration parameters * @@ -2346,6 +2386,11 @@ enum nl80211_mntr_flags { * (in TUs) during which a mesh STA can send only one Action frame * containing a PREQ element for root path confirmation. * + * @NL80211_MESHCONF_POWER_MODE: Default mesh power mode for new peer links. + * type &enum nl80211_mesh_power_mode (u32) + * + * @NL80211_MESHCONF_AWAKE_WINDOW: awake window duration (in TUs) + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -2375,6 +2420,8 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, + NL80211_MESHCONF_POWER_MODE, + NL80211_MESHCONF_AWAKE_WINDOW, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 0fe8ceb5444e..55957a284f6c 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -46,6 +46,7 @@ #define MESH_DEFAULT_BEACON_INTERVAL 1000 /* in 1024 us units (=TUs) */ #define MESH_DEFAULT_DTIM_PERIOD 2 +#define MESH_DEFAULT_AWAKE_WINDOW 10 /* in 1024 us units (=TUs) */ const struct mesh_config default_mesh_config = { .dot11MeshRetryTimeout = MESH_RET_T, @@ -72,6 +73,8 @@ const struct mesh_config default_mesh_config = { .dot11MeshHWMPactivePathToRootTimeout = MESH_PATH_TO_ROOT_TIMEOUT, .dot11MeshHWMProotInterval = MESH_ROOT_INTERVAL, .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL, + .power_mode = NL80211_MESH_POWER_ACTIVE, + .dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW, }; const struct mesh_setup default_mesh_setup = { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d5842eb35aec..1a7a710fe9bf 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3001,6 +3001,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS, sinfo->beacon_loss_count)) goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_LOCAL_PM) && + nla_put_u32(msg, NL80211_STA_INFO_LOCAL_PM, + sinfo->local_pm)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_PEER_PM) && + nla_put_u32(msg, NL80211_STA_INFO_PEER_PM, + sinfo->peer_pm)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_NONPEER_PM) && + nla_put_u32(msg, NL80211_STA_INFO_NONPEER_PM, + sinfo->nonpeer_pm)) + goto nla_put_failure; if (sinfo->filled & STATION_INFO_BSS_PARAM) { bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM); if (!bss_param) @@ -3206,6 +3218,17 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.plink_state = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); + if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) { + enum nl80211_mesh_power_mode pm = nla_get_u32( + info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]); + + if (pm <= NL80211_MESH_POWER_UNKNOWN || + pm > NL80211_MESH_POWER_MAX) + return -EINVAL; + + params.local_pm = pm; + } + switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: @@ -3213,6 +3236,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* disallow mesh-specific things */ if (params.plink_action) return -EINVAL; + if (params.local_pm) + return -EINVAL; /* TDLS can't be set, ... */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) @@ -3265,6 +3290,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* disallow things sta doesn't support */ if (params.plink_action) return -EINVAL; + if (params.local_pm) + return -EINVAL; /* reject any changes other than AUTHORIZED */ if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) return -EINVAL; @@ -3922,7 +3949,11 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u16(msg, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, cur_params.dot11MeshHWMProotInterval) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, - cur_params.dot11MeshHWMPconfirmationInterval)) + cur_params.dot11MeshHWMPconfirmationInterval) || + nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE, + cur_params.power_mode) || + nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW, + cur_params.dot11MeshAwakeWindowDuration)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -3961,6 +3992,8 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = { .type = NLA_U16 }, [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 }, + [NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 }, + [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, }; static const struct nla_policy @@ -4088,6 +4121,14 @@ do { \ 1, 65535, mask, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode, + NL80211_MESH_POWER_ACTIVE, + NL80211_MESH_POWER_MAX, + mask, NL80211_MESHCONF_POWER_MODE, + nla_get_u32); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, + 0, 65535, mask, + NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); if (mask_out) *mask_out = mask; -- cgit v1.2.3 From cee00a959c0a86571e6f99cf42f0261d7e54d2ae Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 15 Jan 2013 17:15:57 +0200 Subject: cfg80211: Allow use_mfp to be specified with the connect command The NL80211_ATTR_USE_MFP attribute was originally added for NL80211_CMD_ASSOCIATE, but it is actually as useful (if not even more useful) with NL80211_CMD_CONNECT, so process that attribute with the connect command, too. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 6 +++--- net/wireless/nl80211.c | 9 +++++++++ net/wireless/sme.c | 3 ++- 4 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d81e730962cc..f1686d460e6b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1465,6 +1465,7 @@ struct cfg80211_ibss_params { * @ie: IEs for association request * @ie_len: Length of assoc_ie in octets * @privacy: indicates whether privacy-enabled APs should be used + * @mfp: indicate whether management frame protection is used * @crypto: crypto settings * @key_len: length of WEP key for shared key authentication * @key_idx: index of WEP key for shared key authentication @@ -1485,6 +1486,7 @@ struct cfg80211_connect_params { u8 *ie; size_t ie_len; bool privacy; + enum nl80211_mfp mfp; struct cfg80211_crypto_settings crypto; const u8 *key; u8 key_len, key_idx; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6c4f703ae890..d01c16220dc5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -374,8 +374,8 @@ * requests to connect to a specified network but without separating * auth and assoc steps. For this, you need to specify the SSID in a * %NL80211_ATTR_SSID attribute, and can optionally specify the association - * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_MAC, - * %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, + * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_USE_MFP, + * %NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE and * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT. * Background scan period can optionally be @@ -958,7 +958,7 @@ enum nl80211_commands { * @NL80211_ATTR_USE_MFP: Whether management frame protection (IEEE 802.11w) is * used for the association (&enum nl80211_mfp, represented as a u32); * this attribute can be used - * with %NL80211_CMD_ASSOCIATE request + * with %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests * * @NL80211_ATTR_STA_FLAGS2: Attribute containing a * &struct nl80211_sta_flag_update. diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d543cf152100..df82a5c9faee 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5932,6 +5932,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } + if (info->attrs[NL80211_ATTR_USE_MFP]) { + connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); + if (connect.mfp != NL80211_MFP_REQUIRED && + connect.mfp != NL80211_MFP_NO) + return -EINVAL; + } else { + connect.mfp = NL80211_MFP_NO; + } + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { connect.channel = ieee80211_get_channel(wiphy, diff --git a/net/wireless/sme.c b/net/wireless/sme.c index d2d26518cdd7..a825dfe12cf7 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -192,7 +192,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) prev_bssid, params->ssid, params->ssid_len, params->ie, params->ie_len, - false, ¶ms->crypto, + params->mfp != NL80211_MFP_NO, + ¶ms->crypto, params->flags, ¶ms->ht_capa, ¶ms->ht_capa_mask); if (err) -- cgit v1.2.3 From 11c4a075db2f8774d37544342c8cb9752b4db9e1 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Tue, 8 Jan 2013 14:04:07 +0100 Subject: cfg80211: check radar interface combinations To ease further DFS development regarding interface combinations, use the interface combinations structure to test for radar capabilities. Drivers can specify which channel widths they support, and in which modes. Right now only a single AP interface is allowed, but as the DFS code evolves other combinations can be enabled. Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 3 +++ net/wireless/core.c | 12 ++++++++++-- net/wireless/core.h | 7 ++++--- net/wireless/nl80211.c | 3 +++ net/wireless/util.c | 41 ++++++++++++++++++++++++++++++++++++++--- 6 files changed, 60 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f1686d460e6b..970da4420676 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2125,6 +2125,7 @@ struct ieee80211_iface_limit { * @beacon_int_infra_match: In this combination, the beacon intervals * between infrastructure and AP types must match. This is required * only in special cases. + * @radar_detect_widths: bitmap of channel widths supported for radar detection * * These examples can be expressed as follows: * @@ -2177,6 +2178,7 @@ struct ieee80211_iface_combination { u16 max_interfaces; u8 n_limits; bool beacon_int_infra_match; + u8 radar_detect_widths; }; struct mac_address { diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d01c16220dc5..e6eeb4ba5dc5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2984,6 +2984,8 @@ enum nl80211_iface_limit_attrs { * the infrastructure network's beacon interval. * @NL80211_IFACE_COMB_NUM_CHANNELS: u32 attribute specifying how many * different channels may be used within this group. + * @NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS: u32 attribute containing the bitmap + * of supported channel widths for radar detection. * @NUM_NL80211_IFACE_COMB: number of attributes * @MAX_NL80211_IFACE_COMB: highest attribute number * @@ -3016,6 +3018,7 @@ enum nl80211_if_combination_attrs { NL80211_IFACE_COMB_MAXNUM, NL80211_IFACE_COMB_STA_AP_BI_MATCH, NL80211_IFACE_COMB_NUM_CHANNELS, + NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, /* keep last */ NUM_NL80211_IFACE_COMB, diff --git a/net/wireless/core.c b/net/wireless/core.c index 747dd9365a44..0e702cdc6043 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -382,8 +382,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) c = &wiphy->iface_combinations[i]; - /* Combinations with just one interface aren't real */ - if (WARN_ON(c->max_interfaces < 2)) + /* + * Combinations with just one interface aren't real, + * however we make an exception for DFS. + */ + if (WARN_ON((c->max_interfaces < 2) && !c->radar_detect_widths)) return -EINVAL; /* Need at least one channel */ @@ -398,6 +401,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) CFG80211_MAX_NUM_DIFFERENT_CHANNELS)) return -EINVAL; + /* DFS only works on one channel. */ + if (WARN_ON(c->radar_detect_widths && + (c->num_different_channels > 1))) + return -EINVAL; + if (WARN_ON(!c->n_limits)) return -EINVAL; diff --git a/net/wireless/core.h b/net/wireless/core.h index f342267e3620..8396f7671c8d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -425,7 +425,8 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode); + enum cfg80211_chan_mode chanmode, + u8 radar_detect); static inline int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, @@ -433,7 +434,7 @@ cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL, - CHAN_MODE_UNDEFINED); + CHAN_MODE_UNDEFINED, 0); } static inline int @@ -450,7 +451,7 @@ cfg80211_can_use_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chanmode) { return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, - chan, chanmode); + chan, chanmode, 0); } void diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index df82a5c9faee..33de80364c5c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -856,6 +856,9 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM, c->max_interfaces)) goto nla_put_failure; + if (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + c->radar_detect_widths)) + goto nla_put_failure; nla_nest_end(msg, nl_combi); } diff --git a/net/wireless/util.c b/net/wireless/util.c index 16d76a807c2f..1c2795d52db0 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1184,7 +1184,8 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_iftype iftype, struct ieee80211_channel *chan, - enum cfg80211_chan_mode chanmode) + enum cfg80211_chan_mode chanmode, + u8 radar_detect) { struct wireless_dev *wdev_iter; u32 used_iftypes = BIT(iftype); @@ -1195,14 +1196,45 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chmode; int num_different_channels = 0; int total = 1; + bool radar_required; int i, j; ASSERT_RTNL(); lockdep_assert_held(&rdev->devlist_mtx); + if (WARN_ON(hweight32(radar_detect) > 1)) + return -EINVAL; + + switch (iftype) { + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_WDS: + radar_required = !!(chan->flags & IEEE80211_CHAN_RADAR); + break; + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_MONITOR: + radar_required = false; + break; + case NL80211_IFTYPE_P2P_DEVICE: + case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_UNSPECIFIED: + default: + return -EINVAL; + } + + if (radar_required && !radar_detect) + return -EINVAL; + /* Always allow software iftypes */ - if (rdev->wiphy.software_iftypes & BIT(iftype)) + if (rdev->wiphy.software_iftypes & BIT(iftype)) { + if (radar_detect) + return -EINVAL; return 0; + } memset(num, 0, sizeof(num)); memset(used_channels, 0, sizeof(used_channels)); @@ -1275,7 +1307,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, used_iftypes |= BIT(wdev_iter->iftype); } - if (total == 1) + if (total == 1 && !radar_detect) return 0; for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) { @@ -1308,6 +1340,9 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, } } + if (radar_detect && !(c->radar_detect_widths & radar_detect)) + goto cont; + /* * Finally check that all iftypes that we're currently * using are actually part of this combination. If they -- cgit v1.2.3 From ebebd49a8eab5e9aa1b1f8f1614ccc3c2120f886 Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Thu, 17 Jan 2013 14:14:53 -0800 Subject: 8250/16?50: Add support for Broadcom TruManage redirected serial port Add support for the UART device present in Broadcom TruManage capable NetXtreme chips (ie: 5761m 5762, and 5725). This implementation has a hidden transmit FIFO, so running in single-byte interrupt mode results in too many interrupts. The UART_CAP_HFIFO capability was added to track this. It continues to reload the THR as long as the THRE and TSRE bits are set in the LSR up to a specified limit (1024 is used here). Signed-off-by: Stephen Hurd Signed-off-by: Michael Chan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.c | 11 +++++++++++ drivers/tty/serial/8250/8250.h | 1 + drivers/tty/serial/8250/8250_pci.c | 38 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/serial_core.h | 3 ++- 4 files changed, 52 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c index d085e3a8ec06..f9320437a649 100644 --- a/drivers/tty/serial/8250/8250.c +++ b/drivers/tty/serial/8250/8250.c @@ -300,6 +300,12 @@ static const struct serial8250_config uart_config[] = { UART_FCR_R_TRIG_00 | UART_FCR_T_TRIG_00, .flags = UART_CAP_FIFO, }, + [PORT_BRCM_TRUMANAGE] = { + .name = "TruManage", + .fifo_size = 1, + .tx_loadsz = 1024, + .flags = UART_CAP_HFIFO, + }, [PORT_8250_CIR] = { .name = "CIR port" } @@ -1490,6 +1496,11 @@ void serial8250_tx_chars(struct uart_8250_port *up) port->icount.tx++; if (uart_circ_empty(xmit)) break; + if (up->capabilities & UART_CAP_HFIFO) { + if ((serial_port_in(port, UART_LSR) & BOTH_EMPTY) != + BOTH_EMPTY) + break; + } } while (--count > 0); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 3b4ea84898c2..12caa1292b75 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -40,6 +40,7 @@ struct serial8250_config { #define UART_CAP_AFE (1 << 11) /* MCR-based hw flow control */ #define UART_CAP_UUE (1 << 12) /* UART needs IER bit 6 set (Xscale) */ #define UART_CAP_RTOIE (1 << 13) /* UART needs IER bit 4 set (Xscale, Tegra) */ +#define UART_CAP_HFIFO (1 << 14) /* UART has a "hidden" FIFO */ #define UART_BUG_QUOT (1 << 0) /* UART has buggy quot LSB */ #define UART_BUG_TXEN (1 << 1) /* UART has buggy TX IIR status */ diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 8a2c3d934187..a27a98e1b066 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1085,6 +1085,18 @@ pci_omegapci_setup(struct serial_private *priv, return setup_port(priv, port, 2, idx * 8, 0); } +static int +pci_brcm_trumanage_setup(struct serial_private *priv, + const struct pciserial_board *board, + struct uart_8250_port *port, int idx) +{ + int ret = pci_default_setup(priv, board, port, idx); + + port->port.type = PORT_BRCM_TRUMANAGE; + port->port.flags = (port->port.flags | UPF_FIXED_PORT | UPF_FIXED_TYPE); + return ret; +} + static int skip_tx_en_setup(struct serial_private *priv, const struct pciserial_board *board, struct uart_8250_port *port, int idx) @@ -1304,6 +1316,7 @@ pci_wch_ch353_setup(struct serial_private *priv, #define PCI_DEVICE_ID_COMMTECH_4224PCIE 0x0020 #define PCI_DEVICE_ID_COMMTECH_4228PCIE 0x0021 #define PCI_DEVICE_ID_COMMTECH_4222PCIE 0x0022 +#define PCI_DEVICE_ID_BROADCOM_TRUMANAGE 0x160a /* Unknown vendors/cards - this should not be in linux/pci_ids.h */ @@ -1953,6 +1966,17 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = pci_xr17v35x_setup, }, + /* + * Broadcom TruManage (NetXtreme) + */ + { + .vendor = PCI_VENDOR_ID_BROADCOM, + .device = PCI_DEVICE_ID_BROADCOM_TRUMANAGE, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_brcm_trumanage_setup, + }, + /* * Default "match everything" terminator entry */ @@ -2148,6 +2172,7 @@ enum pci_board_num_t { pbn_ce4100_1_115200, pbn_omegapci, pbn_NETMOS9900_2s_115200, + pbn_brcm_trumanage, }; /* @@ -2892,6 +2917,12 @@ static struct pciserial_board pci_boards[] = { .num_ports = 2, .base_baud = 115200, }, + [pbn_brcm_trumanage] = { + .flags = FL_BASE0, + .num_ports = 1, + .reg_shift = 2, + .base_baud = 115200, + }, }; static const struct pci_device_id blacklist[] = { @@ -4470,6 +4501,13 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_omegapci }, + /* + * Broadcom TruManage + */ + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_BROADCOM_TRUMANAGE, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_brcm_trumanage }, + /* * AgeStar as-prs2-009 */ diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 78f99d97475b..2c6c85f18ea0 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -50,7 +50,8 @@ #define PORT_LPC3220 22 /* NXP LPC32xx SoC "Standard" UART */ #define PORT_8250_CIR 23 /* CIR infrared port, has its own driver */ #define PORT_XR17V35X 24 /* Exar XR17V35x UARTs */ -#define PORT_MAX_8250 24 /* max port ID */ +#define PORT_BRCM_TRUMANAGE 24 +#define PORT_MAX_8250 25 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed -- cgit v1.2.3 From c539f01717c239cfa0921dd43927afc976f1eedc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 11 Jan 2013 06:30:44 +0000 Subject: netfilter: add connlabel conntrack extension similar to connmarks, except labels are bit-based; i.e. all labels may be attached to a flow at the same time. Up to 128 labels are supported. Supporting more labels is possible, but requires increasing the ct offset delta from u8 to u16 type due to increased extension sizes. Mapping of bit-identifier to label name is done in userspace. The extension is enabled at run-time once "-m connlabel" netfilter rules are added. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_extend.h | 4 ++ include/net/netfilter/nf_conntrack_labels.h | 55 ++++++++++++++++ include/net/netns/conntrack.h | 4 ++ include/uapi/linux/netfilter/xt_connlabel.h | 12 ++++ net/netfilter/Kconfig | 18 ++++++ net/netfilter/Makefile | 2 + net/netfilter/nf_conntrack_core.c | 12 ++++ net/netfilter/nf_conntrack_labels.c | 72 +++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 3 + net/netfilter/xt_connlabel.c | 99 +++++++++++++++++++++++++++++ 10 files changed, 281 insertions(+) create mode 100644 include/net/netfilter/nf_conntrack_labels.h create mode 100644 include/uapi/linux/netfilter/xt_connlabel.h create mode 100644 net/netfilter/nf_conntrack_labels.c create mode 100644 net/netfilter/xt_connlabel.c (limited to 'include/uapi/linux') diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 8b4d1fc29096..977bc8a46444 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -22,6 +22,9 @@ enum nf_ct_ext_id { #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT NF_CT_EXT_TIMEOUT, +#endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + NF_CT_EXT_LABELS, #endif NF_CT_EXT_NUM, }; @@ -33,6 +36,7 @@ enum nf_ct_ext_id { #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout +#define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h new file mode 100644 index 000000000000..b94fe31c7b39 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include +#include + +#include + +struct nf_conn_labels { + u8 words; + unsigned long bits[]; +}; + +static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + return nf_ct_ext_find(ct, NF_CT_EXT_LABELS); +#else + return NULL; +#endif +} + +static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + struct nf_conn_labels *cl_ext; + struct net *net = nf_ct_net(ct); + u8 words; + + words = ACCESS_ONCE(net->ct.label_words); + if (words == 0 || WARN_ON_ONCE(words > 8)) + return NULL; + + cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, + words * sizeof(long), GFP_ATOMIC); + if (cl_ext != NULL) + cl_ext->words = words; + + return cl_ext; +#else + return NULL; +#endif +} + +bool nf_connlabel_match(const struct nf_conn *ct, u16 bit); +int nf_connlabel_set(struct nf_conn *ct, u16 bit); + +#ifdef CONFIG_NF_CONNTRACK_LABELS +int nf_conntrack_labels_init(struct net *net); +void nf_conntrack_labels_fini(struct net *net); +#else +static inline int nf_conntrack_labels_init(struct net *n) { return 0; } +static inline void nf_conntrack_labels_fini(struct net *net) {} +#endif diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 923cb20051ed..c9c0c538b68b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -84,6 +84,10 @@ struct netns_ct { int sysctl_auto_assign_helper; bool auto_assign_helper_warned; struct nf_ip_net nf_ct_proto; +#if defined(CONFIG_NF_CONNTRACK_LABELS) + unsigned int labels_used; + u8 label_words; +#endif #ifdef CONFIG_NF_NAT_NEEDED struct hlist_head *nat_bysource; unsigned int nat_htable_size; diff --git a/include/uapi/linux/netfilter/xt_connlabel.h b/include/uapi/linux/netfilter/xt_connlabel.h new file mode 100644 index 000000000000..c4bc9ee9b330 --- /dev/null +++ b/include/uapi/linux/netfilter/xt_connlabel.h @@ -0,0 +1,12 @@ +#include + +#define XT_CONNLABEL_MAXBIT 127 +enum xt_connlabel_mtopts { + XT_CONNLABEL_OP_INVERT = 1 << 0, + XT_CONNLABEL_OP_SET = 1 << 1, +}; + +struct xt_connlabel_mtinfo { + __u16 bit; + __u16 options; +}; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 49e96df5fbc4..bb48607d4ee4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -124,6 +124,12 @@ config NF_CONNTRACK_TIMESTAMP If unsure, say `N'. +config NF_CONNTRACK_LABELS + bool + help + This option enables support for assigning user-defined flag bits + to connection tracking entries. It selected by the connlabel match. + config NF_CT_PROTO_DCCP tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' depends on EXPERIMENTAL @@ -842,6 +848,18 @@ config NETFILTER_XT_MATCH_CONNBYTES If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_CONNLABEL + tristate '"connlabel" match support' + select NF_CONNTRACK_LABELS + depends on NETFILTER_ADVANCED + ---help--- + This match allows you to test and assign userspace-defined labels names + to a connection. The kernel only stores bit values - mapping + names to bits is done by userspace. + + Unlike connmark, more than 32 flag bits may be assigned to a + connection simultaneously. + config NETFILTER_XT_MATCH_CONNLIMIT tristate '"connlimit" match support"' depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 32596978df1d..b3bbda60945e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -4,6 +4,7 @@ nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_exp nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o +nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -101,6 +102,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLABEL) += xt_connlabel.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e4a0c4fb3a7c..85aa4b7149c5 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -763,6 +764,7 @@ void nf_conntrack_free(struct nf_conn *ct) } EXPORT_SYMBOL_GPL(nf_conntrack_free); + /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * @@ -809,6 +811,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); + nf_ct_labels_ext_add(ct); ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, @@ -1352,6 +1355,7 @@ static void nf_conntrack_cleanup_net(struct net *net) } nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); + nf_conntrack_labels_fini(net); nf_conntrack_helper_fini(net); nf_conntrack_timeout_fini(net); nf_conntrack_ecache_fini(net); @@ -1583,7 +1587,15 @@ static int nf_conntrack_init_net(struct net *net) ret = nf_conntrack_helper_init(net); if (ret < 0) goto err_helper; + + ret = nf_conntrack_labels_init(net); + if (ret < 0) + goto err_labels; + return 0; + +err_labels: + nf_conntrack_helper_fini(net); err_helper: nf_conntrack_timeout_fini(net); err_timeout: diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c new file mode 100644 index 000000000000..0c542f41f338 --- /dev/null +++ b/net/netfilter/nf_conntrack_labels.c @@ -0,0 +1,72 @@ +/* + * test/set flag bits stored in conntrack extension area. + * + * (C) 2013 Astaro GmbH & Co KG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +static unsigned int label_bits(const struct nf_conn_labels *l) +{ + unsigned int longs = l->words; + return longs * BITS_PER_LONG; +} + +bool nf_connlabel_match(const struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return false; + + return bit < label_bits(labels) && test_bit(bit, labels->bits); +} +EXPORT_SYMBOL_GPL(nf_connlabel_match); + +int nf_connlabel_set(struct nf_conn *ct, u16 bit) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels || bit >= label_bits(labels)) + return -ENOSPC; + + if (test_bit(bit, labels->bits)) + return 0; + + if (test_and_set_bit(bit, labels->bits)) + return 0; + + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabel_set); + +static struct nf_ct_ext_type labels_extend __read_mostly = { + .len = sizeof(struct nf_conn_labels), + .align = __alignof__(struct nf_conn_labels), + .id = NF_CT_EXT_LABELS, +}; + +int nf_conntrack_labels_init(struct net *net) +{ + if (net_eq(net, &init_net)) + return nf_ct_extend_register(&labels_extend); + return 0; +} + +void nf_conntrack_labels_fini(struct net *net) +{ + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&labels_extend); +} diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 627b0e50b238..e0b10ee180ef 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef CONFIG_NF_NAT_NEEDED #include #include @@ -1598,6 +1599,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); + nf_ct_labels_ext_add(ct); + /* we must add conntrack extensions before confirmation. */ ct->status |= IPS_CONFIRMED; diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c new file mode 100644 index 000000000000..9f8719df2001 --- /dev/null +++ b/net/netfilter/xt_connlabel.c @@ -0,0 +1,99 @@ +/* + * (C) 2013 Astaro GmbH & Co KG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal "); +MODULE_DESCRIPTION("Xtables: add/match connection trackling labels"); +MODULE_ALIAS("ipt_connlabel"); +MODULE_ALIAS("ip6t_connlabel"); + +static bool +connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_connlabel_mtinfo *info = par->matchinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + bool invert = info->options & XT_CONNLABEL_OP_INVERT; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL || nf_ct_is_untracked(ct)) + return invert; + + if (info->options & XT_CONNLABEL_OP_SET) + return (nf_connlabel_set(ct, info->bit) == 0) ^ invert; + + return nf_connlabel_match(ct, info->bit) ^ invert; +} + +static int connlabel_mt_check(const struct xt_mtchk_param *par) +{ + const int options = XT_CONNLABEL_OP_INVERT | + XT_CONNLABEL_OP_SET; + struct xt_connlabel_mtinfo *info = par->matchinfo; + int ret; + size_t words; + + if (info->bit > XT_CONNLABEL_MAXBIT) + return -ERANGE; + + if (info->options & ~options) { + pr_err("Unknown options in mask %x\n", info->options); + return -EINVAL; + } + + ret = nf_ct_l3proto_try_module_get(par->family); + if (ret < 0) { + pr_info("cannot load conntrack support for proto=%u\n", + par->family); + return ret; + } + + par->net->ct.labels_used++; + words = BITS_TO_LONGS(info->bit+1); + if (words > par->net->ct.label_words) + par->net->ct.label_words = words; + + return ret; +} + +static void connlabel_mt_destroy(const struct xt_mtdtor_param *par) +{ + par->net->ct.labels_used--; + if (par->net->ct.labels_used == 0) + par->net->ct.label_words = 0; + nf_ct_l3proto_module_put(par->family); +} + +static struct xt_match connlabels_mt_reg __read_mostly = { + .name = "connlabel", + .family = NFPROTO_UNSPEC, + .checkentry = connlabel_mt_check, + .match = connlabel_mt, + .matchsize = sizeof(struct xt_connlabel_mtinfo), + .destroy = connlabel_mt_destroy, + .me = THIS_MODULE, +}; + +static int __init connlabel_mt_init(void) +{ + return xt_register_match(&connlabels_mt_reg); +} + +static void __exit connlabel_mt_exit(void) +{ + xt_unregister_match(&connlabels_mt_reg); +} + +module_init(connlabel_mt_init); +module_exit(connlabel_mt_exit); -- cgit v1.2.3 From 0ceabd83875b72a29f33db4ab703d6ba40ea4c58 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 11 Jan 2013 06:30:45 +0000 Subject: netfilter: ctnetlink: deliver labels to userspace Introduce CTA_LABELS attribute to send a bit-vector of currently active labels to userspace. Future patch will permit userspace to also set/delete active labels. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 1 + include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + net/netfilter/nf_conntrack_labels.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 41 ++++++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 1644cdd8be91..d69483fb3825 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -101,6 +101,7 @@ enum ip_conntrack_events { IPCT_MARK, /* new mark has been set */ IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ IPCT_SECMARK, /* new security mark has been set */ + IPCT_LABEL, /* new connlabel has been set */ }; enum ip_conntrack_expect_events { diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 86e930cf3dfb..9e71e0c081fd 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -49,6 +49,7 @@ enum ctattr_type { CTA_SECCTX, CTA_TIMESTAMP, CTA_MARK_MASK, + CTA_LABELS, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 0c542f41f338..ac5d0807d681 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -46,7 +46,7 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit) return 0; if (test_and_set_bit(bit, labels->bits)) - return 0; + nf_conntrack_event_cache(IPCT_LABEL, ct); return 0; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index e0b10ee180ef..5f5386382f13 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -324,6 +324,40 @@ nla_put_failure: #define ctnetlink_dump_secctx(a, b) (0) #endif +#ifdef CONFIG_NF_CONNTRACK_LABELS +static int ctnetlink_label_size(const struct nf_conn *ct) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + + if (!labels) + return 0; + return nla_total_size(labels->words * sizeof(long)); +} + +static int +ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct) +{ + struct nf_conn_labels *labels = nf_ct_labels_find(ct); + unsigned int len, i; + + if (!labels) + return 0; + + len = labels->words * sizeof(long); + i = 0; + do { + if (labels->bits[i] != 0) + return nla_put(skb, CTA_LABELS, len, labels->bits); + i++; + } while (i < labels->words); + + return 0; +} +#else +#define ctnetlink_dump_labels(a, b) (0) +#define ctnetlink_label_size(a) (0) +#endif + #define master_tuple(ct) &(ct->master->tuplehash[IP_CT_DIR_ORIGINAL].tuple) static inline int @@ -464,6 +498,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, ctnetlink_dump_helpinfo(skb, ct) < 0 || ctnetlink_dump_mark(skb, ct) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || + ctnetlink_dump_labels(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || ctnetlink_dump_master(skb, ct) < 0 || @@ -562,6 +597,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif + ctnetlink_proto_size(ct) + + ctnetlink_label_size(ct) ; } @@ -663,6 +699,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) && ctnetlink_dump_secctx(skb, ct) < 0) goto nla_put_failure; #endif + if (events & (1 << IPCT_LABEL) && + ctnetlink_dump_labels(skb, ct) < 0) + goto nla_put_failure; if (events & (1 << IPCT_RELATED) && ctnetlink_dump_master(skb, ct) < 0) @@ -1986,6 +2025,8 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif + if (ctnetlink_dump_labels(skb, ct) < 0) + goto nla_put_failure; rcu_read_unlock(); return 0; -- cgit v1.2.3 From 9b21f6a90924dfe8e5e686c314ddb441fb06501e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 11 Jan 2013 06:30:46 +0000 Subject: netfilter: ctnetlink: allow userspace to modify labels Add the ability to set/clear labels assigned to a conntrack via ctnetlink. To allow userspace to only alter specific bits, Pablo suggested to add a new CTA_LABELS_MASK attribute: The new set of active labels is then determined via active = (active & ~mask) ^ changeset i.e., the mask selects those bits in the existing set that should be changed. This follows the same method already used by MARK and CONNMARK targets. Omitting CTA_LABELS_MASK is the same as setting all bits in CTA_LABELS_MASK to 1: The existing set is replaced by the one from userspace. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 3 ++ include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + net/netfilter/nf_conntrack_labels.c | 43 +++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 44 ++++++++++++++++++++++ 4 files changed, 91 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index b94fe31c7b39..a3ce5d076fca 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -46,6 +46,9 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) bool nf_connlabel_match(const struct nf_conn *ct, u16 bit); int nf_connlabel_set(struct nf_conn *ct, u16 bit); +int nf_connlabels_replace(struct nf_conn *ct, + const u32 *data, const u32 *mask, unsigned int words); + #ifdef CONFIG_NF_CONNTRACK_LABELS int nf_conntrack_labels_init(struct net *net); void nf_conntrack_labels_fini(struct net *net); diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 9e71e0c081fd..08fabc6c93f3 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -50,6 +50,7 @@ enum ctattr_type { CTA_TIMESTAMP, CTA_MARK_MASK, CTA_LABELS, + CTA_LABELS_MASK, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index ac5d0807d681..e1d1eb850e7f 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -52,6 +52,49 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit) } EXPORT_SYMBOL_GPL(nf_connlabel_set); +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) +static void replace_u32(u32 *address, u32 mask, u32 new) +{ + u32 old, tmp; + + do { + old = *address; + tmp = (old & mask) ^ new; + } while (cmpxchg(address, old, tmp) != old); +} + +int nf_connlabels_replace(struct nf_conn *ct, + const u32 *data, + const u32 *mask, unsigned int words32) +{ + struct nf_conn_labels *labels; + unsigned int size, i; + u32 *dst; + + labels = nf_ct_labels_find(ct); + if (!labels) + return -ENOSPC; + + size = labels->words * sizeof(long); + if (size < (words32 * sizeof(u32))) + words32 = size / sizeof(u32); + + dst = (u32 *) labels->bits; + if (words32) { + for (i = 0; i < words32; i++) + replace_u32(&dst[i], mask ? ~mask[i] : 0, data[i]); + } + + size /= sizeof(u32); + for (i = words32; i < size; i++) /* pad */ + replace_u32(&dst[i], 0, 0); + + nf_conntrack_event_cache(IPCT_LABEL, ct); + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabels_replace); +#endif + static struct nf_ct_ext_type labels_extend __read_mostly = { .len = sizeof(struct nf_conn_labels), .align = __alignof__(struct nf_conn_labels), diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 5f5386382f13..2334cc5d2b16 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -961,6 +961,7 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, return 0; } +#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_TUPLE_ORIG] = { .type = NLA_NESTED }, [CTA_TUPLE_REPLY] = { .type = NLA_NESTED }, @@ -977,6 +978,10 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NLA_NESTED }, [CTA_ZONE] = { .type = NLA_U16 }, [CTA_MARK_MASK] = { .type = NLA_U32 }, + [CTA_LABELS] = { .type = NLA_BINARY, + .len = __CTA_LABELS_MAX_LENGTH }, + [CTA_LABELS_MASK] = { .type = NLA_BINARY, + .len = __CTA_LABELS_MAX_LENGTH }, }; static int @@ -1504,6 +1509,31 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct, } #endif +static int +ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[]) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + size_t len = nla_len(cda[CTA_LABELS]); + const void *mask = cda[CTA_LABELS_MASK]; + + if (len & (sizeof(u32)-1)) /* must be multiple of u32 */ + return -EINVAL; + + if (mask) { + if (nla_len(cda[CTA_LABELS_MASK]) == 0 || + nla_len(cda[CTA_LABELS_MASK]) != len) + return -EINVAL; + mask = nla_data(cda[CTA_LABELS_MASK]); + } + + len /= sizeof(u32); + + return nf_connlabels_replace(ct, nla_data(cda[CTA_LABELS]), mask, len); +#else + return -EOPNOTSUPP; +#endif +} + static int ctnetlink_change_conntrack(struct nf_conn *ct, const struct nlattr * const cda[]) @@ -1550,6 +1580,11 @@ ctnetlink_change_conntrack(struct nf_conn *ct, return err; } #endif + if (cda[CTA_LABELS]) { + err = ctnetlink_attach_labels(ct, cda); + if (err < 0) + return err; + } return 0; } @@ -1758,6 +1793,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, else events = IPCT_NEW; + if (cda[CTA_LABELS] && + ctnetlink_attach_labels(ct, cda) == 0) + events |= (1 << IPCT_LABEL); + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | @@ -2055,6 +2094,11 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) if (err < 0) return err; } + if (cda[CTA_LABELS]) { + err = ctnetlink_attach_labels(ct, cda); + if (err < 0) + return err; + } #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); -- cgit v1.2.3 From 7d9f49afa451d8565d00a5cea39acf9bb26feb50 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Wed, 16 Jan 2013 20:28:40 -0800 Subject: serial: rp2: New driver for Comtrol RocketPort 2 cards This driver supports the RocketPort EXPRESS and RocketPort INFINITY families of PCI/PCIe multiport serial adapters. These adapters use a "RocketPort 2" ASIC that is not compatible with the original RocketPort driver (CONFIG_ROCKETPORT). Tested with the RocketPort EXPRESS Octa DB9 and Quad DB9. Also added an old RocketPort 8J PCI card to the same system to verify that rocket.c and rp2.c coexist peacefully. Signed-off-by: Kevin Cernekee Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 24 ++ drivers/tty/serial/Makefile | 1 + drivers/tty/serial/rp2.c | 885 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/serial_core.h | 3 + 4 files changed, 913 insertions(+) create mode 100644 drivers/tty/serial/rp2.c (limited to 'include/uapi/linux') diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index aff3cd356662..2dc429357fe3 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -1458,4 +1458,28 @@ config SERIAL_ARC_NR_PORTS Set this to the number of serial ports you want the driver to support. +config SERIAL_RP2 + tristate "Comtrol RocketPort EXPRESS/INFINITY support" + depends on PCI + select SERIAL_CORE + help + This driver supports the Comtrol RocketPort EXPRESS and + RocketPort INFINITY families of PCI/PCIe multiport serial adapters. + These adapters use a "RocketPort 2" ASIC that is not compatible + with the original RocketPort driver (CONFIG_ROCKETPORT). + + To compile this driver as a module, choose M here: the + module will be called rp2. + + If you want to compile this driver into the kernel, say Y here. If + you don't have a suitable RocketPort card installed, say N. + +config SERIAL_RP2_NR_UARTS + int "Maximum number of RocketPort EXPRESS/INFINITY ports" + depends on SERIAL_RP2 + default "32" + help + If multiple cards are present, the default limit of 32 ports may + need to be increased. + endmenu diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile index 82e4306bf962..eedfec40e3dd 100644 --- a/drivers/tty/serial/Makefile +++ b/drivers/tty/serial/Makefile @@ -84,3 +84,4 @@ obj-$(CONFIG_SERIAL_TEGRA) += serial-tegra.o obj-$(CONFIG_SERIAL_AR933X) += ar933x_uart.o obj-$(CONFIG_SERIAL_EFM32_UART) += efm32-uart.o obj-$(CONFIG_SERIAL_ARC) += arc_uart.o +obj-$(CONFIG_SERIAL_RP2) += rp2.o diff --git a/drivers/tty/serial/rp2.c b/drivers/tty/serial/rp2.c new file mode 100644 index 000000000000..a314a943f124 --- /dev/null +++ b/drivers/tty/serial/rp2.c @@ -0,0 +1,885 @@ +/* + * Driver for Comtrol RocketPort EXPRESS/INFINITY cards + * + * Copyright (C) 2012 Kevin Cernekee + * + * Inspired by, and loosely based on: + * + * ar933x_uart.c + * Copyright (C) 2011 Gabor Juhos + * + * rocketport_infinity_express-linux-1.20.tar.gz + * Copyright (C) 2004-2011 Comtrol, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "rp2" + +#define RP2_FW_NAME "rp2.fw" +#define RP2_UCODE_BYTES 0x3f + +#define PORTS_PER_ASIC 16 +#define ALL_PORTS_MASK (BIT(PORTS_PER_ASIC) - 1) + +#define UART_CLOCK 44236800 +#define DEFAULT_BAUD_DIV (UART_CLOCK / (9600 * 16)) +#define FIFO_SIZE 512 + +/* BAR0 registers */ +#define RP2_FPGA_CTL0 0x110 +#define RP2_FPGA_CTL1 0x11c +#define RP2_IRQ_MASK 0x1ec +#define RP2_IRQ_MASK_EN_m BIT(0) +#define RP2_IRQ_STATUS 0x1f0 + +/* BAR1 registers */ +#define RP2_ASIC_SPACING 0x1000 +#define RP2_ASIC_OFFSET(i) ((i) << ilog2(RP2_ASIC_SPACING)) + +#define RP2_PORT_BASE 0x000 +#define RP2_PORT_SPACING 0x040 + +#define RP2_UCODE_BASE 0x400 +#define RP2_UCODE_SPACING 0x80 + +#define RP2_CLK_PRESCALER 0xc00 +#define RP2_CH_IRQ_STAT 0xc04 +#define RP2_CH_IRQ_MASK 0xc08 +#define RP2_ASIC_IRQ 0xd00 +#define RP2_ASIC_IRQ_EN_m BIT(20) +#define RP2_GLOBAL_CMD 0xd0c +#define RP2_ASIC_CFG 0xd04 + +/* port registers */ +#define RP2_DATA_DWORD 0x000 + +#define RP2_DATA_BYTE 0x008 +#define RP2_DATA_BYTE_ERR_PARITY_m BIT(8) +#define RP2_DATA_BYTE_ERR_OVERRUN_m BIT(9) +#define RP2_DATA_BYTE_ERR_FRAMING_m BIT(10) +#define RP2_DATA_BYTE_BREAK_m BIT(11) + +/* This lets uart_insert_char() drop bytes received on a !CREAD port */ +#define RP2_DUMMY_READ BIT(16) + +#define RP2_DATA_BYTE_EXCEPTION_MASK (RP2_DATA_BYTE_ERR_PARITY_m | \ + RP2_DATA_BYTE_ERR_OVERRUN_m | \ + RP2_DATA_BYTE_ERR_FRAMING_m | \ + RP2_DATA_BYTE_BREAK_m) + +#define RP2_RX_FIFO_COUNT 0x00c +#define RP2_TX_FIFO_COUNT 0x00e + +#define RP2_CHAN_STAT 0x010 +#define RP2_CHAN_STAT_RXDATA_m BIT(0) +#define RP2_CHAN_STAT_DCD_m BIT(3) +#define RP2_CHAN_STAT_DSR_m BIT(4) +#define RP2_CHAN_STAT_CTS_m BIT(5) +#define RP2_CHAN_STAT_RI_m BIT(6) +#define RP2_CHAN_STAT_OVERRUN_m BIT(13) +#define RP2_CHAN_STAT_DSR_CHANGED_m BIT(16) +#define RP2_CHAN_STAT_CTS_CHANGED_m BIT(17) +#define RP2_CHAN_STAT_CD_CHANGED_m BIT(18) +#define RP2_CHAN_STAT_RI_CHANGED_m BIT(22) +#define RP2_CHAN_STAT_TXEMPTY_m BIT(25) + +#define RP2_CHAN_STAT_MS_CHANGED_MASK (RP2_CHAN_STAT_DSR_CHANGED_m | \ + RP2_CHAN_STAT_CTS_CHANGED_m | \ + RP2_CHAN_STAT_CD_CHANGED_m | \ + RP2_CHAN_STAT_RI_CHANGED_m) + +#define RP2_TXRX_CTL 0x014 +#define RP2_TXRX_CTL_MSRIRQ_m BIT(0) +#define RP2_TXRX_CTL_RXIRQ_m BIT(2) +#define RP2_TXRX_CTL_RX_TRIG_s 3 +#define RP2_TXRX_CTL_RX_TRIG_m (0x3 << RP2_TXRX_CTL_RX_TRIG_s) +#define RP2_TXRX_CTL_RX_TRIG_1 (0x1 << RP2_TXRX_CTL_RX_TRIG_s) +#define RP2_TXRX_CTL_RX_TRIG_256 (0x2 << RP2_TXRX_CTL_RX_TRIG_s) +#define RP2_TXRX_CTL_RX_TRIG_448 (0x3 << RP2_TXRX_CTL_RX_TRIG_s) +#define RP2_TXRX_CTL_RX_EN_m BIT(5) +#define RP2_TXRX_CTL_RTSFLOW_m BIT(6) +#define RP2_TXRX_CTL_DTRFLOW_m BIT(7) +#define RP2_TXRX_CTL_TX_TRIG_s 16 +#define RP2_TXRX_CTL_TX_TRIG_m (0x3 << RP2_TXRX_CTL_RX_TRIG_s) +#define RP2_TXRX_CTL_DSRFLOW_m BIT(18) +#define RP2_TXRX_CTL_TXIRQ_m BIT(19) +#define RP2_TXRX_CTL_CTSFLOW_m BIT(23) +#define RP2_TXRX_CTL_TX_EN_m BIT(24) +#define RP2_TXRX_CTL_RTS_m BIT(25) +#define RP2_TXRX_CTL_DTR_m BIT(26) +#define RP2_TXRX_CTL_LOOP_m BIT(27) +#define RP2_TXRX_CTL_BREAK_m BIT(28) +#define RP2_TXRX_CTL_CMSPAR_m BIT(29) +#define RP2_TXRX_CTL_nPARODD_m BIT(30) +#define RP2_TXRX_CTL_PARENB_m BIT(31) + +#define RP2_UART_CTL 0x018 +#define RP2_UART_CTL_MODE_s 0 +#define RP2_UART_CTL_MODE_m (0x7 << RP2_UART_CTL_MODE_s) +#define RP2_UART_CTL_MODE_rs232 (0x1 << RP2_UART_CTL_MODE_s) +#define RP2_UART_CTL_FLUSH_RX_m BIT(3) +#define RP2_UART_CTL_FLUSH_TX_m BIT(4) +#define RP2_UART_CTL_RESET_CH_m BIT(5) +#define RP2_UART_CTL_XMIT_EN_m BIT(6) +#define RP2_UART_CTL_DATABITS_s 8 +#define RP2_UART_CTL_DATABITS_m (0x3 << RP2_UART_CTL_DATABITS_s) +#define RP2_UART_CTL_DATABITS_8 (0x3 << RP2_UART_CTL_DATABITS_s) +#define RP2_UART_CTL_DATABITS_7 (0x2 << RP2_UART_CTL_DATABITS_s) +#define RP2_UART_CTL_DATABITS_6 (0x1 << RP2_UART_CTL_DATABITS_s) +#define RP2_UART_CTL_DATABITS_5 (0x0 << RP2_UART_CTL_DATABITS_s) +#define RP2_UART_CTL_STOPBITS_m BIT(10) + +#define RP2_BAUD 0x01c + +/* ucode registers */ +#define RP2_TX_SWFLOW 0x02 +#define RP2_TX_SWFLOW_ena 0x81 +#define RP2_TX_SWFLOW_dis 0x9d + +#define RP2_RX_SWFLOW 0x0c +#define RP2_RX_SWFLOW_ena 0x81 +#define RP2_RX_SWFLOW_dis 0x8d + +#define RP2_RX_FIFO 0x37 +#define RP2_RX_FIFO_ena 0x08 +#define RP2_RX_FIFO_dis 0x81 + +static struct uart_driver rp2_uart_driver = { + .owner = THIS_MODULE, + .driver_name = DRV_NAME, + .dev_name = "ttyRP", + .nr = CONFIG_SERIAL_RP2_NR_UARTS, +}; + +struct rp2_card; + +struct rp2_uart_port { + struct uart_port port; + int idx; + int ignore_rx; + struct rp2_card *card; + void __iomem *asic_base; + void __iomem *base; + void __iomem *ucode; +}; + +struct rp2_card { + struct pci_dev *pdev; + struct rp2_uart_port *ports; + int n_ports; + int initialized_ports; + int minor_start; + int smpte; + void __iomem *bar0; + void __iomem *bar1; + spinlock_t card_lock; + struct completion fw_loaded; +}; + +#define RP_ID(prod) PCI_VDEVICE(RP, (prod)) +#define RP_CAP(ports, smpte) (((ports) << 8) | ((smpte) << 0)) + +static inline void rp2_decode_cap(const struct pci_device_id *id, + int *ports, int *smpte) +{ + *ports = id->driver_data >> 8; + *smpte = id->driver_data & 0xff; +} + +static DEFINE_SPINLOCK(rp2_minor_lock); +static int rp2_minor_next; + +static int rp2_alloc_ports(int n_ports) +{ + int ret = -ENOSPC; + + spin_lock(&rp2_minor_lock); + if (rp2_minor_next + n_ports <= CONFIG_SERIAL_RP2_NR_UARTS) { + /* sorry, no support for hot unplugging individual cards */ + ret = rp2_minor_next; + rp2_minor_next += n_ports; + } + spin_unlock(&rp2_minor_lock); + + return ret; +} + +static inline struct rp2_uart_port *port_to_up(struct uart_port *port) +{ + return container_of(port, struct rp2_uart_port, port); +} + +static void rp2_rmw(struct rp2_uart_port *up, int reg, + u32 clr_bits, u32 set_bits) +{ + u32 tmp = readl(up->base + reg); + tmp &= ~clr_bits; + tmp |= set_bits; + writel(tmp, up->base + reg); +} + +static void rp2_rmw_clr(struct rp2_uart_port *up, int reg, u32 val) +{ + rp2_rmw(up, reg, val, 0); +} + +static void rp2_rmw_set(struct rp2_uart_port *up, int reg, u32 val) +{ + rp2_rmw(up, reg, 0, val); +} + +static void rp2_mask_ch_irq(struct rp2_uart_port *up, int ch_num, + int is_enabled) +{ + unsigned long flags, irq_mask; + + spin_lock_irqsave(&up->card->card_lock, flags); + + irq_mask = readl(up->asic_base + RP2_CH_IRQ_MASK); + if (is_enabled) + irq_mask &= ~BIT(ch_num); + else + irq_mask |= BIT(ch_num); + writel(irq_mask, up->asic_base + RP2_CH_IRQ_MASK); + + spin_unlock_irqrestore(&up->card->card_lock, flags); +} + +static unsigned int rp2_uart_tx_empty(struct uart_port *port) +{ + struct rp2_uart_port *up = port_to_up(port); + unsigned long tx_fifo_bytes, flags; + + /* + * This should probably check the transmitter, not the FIFO. + * But the TXEMPTY bit doesn't seem to work unless the TX IRQ is + * enabled. + */ + spin_lock_irqsave(&up->port.lock, flags); + tx_fifo_bytes = readw(up->base + RP2_TX_FIFO_COUNT); + spin_unlock_irqrestore(&up->port.lock, flags); + + return tx_fifo_bytes ? 0 : TIOCSER_TEMT; +} + +static unsigned int rp2_uart_get_mctrl(struct uart_port *port) +{ + struct rp2_uart_port *up = port_to_up(port); + u32 status; + + status = readl(up->base + RP2_CHAN_STAT); + return ((status & RP2_CHAN_STAT_DCD_m) ? TIOCM_CAR : 0) | + ((status & RP2_CHAN_STAT_DSR_m) ? TIOCM_DSR : 0) | + ((status & RP2_CHAN_STAT_CTS_m) ? TIOCM_CTS : 0) | + ((status & RP2_CHAN_STAT_RI_m) ? TIOCM_RI : 0); +} + +static void rp2_uart_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + rp2_rmw(port_to_up(port), RP2_TXRX_CTL, + RP2_TXRX_CTL_DTR_m | RP2_TXRX_CTL_RTS_m | RP2_TXRX_CTL_LOOP_m, + ((mctrl & TIOCM_DTR) ? RP2_TXRX_CTL_DTR_m : 0) | + ((mctrl & TIOCM_RTS) ? RP2_TXRX_CTL_RTS_m : 0) | + ((mctrl & TIOCM_LOOP) ? RP2_TXRX_CTL_LOOP_m : 0)); +} + +static void rp2_uart_start_tx(struct uart_port *port) +{ + rp2_rmw_set(port_to_up(port), RP2_TXRX_CTL, RP2_TXRX_CTL_TXIRQ_m); +} + +static void rp2_uart_stop_tx(struct uart_port *port) +{ + rp2_rmw_clr(port_to_up(port), RP2_TXRX_CTL, RP2_TXRX_CTL_TXIRQ_m); +} + +static void rp2_uart_stop_rx(struct uart_port *port) +{ + rp2_rmw_clr(port_to_up(port), RP2_TXRX_CTL, RP2_TXRX_CTL_RXIRQ_m); +} + +static void rp2_uart_break_ctl(struct uart_port *port, int break_state) +{ + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + rp2_rmw(port_to_up(port), RP2_TXRX_CTL, RP2_TXRX_CTL_BREAK_m, + break_state ? RP2_TXRX_CTL_BREAK_m : 0); + spin_unlock_irqrestore(&port->lock, flags); +} + +static void rp2_uart_enable_ms(struct uart_port *port) +{ + rp2_rmw_set(port_to_up(port), RP2_TXRX_CTL, RP2_TXRX_CTL_MSRIRQ_m); +} + +static void __rp2_uart_set_termios(struct rp2_uart_port *up, + unsigned long cfl, + unsigned long ifl, + unsigned int baud_div) +{ + /* baud rate divisor (calculated elsewhere). 0 = divide-by-1 */ + writew(baud_div - 1, up->base + RP2_BAUD); + + /* data bits and stop bits */ + rp2_rmw(up, RP2_UART_CTL, + RP2_UART_CTL_STOPBITS_m | RP2_UART_CTL_DATABITS_m, + ((cfl & CSTOPB) ? RP2_UART_CTL_STOPBITS_m : 0) | + (((cfl & CSIZE) == CS8) ? RP2_UART_CTL_DATABITS_8 : 0) | + (((cfl & CSIZE) == CS7) ? RP2_UART_CTL_DATABITS_7 : 0) | + (((cfl & CSIZE) == CS6) ? RP2_UART_CTL_DATABITS_6 : 0) | + (((cfl & CSIZE) == CS5) ? RP2_UART_CTL_DATABITS_5 : 0)); + + /* parity and hardware flow control */ + rp2_rmw(up, RP2_TXRX_CTL, + RP2_TXRX_CTL_PARENB_m | RP2_TXRX_CTL_nPARODD_m | + RP2_TXRX_CTL_CMSPAR_m | RP2_TXRX_CTL_DTRFLOW_m | + RP2_TXRX_CTL_DSRFLOW_m | RP2_TXRX_CTL_RTSFLOW_m | + RP2_TXRX_CTL_CTSFLOW_m, + ((cfl & PARENB) ? RP2_TXRX_CTL_PARENB_m : 0) | + ((cfl & PARODD) ? 0 : RP2_TXRX_CTL_nPARODD_m) | + ((cfl & CMSPAR) ? RP2_TXRX_CTL_CMSPAR_m : 0) | + ((cfl & CRTSCTS) ? (RP2_TXRX_CTL_RTSFLOW_m | + RP2_TXRX_CTL_CTSFLOW_m) : 0)); + + /* XON/XOFF software flow control */ + writeb((ifl & IXON) ? RP2_TX_SWFLOW_ena : RP2_TX_SWFLOW_dis, + up->ucode + RP2_TX_SWFLOW); + writeb((ifl & IXOFF) ? RP2_RX_SWFLOW_ena : RP2_RX_SWFLOW_dis, + up->ucode + RP2_RX_SWFLOW); +} + +static void rp2_uart_set_termios(struct uart_port *port, + struct ktermios *new, + struct ktermios *old) +{ + struct rp2_uart_port *up = port_to_up(port); + unsigned long flags; + unsigned int baud, baud_div; + + baud = uart_get_baud_rate(port, new, old, 0, port->uartclk / 16); + baud_div = uart_get_divisor(port, baud); + + if (tty_termios_baud_rate(new)) + tty_termios_encode_baud_rate(new, baud, baud); + + spin_lock_irqsave(&port->lock, flags); + + /* ignore all characters if CREAD is not set */ + port->ignore_status_mask = (new->c_cflag & CREAD) ? 0 : RP2_DUMMY_READ; + + __rp2_uart_set_termios(up, new->c_cflag, new->c_iflag, baud_div); + uart_update_timeout(port, new->c_cflag, baud); + + spin_unlock_irqrestore(&port->lock, flags); +} + +static void rp2_rx_chars(struct rp2_uart_port *up) +{ + u16 bytes = readw(up->base + RP2_RX_FIFO_COUNT); + struct tty_port *port = &up->port.state->port; + + for (; bytes != 0; bytes--) { + u32 byte = readw(up->base + RP2_DATA_BYTE) | RP2_DUMMY_READ; + char ch = byte & 0xff; + + if (likely(!(byte & RP2_DATA_BYTE_EXCEPTION_MASK))) { + if (!uart_handle_sysrq_char(&up->port, ch)) + uart_insert_char(&up->port, byte, 0, ch, + TTY_NORMAL); + } else { + char flag = TTY_NORMAL; + + if (byte & RP2_DATA_BYTE_BREAK_m) + flag = TTY_BREAK; + else if (byte & RP2_DATA_BYTE_ERR_FRAMING_m) + flag = TTY_FRAME; + else if (byte & RP2_DATA_BYTE_ERR_PARITY_m) + flag = TTY_PARITY; + uart_insert_char(&up->port, byte, + RP2_DATA_BYTE_ERR_OVERRUN_m, ch, flag); + } + up->port.icount.rx++; + } + + tty_flip_buffer_push(port); +} + +static void rp2_tx_chars(struct rp2_uart_port *up) +{ + u16 max_tx = FIFO_SIZE - readw(up->base + RP2_TX_FIFO_COUNT); + struct circ_buf *xmit = &up->port.state->xmit; + + if (uart_tx_stopped(&up->port)) { + rp2_uart_stop_tx(&up->port); + return; + } + + for (; max_tx != 0; max_tx--) { + if (up->port.x_char) { + writeb(up->port.x_char, up->base + RP2_DATA_BYTE); + up->port.x_char = 0; + up->port.icount.tx++; + continue; + } + if (uart_circ_empty(xmit)) { + rp2_uart_stop_tx(&up->port); + break; + } + writeb(xmit->buf[xmit->tail], up->base + RP2_DATA_BYTE); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + up->port.icount.tx++; + } + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(&up->port); +} + +static void rp2_ch_interrupt(struct rp2_uart_port *up) +{ + u32 status; + + spin_lock(&up->port.lock); + + /* + * The IRQ status bits are clear-on-write. Other status bits in + * this register aren't, so it's harmless to write to them. + */ + status = readl(up->base + RP2_CHAN_STAT); + writel(status, up->base + RP2_CHAN_STAT); + + if (status & RP2_CHAN_STAT_RXDATA_m) + rp2_rx_chars(up); + if (status & RP2_CHAN_STAT_TXEMPTY_m) + rp2_tx_chars(up); + if (status & RP2_CHAN_STAT_MS_CHANGED_MASK) + wake_up_interruptible(&up->port.state->port.delta_msr_wait); + + spin_unlock(&up->port.lock); +} + +static int rp2_asic_interrupt(struct rp2_card *card, unsigned int asic_id) +{ + void __iomem *base = card->bar1 + RP2_ASIC_OFFSET(asic_id); + int ch, handled = 0; + unsigned long status = readl(base + RP2_CH_IRQ_STAT) & + ~readl(base + RP2_CH_IRQ_MASK); + + for_each_set_bit(ch, &status, PORTS_PER_ASIC) { + rp2_ch_interrupt(&card->ports[ch]); + handled++; + } + return handled; +} + +static irqreturn_t rp2_uart_interrupt(int irq, void *dev_id) +{ + struct rp2_card *card = dev_id; + int handled; + + handled = rp2_asic_interrupt(card, 0); + if (card->n_ports >= PORTS_PER_ASIC) + handled += rp2_asic_interrupt(card, 1); + + return handled ? IRQ_HANDLED : IRQ_NONE; +} + +static inline void rp2_flush_fifos(struct rp2_uart_port *up) +{ + rp2_rmw_set(up, RP2_UART_CTL, + RP2_UART_CTL_FLUSH_RX_m | RP2_UART_CTL_FLUSH_TX_m); + readl(up->base + RP2_UART_CTL); + udelay(10); + rp2_rmw_clr(up, RP2_UART_CTL, + RP2_UART_CTL_FLUSH_RX_m | RP2_UART_CTL_FLUSH_TX_m); +} + +static int rp2_uart_startup(struct uart_port *port) +{ + struct rp2_uart_port *up = port_to_up(port); + + rp2_flush_fifos(up); + rp2_rmw(up, RP2_TXRX_CTL, RP2_TXRX_CTL_MSRIRQ_m, RP2_TXRX_CTL_RXIRQ_m); + rp2_rmw(up, RP2_TXRX_CTL, RP2_TXRX_CTL_RX_TRIG_m, + RP2_TXRX_CTL_RX_TRIG_1); + rp2_rmw(up, RP2_CHAN_STAT, 0, 0); + rp2_mask_ch_irq(up, up->idx, 1); + + return 0; +} + +static void rp2_uart_shutdown(struct uart_port *port) +{ + struct rp2_uart_port *up = port_to_up(port); + unsigned long flags; + + rp2_uart_break_ctl(port, 0); + + spin_lock_irqsave(&port->lock, flags); + rp2_mask_ch_irq(up, up->idx, 0); + rp2_rmw(up, RP2_CHAN_STAT, 0, 0); + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *rp2_uart_type(struct uart_port *port) +{ + return (port->type == PORT_RP2) ? "RocketPort 2 UART" : NULL; +} + +static void rp2_uart_release_port(struct uart_port *port) +{ + /* Nothing to release ... */ +} + +static int rp2_uart_request_port(struct uart_port *port) +{ + /* UARTs always present */ + return 0; +} + +static void rp2_uart_config_port(struct uart_port *port, int flags) +{ + if (flags & UART_CONFIG_TYPE) + port->type = PORT_RP2; +} + +static int rp2_uart_verify_port(struct uart_port *port, + struct serial_struct *ser) +{ + if (ser->type != PORT_UNKNOWN && ser->type != PORT_RP2) + return -EINVAL; + + return 0; +} + +static const struct uart_ops rp2_uart_ops = { + .tx_empty = rp2_uart_tx_empty, + .set_mctrl = rp2_uart_set_mctrl, + .get_mctrl = rp2_uart_get_mctrl, + .stop_tx = rp2_uart_stop_tx, + .start_tx = rp2_uart_start_tx, + .stop_rx = rp2_uart_stop_rx, + .enable_ms = rp2_uart_enable_ms, + .break_ctl = rp2_uart_break_ctl, + .startup = rp2_uart_startup, + .shutdown = rp2_uart_shutdown, + .set_termios = rp2_uart_set_termios, + .type = rp2_uart_type, + .release_port = rp2_uart_release_port, + .request_port = rp2_uart_request_port, + .config_port = rp2_uart_config_port, + .verify_port = rp2_uart_verify_port, +}; + +static void rp2_reset_asic(struct rp2_card *card, unsigned int asic_id) +{ + void __iomem *base = card->bar1 + RP2_ASIC_OFFSET(asic_id); + u32 clk_cfg; + + writew(1, base + RP2_GLOBAL_CMD); + readw(base + RP2_GLOBAL_CMD); + msleep(100); + writel(0, base + RP2_CLK_PRESCALER); + + /* TDM clock configuration */ + clk_cfg = readw(base + RP2_ASIC_CFG); + clk_cfg = (clk_cfg & ~BIT(8)) | BIT(9); + writew(clk_cfg, base + RP2_ASIC_CFG); + + /* IRQ routing */ + writel(ALL_PORTS_MASK, base + RP2_CH_IRQ_MASK); + writel(RP2_ASIC_IRQ_EN_m, base + RP2_ASIC_IRQ); +} + +static void rp2_init_card(struct rp2_card *card) +{ + writel(4, card->bar0 + RP2_FPGA_CTL0); + writel(0, card->bar0 + RP2_FPGA_CTL1); + + rp2_reset_asic(card, 0); + if (card->n_ports >= PORTS_PER_ASIC) + rp2_reset_asic(card, 1); + + writel(RP2_IRQ_MASK_EN_m, card->bar0 + RP2_IRQ_MASK); +} + +static void rp2_init_port(struct rp2_uart_port *up, const struct firmware *fw) +{ + int i; + + writel(RP2_UART_CTL_RESET_CH_m, up->base + RP2_UART_CTL); + readl(up->base + RP2_UART_CTL); + udelay(1); + + writel(0, up->base + RP2_TXRX_CTL); + writel(0, up->base + RP2_UART_CTL); + readl(up->base + RP2_UART_CTL); + udelay(1); + + rp2_flush_fifos(up); + + for (i = 0; i < min_t(int, fw->size, RP2_UCODE_BYTES); i++) + writeb(fw->data[i], up->ucode + i); + + __rp2_uart_set_termios(up, CS8 | CREAD | CLOCAL, 0, DEFAULT_BAUD_DIV); + rp2_uart_set_mctrl(&up->port, 0); + + writeb(RP2_RX_FIFO_ena, up->ucode + RP2_RX_FIFO); + rp2_rmw(up, RP2_UART_CTL, RP2_UART_CTL_MODE_m, + RP2_UART_CTL_XMIT_EN_m | RP2_UART_CTL_MODE_rs232); + rp2_rmw_set(up, RP2_TXRX_CTL, + RP2_TXRX_CTL_TX_EN_m | RP2_TXRX_CTL_RX_EN_m); +} + +static void rp2_remove_ports(struct rp2_card *card) +{ + int i; + + for (i = 0; i < card->initialized_ports; i++) + uart_remove_one_port(&rp2_uart_driver, &card->ports[i].port); + card->initialized_ports = 0; +} + +static void rp2_fw_cb(const struct firmware *fw, void *context) +{ + struct rp2_card *card = context; + resource_size_t phys_base; + int i, rc = -ENOENT; + + if (!fw) { + dev_err(&card->pdev->dev, "cannot find '%s' firmware image\n", + RP2_FW_NAME); + goto no_fw; + } + + phys_base = pci_resource_start(card->pdev, 1); + + for (i = 0; i < card->n_ports; i++) { + struct rp2_uart_port *rp = &card->ports[i]; + struct uart_port *p; + int j = (unsigned)i % PORTS_PER_ASIC; + + rp->asic_base = card->bar1; + rp->base = card->bar1 + RP2_PORT_BASE + j*RP2_PORT_SPACING; + rp->ucode = card->bar1 + RP2_UCODE_BASE + j*RP2_UCODE_SPACING; + rp->card = card; + rp->idx = j; + + p = &rp->port; + p->line = card->minor_start + i; + p->dev = &card->pdev->dev; + p->type = PORT_RP2; + p->iotype = UPIO_MEM32; + p->uartclk = UART_CLOCK; + p->regshift = 2; + p->fifosize = FIFO_SIZE; + p->ops = &rp2_uart_ops; + p->irq = card->pdev->irq; + p->membase = rp->base; + p->mapbase = phys_base + RP2_PORT_BASE + j*RP2_PORT_SPACING; + + if (i >= PORTS_PER_ASIC) { + rp->asic_base += RP2_ASIC_SPACING; + rp->base += RP2_ASIC_SPACING; + rp->ucode += RP2_ASIC_SPACING; + p->mapbase += RP2_ASIC_SPACING; + } + + rp2_init_port(rp, fw); + rc = uart_add_one_port(&rp2_uart_driver, p); + if (rc) { + dev_err(&card->pdev->dev, + "error registering port %d: %d\n", i, rc); + rp2_remove_ports(card); + break; + } + card->initialized_ports++; + } + + release_firmware(fw); +no_fw: + /* + * rp2_fw_cb() is called from a workqueue long after rp2_probe() + * has already returned success. So if something failed here, + * we'll just leave the now-dormant device in place until somebody + * unbinds it. + */ + if (rc) + dev_warn(&card->pdev->dev, "driver initialization failed\n"); + + complete(&card->fw_loaded); +} + +static int rp2_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct rp2_card *card; + struct rp2_uart_port *ports; + void __iomem * const *bars; + int rc; + + card = devm_kzalloc(&pdev->dev, sizeof(*card), GFP_KERNEL); + if (!card) + return -ENOMEM; + pci_set_drvdata(pdev, card); + spin_lock_init(&card->card_lock); + init_completion(&card->fw_loaded); + + rc = pcim_enable_device(pdev); + if (rc) + return rc; + + rc = pcim_iomap_regions_request_all(pdev, 0x03, DRV_NAME); + if (rc) + return rc; + + bars = pcim_iomap_table(pdev); + card->bar0 = bars[0]; + card->bar1 = bars[1]; + card->pdev = pdev; + + rp2_decode_cap(id, &card->n_ports, &card->smpte); + dev_info(&pdev->dev, "found new card with %d ports\n", card->n_ports); + + card->minor_start = rp2_alloc_ports(card->n_ports); + if (card->minor_start < 0) { + dev_err(&pdev->dev, + "too many ports (try increasing CONFIG_SERIAL_RP2_NR_UARTS)\n"); + return -EINVAL; + } + + rp2_init_card(card); + + ports = devm_kzalloc(&pdev->dev, sizeof(*ports) * card->n_ports, + GFP_KERNEL); + if (!ports) + return -ENOMEM; + card->ports = ports; + + rc = devm_request_irq(&pdev->dev, pdev->irq, rp2_uart_interrupt, + IRQF_SHARED, DRV_NAME, card); + if (rc) + return rc; + + /* + * Only catastrophic errors (e.g. ENOMEM) are reported here. + * If the FW image is missing, we'll find out in rp2_fw_cb() + * and print an error message. + */ + rc = request_firmware_nowait(THIS_MODULE, 1, RP2_FW_NAME, &pdev->dev, + GFP_KERNEL, card, rp2_fw_cb); + if (rc) + return rc; + dev_dbg(&pdev->dev, "waiting for firmware blob...\n"); + + return 0; +} + +static void rp2_remove(struct pci_dev *pdev) +{ + struct rp2_card *card = pci_get_drvdata(pdev); + + wait_for_completion(&card->fw_loaded); + rp2_remove_ports(card); +} + +static DEFINE_PCI_DEVICE_TABLE(rp2_pci_tbl) = { + + /* RocketPort INFINITY cards */ + + { RP_ID(0x0040), RP_CAP(8, 0) }, /* INF Octa, RJ45, selectable */ + { RP_ID(0x0041), RP_CAP(32, 0) }, /* INF 32, ext interface */ + { RP_ID(0x0042), RP_CAP(8, 0) }, /* INF Octa, ext interface */ + { RP_ID(0x0043), RP_CAP(16, 0) }, /* INF 16, ext interface */ + { RP_ID(0x0044), RP_CAP(4, 0) }, /* INF Quad, DB, selectable */ + { RP_ID(0x0045), RP_CAP(8, 0) }, /* INF Octa, DB, selectable */ + { RP_ID(0x0046), RP_CAP(4, 0) }, /* INF Quad, ext interface */ + { RP_ID(0x0047), RP_CAP(4, 0) }, /* INF Quad, RJ45 */ + { RP_ID(0x004a), RP_CAP(4, 0) }, /* INF Plus, Quad */ + { RP_ID(0x004b), RP_CAP(8, 0) }, /* INF Plus, Octa */ + { RP_ID(0x004c), RP_CAP(8, 0) }, /* INF III, Octa */ + { RP_ID(0x004d), RP_CAP(4, 0) }, /* INF III, Quad */ + { RP_ID(0x004e), RP_CAP(2, 0) }, /* INF Plus, 2, RS232 */ + { RP_ID(0x004f), RP_CAP(2, 1) }, /* INF Plus, 2, SMPTE */ + { RP_ID(0x0050), RP_CAP(4, 0) }, /* INF Plus, Quad, RJ45 */ + { RP_ID(0x0051), RP_CAP(8, 0) }, /* INF Plus, Octa, RJ45 */ + { RP_ID(0x0052), RP_CAP(8, 1) }, /* INF Octa, SMPTE */ + + /* RocketPort EXPRESS cards */ + + { RP_ID(0x0060), RP_CAP(8, 0) }, /* EXP Octa, RJ45, selectable */ + { RP_ID(0x0061), RP_CAP(32, 0) }, /* EXP 32, ext interface */ + { RP_ID(0x0062), RP_CAP(8, 0) }, /* EXP Octa, ext interface */ + { RP_ID(0x0063), RP_CAP(16, 0) }, /* EXP 16, ext interface */ + { RP_ID(0x0064), RP_CAP(4, 0) }, /* EXP Quad, DB, selectable */ + { RP_ID(0x0065), RP_CAP(8, 0) }, /* EXP Octa, DB, selectable */ + { RP_ID(0x0066), RP_CAP(4, 0) }, /* EXP Quad, ext interface */ + { RP_ID(0x0067), RP_CAP(4, 0) }, /* EXP Quad, RJ45 */ + { RP_ID(0x0068), RP_CAP(8, 0) }, /* EXP Octa, RJ11 */ + { RP_ID(0x0072), RP_CAP(8, 1) }, /* EXP Octa, SMPTE */ + { } +}; +MODULE_DEVICE_TABLE(pci, rp2_pci_tbl); + +static struct pci_driver rp2_pci_driver = { + .name = DRV_NAME, + .id_table = rp2_pci_tbl, + .probe = rp2_probe, + .remove = rp2_remove, +}; + +static int __init rp2_uart_init(void) +{ + int rc; + + rc = uart_register_driver(&rp2_uart_driver); + if (rc) + return rc; + + rc = pci_register_driver(&rp2_pci_driver); + if (rc) { + uart_unregister_driver(&rp2_uart_driver); + return rc; + } + + return 0; +} + +static void __exit rp2_uart_exit(void) +{ + pci_unregister_driver(&rp2_pci_driver); + uart_unregister_driver(&rp2_uart_driver); +} + +module_init(rp2_uart_init); +module_exit(rp2_uart_exit); + +MODULE_DESCRIPTION("Comtrol RocketPort EXPRESS/INFINITY driver"); +MODULE_AUTHOR("Kevin Cernekee "); +MODULE_LICENSE("GPL v2"); +MODULE_FIRMWARE(RP2_FW_NAME); diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 78f99d97475b..9dd47a569726 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -219,4 +219,7 @@ /* ARC (Synopsys) on-chip UART */ #define PORT_ARC 101 +/* Rocketport EXPRESS/INFINITY */ +#define PORT_RP2 102 + #endif /* _UAPILINUX_SERIAL_CORE_H */ -- cgit v1.2.3 From e6f30c731718db45cec380964dfee210307cfc4a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 18 Jan 2013 07:17:30 +0000 Subject: netfilter: x_tables: add xt_bpf match Support arbitrary linux socket filter (BPF) programs as x_tables match rules. This allows for very expressive filters, and on platforms with BPF JIT appears competitive with traditional hardcoded iptables rules using the u32 match. The size of the filter has been artificially limited to 64 instructions maximum to avoid bloating the size of each rule using this new match. Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_bpf.h | 17 ++++++++ net/netfilter/Kconfig | 9 +++++ net/netfilter/Makefile | 1 + net/netfilter/xt_bpf.c | 73 +++++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+) create mode 100644 include/uapi/linux/netfilter/xt_bpf.h create mode 100644 net/netfilter/xt_bpf.c (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h new file mode 100644 index 000000000000..5dda450eb55b --- /dev/null +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -0,0 +1,17 @@ +#ifndef _XT_BPF_H +#define _XT_BPF_H + +#include +#include + +#define XT_BPF_MAX_NUM_INSTR 64 + +struct xt_bpf_info { + __u16 bpf_program_num_elem; + struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; + + /* only used in the kernel */ + struct sk_filter *filter __attribute__((aligned(8))); +}; + +#endif /*_XT_BPF_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index bb48607d4ee4..eb2c8ebf6d99 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -811,6 +811,15 @@ config NETFILTER_XT_MATCH_ADDRTYPE If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_BPF + tristate '"bpf" match support' + depends on NETFILTER_ADVANCED + help + BPF matching applies a linux socket filter to each packet and + accepts those for which the filter returns non-zero. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index b3bbda60945e..a1abf87d43bf 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -99,6 +99,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o +obj-$(CONFIG_NETFILTER_XT_MATCH_BPF) += xt_bpf.o obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c new file mode 100644 index 000000000000..12d4da8e6c77 --- /dev/null +++ b/net/netfilter/xt_bpf.c @@ -0,0 +1,73 @@ +/* Xtables module to match packets using a BPF filter. + * Copyright 2013 Google Inc. + * Written by Willem de Bruijn + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Willem de Bruijn "); +MODULE_DESCRIPTION("Xtables: BPF filter match"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_bpf"); +MODULE_ALIAS("ip6t_bpf"); + +static int bpf_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_bpf_info *info = par->matchinfo; + struct sock_fprog program; + + program.len = info->bpf_program_num_elem; + program.filter = (struct sock_filter __user *) info->bpf_program; + if (sk_unattached_filter_create(&info->filter, &program)) { + pr_info("bpf: check failed: parse error\n"); + return -EINVAL; + } + + return 0; +} + +static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + + return SK_RUN_FILTER(info->filter, skb); +} + +static void bpf_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_bpf_info *info = par->matchinfo; + sk_unattached_filter_destroy(info->filter); +} + +static struct xt_match bpf_mt_reg __read_mostly = { + .name = "bpf", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = bpf_mt_check, + .match = bpf_mt, + .destroy = bpf_mt_destroy, + .matchsize = sizeof(struct xt_bpf_info), + .me = THIS_MODULE, +}; + +static int __init bpf_mt_init(void) +{ + return xt_register_match(&bpf_mt_reg); +} + +static void __exit bpf_mt_exit(void) +{ + xt_unregister_match(&bpf_mt_reg); +} + +module_init(bpf_mt_init); +module_exit(bpf_mt_exit); -- cgit v1.2.3 From e7db3cbcd6508235d63ba4a31bbd1ce4fdece6e1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 21 Jan 2013 12:30:59 +0100 Subject: netfilter: add missing xt_bpf.h header in installation Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 08f555fef13f..8b4bd36a7a84 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -35,6 +35,7 @@ header-y += xt_TCPOPTSTRIP.h header-y += xt_TEE.h header-y += xt_TPROXY.h header-y += xt_addrtype.h +header-y += xt_bpf.h header-y += xt_cluster.h header-y += xt_comment.h header-y += xt_connbytes.h -- cgit v1.2.3 From 8a454ab95e5ccbffd04363e9c028f60739bc3fa4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 21 Jan 2013 13:02:19 +0100 Subject: netfilter: add missing xt_connlabel.h header in installation In (c539f01 netfilter: add connlabel conntrack extension), it was missing the change to the Kbuild file to install the header in the system. Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 8b4bd36a7a84..41115776d76f 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -39,6 +39,7 @@ header-y += xt_bpf.h header-y += xt_cluster.h header-y += xt_comment.h header-y += xt_connbytes.h +header-y += xt_connlabel.h header-y += xt_connlimit.h header-y += xt_connmark.h header-y += xt_conntrack.h -- cgit v1.2.3 From bbb923a4c2d17ebd5ec34755fe19a33914cbd86f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 21 Jan 2013 06:00:25 +0000 Subject: mcast: define and use MRT[6]_MAX in ip[6]_mroute_opt() This will ease further addition of new MRT[6]_* values and avoid to update in6.h each time. Note that we reduce the maximum value from 210 to 209, but 210 does not match any known value in ip[6]_mroute_setsockopt(). Signed-off-by: Nicolas Dichtel Acked-by: David L Stevens Signed-off-by: David S. Miller --- include/linux/mroute.h | 2 +- include/linux/mroute6.h | 2 +- include/uapi/linux/in6.h | 15 ++++----------- include/uapi/linux/mroute.h | 1 + include/uapi/linux/mroute6.h | 1 + 5 files changed, 8 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index ea00d9162ee5..79aaa9fc1a15 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -9,7 +9,7 @@ #ifdef CONFIG_IP_MROUTE static inline int ip_mroute_opt(int opt) { - return (opt >= MRT_BASE) && (opt <= MRT_BASE + 10); + return (opt >= MRT_BASE) && (opt <= MRT_MAX); } #else static inline int ip_mroute_opt(int opt) diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index a223561ba12e..66982e764051 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -10,7 +10,7 @@ #ifdef CONFIG_IPV6_MROUTE static inline int ip6_mroute_opt(int opt) { - return (opt >= MRT6_BASE) && (opt <= MRT6_BASE + 10); + return (opt >= MRT6_BASE) && (opt <= MRT6_MAX); } #else static inline int ip6_mroute_opt(int opt) diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 5673b97dcf54..53b1d56a6e7f 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -259,17 +259,10 @@ struct in6_flowlabel_req { /* * Multicast Routing: - * see include/linux/mroute6.h. + * see include/uapi/linux/mroute6.h. * - * MRT6_INIT 200 - * MRT6_DONE 201 - * MRT6_ADD_MIF 202 - * MRT6_DEL_MIF 203 - * MRT6_ADD_MFC 204 - * MRT6_DEL_MFC 205 - * MRT6_VERSION 206 - * MRT6_ASSERT 207 - * MRT6_PIM 208 - * (reserved) 209 + * MRT6_BASE 200 + * ... + * MRT6_MAX */ #endif /* _UAPI_LINUX_IN6_H */ diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index 16929993acc4..1c11004af5db 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -26,6 +26,7 @@ #define MRT_ASSERT (MRT_BASE+7) /* Activate PIM assert mode */ #define MRT_PIM (MRT_BASE+8) /* enable PIM code */ #define MRT_TABLE (MRT_BASE+9) /* Specify mroute table ID */ +#define MRT_MAX (MRT_BASE+9) #define SIOCGETVIFCNT SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index 3e89b5e7f9e3..c206ae3a2327 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -26,6 +26,7 @@ #define MRT6_ASSERT (MRT6_BASE+7) /* Activate PIM assert mode */ #define MRT6_PIM (MRT6_BASE+8) /* enable PIM code */ #define MRT6_TABLE (MRT6_BASE+9) /* Specify mroute table ID */ +#define MRT6_MAX (MRT6_BASE+9) #define SIOCGETMIFCNT_IN6 SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT_IN6 (SIOCPROTOPRIVATE+1) -- cgit v1.2.3 From 660b26dc1a8aeb33c2a2246ebf1b3684449a74b7 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 21 Jan 2013 06:00:26 +0000 Subject: mcast: add multicast proxy support (IPv4 and IPv6) This patch add the support of proxy multicast, ie being able to build a static multicast tree. It adds the support of (*,*) and (*,G) entries. The user should define an (*,*) entry which is not used for real forwarding. This entry defines the upstream in iif and contains all interfaces from the static tree in its oifs. It will be used to forward packet upstream when they come from an interface belonging to the static tree. Hence, the user should define (*,G) entries to build its static tree. Note that upstream interface must be part of oifs: packets are sent to all oifs interfaces except the input interface. This ensures to always join the whole static tree, even if the packet is not coming from the upstream interface. Signed-off-by: Nicolas Dichtel Acked-by: David L Stevens Signed-off-by: David S. Miller --- include/uapi/linux/mroute.h | 4 +- include/uapi/linux/mroute6.h | 4 +- net/ipv4/ipmr.c | 119 +++++++++++++++++++++++++++++++++++----- net/ipv6/ip6mr.c | 126 ++++++++++++++++++++++++++++++++++++++----- 4 files changed, 225 insertions(+), 28 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index 1c11004af5db..a382d2c04a42 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -26,7 +26,9 @@ #define MRT_ASSERT (MRT_BASE+7) /* Activate PIM assert mode */ #define MRT_PIM (MRT_BASE+8) /* enable PIM code */ #define MRT_TABLE (MRT_BASE+9) /* Specify mroute table ID */ -#define MRT_MAX (MRT_BASE+9) +#define MRT_ADD_MFC_PROXY (MRT_BASE+10) /* Add a (*,*|G) mfc entry */ +#define MRT_DEL_MFC_PROXY (MRT_BASE+11) /* Del a (*,*|G) mfc entry */ +#define MRT_MAX (MRT_BASE+11) #define SIOCGETVIFCNT SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index c206ae3a2327..ce91215cf7e6 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -26,7 +26,9 @@ #define MRT6_ASSERT (MRT6_BASE+7) /* Activate PIM assert mode */ #define MRT6_PIM (MRT6_BASE+8) /* enable PIM code */ #define MRT6_TABLE (MRT6_BASE+9) /* Specify mroute table ID */ -#define MRT6_MAX (MRT6_BASE+9) +#define MRT6_ADD_MFC_PROXY (MRT6_BASE+10) /* Add a (*,*|G) mfc entry */ +#define MRT6_DEL_MFC_PROXY (MRT6_BASE+11) /* Del a (*,*|G) mfc entry */ +#define MRT6_MAX (MRT6_BASE+11) #define SIOCGETMIFCNT_IN6 SIOCPROTOPRIVATE /* IP protocol privates */ #define SIOCGETSGCNT_IN6 (SIOCPROTOPRIVATE+1) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a9454cbd953c..4b5e22670d44 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -828,6 +828,49 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, return NULL; } +/* Look for a (*,*,oif) entry */ +static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, + int vifi) +{ + int line = MFC_HASH(INADDR_ANY, INADDR_ANY); + struct mfc_cache *c; + + list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) + if (c->mfc_origin == INADDR_ANY && + c->mfc_mcastgrp == INADDR_ANY && + c->mfc_un.res.ttls[vifi] < 255) + return c; + + return NULL; +} + +/* Look for a (*,G) entry */ +static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, + __be32 mcastgrp, int vifi) +{ + int line = MFC_HASH(mcastgrp, INADDR_ANY); + struct mfc_cache *c, *proxy; + + if (mcastgrp == INADDR_ANY) + goto skip; + + list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) + if (c->mfc_origin == INADDR_ANY && + c->mfc_mcastgrp == mcastgrp) { + if (c->mfc_un.res.ttls[vifi] < 255) + return c; + + /* It's ok if the vifi is part of the static tree */ + proxy = ipmr_cache_find_any_parent(mrt, + c->mfc_parent); + if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) + return c; + } + +skip: + return ipmr_cache_find_any_parent(mrt, vifi); +} + /* * Allocate a multicast cache entry */ @@ -1053,7 +1096,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) * MFC cache manipulation by user space mroute daemon */ -static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) +static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { int line; struct mfc_cache *c, *next; @@ -1062,7 +1105,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && - c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { + c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && + (parent == -1 || parent == c->mfc_parent)) { list_del_rcu(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); ipmr_cache_free(c); @@ -1073,7 +1117,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc) } static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, - struct mfcctl *mfc, int mrtsock) + struct mfcctl *mfc, int mrtsock, int parent) { bool found = false; int line; @@ -1086,7 +1130,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && - c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { + c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && + (parent == -1 || parent == c->mfc_parent)) { found = true; break; } @@ -1103,7 +1148,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, return 0; } - if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) + if (mfc->mfcc_mcastgrp.s_addr != INADDR_ANY && + !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) return -EINVAL; c = ipmr_cache_alloc(); @@ -1218,7 +1264,7 @@ static void mrtsock_destruct(struct sock *sk) int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) { - int ret; + int ret, parent = 0; struct vifctl vif; struct mfcctl mfc; struct net *net = sock_net(sk); @@ -1287,16 +1333,22 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi */ case MRT_ADD_MFC: case MRT_DEL_MFC: + parent = -1; + case MRT_ADD_MFC_PROXY: + case MRT_DEL_MFC_PROXY: if (optlen != sizeof(mfc)) return -EINVAL; if (copy_from_user(&mfc, optval, sizeof(mfc))) return -EFAULT; + if (parent == 0) + parent = mfc.mfcc_parent; rtnl_lock(); - if (optname == MRT_DEL_MFC) - ret = ipmr_mfc_delete(mrt, &mfc); + if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) + ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, - sk == rtnl_dereference(mrt->mroute_sk)); + sk == rtnl_dereference(mrt->mroute_sk), + parent); rtnl_unlock(); return ret; /* @@ -1749,17 +1801,28 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, { int psend = -1; int vif, ct; + int true_vifi = ipmr_find_vif(mrt, skb->dev); vif = cache->mfc_parent; cache->mfc_un.res.pkt++; cache->mfc_un.res.bytes += skb->len; + if (cache->mfc_origin == INADDR_ANY && true_vifi >= 0) { + struct mfc_cache *cache_proxy; + + /* For an (*,G) entry, we only check that the incomming + * interface is part of the static tree. + */ + cache_proxy = ipmr_cache_find_any_parent(mrt, vif); + if (cache_proxy && + cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + goto forward; + } + /* * Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif_table[vif].dev != skb->dev) { - int true_vifi; - if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... @@ -1776,7 +1839,6 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, } cache->mfc_un.res.wrong_if++; - true_vifi = ipmr_find_vif(mrt, skb->dev); if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -1794,15 +1856,33 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, goto dont_forward; } +forward: mrt->vif_table[vif].pkt_in++; mrt->vif_table[vif].bytes_in += skb->len; /* * Forward the frame */ + if (cache->mfc_origin == INADDR_ANY && + cache->mfc_mcastgrp == INADDR_ANY) { + if (true_vifi >= 0 && + true_vifi != cache->mfc_parent && + ip_hdr(skb)->ttl > + cache->mfc_un.res.ttls[cache->mfc_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = cache->mfc_parent; + goto last_forward; + } + goto dont_forward; + } for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { - if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { + /* For (*,G) entry, don't forward to the incoming interface */ + if ((cache->mfc_origin != INADDR_ANY || ct != true_vifi) && + ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -1813,6 +1893,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, psend = ct; } } +last_forward: if (psend != -1) { if (local) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -1902,6 +1983,13 @@ int ip_mr_input(struct sk_buff *skb) /* already under rcu_read_lock() */ cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); + if (cache == NULL) { + int vif = ipmr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, + vif); + } /* * No usable cache entry @@ -2107,7 +2195,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, rcu_read_lock(); cache = ipmr_cache_find(mrt, saddr, daddr); + if (cache == NULL && skb->dev) { + int vif = ipmr_find_vif(mrt, skb->dev); + if (vif >= 0) + cache = ipmr_cache_find_any(mrt, daddr, vif); + } if (cache == NULL) { struct sk_buff *skb2; struct iphdr *iph; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 26dcdec9e3a5..acc32494006a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1017,6 +1017,50 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, return NULL; } +/* Look for a (*,*,oif) entry */ +static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt, + mifi_t mifi) +{ + int line = MFC6_HASH(&in6addr_any, &in6addr_any); + struct mfc6_cache *c; + + list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_any(&c->mf6c_mcastgrp) && + (c->mfc_un.res.ttls[mifi] < 255)) + return c; + + return NULL; +} + +/* Look for a (*,G) entry */ +static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt, + struct in6_addr *mcastgrp, + mifi_t mifi) +{ + int line = MFC6_HASH(mcastgrp, &in6addr_any); + struct mfc6_cache *c, *proxy; + + if (ipv6_addr_any(mcastgrp)) + goto skip; + + list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) { + if (c->mfc_un.res.ttls[mifi] < 255) + return c; + + /* It's ok if the mifi is part of the static tree */ + proxy = ip6mr_cache_find_any_parent(mrt, + c->mf6c_parent); + if (proxy && proxy->mfc_un.res.ttls[mifi] < 255) + return c; + } + +skip: + return ip6mr_cache_find_any_parent(mrt, mifi); +} + /* * Allocate a multicast cache entry */ @@ -1247,7 +1291,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) * MFC6 cache manipulation by user space */ -static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) +static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, + int parent) { int line; struct mfc6_cache *c, *next; @@ -1256,7 +1301,9 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { + ipv6_addr_equal(&c->mf6c_mcastgrp, + &mfc->mf6cc_mcastgrp.sin6_addr) && + (parent == -1 || parent == c->mf6c_parent)) { write_lock_bh(&mrt_lock); list_del(&c->list); write_unlock_bh(&mrt_lock); @@ -1391,7 +1438,7 @@ void ip6_mr_cleanup(void) } static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, - struct mf6cctl *mfc, int mrtsock) + struct mf6cctl *mfc, int mrtsock, int parent) { bool found = false; int line; @@ -1413,7 +1460,9 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { + ipv6_addr_equal(&c->mf6c_mcastgrp, + &mfc->mf6cc_mcastgrp.sin6_addr) && + (parent == -1 || parent == mfc->mf6cc_parent)) { found = true; break; } @@ -1430,7 +1479,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, return 0; } - if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) + if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) && + !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) return -EINVAL; c = ip6mr_cache_alloc(); @@ -1596,7 +1646,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) { - int ret; + int ret, parent = 0; struct mif6ctl vif; struct mf6cctl mfc; mifi_t mifi; @@ -1653,15 +1703,21 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns */ case MRT6_ADD_MFC: case MRT6_DEL_MFC: + parent = -1; + case MRT6_ADD_MFC_PROXY: + case MRT6_DEL_MFC_PROXY: if (optlen < sizeof(mfc)) return -EINVAL; if (copy_from_user(&mfc, optval, sizeof(mfc))) return -EFAULT; + if (parent == 0) + parent = mfc.mf6cc_parent; rtnl_lock(); - if (optname == MRT6_DEL_MFC) - ret = ip6mr_mfc_delete(mrt, &mfc); + if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY) + ret = ip6mr_mfc_delete(mrt, &mfc, parent); else - ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk); + ret = ip6mr_mfc_add(net, mrt, &mfc, + sk == mrt->mroute6_sk, parent); rtnl_unlock(); return ret; @@ -2015,19 +2071,29 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, { int psend = -1; int vif, ct; + int true_vifi = ip6mr_find_vif(mrt, skb->dev); vif = cache->mf6c_parent; cache->mfc_un.res.pkt++; cache->mfc_un.res.bytes += skb->len; + if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) { + struct mfc6_cache *cache_proxy; + + /* For an (*,G) entry, we only check that the incomming + * interface is part of the static tree. + */ + cache_proxy = ip6mr_cache_find_any_parent(mrt, vif); + if (cache_proxy && + cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + goto forward; + } + /* * Wrong interface: drop packet and (maybe) send PIM assert. */ if (mrt->vif6_table[vif].dev != skb->dev) { - int true_vifi; - cache->mfc_un.res.wrong_if++; - true_vifi = ip6mr_find_vif(mrt, skb->dev); if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -2045,14 +2111,32 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, goto dont_forward; } +forward: mrt->vif6_table[vif].pkt_in++; mrt->vif6_table[vif].bytes_in += skb->len; /* * Forward the frame */ + if (ipv6_addr_any(&cache->mf6c_origin) && + ipv6_addr_any(&cache->mf6c_mcastgrp)) { + if (true_vifi >= 0 && + true_vifi != cache->mf6c_parent && + ipv6_hdr(skb)->hop_limit > + cache->mfc_un.res.ttls[cache->mf6c_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = cache->mf6c_parent; + goto last_forward; + } + goto dont_forward; + } for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { - if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { + /* For (*,G) entry, don't forward to the incoming interface */ + if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) && + ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) @@ -2061,6 +2145,7 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt, psend = ct; } } +last_forward: if (psend != -1) { ip6mr_forward2(net, mrt, skb, cache, psend); return 0; @@ -2096,6 +2181,14 @@ int ip6_mr_input(struct sk_buff *skb) read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); + if (cache == NULL) { + int vif = ip6mr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, + &ipv6_hdr(skb)->daddr, + vif); + } /* * No usable cache entry @@ -2183,6 +2276,13 @@ int ip6mr_get_route(struct net *net, read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); + if (!cache && skb->dev) { + int vif = ip6mr_find_vif(mrt, skb->dev); + + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr, + vif); + } if (!cache) { struct sk_buff *skb2; -- cgit v1.2.3 From 7e58d5aea8abb993983a3f3088fd4a3f06180a1c Mon Sep 17 00:00:00 2001 From: Amos Kong Date: Mon, 21 Jan 2013 01:17:23 +0000 Subject: virtio-net: introduce a new control to set macaddr Currently we write MAC address to pci config space byte by byte, this means that we have an intermediate step where mac is wrong. This patch introduced a new control command to set MAC address, it's atomic. VIRTIO_NET_F_CTRL_MAC_ADDR is a new feature bit for compatibility. Signed-off-by: Amos Kong Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 21 ++++++++++++++++++--- include/uapi/linux/virtio_net.h | 8 +++++++- 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 395ab4ff3e64..701408a1ded6 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -802,14 +802,28 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) struct virtnet_info *vi = netdev_priv(dev); struct virtio_device *vdev = vi->vdev; int ret; + struct sockaddr *addr = p; + struct scatterlist sg; - ret = eth_mac_addr(dev, p); + ret = eth_prepare_mac_addr_change(dev, p); if (ret) return ret; - if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) + if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { + sg_init_one(&sg, addr->sa_data, dev->addr_len); + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, + VIRTIO_NET_CTRL_MAC_ADDR_SET, + &sg, 1, 0)) { + dev_warn(&vdev->dev, + "Failed to set mac address by vq command.\n"); + return -EINVAL; + } + } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { vdev->config->set(vdev, offsetof(struct virtio_net_config, mac), - dev->dev_addr, dev->addr_len); + addr->sa_data, dev->addr_len); + } + + eth_commit_mac_addr_change(dev, p); return 0; } @@ -1627,6 +1641,7 @@ static unsigned int features[] = { VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, + VIRTIO_NET_F_CTRL_MAC_ADDR, }; static struct virtio_driver virtio_net_driver = { diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 848e3584d7c8..a5a8c88753b9 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -53,6 +53,7 @@ * network */ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ +#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ #define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ @@ -127,7 +128,7 @@ typedef __u8 virtio_net_ctrl_ack; #define VIRTIO_NET_CTRL_RX_NOBCAST 5 /* - * Control the MAC filter table. + * Control the MAC * * The MAC filter table is managed by the hypervisor, the guest should * assume the size is infinite. Filtering should be considered @@ -140,6 +141,10 @@ typedef __u8 virtio_net_ctrl_ack; * first sg list contains unicast addresses, the second is for multicast. * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature * is available. + * + * The ADDR_SET command requests one out scatterlist, it contains a + * 6 bytes MAC address. This functionality is present if the + * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available. */ struct virtio_net_ctrl_mac { __u32 entries; @@ -148,6 +153,7 @@ struct virtio_net_ctrl_mac { #define VIRTIO_NET_CTRL_MAC 1 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 + #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 /* * Control VLAN filtering -- cgit v1.2.3 From afe759511808cd5bb508b598007cf0c7b0ca8e08 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Tue, 15 Jan 2013 17:20:58 +0800 Subject: libata: identify and init ZPODD devices The ODD can be enabled for ZPODD if the following three conditions are satisfied: 1 The ODD supports device attention; 2 The platform can runtime power off the ODD through ACPI; 3 The ODD is either slot type or drawer type. For such ODDs, zpodd_init is called and a new structure is allocated for it to store ZPODD related stuffs. And the zpodd_dev_enabled function is used to test if ZPODD is currently enabled for this ODD. A new config CONFIG_SATA_ZPODD is added to selectively build ZPODD code. Signed-off-by: Aaron Lu Acked-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/Kconfig | 13 ++++++ drivers/ata/Makefile | 1 + drivers/ata/libata-core.c | 4 +- drivers/ata/libata-scsi.c | 2 + drivers/ata/libata-zpodd.c | 100 +++++++++++++++++++++++++++++++++++++++++++++ drivers/ata/libata.h | 14 +++++++ include/linux/libata.h | 3 ++ include/uapi/linux/cdrom.h | 34 +++++++++++++++ 8 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 drivers/ata/libata-zpodd.c (limited to 'include/uapi/linux') diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index e08d322d01d7..996d16c9c6e5 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -58,6 +58,19 @@ config ATA_ACPI You can disable this at kernel boot time by using the option libata.noacpi=1 +config SATA_ZPODD + bool "SATA Zero Power ODD Support" + depends on ATA_ACPI + default n + help + This option adds support for SATA ZPODD. It requires both + ODD and the platform support, and if enabled, will automatically + power on/off the ODD when certain condition is satisfied. This + does not impact user's experience of the ODD, only power is saved + when ODD is not in use(i.e. no disc inside). + + If unsure, say N. + config SATA_PMP bool "SATA Port Multiplier support" default y diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 9329dafba91b..85e3de463ed1 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -107,3 +107,4 @@ libata-y := libata-core.o libata-scsi.o libata-eh.o libata-transport.o libata-$(CONFIG_ATA_SFF) += libata-sff.o libata-$(CONFIG_SATA_PMP) += libata-pmp.o libata-$(CONFIG_ATA_ACPI) += libata-acpi.o +libata-$(CONFIG_SATA_ZPODD) += libata-zpodd.o diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 275941b576a8..c7ecd8492f1e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2401,8 +2401,10 @@ int ata_dev_configure(struct ata_device *dev) dma_dir_string = ", DMADIR"; } - if (ata_id_has_da(dev->id)) + if (ata_id_has_da(dev->id)) { dev->flags |= ATA_DFLAG_DA; + zpodd_init(dev); + } /* print device info to dmesg */ if (ata_msg_drv(ap) && print_info) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 7c337e754dab..1ff018525e3b 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3755,6 +3755,8 @@ static void ata_scsi_remove_dev(struct ata_device *dev) mutex_lock(&ap->scsi_host->scan_mutex); spin_lock_irqsave(ap->lock, flags); + if (zpodd_dev_enabled(dev)) + zpodd_exit(dev); ata_acpi_unbind(dev); /* clearing dev->sdev is protected by host lock */ diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c new file mode 100644 index 000000000000..27eed2f09a8a --- /dev/null +++ b/drivers/ata/libata-zpodd.c @@ -0,0 +1,100 @@ +#include +#include + +#include "libata.h" + +enum odd_mech_type { + ODD_MECH_TYPE_SLOT, + ODD_MECH_TYPE_DRAWER, + ODD_MECH_TYPE_UNSUPPORTED, +}; + +struct zpodd { + enum odd_mech_type mech_type; /* init during probe, RO afterwards */ + struct ata_device *dev; +}; + +/* Per the spec, only slot type and drawer type ODD can be supported */ +static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) +{ + char buf[16]; + unsigned int ret; + struct rm_feature_desc *desc = (void *)(buf + 8); + struct ata_taskfile tf = {}; + + char cdb[] = { GPCMD_GET_CONFIGURATION, + 2, /* only 1 feature descriptor requested */ + 0, 3, /* 3, removable medium feature */ + 0, 0, 0,/* reserved */ + 0, sizeof(buf), + 0, 0, 0, + }; + + tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; + tf.command = ATA_CMD_PACKET; + tf.protocol = ATAPI_PROT_PIO; + tf.lbam = sizeof(buf); + + ret = ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, + buf, sizeof(buf), 0); + if (ret) + return ODD_MECH_TYPE_UNSUPPORTED; + + if (be16_to_cpu(desc->feature_code) != 3) + return ODD_MECH_TYPE_UNSUPPORTED; + + if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1) + return ODD_MECH_TYPE_SLOT; + else if (desc->mech_type == 1 && desc->load == 0 && desc->eject == 1) + return ODD_MECH_TYPE_DRAWER; + else + return ODD_MECH_TYPE_UNSUPPORTED; +} + +static bool odd_can_poweroff(struct ata_device *ata_dev) +{ + acpi_handle handle; + acpi_status status; + struct acpi_device *acpi_dev; + + handle = ata_dev_acpi_handle(ata_dev); + if (!handle) + return false; + + status = acpi_bus_get_device(handle, &acpi_dev); + if (ACPI_FAILURE(status)) + return false; + + return acpi_device_can_poweroff(acpi_dev); +} + +void zpodd_init(struct ata_device *dev) +{ + enum odd_mech_type mech_type; + struct zpodd *zpodd; + + if (dev->zpodd) + return; + + if (!odd_can_poweroff(dev)) + return; + + mech_type = zpodd_get_mech_type(dev); + if (mech_type == ODD_MECH_TYPE_UNSUPPORTED) + return; + + zpodd = kzalloc(sizeof(struct zpodd), GFP_KERNEL); + if (!zpodd) + return; + + zpodd->mech_type = mech_type; + + zpodd->dev = dev; + dev->zpodd = zpodd; +} + +void zpodd_exit(struct ata_device *dev) +{ + kfree(dev->zpodd); + dev->zpodd = NULL; +} diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 7148a58020b9..a21740b4ee11 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -230,4 +230,18 @@ static inline void ata_sff_exit(void) { } #endif /* CONFIG_ATA_SFF */ +/* libata-zpodd.c */ +#ifdef CONFIG_SATA_ZPODD +void zpodd_init(struct ata_device *dev); +void zpodd_exit(struct ata_device *dev); +static inline bool zpodd_dev_enabled(struct ata_device *dev) +{ + return dev->zpodd != NULL; +} +#else /* CONFIG_SATA_ZPODD */ +static inline void zpodd_init(struct ata_device *dev) {} +static inline void zpodd_exit(struct ata_device *dev) {} +static inline bool zpodd_dev_enabled(struct ata_device *dev) { return false; } +#endif /* CONFIG_SATA_ZPODD */ + #endif /* __LIBATA_H__ */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 7ae207eb29a0..65ff67e34b77 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -620,6 +620,9 @@ struct ata_device { #ifdef CONFIG_ATA_ACPI union acpi_object *gtf_cache; unsigned int gtf_filter; +#endif +#ifdef CONFIG_SATA_ZPODD + void *zpodd; #endif struct device tdev; /* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */ diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h index 898b866b300c..bd17ad5aa06d 100644 --- a/include/uapi/linux/cdrom.h +++ b/include/uapi/linux/cdrom.h @@ -908,5 +908,39 @@ struct mode_page_header { __be16 desc_length; }; +/* removable medium feature descriptor */ +struct rm_feature_desc { + __be16 feature_code; +#if defined(__BIG_ENDIAN_BITFIELD) + __u8 reserved1:2; + __u8 feature_version:4; + __u8 persistent:1; + __u8 curr:1; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + __u8 curr:1; + __u8 persistent:1; + __u8 feature_version:4; + __u8 reserved1:2; +#endif + __u8 add_len; +#if defined(__BIG_ENDIAN_BITFIELD) + __u8 mech_type:3; + __u8 load:1; + __u8 eject:1; + __u8 pvnt_jmpr:1; + __u8 dbml:1; + __u8 lock:1; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + __u8 lock:1; + __u8 dbml:1; + __u8 pvnt_jmpr:1; + __u8 eject:1; + __u8 load:1; + __u8 mech_type:3; +#endif + __u8 reserved2; + __u8 reserved3; + __u8 reserved4; +}; #endif /* _UAPI_LINUX_CDROM_H */ -- cgit v1.2.3 From 749cf76c5a363e1383108a914ea09530bfa0bd43 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 20 Jan 2013 18:28:06 -0500 Subject: KVM: ARM: Initial skeleton to compile KVM support Targets KVM support for Cortex A-15 processors. Contains all the framework components, make files, header files, some tracing functionality, and basic user space API. Only supported core is Cortex-A15 for now. Most functionality is in arch/arm/kvm/* or arch/arm/include/asm/kvm_*.h. Reviewed-by: Will Deacon Reviewed-by: Marcelo Tosatti Signed-off-by: Rusty Russell Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 57 +++++- arch/arm/Kconfig | 2 + arch/arm/Makefile | 1 + arch/arm/include/asm/kvm_arm.h | 24 +++ arch/arm/include/asm/kvm_asm.h | 58 ++++++ arch/arm/include/asm/kvm_coproc.h | 24 +++ arch/arm/include/asm/kvm_emulate.h | 50 ++++++ arch/arm/include/asm/kvm_host.h | 114 ++++++++++++ arch/arm/include/uapi/asm/kvm.h | 106 +++++++++++ arch/arm/kvm/Kconfig | 55 ++++++ arch/arm/kvm/Makefile | 21 +++ arch/arm/kvm/arm.c | 350 +++++++++++++++++++++++++++++++++++++ arch/arm/kvm/coproc.c | 23 +++ arch/arm/kvm/emulate.c | 155 ++++++++++++++++ arch/arm/kvm/guest.c | 221 +++++++++++++++++++++++ arch/arm/kvm/init.S | 19 ++ arch/arm/kvm/interrupts.S | 19 ++ arch/arm/kvm/mmu.c | 17 ++ arch/arm/kvm/reset.c | 74 ++++++++ arch/arm/kvm/trace.h | 52 ++++++ include/uapi/linux/kvm.h | 7 + 21 files changed, 1445 insertions(+), 4 deletions(-) create mode 100644 arch/arm/include/asm/kvm_arm.h create mode 100644 arch/arm/include/asm/kvm_asm.h create mode 100644 arch/arm/include/asm/kvm_coproc.h create mode 100644 arch/arm/include/asm/kvm_emulate.h create mode 100644 arch/arm/include/asm/kvm_host.h create mode 100644 arch/arm/include/uapi/asm/kvm.h create mode 100644 arch/arm/kvm/Kconfig create mode 100644 arch/arm/kvm/Makefile create mode 100644 arch/arm/kvm/arm.c create mode 100644 arch/arm/kvm/coproc.c create mode 100644 arch/arm/kvm/emulate.c create mode 100644 arch/arm/kvm/guest.c create mode 100644 arch/arm/kvm/init.S create mode 100644 arch/arm/kvm/interrupts.S create mode 100644 arch/arm/kvm/mmu.c create mode 100644 arch/arm/kvm/reset.c create mode 100644 arch/arm/kvm/trace.h (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a4df5535996b..4237c27ea612 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -293,7 +293,7 @@ kvm_run' (see below). 4.11 KVM_GET_REGS Capability: basic -Architectures: all +Architectures: all except ARM Type: vcpu ioctl Parameters: struct kvm_regs (out) Returns: 0 on success, -1 on error @@ -314,7 +314,7 @@ struct kvm_regs { 4.12 KVM_SET_REGS Capability: basic -Architectures: all +Architectures: all except ARM Type: vcpu ioctl Parameters: struct kvm_regs (in) Returns: 0 on success, -1 on error @@ -600,7 +600,7 @@ struct kvm_fpu { 4.24 KVM_CREATE_IRQCHIP Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64 +Architectures: x86, ia64, ARM Type: vm ioctl Parameters: none Returns: 0 on success, -1 on error @@ -608,7 +608,8 @@ Returns: 0 on success, -1 on error Creates an interrupt controller model in the kernel. On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 -only go to the IOAPIC. On ia64, a IOSAPIC is created. +only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM, a GIC is +created. 4.25 KVM_IRQ_LINE @@ -1775,6 +1776,14 @@ registers, find a list below: PPC | KVM_REG_PPC_VPA_DTL | 128 PPC | KVM_REG_PPC_EPCR | 32 +ARM registers are mapped using the lower 32 bits. The upper 16 of that +is the register group type, or coprocessor number: + +ARM core registers have the following id bit patterns: + 0x4002 0000 0010 + + + 4.69 KVM_GET_ONE_REG Capability: KVM_CAP_ONE_REG @@ -2127,6 +2136,46 @@ written, then `n_invalid' invalid entries, invalidating any previously valid entries found. +4.77 KVM_ARM_VCPU_INIT + +Capability: basic +Architectures: arm +Type: vcpu ioctl +Parameters: struct struct kvm_vcpu_init (in) +Returns: 0 on success; -1 on error +Errors: +  EINVAL:    the target is unknown, or the combination of features is invalid. +  ENOENT:    a features bit specified is unknown. + +This tells KVM what type of CPU to present to the guest, and what +optional features it should have.  This will cause a reset of the cpu +registers to their initial values.  If this is not called, KVM_RUN will +return ENOEXEC for that vcpu. + +Note that because some registers reflect machine topology, all vcpus +should be created before this ioctl is invoked. + + +4.78 KVM_GET_REG_LIST + +Capability: basic +Architectures: arm +Type: vcpu ioctl +Parameters: struct kvm_reg_list (in/out) +Returns: 0 on success; -1 on error +Errors: +  E2BIG:     the reg index list is too big to fit in the array specified by +             the user (the number required will be written into n). + +struct kvm_reg_list { + __u64 n; /* number of registers in reg[] */ + __u64 reg[0]; +}; + +This ioctl returns the guest registers that are supported for the +KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. + + 5. The kvm_run structure ------------------------ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 67874b82a4ed..e0627cdbcda5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2322,3 +2322,5 @@ source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" + +source "arch/arm/kvm/Kconfig" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 30c443c406f3..4bcd2d6b0535 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -252,6 +252,7 @@ core-$(CONFIG_FPE_NWFPE) += arch/arm/nwfpe/ core-$(CONFIG_FPE_FASTFPE) += $(FASTFPE_OBJ) core-$(CONFIG_VFP) += arch/arm/vfp/ core-$(CONFIG_XEN) += arch/arm/xen/ +core-$(CONFIG_KVM_ARM_HOST) += arch/arm/kvm/ # If we have a machine-specific directory, then include it in the build. core-y += arch/arm/kernel/ arch/arm/mm/ arch/arm/common/ diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h new file mode 100644 index 000000000000..dc678e193417 --- /dev/null +++ b/arch/arm/include/asm/kvm_arm.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_ARM_H__ +#define __ARM_KVM_ARM_H__ + +#include + +#endif /* __ARM_KVM_ARM_H__ */ diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h new file mode 100644 index 000000000000..f9993e5fb695 --- /dev/null +++ b/arch/arm/include/asm/kvm_asm.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_ASM_H__ +#define __ARM_KVM_ASM_H__ + +/* 0 is reserved as an invalid value. */ +#define c0_MPIDR 1 /* MultiProcessor ID Register */ +#define c0_CSSELR 2 /* Cache Size Selection Register */ +#define c1_SCTLR 3 /* System Control Register */ +#define c1_ACTLR 4 /* Auxilliary Control Register */ +#define c1_CPACR 5 /* Coprocessor Access Control */ +#define c2_TTBR0 6 /* Translation Table Base Register 0 */ +#define c2_TTBR0_high 7 /* TTBR0 top 32 bits */ +#define c2_TTBR1 8 /* Translation Table Base Register 1 */ +#define c2_TTBR1_high 9 /* TTBR1 top 32 bits */ +#define c2_TTBCR 10 /* Translation Table Base Control R. */ +#define c3_DACR 11 /* Domain Access Control Register */ +#define c5_DFSR 12 /* Data Fault Status Register */ +#define c5_IFSR 13 /* Instruction Fault Status Register */ +#define c5_ADFSR 14 /* Auxilary Data Fault Status R */ +#define c5_AIFSR 15 /* Auxilary Instrunction Fault Status R */ +#define c6_DFAR 16 /* Data Fault Address Register */ +#define c6_IFAR 17 /* Instruction Fault Address Register */ +#define c9_L2CTLR 18 /* Cortex A15 L2 Control Register */ +#define c10_PRRR 19 /* Primary Region Remap Register */ +#define c10_NMRR 20 /* Normal Memory Remap Register */ +#define c12_VBAR 21 /* Vector Base Address Register */ +#define c13_CID 22 /* Context ID Register */ +#define c13_TID_URW 23 /* Thread ID, User R/W */ +#define c13_TID_URO 24 /* Thread ID, User R/O */ +#define c13_TID_PRIV 25 /* Thread ID, Privileged */ +#define NR_CP15_REGS 26 /* Number of regs (incl. invalid) */ + +#define ARM_EXCEPTION_RESET 0 +#define ARM_EXCEPTION_UNDEFINED 1 +#define ARM_EXCEPTION_SOFTWARE 2 +#define ARM_EXCEPTION_PREF_ABORT 3 +#define ARM_EXCEPTION_DATA_ABORT 4 +#define ARM_EXCEPTION_IRQ 5 +#define ARM_EXCEPTION_FIQ 6 + +#endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm/include/asm/kvm_coproc.h b/arch/arm/include/asm/kvm_coproc.h new file mode 100644 index 000000000000..b6d023deb426 --- /dev/null +++ b/arch/arm/include/asm/kvm_coproc.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2012 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_COPROC_H__ +#define __ARM_KVM_COPROC_H__ +#include + +void kvm_reset_coprocs(struct kvm_vcpu *vcpu); + +#endif /* __ARM_KVM_COPROC_H__ */ diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h new file mode 100644 index 000000000000..17dad674b90f --- /dev/null +++ b/arch/arm/include/asm/kvm_emulate.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_EMULATE_H__ +#define __ARM_KVM_EMULATE_H__ + +#include +#include + +u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); +u32 *vcpu_spsr(struct kvm_vcpu *vcpu); + +static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu) +{ + return (u32 *)&vcpu->arch.regs.usr_regs.ARM_pc; +} + +static inline u32 *vcpu_cpsr(struct kvm_vcpu *vcpu) +{ + return (u32 *)&vcpu->arch.regs.usr_regs.ARM_cpsr; +} + +static inline bool mode_has_spsr(struct kvm_vcpu *vcpu) +{ + unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; + return (cpsr_mode > USR_MODE && cpsr_mode < SYSTEM_MODE); +} + +static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) +{ + unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; + return cpsr_mode > USR_MODE;; +} + +#endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h new file mode 100644 index 000000000000..0d9938a20751 --- /dev/null +++ b/arch/arm/include/asm/kvm_host.h @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_HOST_H__ +#define __ARM_KVM_HOST_H__ + +#include +#include + +#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS +#define KVM_MEMORY_SLOTS 32 +#define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +#define KVM_VCPU_MAX_FEATURES 0 + +/* We don't currently support large pages. */ +#define KVM_HPAGE_GFN_SHIFT(x) 0 +#define KVM_NR_PAGE_SIZES 1 +#define KVM_PAGES_PER_HPAGE(x) (1UL<<31) + +struct kvm_vcpu; +u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); +int kvm_target_cpu(void); +int kvm_reset_vcpu(struct kvm_vcpu *vcpu); +void kvm_reset_coprocs(struct kvm_vcpu *vcpu); + +struct kvm_arch { + /* VTTBR value associated with below pgd and vmid */ + u64 vttbr; + + /* + * Anything that is not used directly from assembly code goes + * here. + */ + + /* The VMID generation used for the virt. memory system */ + u64 vmid_gen; + u32 vmid; + + /* Stage-2 page table */ + pgd_t *pgd; +}; + +#define KVM_NR_MEM_OBJS 40 + +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; +}; + +struct kvm_vcpu_arch { + struct kvm_regs regs; + + int target; /* Processor target */ + DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); + + /* System control coprocessor (cp15) */ + u32 cp15[NR_CP15_REGS]; + + /* The CPU type we expose to the VM */ + u32 midr; + + /* Exception Information */ + u32 hsr; /* Hyp Syndrome Register */ + u32 hxfar; /* Hyp Data/Inst Fault Address Register */ + u32 hpfar; /* Hyp IPA Fault Address Register */ + + /* Interrupt related fields */ + u32 irq_lines; /* IRQ and FIQ levels */ + + /* Hyp exception information */ + u32 hyp_pc; /* PC when exception was taken from Hyp mode */ + + /* Cache some mmu pages needed inside spinlock regions */ + struct kvm_mmu_memory_cache mmu_page_cache; +}; + +struct kvm_vm_stat { + u32 remote_tlb_flush; +}; + +struct kvm_vcpu_stat { + u32 halt_wakeup; +}; + +struct kvm_vcpu_init; +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init); +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); +struct kvm_one_reg; +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +#endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h new file mode 100644 index 000000000000..1083327b5fcd --- /dev/null +++ b/arch/arm/include/uapi/asm/kvm.h @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __ARM_KVM_H__ +#define __ARM_KVM_H__ + +#include +#include + +#define __KVM_HAVE_GUEST_DEBUG + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + +/* Valid for svc_regs, abt_regs, und_regs, irq_regs in struct kvm_regs */ +#define KVM_ARM_SVC_sp svc_regs[0] +#define KVM_ARM_SVC_lr svc_regs[1] +#define KVM_ARM_SVC_spsr svc_regs[2] +#define KVM_ARM_ABT_sp abt_regs[0] +#define KVM_ARM_ABT_lr abt_regs[1] +#define KVM_ARM_ABT_spsr abt_regs[2] +#define KVM_ARM_UND_sp und_regs[0] +#define KVM_ARM_UND_lr und_regs[1] +#define KVM_ARM_UND_spsr und_regs[2] +#define KVM_ARM_IRQ_sp irq_regs[0] +#define KVM_ARM_IRQ_lr irq_regs[1] +#define KVM_ARM_IRQ_spsr irq_regs[2] + +/* Valid only for fiq_regs in struct kvm_regs */ +#define KVM_ARM_FIQ_r8 fiq_regs[0] +#define KVM_ARM_FIQ_r9 fiq_regs[1] +#define KVM_ARM_FIQ_r10 fiq_regs[2] +#define KVM_ARM_FIQ_fp fiq_regs[3] +#define KVM_ARM_FIQ_ip fiq_regs[4] +#define KVM_ARM_FIQ_sp fiq_regs[5] +#define KVM_ARM_FIQ_lr fiq_regs[6] +#define KVM_ARM_FIQ_spsr fiq_regs[7] + +struct kvm_regs { + struct pt_regs usr_regs;/* R0_usr - R14_usr, PC, CPSR */ + __u32 svc_regs[3]; /* SP_svc, LR_svc, SPSR_svc */ + __u32 abt_regs[3]; /* SP_abt, LR_abt, SPSR_abt */ + __u32 und_regs[3]; /* SP_und, LR_und, SPSR_und */ + __u32 irq_regs[3]; /* SP_irq, LR_irq, SPSR_irq */ + __u32 fiq_regs[8]; /* R8_fiq - R14_fiq, SPSR_fiq */ +}; + +/* Supported Processor Types */ +#define KVM_ARM_TARGET_CORTEX_A15 0 +#define KVM_ARM_NUM_TARGETS 1 + +struct kvm_vcpu_init { + __u32 target; + __u32 features[7]; +}; + +struct kvm_sregs { +}; + +struct kvm_fpu { +}; + +struct kvm_guest_debug_arch { +}; + +struct kvm_debug_exit_arch { +}; + +struct kvm_sync_regs { +}; + +struct kvm_arch_memory_slot { +}; + +/* If you need to interpret the index values, here is the key: */ +#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 +#define KVM_REG_ARM_COPROC_SHIFT 16 +#define KVM_REG_ARM_32_OPC2_MASK 0x0000000000000007 +#define KVM_REG_ARM_32_OPC2_SHIFT 0 +#define KVM_REG_ARM_OPC1_MASK 0x0000000000000078 +#define KVM_REG_ARM_OPC1_SHIFT 3 +#define KVM_REG_ARM_CRM_MASK 0x0000000000000780 +#define KVM_REG_ARM_CRM_SHIFT 7 +#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 +#define KVM_REG_ARM_32_CRN_SHIFT 11 + +/* Normal registers are mapped as coprocessor 16. */ +#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) + +#endif /* __ARM_KVM_H__ */ diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig new file mode 100644 index 000000000000..4a01b6fbf380 --- /dev/null +++ b/arch/arm/kvm/Kconfig @@ -0,0 +1,55 @@ +# +# KVM configuration +# + +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + bool "Virtualization" + ---help--- + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled. + +if VIRTUALIZATION + +config KVM + bool "Kernel-based Virtual Machine (KVM) support" + select PREEMPT_NOTIFIERS + select ANON_INODES + select KVM_MMIO + select KVM_ARM_HOST + depends on ARM_VIRT_EXT && ARM_LPAE + ---help--- + Support hosting virtualized guest machines. You will also + need to select one or more of the processor modules below. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + +config KVM_ARM_HOST + bool "KVM host support for ARM cpus." + depends on KVM + depends on MMU + ---help--- + Provides host support for ARM processors. + +config KVM_ARM_MAX_VCPUS + int "Number maximum supported virtual CPUs per VM" + depends on KVM_ARM_HOST + default 4 + help + Static number of max supported virtual CPUs per VM. + + If you choose a high number, the vcpu structures will be quite + large, so only choose a reasonable number that you expect to + actually use. + +source drivers/virtio/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile new file mode 100644 index 000000000000..dfc293f277b3 --- /dev/null +++ b/arch/arm/kvm/Makefile @@ -0,0 +1,21 @@ +# +# Makefile for Kernel-based Virtual Machine module +# + +plus_virt := $(call as-instr,.arch_extension virt,+virt) +ifeq ($(plus_virt),+virt) + plus_virt_def := -DREQUIRES_VIRT=1 +endif + +ccflags-y += -Ivirt/kvm -Iarch/arm/kvm +CFLAGS_arm.o := -I. $(plus_virt_def) +CFLAGS_mmu.o := -I. + +AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) +AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) + +kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) + +obj-y += kvm-arm.o init.o interrupts.o +obj-y += arm.o guest.o mmu.o emulate.o reset.o +obj-y += coproc.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c new file mode 100644 index 000000000000..d3506b4001aa --- /dev/null +++ b/arch/arm/kvm/arm.c @@ -0,0 +1,350 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include "trace.h" + +#include +#include +#include +#include +#include + +#ifdef REQUIRES_VIRT +__asm__(".arch_extension virt"); +#endif + +int kvm_arch_hardware_enable(void *garbage) +{ + return 0; +} + +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; +} + +void kvm_arch_hardware_disable(void *garbage) +{ +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +void kvm_arch_check_processor_compat(void *rtn) +{ + *(int *)rtn = 0; +} + +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +{ + if (type) + return -EINVAL; + + return 0; +} + +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + return 0; +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + int i; + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_arch_vcpu_free(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + int r; + switch (ext) { + case KVM_CAP_USER_MEMORY: + case KVM_CAP_SYNC_MMU: + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_ONE_REG: + r = 1; + break; + case KVM_CAP_COALESCED_MMIO: + r = KVM_COALESCED_MMIO_PAGE_OFFSET; + break; + case KVM_CAP_NR_VCPUS: + r = num_online_cpus(); + break; + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; + default: + r = 0; + break; + } + return r; +} + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + return 0; +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_memory_slot old, + struct kvm_userspace_memory_region *mem, + int user_alloc) +{ + return 0; +} + +void kvm_arch_commit_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) +{ + int err; + struct kvm_vcpu *vcpu; + + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu) { + err = -ENOMEM; + goto out; + } + + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + return vcpu; +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu); +out: + return ERR_PTR(err); +} + +int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + kvm_arch_vcpu_free(vcpu); +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return 0; +} + +int __attribute_const__ kvm_target_cpu(void) +{ + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_number = read_cpuid_part_number(); + + if (implementor != ARM_CPU_IMP_ARM) + return -EINVAL; + + switch (part_number) { + case ARM_CPU_PART_CORTEX_A15: + return KVM_ARM_TARGET_CORTEX_A15; + default: + return -EINVAL; + } +} + +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ +} + +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -EINVAL; +} + + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) +{ + return 0; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + return -EINVAL; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case KVM_ARM_VCPU_INIT: { + struct kvm_vcpu_init init; + + if (copy_from_user(&init, argp, sizeof(init))) + return -EFAULT; + + return kvm_vcpu_set_target(vcpu, &init); + + } + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: { + struct kvm_one_reg reg; + if (copy_from_user(®, argp, sizeof(reg))) + return -EFAULT; + if (ioctl == KVM_SET_ONE_REG) + return kvm_arm_set_reg(vcpu, ®); + else + return kvm_arm_get_reg(vcpu, ®); + } + case KVM_GET_REG_LIST: { + struct kvm_reg_list __user *user_list = argp; + struct kvm_reg_list reg_list; + unsigned n; + + if (copy_from_user(®_list, user_list, sizeof(reg_list))) + return -EFAULT; + n = reg_list.n; + reg_list.n = kvm_arm_num_regs(vcpu); + if (copy_to_user(user_list, ®_list, sizeof(reg_list))) + return -EFAULT; + if (n < reg_list.n) + return -E2BIG; + return kvm_arm_copy_reg_indices(vcpu, user_list->reg); + } + default: + return -EINVAL; + } +} + +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +{ + return -EINVAL; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_init(void *opaque) +{ + return 0; +} + +/* NOP: Compiling as a module not supported */ +void kvm_arch_exit(void) +{ +} + +static int arm_init(void) +{ + int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + return rc; +} + +module_init(arm_init); diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c new file mode 100644 index 000000000000..0c433558591c --- /dev/null +++ b/arch/arm/kvm/coproc.c @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Rusty Russell + * Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include + +void kvm_reset_coprocs(struct kvm_vcpu *vcpu) +{ +} diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c new file mode 100644 index 000000000000..3eadc25e95de --- /dev/null +++ b/arch/arm/kvm/emulate.c @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include + +#define VCPU_NR_MODES 6 +#define VCPU_REG_OFFSET_USR 0 +#define VCPU_REG_OFFSET_FIQ 1 +#define VCPU_REG_OFFSET_IRQ 2 +#define VCPU_REG_OFFSET_SVC 3 +#define VCPU_REG_OFFSET_ABT 4 +#define VCPU_REG_OFFSET_UND 5 +#define REG_OFFSET(_reg) \ + (offsetof(struct kvm_regs, _reg) / sizeof(u32)) + +#define USR_REG_OFFSET(_num) REG_OFFSET(usr_regs.uregs[_num]) + +static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][15] = { + /* USR/SYS Registers */ + [VCPU_REG_OFFSET_USR] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14), + }, + + /* FIQ Registers */ + [VCPU_REG_OFFSET_FIQ] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), + REG_OFFSET(fiq_regs[0]), /* r8 */ + REG_OFFSET(fiq_regs[1]), /* r9 */ + REG_OFFSET(fiq_regs[2]), /* r10 */ + REG_OFFSET(fiq_regs[3]), /* r11 */ + REG_OFFSET(fiq_regs[4]), /* r12 */ + REG_OFFSET(fiq_regs[5]), /* r13 */ + REG_OFFSET(fiq_regs[6]), /* r14 */ + }, + + /* IRQ Registers */ + [VCPU_REG_OFFSET_IRQ] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(irq_regs[0]), /* r13 */ + REG_OFFSET(irq_regs[1]), /* r14 */ + }, + + /* SVC Registers */ + [VCPU_REG_OFFSET_SVC] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(svc_regs[0]), /* r13 */ + REG_OFFSET(svc_regs[1]), /* r14 */ + }, + + /* ABT Registers */ + [VCPU_REG_OFFSET_ABT] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(abt_regs[0]), /* r13 */ + REG_OFFSET(abt_regs[1]), /* r14 */ + }, + + /* UND Registers */ + [VCPU_REG_OFFSET_UND] = { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(und_regs[0]), /* r13 */ + REG_OFFSET(und_regs[1]), /* r14 */ + }, +}; + +/* + * Return a pointer to the register number valid in the current mode of + * the virtual CPU. + */ +u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) +{ + u32 *reg_array = (u32 *)&vcpu->arch.regs; + u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK; + + switch (mode) { + case USR_MODE...SVC_MODE: + mode &= ~MODE32_BIT; /* 0 ... 3 */ + break; + + case ABT_MODE: + mode = VCPU_REG_OFFSET_ABT; + break; + + case UND_MODE: + mode = VCPU_REG_OFFSET_UND; + break; + + case SYSTEM_MODE: + mode = VCPU_REG_OFFSET_USR; + break; + + default: + BUG(); + } + + return reg_array + vcpu_reg_offsets[mode][reg_num]; +} + +/* + * Return the SPSR for the current mode of the virtual CPU. + */ +u32 *vcpu_spsr(struct kvm_vcpu *vcpu) +{ + u32 mode = *vcpu_cpsr(vcpu) & MODE_MASK; + switch (mode) { + case SVC_MODE: + return &vcpu->arch.regs.KVM_ARM_SVC_spsr; + case ABT_MODE: + return &vcpu->arch.regs.KVM_ARM_ABT_spsr; + case UND_MODE: + return &vcpu->arch.regs.KVM_ARM_UND_spsr; + case IRQ_MODE: + return &vcpu->arch.regs.KVM_ARM_IRQ_spsr; + case FIQ_MODE: + return &vcpu->arch.regs.KVM_ARM_FIQ_spsr; + default: + BUG(); + } +} diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c new file mode 100644 index 000000000000..a12eb229021d --- /dev/null +++ b/arch/arm/kvm/guest.c @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM } +#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { NULL } +}; + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static u64 core_reg_offset_from_id(u64 id) +{ + return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); +} + +static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + u32 __user *uaddr = (u32 __user *)(long)reg->addr; + struct kvm_regs *regs = &vcpu->arch.regs; + u64 off; + + if (KVM_REG_SIZE(reg->id) != 4) + return -ENOENT; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= sizeof(*regs) / KVM_REG_SIZE(reg->id)) + return -ENOENT; + + return put_user(((u32 *)regs)[off], uaddr); +} + +static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + u32 __user *uaddr = (u32 __user *)(long)reg->addr; + struct kvm_regs *regs = &vcpu->arch.regs; + u64 off, val; + + if (KVM_REG_SIZE(reg->id) != 4) + return -ENOENT; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg->id); + if (off >= sizeof(*regs) / KVM_REG_SIZE(reg->id)) + return -ENOENT; + + if (get_user(val, uaddr) != 0) + return -EFAULT; + + if (off == KVM_REG_ARM_CORE_REG(usr_regs.ARM_cpsr)) { + unsigned long mode = val & MODE_MASK; + switch (mode) { + case USR_MODE: + case FIQ_MODE: + case IRQ_MODE: + case SVC_MODE: + case ABT_MODE: + case UND_MODE: + break; + default: + return -EINVAL; + } + } + + ((u32 *)regs)[off] = val; + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +static unsigned long num_core_regs(void) +{ + return sizeof(struct kvm_regs) / sizeof(u32); +} + +/** + * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG + * + * This is for all registers. + */ +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) +{ + return num_core_regs(); +} + +/** + * kvm_arm_copy_reg_indices - get indices of all registers. + * + * We do core registers right here, then we apppend coproc regs. + */ +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int i; + const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; + + for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { + if (put_user(core_reg | i, uindices)) + return -EFAULT; + uindices++; + } + + return 0; +} + +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM >> 32) + return -EINVAL; + + /* Register group 16 means we want a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return get_core_reg(vcpu, reg); + + return -EINVAL; +} + +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM >> 32) + return -EINVAL; + + /* Register group 16 means we set a core register. */ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) + return set_core_reg(vcpu, reg); + + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) +{ + unsigned int i; + + /* We can only do a cortex A15 for now. */ + if (init->target != kvm_target_cpu()) + return -EINVAL; + + vcpu->arch.target = init->target; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); + + /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ + for (i = 0; i < sizeof(init->features) * 8; i++) { + if (test_bit(i, (void *)init->features)) { + if (i >= KVM_VCPU_MAX_FEATURES) + return -ENOENT; + set_bit(i, vcpu->arch.features); + } + } + + /* Now we know what it is, we can reset it. */ + return kvm_reset_vcpu(vcpu); +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return -EINVAL; +} diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S new file mode 100644 index 000000000000..1dc8926e26d2 --- /dev/null +++ b/arch/arm/kvm/init.S @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include +#include diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S new file mode 100644 index 000000000000..1dc8926e26d2 --- /dev/null +++ b/arch/arm/kvm/interrupts.S @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include +#include diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c new file mode 100644 index 000000000000..10ed4643269f --- /dev/null +++ b/arch/arm/kvm/mmu.c @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c new file mode 100644 index 000000000000..b80256b554cd --- /dev/null +++ b/arch/arm/kvm/reset.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/****************************************************************************** + * Cortex-A15 Reset Values + */ + +static const int a15_max_cpu_idx = 3; + +static struct kvm_regs a15_regs_reset = { + .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, +}; + + +/******************************************************************************* + * Exported reset function + */ + +/** + * kvm_reset_vcpu - sets core registers and cp15 registers to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on the + * virtual CPU struct to their architectually defined reset values. + */ +int kvm_reset_vcpu(struct kvm_vcpu *vcpu) +{ + struct kvm_regs *cpu_reset; + + switch (vcpu->arch.target) { + case KVM_ARM_TARGET_CORTEX_A15: + if (vcpu->vcpu_id > a15_max_cpu_idx) + return -EINVAL; + cpu_reset = &a15_regs_reset; + vcpu->arch.midr = read_cpuid_id(); + break; + default: + return -ENODEV; + } + + /* Reset core registers */ + memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs)); + + /* Reset CP15 registers */ + kvm_reset_coprocs(vcpu); + + return 0; +} diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h new file mode 100644 index 000000000000..f8869c19c0a3 --- /dev/null +++ b/arch/arm/kvm/trace.h @@ -0,0 +1,52 @@ +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +/* + * Tracepoints for entry/exit to guest + */ +TRACE_EVENT(kvm_entry, + TP_PROTO(unsigned long vcpu_pc), + TP_ARGS(vcpu_pc), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("PC: 0x%08lx", __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_exit, + TP_PROTO(unsigned long vcpu_pc), + TP_ARGS(vcpu_pc), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("PC: 0x%08lx", __entry->vcpu_pc) +); + + + +#endif /* _TRACE_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH arch/arm/kvm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e6e5d4b13708..24978d525c6e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -764,6 +764,11 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_U512 0x0060000000000000ULL #define KVM_REG_SIZE_U1024 0x0070000000000000ULL +struct kvm_reg_list { + __u64 n; /* number of regs */ + __u64 reg[0]; +}; + struct kvm_one_reg { __u64 id; __u64 addr; @@ -932,6 +937,8 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg) /* VM is being stopped by host */ #define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) +#define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) +#define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -- cgit v1.2.3 From 86ce85352f0da7e1431ad8efcb04323819a620e7 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 20 Jan 2013 18:28:08 -0500 Subject: KVM: ARM: Inject IRQs and FIQs from userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All interrupt injection is now based on the VM ioctl KVM_IRQ_LINE. This works semantically well for the GIC as we in fact raise/lower a line on a machine component (the gic). The IOCTL uses the follwing struct. struct kvm_irq_level { union { __u32 irq; /* GSI */ __s32 status; /* not used for KVM_IRQ_LEVEL */ }; __u32 level; /* 0 or 1 */ }; ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for specific cpus. The irq field is interpreted like this:  bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 | field: | irq_type | vcpu_index | irq_number | The irq_type field has the following values: - irq_type[0]: out-of-kernel GIC: irq_number 0 is IRQ, irq_number 1 is FIQ - irq_type[1]: in-kernel GIC: SPI, irq_number between 32 and 1019 (incl.) (the vcpu_index field is ignored) - irq_type[2]: in-kernel GIC: PPI, irq_number between 16 and 31 (incl.) The irq_number thus corresponds to the irq ID in as in the GICv2 specs. This is documented in Documentation/kvm/api.txt. Reviewed-by: Will Deacon Reviewed-by: Marcelo Tosatti Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 25 ++++++++++++--- arch/arm/include/asm/kvm_arm.h | 1 + arch/arm/include/uapi/asm/kvm.h | 21 +++++++++++++ arch/arm/kvm/arm.c | 65 +++++++++++++++++++++++++++++++++++++++ arch/arm/kvm/trace.h | 25 +++++++++++++++ include/uapi/linux/kvm.h | 1 + 6 files changed, 134 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4237c27ea612..505049299298 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -615,15 +615,32 @@ created. 4.25 KVM_IRQ_LINE Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64 +Architectures: x86, ia64, arm Type: vm ioctl Parameters: struct kvm_irq_level Returns: 0 on success, -1 on error Sets the level of a GSI input to the interrupt controller model in the kernel. -Requires that an interrupt controller model has been previously created with -KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level -to be set to 1 and then back to 0. +On some architectures it is required that an interrupt controller model has +been previously created with KVM_CREATE_IRQCHIP. Note that edge-triggered +interrupts require the level to be set to 1 and then back to 0. + +ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip +(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for +specific cpus. The irq field is interpreted like this: + +  bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 | + field: | irq_type | vcpu_index | irq_id | + +The irq_type field has the following values: +- irq_type[0]: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ +- irq_type[1]: in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.) + (the vcpu_index field is ignored) +- irq_type[2]: in-kernel GIC: PPI, irq_id between 16 and 31 (incl.) + +(The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs) + +In both cases, level is used to raise/lower the line. struct kvm_irq_level { union { diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 8875b3f605a7..d64b5250ad4e 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -68,6 +68,7 @@ #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ HCR_SWIO | HCR_TIDCP) +#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) /* Hyp System Control Register (HSCTLR) bits */ #define HSCTLR_TE (1 << 30) diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 1083327b5fcd..53f45f146875 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -23,6 +23,7 @@ #include #define __KVM_HAVE_GUEST_DEBUG +#define __KVM_HAVE_IRQ_LINE #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -103,4 +104,24 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) +/* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_TYPE_SHIFT 24 +#define KVM_ARM_IRQ_TYPE_MASK 0xff +#define KVM_ARM_IRQ_VCPU_SHIFT 16 +#define KVM_ARM_IRQ_VCPU_MASK 0xff +#define KVM_ARM_IRQ_NUM_SHIFT 0 +#define KVM_ARM_IRQ_NUM_MASK 0xffff + +/* irq_type field */ +#define KVM_ARM_IRQ_TYPE_CPU 0 +#define KVM_ARM_IRQ_TYPE_SPI 1 +#define KVM_ARM_IRQ_TYPE_PPI 2 + +/* out-of-kernel GIC cpu interrupt injection irq_number field */ +#define KVM_ARM_IRQ_CPU_IRQ 0 +#define KVM_ARM_IRQ_CPU_FIQ 1 + +/* Highest supported SPI, from VGIC_NR_IRQS */ +#define KVM_ARM_IRQ_GIC_MAX 127 + #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d810afb6cb84..2101152c3a4b 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -284,6 +285,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + vcpu->cpu = cpu; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -319,6 +321,69 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) return -EINVAL; } +static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) +{ + int bit_index; + bool set; + unsigned long *ptr; + + if (number == KVM_ARM_IRQ_CPU_IRQ) + bit_index = __ffs(HCR_VI); + else /* KVM_ARM_IRQ_CPU_FIQ */ + bit_index = __ffs(HCR_VF); + + ptr = (unsigned long *)&vcpu->arch.irq_lines; + if (level) + set = test_and_set_bit(bit_index, ptr); + else + set = test_and_clear_bit(bit_index, ptr); + + /* + * If we didn't change anything, no need to wake up or kick other CPUs + */ + if (set == level) + return 0; + + /* + * The vcpu irq_lines field was updated, wake up sleeping VCPUs and + * trigger a world-switch round on the running physical CPU to set the + * virtual IRQ/FIQ fields in the HCR appropriately. + */ + kvm_vcpu_kick(vcpu); + + return 0; +} + +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level) +{ + u32 irq = irq_level->irq; + unsigned int irq_type, vcpu_idx, irq_num; + int nrcpus = atomic_read(&kvm->online_vcpus); + struct kvm_vcpu *vcpu = NULL; + bool level = irq_level->level; + + irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK; + vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; + irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK; + + trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level); + + if (irq_type != KVM_ARM_IRQ_TYPE_CPU) + return -EINVAL; + + if (vcpu_idx >= nrcpus) + return -EINVAL; + + vcpu = kvm_get_vcpu(kvm, vcpu_idx); + if (!vcpu) + return -EINVAL; + + if (irq_num > KVM_ARM_IRQ_CPU_FIQ) + return -EINVAL; + + return vcpu_interrupt_line(vcpu, irq_num, level); +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index 862b2cc12fbe..105d1f79909a 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -39,6 +39,31 @@ TRACE_EVENT(kvm_exit, TP_printk("PC: 0x%08lx", __entry->vcpu_pc) ); +TRACE_EVENT(kvm_irq_line, + TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level), + TP_ARGS(type, vcpu_idx, irq_num, level), + + TP_STRUCT__entry( + __field( unsigned int, type ) + __field( int, vcpu_idx ) + __field( int, irq_num ) + __field( int, level ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->vcpu_idx = vcpu_idx; + __entry->irq_num = irq_num; + __entry->level = level; + ), + + TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d", + (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" : + (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" : + (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN", + __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level) +); + TRACE_EVENT(kvm_unmap_hva, TP_PROTO(unsigned long hva), TP_ARGS(hva), diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 24978d525c6e..dc63665e73ad 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -115,6 +115,7 @@ struct kvm_irq_level { * ACPI gsi notion of irq. * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. + * For ARM: See Documentation/virtual/kvm/api.txt */ union { __u32 irq; -- cgit v1.2.3 From aa024c2f35a07cc32e48c5f62a5807be01c09249 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 20 Jan 2013 18:28:13 -0500 Subject: KVM: ARM: Power State Coordination Interface implementation Implement the PSCI specification (ARM DEN 0022A) to control virtual CPUs being "powered" on or off. PSCI/KVM is detected using the KVM_CAP_ARM_PSCI capability. A virtual CPU can now be initialized in a "powered off" state, using the KVM_ARM_VCPU_POWER_OFF feature flag. The guest can use either SMC or HVC to execute a PSCI function. Reviewed-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/api.txt | 4 ++ arch/arm/include/asm/kvm_emulate.h | 10 ++++ arch/arm/include/asm/kvm_host.h | 5 +- arch/arm/include/asm/kvm_psci.h | 23 ++++++++ arch/arm/include/uapi/asm/kvm.h | 16 ++++++ arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/arm.c | 30 ++++++++++- arch/arm/kvm/psci.c | 108 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 1 + 9 files changed, 195 insertions(+), 4 deletions(-) create mode 100644 arch/arm/include/asm/kvm_psci.h create mode 100644 arch/arm/kvm/psci.c (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 38066a7a74e1..c25439a58274 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2185,6 +2185,10 @@ return ENOEXEC for that vcpu. Note that because some registers reflect machine topology, all vcpus should be created before this ioctl is invoked. +Possible features: + - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state. + Depends on KVM_CAP_ARM_PSCI. + 4.78 KVM_GET_REG_LIST diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 4c1a073280be..fd611996bfb5 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -32,6 +32,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) +{ + return 1; +} + static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu) { return (u32 *)&vcpu->arch.regs.usr_regs.ARM_pc; @@ -42,6 +47,11 @@ static inline u32 *vcpu_cpsr(struct kvm_vcpu *vcpu) return (u32 *)&vcpu->arch.regs.usr_regs.ARM_cpsr; } +static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) +{ + *vcpu_cpsr(vcpu) |= PSR_T_BIT; +} + static inline bool mode_has_spsr(struct kvm_vcpu *vcpu) { unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index e65fc967a71d..98b4d1a72923 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -30,7 +30,7 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HAVE_ONE_REG -#define KVM_VCPU_MAX_FEATURES 0 +#define KVM_VCPU_MAX_FEATURES 1 /* We don't currently support large pages. */ #define KVM_HPAGE_GFN_SHIFT(x) 0 @@ -100,6 +100,9 @@ struct kvm_vcpu_arch { int last_pcpu; cpumask_t require_dcache_flush; + /* Don't run the guest on this vcpu */ + bool pause; + /* IO related fields */ struct kvm_decode mmio_decode; diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h new file mode 100644 index 000000000000..9a83d98bf170 --- /dev/null +++ b/arch/arm/include/asm/kvm_psci.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM_KVM_PSCI_H__ +#define __ARM_KVM_PSCI_H__ + +bool kvm_psci_call(struct kvm_vcpu *vcpu); + +#endif /* __ARM_KVM_PSCI_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index bbb6b2328004..3303ff5adbf3 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -65,6 +65,8 @@ struct kvm_regs { #define KVM_ARM_TARGET_CORTEX_A15 0 #define KVM_ARM_NUM_TARGETS 1 +#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ + struct kvm_vcpu_init { __u32 target; __u32 features[7]; @@ -145,4 +147,18 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX 127 +/* PSCI interface */ +#define KVM_PSCI_FN_BASE 0x95c1ba5e +#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) + +#define KVM_PSCI_FN_CPU_SUSPEND KVM_PSCI_FN(0) +#define KVM_PSCI_FN_CPU_OFF KVM_PSCI_FN(1) +#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) +#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3) + +#define KVM_PSCI_RET_SUCCESS 0 +#define KVM_PSCI_RET_NI ((unsigned long)-1) +#define KVM_PSCI_RET_INVAL ((unsigned long)-2) +#define KVM_PSCI_RET_DENIED ((unsigned long)-3) + #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 1e45cd97a7fc..ea27987bd07f 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -18,4 +18,4 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o +obj-y += coproc.o coproc_a15.o mmio.o psci.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 8680b9ffd2ae..2d30e3afdaf9 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #ifdef REQUIRES_VIRT @@ -160,6 +161,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_ONE_REG: + case KVM_CAP_ARM_PSCI: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -443,14 +445,18 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), vcpu->arch.hsr & HSR_HVC_IMM_MASK); + if (kvm_psci_call(vcpu)) + return 1; + kvm_inject_undefined(vcpu); return 1; } static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - /* We don't support SMC; don't do that. */ - kvm_debug("smc: at %08x", *vcpu_pc(vcpu)); + if (kvm_psci_call(vcpu)) + return 1; + kvm_inject_undefined(vcpu); return 1; } @@ -589,9 +595,26 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return 0; vcpu->arch.has_run_once = true; + + /* + * Handle the "start in power-off" case by calling into the + * PSCI code. + */ + if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) { + *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF; + kvm_psci_call(vcpu); + } + return 0; } +static void vcpu_pause(struct kvm_vcpu *vcpu) +{ + wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); + + wait_event_interruptible(*wq, !vcpu->arch.pause); +} + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -635,6 +658,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) update_vttbr(vcpu->kvm); + if (vcpu->arch.pause) + vcpu_pause(vcpu); + local_irq_disable(); /* diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c new file mode 100644 index 000000000000..7ee5bb7a3667 --- /dev/null +++ b/arch/arm/kvm/psci.c @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include +#include + +/* + * This is an implementation of the Power State Coordination Interface + * as described in ARM document number ARM DEN 0022A. + */ + +static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pause = true; +} + +static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) +{ + struct kvm *kvm = source_vcpu->kvm; + struct kvm_vcpu *vcpu; + wait_queue_head_t *wq; + unsigned long cpu_id; + phys_addr_t target_pc; + + cpu_id = *vcpu_reg(source_vcpu, 1); + if (vcpu_mode_is_32bit(source_vcpu)) + cpu_id &= ~((u32) 0); + + if (cpu_id >= atomic_read(&kvm->online_vcpus)) + return KVM_PSCI_RET_INVAL; + + target_pc = *vcpu_reg(source_vcpu, 2); + + vcpu = kvm_get_vcpu(kvm, cpu_id); + + wq = kvm_arch_vcpu_wq(vcpu); + if (!waitqueue_active(wq)) + return KVM_PSCI_RET_INVAL; + + kvm_reset_vcpu(vcpu); + + /* Gracefully handle Thumb2 entry point */ + if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) { + target_pc &= ~((phys_addr_t) 1); + vcpu_set_thumb(vcpu); + } + + *vcpu_pc(vcpu) = target_pc; + vcpu->arch.pause = false; + smp_mb(); /* Make sure the above is visible */ + + wake_up_interruptible(wq); + + return KVM_PSCI_RET_SUCCESS; +} + +/** + * kvm_psci_call - handle PSCI call if r0 value is in range + * @vcpu: Pointer to the VCPU struct + * + * Handle PSCI calls from guests through traps from HVC or SMC instructions. + * The calling convention is similar to SMC calls to the secure world where + * the function number is placed in r0 and this function returns true if the + * function number specified in r0 is withing the PSCI range, and false + * otherwise. + */ +bool kvm_psci_call(struct kvm_vcpu *vcpu) +{ + unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0); + unsigned long val; + + switch (psci_fn) { + case KVM_PSCI_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = KVM_PSCI_RET_SUCCESS; + break; + case KVM_PSCI_FN_CPU_ON: + val = kvm_psci_vcpu_on(vcpu); + break; + case KVM_PSCI_FN_CPU_SUSPEND: + case KVM_PSCI_FN_MIGRATE: + val = KVM_PSCI_RET_NI; + break; + + default: + return false; + } + + *vcpu_reg(vcpu, 0) = val; + return true; +} diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index dc63665e73ad..7f2360a46fc2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -636,6 +636,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQFD_RESAMPLE 82 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 +#define KVM_CAP_ARM_PSCI 87 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 9569793a79836320c33d400c686dcb78f886bdad Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 6 Jan 2013 12:22:06 -0300 Subject: [media] dvb: Add DVBv5 statistics properties The DVBv3 statistics parameters are limited on several ways: - It doesn't provide any way to indicate the used measure, so userspace need to guess how to calculate/use it; - Only a limited set of stats are supported; - Can't be called in a way to require them to be filled all at once (atomic reads from the hardware), with may cause troubles on interpreting them on userspace; - On some OFDM delivery systems, the carriers can be independently modulated, having different properties. Currently, there's no way to report per-layer stats. To address the above issues, adding a new DVBv5-based stats API. While here, correct inner code nomenclature on a few places. Reviewed-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/dvb/dvbapi.xml | 2 +- Documentation/DocBook/media/dvb/dvbproperty.xml | 180 +++++++++++++++++++++++- Documentation/DocBook/media/dvb/frontend.xml | 2 +- include/uapi/linux/dvb/frontend.h | 79 ++++++++++- include/uapi/linux/dvb/version.h | 2 +- 5 files changed, 260 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/dvb/dvbapi.xml b/Documentation/DocBook/media/dvb/dvbapi.xml index 757488b24f4f..0197bcc7842d 100644 --- a/Documentation/DocBook/media/dvb/dvbapi.xml +++ b/Documentation/DocBook/media/dvb/dvbapi.xml @@ -84,7 +84,7 @@ Added ISDB-T test originally written by Patrick Boettcher LINUX DVB API -Version 5.8 +Version 5.10 &sub-intro; diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml index 957e3acaae8e..4a5eaeed0b9e 100644 --- a/Documentation/DocBook/media/dvb/dvbproperty.xml +++ b/Documentation/DocBook/media/dvb/dvbproperty.xml @@ -7,14 +7,41 @@ the capability ioctls weren't implemented yet via the new way. The typical usage for the FE_GET_PROPERTY/FE_SET_PROPERTY API is to replace the ioctl's were the struct dvb_frontend_parameters were used. +
+DTV stats type + +struct dtv_stats { + __u8 scale; /* enum fecap_scale_params type */ + union { + __u64 uvalue; /* for counters and relative scales */ + __s64 svalue; /* for 1/1000 dB measures */ + }; +} __packed; + +
+
+DTV stats type + +#define MAX_DTV_STATS 4 + +struct dtv_fe_stats { + __u8 len; + struct dtv_stats stat[MAX_DTV_STATS]; +} __packed; + +
+
DTV property type /* Reserved fields should be set to 0 */ + struct dtv_property { __u32 cmd; + __u32 reserved[3]; union { __u32 data; + struct dtv_fe_stats st; struct { __u8 data[32]; __u32 len; @@ -440,7 +467,7 @@ typedef enum fe_delivery_system { <constant>DTV-ISDBT-LAYER*</constant> parameters ISDB-T channels can be coded hierarchically. As opposed to DVB-T in ISDB-T hierarchical layers can be decoded simultaneously. For that - reason a ISDB-T demodulator has 3 viterbi and 3 reed-solomon-decoders. + reason a ISDB-T demodulator has 3 Viterbi and 3 Reed-Solomon decoders. ISDB-T has 3 hierarchical layers which each can use a part of the available segments. The total number of segments over all layers has to 13 in ISDB-T. @@ -850,6 +877,147 @@ enum fe_interleaving { use the special macro LNA_AUTO to set LNA auto
+ +
+ Frontend statistics indicators + The values are returned via dtv_property.stat. + If the property is supported, dtv_property.stat.len is bigger than zero. + For most delivery systems, dtv_property.stat.len + will be 1 if the stats is supported, and the properties will + return a single value for each parameter. + It should be noticed, however, that new OFDM delivery systems + like ISDB can use different modulation types for each group of + carriers. On such standards, up to 3 groups of statistics can be + provided, and dtv_property.stat.len is updated + to reflect the "global" metrics, plus one metric per each carrier + group (called "layer" on ISDB). + So, in order to be consistent with other delivery systems, the first + value at dtv_property.stat.dtv_stats + array refers to the global metric. The other elements of the array + represent each layer, starting from layer A(index 1), + layer B (index 2) and so on. + The number of filled elements are stored at dtv_property.stat.len. + Each element of the dtv_property.stat.dtv_stats array consists on two elements: + + svalue or uvalue, where + svalue is for signed values of the measure (dB measures) + and uvalue is for unsigned values (counters, relative scale) + scale - Scale for the value. It can be: +
+ + FE_SCALE_NOT_AVAILABLE - The parameter is supported by the frontend, but it was not possible to collect it (could be a transitory or permanent condition) + FE_SCALE_DECIBEL - parameter is a signed value, measured in 1/1000 dB + FE_SCALE_RELATIVE - parameter is a unsigned value, where 0 means 0% and 65535 means 100%. + FE_SCALE_COUNTER - parameter is a unsigned value that counts the occurrence of an event, like bit error, block error, or lapsed time. + +
+
+
+
+ <constant>DTV_STAT_SIGNAL_STRENGTH</constant> + Indicates the signal strength level at the analog part of the tuner or of the demod. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_DECIBEL - signal strength is in 0.0001 dBm units, power measured in miliwatts. This value is generally negative. + FE_SCALE_RELATIVE - The frontend provides a 0% to 100% measurement for power (actually, 0 to 65535). + +
+
+ <constant>DTV_STAT_CNR</constant> + Indicates the Signal to Noise ratio for the main carrier. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_DECIBEL - Signal/Noise ratio is in 0.0001 dB units. + FE_SCALE_RELATIVE - The frontend provides a 0% to 100% measurement for Signal/Noise (actually, 0 to 65535). + +
+
+ <constant>DTV_STAT_PRE_ERROR_BIT_COUNT</constant> + Measures the number of bit errors before the forward error correction (FEC) on the inner coding block (before Viterbi, LDPC or other inner code). + This measure is taken during the same interval as DTV_STAT_PRE_TOTAL_BIT_COUNT. + In order to get the BER (Bit Error Rate) measurement, it should be divided by + DTV_STAT_PRE_TOTAL_BIT_COUNT. + This measurement is monotonically increased, as the frontend gets more bit count measurements. + The frontend may reset it when a channel/transponder is tuned. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_COUNTER - Number of error bits counted before the inner coding. + +
+
+ <constant>DTV_STAT_PRE_TOTAL_BIT_COUNT</constant> + Measures the amount of bits received before the inner code block, during the same period as + DTV_STAT_PRE_ERROR_BIT_COUNT measurement was taken. + It should be noticed that this measurement can be smaller than the total amount of bits on the transport stream, + as the frontend may need to manually restart the measurement, loosing some data between each measurement interval. + This measurement is monotonically increased, as the frontend gets more bit count measurements. + The frontend may reset it when a channel/transponder is tuned. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_COUNTER - Number of bits counted while measuring + DTV_STAT_PRE_ERROR_BIT_COUNT. + +
+
+ <constant>DTV_STAT_POST_ERROR_BIT_COUNT</constant> + Measures the number of bit errors after the forward error correction (FEC) done by inner code block (after Viterbi, LDPC or other inner code). + This measure is taken during the same interval as DTV_STAT_POST_TOTAL_BIT_COUNT. + In order to get the BER (Bit Error Rate) measurement, it should be divided by + DTV_STAT_POST_TOTAL_BIT_COUNT. + This measurement is monotonically increased, as the frontend gets more bit count measurements. + The frontend may reset it when a channel/transponder is tuned. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_COUNTER - Number of error bits counted after the inner coding. + +
+
+ <constant>DTV_STAT_POST_TOTAL_BIT_COUNT</constant> + Measures the amount of bits received after the inner coding, during the same period as + DTV_STAT_POST_ERROR_BIT_COUNT measurement was taken. + It should be noticed that this measurement can be smaller than the total amount of bits on the transport stream, + as the frontend may need to manually restart the measurement, loosing some data between each measurement interval. + This measurement is monotonically increased, as the frontend gets more bit count measurements. + The frontend may reset it when a channel/transponder is tuned. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_COUNTER - Number of bits counted while measuring + DTV_STAT_POST_ERROR_BIT_COUNT. + +
+
+ <constant>DTV_STAT_ERROR_BLOCK_COUNT</constant> + Measures the number of block errors after the outer forward error correction coding (after Reed-Solomon or other outer code). + This measurement is monotonically increased, as the frontend gets more bit count measurements. + The frontend may reset it when a channel/transponder is tuned. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_COUNTER - Number of error blocks counted after the outer coding. + +
+
+ <constant>DTV-STAT_TOTAL_BLOCK_COUNT</constant> + Measures the total number of blocks received during the same period as + DTV_STAT_ERROR_BLOCK_COUNT measurement was taken. + It can be used to calculate the PER indicator, by dividing + DTV_STAT_ERROR_BLOCK_COUNT + by DTV-STAT-TOTAL-BLOCK-COUNT. + Possible scales for this metric are: + + FE_SCALE_NOT_AVAILABLE - it failed to measure it, or the measurement was not complete yet. + FE_SCALE_COUNTER - Number of blocks counted while measuring + DTV_STAT_ERROR_BLOCK_COUNT. + +
+
+
Properties used on terrestrial delivery systems
@@ -871,6 +1039,7 @@ enum fe_interleaving { DTV_HIERARCHY DTV_LNA + In addition, the DTV QoS statistics are also valid.
DVB-T2 delivery system @@ -895,6 +1064,7 @@ enum fe_interleaving { DTV_STREAM_ID DTV_LNA + In addition, the DTV QoS statistics are also valid.
ISDB-T delivery system @@ -948,6 +1118,7 @@ enum fe_interleaving { DTV_ISDBT_LAYERC_SEGMENT_COUNT DTV_ISDBT_LAYERC_TIME_INTERLEAVING + In addition, the DTV QoS statistics are also valid.
ATSC delivery system @@ -961,6 +1132,7 @@ enum fe_interleaving { DTV_MODULATION DTV_BANDWIDTH_HZ + In addition, the DTV QoS statistics are also valid.
ATSC-MH delivery system @@ -988,6 +1160,7 @@ enum fe_interleaving { DTV_ATSCMH_SCCC_CODE_MODE_C DTV_ATSCMH_SCCC_CODE_MODE_D + In addition, the DTV QoS statistics are also valid.
DTMB delivery system @@ -1007,6 +1180,7 @@ enum fe_interleaving { DTV_INTERLEAVING DTV_LNA + In addition, the DTV QoS statistics are also valid.
@@ -1028,6 +1202,7 @@ enum fe_interleaving { DTV_INNER_FEC DTV_LNA + In addition, the DTV QoS statistics are also valid.
DVB-C Annex B delivery system @@ -1043,6 +1218,7 @@ enum fe_interleaving { DTV_INVERSION DTV_LNA + In addition, the DTV QoS statistics are also valid.
@@ -1062,6 +1238,7 @@ enum fe_interleaving { DTV_VOLTAGE DTV_TONE + In addition, the DTV QoS statistics are also valid. Future implementations might add those two missing parameters: DTV_DISEQC_MASTER @@ -1077,6 +1254,7 @@ enum fe_interleaving { DTV_ROLLOFF DTV_STREAM_ID + In addition, the DTV QoS statistics are also valid.
Turbo code delivery system diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml index 426c2526a454..df39ba395df0 100644 --- a/Documentation/DocBook/media/dvb/frontend.xml +++ b/Documentation/DocBook/media/dvb/frontend.xml @@ -230,7 +230,7 @@ typedef enum fe_status { The frontend has found a DVB signal FE_HAS_VITERBI -The frontend FEC code is stable +The frontend FEC inner coding (Viterbi, LDPC or other inner code) is stable FE_HAS_SYNC Syncronization bytes was found diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h index c12d452cb40d..c56d77c496a5 100644 --- a/include/uapi/linux/dvb/frontend.h +++ b/include/uapi/linux/dvb/frontend.h @@ -365,7 +365,17 @@ struct dvb_frontend_event { #define DTV_INTERLEAVING 60 #define DTV_LNA 61 -#define DTV_MAX_COMMAND DTV_LNA +/* Quality parameters */ +#define DTV_STAT_SIGNAL_STRENGTH 62 +#define DTV_STAT_CNR 63 +#define DTV_STAT_PRE_ERROR_BIT_COUNT 64 +#define DTV_STAT_PRE_TOTAL_BIT_COUNT 65 +#define DTV_STAT_POST_ERROR_BIT_COUNT 66 +#define DTV_STAT_POST_TOTAL_BIT_COUNT 67 +#define DTV_STAT_ERROR_BLOCK_COUNT 68 +#define DTV_STAT_TOTAL_BLOCK_COUNT 69 + +#define DTV_MAX_COMMAND DTV_STAT_TOTAL_BLOCK_COUNT typedef enum fe_pilot { PILOT_ON, @@ -452,11 +462,78 @@ struct dtv_cmds_h { __u32 reserved:30; /* Align */ }; +/** + * Scale types for the quality parameters. + * @FE_SCALE_NOT_AVAILABLE: That QoS measure is not available. That + * could indicate a temporary or a permanent + * condition. + * @FE_SCALE_DECIBEL: The scale is measured in 0.0001 dB steps, typically + * used on signal measures. + * @FE_SCALE_RELATIVE: The scale is a relative percentual measure, + * ranging from 0 (0%) to 0xffff (100%). + * @FE_SCALE_COUNTER: The scale counts the occurrence of an event, like + * bit error, block error, lapsed time. + */ +enum fecap_scale_params { + FE_SCALE_NOT_AVAILABLE = 0, + FE_SCALE_DECIBEL, + FE_SCALE_RELATIVE, + FE_SCALE_COUNTER +}; + +/** + * struct dtv_stats - Used for reading a DTV status property + * + * @value: value of the measure. Should range from 0 to 0xffff; + * @scale: Filled with enum fecap_scale_params - the scale + * in usage for that parameter + * + * For most delivery systems, this will return a single value for each + * parameter. + * It should be noticed, however, that new OFDM delivery systems like + * ISDB can use different modulation types for each group of carriers. + * On such standards, up to 8 groups of statistics can be provided, one + * for each carrier group (called "layer" on ISDB). + * In order to be consistent with other delivery systems, the first + * value refers to the entire set of carriers ("global"). + * dtv_status:scale should use the value FE_SCALE_NOT_AVAILABLE when + * the value for the entire group of carriers or from one specific layer + * is not provided by the hardware. + * st.len should be filled with the latest filled status + 1. + * + * In other words, for ISDB, those values should be filled like: + * u.st.stat.svalue[0] = global statistics; + * u.st.stat.scale[0] = FE_SCALE_DECIBELS; + * u.st.stat.value[1] = layer A statistics; + * u.st.stat.scale[1] = FE_SCALE_NOT_AVAILABLE (if not available); + * u.st.stat.svalue[2] = layer B statistics; + * u.st.stat.scale[2] = FE_SCALE_DECIBELS; + * u.st.stat.svalue[3] = layer C statistics; + * u.st.stat.scale[3] = FE_SCALE_DECIBELS; + * u.st.len = 4; + */ +struct dtv_stats { + __u8 scale; /* enum fecap_scale_params type */ + union { + __u64 uvalue; /* for counters and relative scales */ + __s64 svalue; /* for 0.0001 dB measures */ + }; +} __attribute__ ((packed)); + + +#define MAX_DTV_STATS 4 + +struct dtv_fe_stats { + __u8 len; + struct dtv_stats stat[MAX_DTV_STATS]; +} __attribute__ ((packed)); + struct dtv_property { __u32 cmd; __u32 reserved[3]; union { __u32 data; + struct dtv_fe_stats st; struct { __u8 data[32]; __u32 len; diff --git a/include/uapi/linux/dvb/version.h b/include/uapi/linux/dvb/version.h index 827cce7e33e3..e53e2ad4444f 100644 --- a/include/uapi/linux/dvb/version.h +++ b/include/uapi/linux/dvb/version.h @@ -24,6 +24,6 @@ #define _DVBVERSION_H_ #define DVB_API_VERSION 5 -#define DVB_API_VERSION_MINOR 9 +#define DVB_API_VERSION_MINOR 10 #endif /*_DVBVERSION_H_*/ -- cgit v1.2.3 From 0b05b18381eea98c9c9ada95629bf659a88c9374 Mon Sep 17 00:00:00 2001 From: "Anand V. Avati" Date: Sun, 19 Aug 2012 08:53:23 -0400 Subject: fuse: implement NFS-like readdirplus support This patch implements readdirplus support in FUSE, similar to NFS. The payload returned in the readdirplus call contains 'fuse_entry_out' structure thereby providing all the necessary inputs for 'faking' a lookup() operation on the spot. If the dentry and inode already existed (for e.g. in a re-run of ls -l) then just the inode attributes timeout and dentry timeout are refreshed. With a simple client->network->server implementation of a FUSE based filesystem, the following performance observations were made: Test: Performing a filesystem crawl over 20,000 files with sh# time ls -lR /mnt Without readdirplus: Run 1: 18.1s Run 2: 16.0s Run 3: 16.2s With readdirplus: Run 1: 4.1s Run 2: 3.8s Run 3: 3.8s The performance improvement is significant as it avoided 20,000 upcalls calls (lookup). Cache consistency is no worse than what already is. Signed-off-by: Anand V. Avati Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 19 ++++++ fs/fuse/dir.c | 160 ++++++++++++++++++++++++++++++++++++++++++++-- fs/fuse/fuse_i.h | 6 ++ fs/fuse/inode.c | 5 +- include/uapi/linux/fuse.h | 12 ++++ 5 files changed, 197 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e83351aa5bad..05c3eec298f2 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -491,6 +491,25 @@ void fuse_request_send_background_locked(struct fuse_conn *fc, fuse_request_send_nowait_locked(fc, req); } +void fuse_force_forget(struct file *file, u64 nodeid) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_forget_in inarg; + + memset(&inarg, 0, sizeof(inarg)); + inarg.nlookup = 1; + req = fuse_get_req_nofail(fc, file); + req->in.h.opcode = FUSE_FORGET; + req->in.h.nodeid = nodeid; + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->isreply = 0; + fuse_request_send_nowait(fc, req); +} + /* * Lock the request. Up to the next unlock_request() there mustn't be * anything that could cause a page-fault. If the request was already diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b7c09f9eb40c..dcc1e522c7d4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1155,6 +1155,143 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file, return 0; } +static int fuse_direntplus_link(struct file *file, + struct fuse_direntplus *direntplus, + u64 attr_version) +{ + int err; + struct fuse_entry_out *o = &direntplus->entry_out; + struct fuse_dirent *dirent = &direntplus->dirent; + struct dentry *parent = file->f_path.dentry; + struct qstr name = QSTR_INIT(dirent->name, dirent->namelen); + struct dentry *dentry; + struct dentry *alias; + struct inode *dir = parent->d_inode; + struct fuse_conn *fc; + struct inode *inode; + + if (!o->nodeid) { + /* + * Unlike in the case of fuse_lookup, zero nodeid does not mean + * ENOENT. Instead, it only means the userspace filesystem did + * not want to return attributes/handle for this entry. + * + * So do nothing. + */ + return 0; + } + + if (name.name[0] == '.') { + /* + * We could potentially refresh the attributes of the directory + * and its parent? + */ + if (name.len == 1) + return 0; + if (name.name[1] == '.' && name.len == 2) + return 0; + } + fc = get_fuse_conn(dir); + + name.hash = full_name_hash(name.name, name.len); + dentry = d_lookup(parent, &name); + if (dentry && dentry->d_inode) { + inode = dentry->d_inode; + if (get_node_id(inode) == o->nodeid) { + struct fuse_inode *fi; + fi = get_fuse_inode(inode); + spin_lock(&fc->lock); + fi->nlookup++; + spin_unlock(&fc->lock); + + /* + * The other branch to 'found' comes via fuse_iget() + * which bumps nlookup inside + */ + goto found; + } + err = d_invalidate(dentry); + if (err) + goto out; + dput(dentry); + dentry = NULL; + } + + dentry = d_alloc(parent, &name); + err = -ENOMEM; + if (!dentry) + goto out; + + inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, + &o->attr, entry_attr_timeout(o), attr_version); + if (!inode) + goto out; + + alias = d_materialise_unique(dentry, inode); + err = PTR_ERR(alias); + if (IS_ERR(alias)) + goto out; + if (alias) { + dput(dentry); + dentry = alias; + } + +found: + fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o), + attr_version); + + fuse_change_entry_timeout(dentry, o); + + err = 0; +out: + if (dentry) + dput(dentry); + return err; +} + +static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, + void *dstbuf, filldir_t filldir, u64 attr_version) +{ + struct fuse_direntplus *direntplus; + struct fuse_dirent *dirent; + size_t reclen; + int over = 0; + int ret; + + while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) { + direntplus = (struct fuse_direntplus *) buf; + dirent = &direntplus->dirent; + reclen = FUSE_DIRENTPLUS_SIZE(direntplus); + + if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) + return -EIO; + if (reclen > nbytes) + break; + + if (!over) { + /* We fill entries into dstbuf only as much as + it can hold. But we still continue iterating + over remaining entries to link them. If not, + we need to send a FORGET for each of those + which we did not link. + */ + over = filldir(dstbuf, dirent->name, dirent->namelen, + file->f_pos, dirent->ino, + dirent->type); + file->f_pos = dirent->off; + } + + buf += reclen; + nbytes -= reclen; + + ret = fuse_direntplus_link(file, direntplus, attr_version); + if (ret) + fuse_force_forget(file, direntplus->entry_out.nodeid); + } + + return 0; +} + static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) { int err; @@ -1163,6 +1300,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) struct inode *inode = file->f_path.dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; + u64 attr_version = 0; if (is_bad_inode(inode)) return -EIO; @@ -1179,14 +1317,28 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; - fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); + if (fc->do_readdirplus) { + attr_version = fuse_get_attr_version(fc); + fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, + FUSE_READDIRPLUS); + } else { + fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, + FUSE_READDIR); + } fuse_request_send(fc, req); nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); - if (!err) - err = parse_dirfile(page_address(page), nbytes, file, dstbuf, - filldir); + if (!err) { + if (fc->do_readdirplus) { + err = parse_dirplusfile(page_address(page), nbytes, + file, dstbuf, filldir, + attr_version); + } else { + err = parse_dirfile(page_address(page), nbytes, file, + dstbuf, filldir); + } + } __free_page(page); fuse_invalidate_attr(inode); /* atime changed */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e105a53fc72d..5c5055306d3c 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -487,6 +487,9 @@ struct fuse_conn { /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; + /** Does the filesystem support readdir-plus? */ + unsigned do_readdirplus:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -578,6 +581,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, struct fuse_forget_link *fuse_alloc_forget(void); +/* Used by READDIRPLUS */ +void fuse_force_forget(struct file *file, u64 nodeid); + /** * Initialize READ or READDIR request */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 73ca6b72beaf..6f7d5746bf52 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -863,6 +863,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->dont_mask = 1; if (arg->flags & FUSE_AUTO_INVAL_DATA) fc->auto_inval_data = 1; + if (arg->flags & FUSE_DO_READDIRPLUS) + fc->do_readdirplus = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -889,7 +891,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | - FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA; + FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | + FUSE_DO_READDIRPLUS; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d8c713e148e3..5dc1fea49ecd 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -193,6 +193,7 @@ struct fuse_file_lock { #define FUSE_FLOCK_LOCKS (1 << 10) #define FUSE_HAS_IOCTL_DIR (1 << 11) #define FUSE_AUTO_INVAL_DATA (1 << 12) +#define FUSE_DO_READDIRPLUS (1 << 13) /** * CUSE INIT request/reply flags @@ -299,6 +300,7 @@ enum fuse_opcode { FUSE_NOTIFY_REPLY = 41, FUSE_BATCH_FORGET = 42, FUSE_FALLOCATE = 43, + FUSE_READDIRPLUS = 44, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -630,6 +632,16 @@ struct fuse_dirent { #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) +struct fuse_direntplus { + struct fuse_entry_out entry_out; + struct fuse_dirent dirent; +}; + +#define FUSE_NAME_OFFSET_DIRENTPLUS \ + offsetof(struct fuse_direntplus, dirent.name) +#define FUSE_DIRENTPLUS_SIZE(d) \ + FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) + struct fuse_notify_inval_inode_out { __u64 ino; __s64 off; -- cgit v1.2.3 From 54a3ac0c9e5b7213daa358ce74d154352657353a Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Thu, 24 Jan 2013 10:31:28 +0800 Subject: usb: Using correct way to clear usb3.0 device's remote wakeup feature. Usb3.0 device defines function remote wakeup which is only for interface recipient rather than device recipient. This is different with usb2.0 device's remote wakeup feature which is defined for device recipient. According usb3.0 spec 9.4.5, the function remote wakeup can be modified by the SetFeature() requests using the FUNCTION_SUSPEND feature selector. This patch is to use correct way to disable usb3.0 device's function remote wakeup after suspend error and resuming. This should be backported to kernels as old as 3.4, that contain the commit 623bef9e03a60adc623b09673297ca7a1cdfb367 "USB/xhci: Enable remote wakeup for USB3 devices." Signed-off-by: Lan Tianyu Signed-off-by: Sarah Sharp Cc: stable@vger.kernel.org --- drivers/usb/core/hub.c | 70 ++++++++++++++++++++++++++++++++------------ include/uapi/linux/usb/ch9.h | 6 ++++ 2 files changed, 58 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 957ed2c41482..cbf7168e3ce7 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2838,6 +2838,23 @@ void usb_enable_ltm(struct usb_device *udev) EXPORT_SYMBOL_GPL(usb_enable_ltm); #ifdef CONFIG_USB_SUSPEND +/* + * usb_disable_function_remotewakeup - disable usb3.0 + * device's function remote wakeup + * @udev: target device + * + * Assume there's only one function on the USB 3.0 + * device and disable remote wake for the first + * interface. FIXME if the interface association + * descriptor shows there's more than one function. + */ +static int usb_disable_function_remotewakeup(struct usb_device *udev) +{ + return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), + USB_REQ_CLEAR_FEATURE, USB_RECIP_INTERFACE, + USB_INTRF_FUNC_SUSPEND, 0, NULL, 0, + USB_CTRL_SET_TIMEOUT); +} /* * usb_port_suspend - suspend a usb device's upstream port @@ -2955,12 +2972,19 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) dev_dbg(hub->intfdev, "can't suspend port %d, status %d\n", port1, status); /* paranoia: "should not happen" */ - if (udev->do_remote_wakeup) - (void) usb_control_msg(udev, usb_sndctrlpipe(udev, 0), - USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE, - USB_DEVICE_REMOTE_WAKEUP, 0, - NULL, 0, - USB_CTRL_SET_TIMEOUT); + if (udev->do_remote_wakeup) { + if (!hub_is_superspeed(hub->hdev)) { + (void) usb_control_msg(udev, + usb_sndctrlpipe(udev, 0), + USB_REQ_CLEAR_FEATURE, + USB_RECIP_DEVICE, + USB_DEVICE_REMOTE_WAKEUP, 0, + NULL, 0, + USB_CTRL_SET_TIMEOUT); + } else + (void) usb_disable_function_remotewakeup(udev); + + } /* Try to enable USB2 hardware LPM again */ if (udev->usb2_hw_lpm_capable == 1) @@ -3052,20 +3076,30 @@ static int finish_port_resume(struct usb_device *udev) * udev->reset_resume */ } else if (udev->actconfig && !udev->reset_resume) { - le16_to_cpus(&devstatus); - if (devstatus & (1 << USB_DEVICE_REMOTE_WAKEUP)) { - status = usb_control_msg(udev, - usb_sndctrlpipe(udev, 0), - USB_REQ_CLEAR_FEATURE, + if (!hub_is_superspeed(udev->parent)) { + le16_to_cpus(&devstatus); + if (devstatus & (1 << USB_DEVICE_REMOTE_WAKEUP)) + status = usb_control_msg(udev, + usb_sndctrlpipe(udev, 0), + USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE, - USB_DEVICE_REMOTE_WAKEUP, 0, - NULL, 0, - USB_CTRL_SET_TIMEOUT); - if (status) - dev_dbg(&udev->dev, - "disable remote wakeup, status %d\n", - status); + USB_DEVICE_REMOTE_WAKEUP, 0, + NULL, 0, + USB_CTRL_SET_TIMEOUT); + } else { + status = usb_get_status(udev, USB_RECIP_INTERFACE, 0, + &devstatus); + le16_to_cpus(&devstatus); + if (!status && devstatus & (USB_INTRF_STAT_FUNC_RW_CAP + | USB_INTRF_STAT_FUNC_RW)) + status = + usb_disable_function_remotewakeup(udev); } + + if (status) + dev_dbg(&udev->dev, + "disable remote wakeup, status %d\n", + status); status = 0; } return status; diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 50598472dc41..f738e25377ff 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -152,6 +152,12 @@ #define USB_INTRF_FUNC_SUSPEND_LP (1 << (8 + 0)) #define USB_INTRF_FUNC_SUSPEND_RW (1 << (8 + 1)) +/* + * Interface status, Figure 9-5 USB 3.0 spec + */ +#define USB_INTRF_STAT_FUNC_RW_CAP 1 +#define USB_INTRF_STAT_FUNC_RW 2 + #define USB_ENDPOINT_HALT 0 /* IN/OUT will STALL */ /* Bit array elements as returned by the USB_REQ_GET_STATUS request. */ -- cgit v1.2.3 From b878e7fb22ea48b0585bbbbef249f7efc6d42748 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Tue, 8 Jan 2013 14:44:25 -0500 Subject: perf: Missing field in PERF_RECORD_SAMPLE documentation While trying to write a perf_event/mmap test for my perf_event test-suite I came across a missing field description in the PERF_RECORD_SAMPLE documentation in perf_event.h Signed-off-by: Vince Weaver Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1301081439300.24507@vincent-weaver-1.um.maine.edu Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4f63c05d27c9..9fa9c622a7f4 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -579,7 +579,8 @@ enum perf_event_type { * { u32 size; * char data[size];}&& PERF_SAMPLE_RAW * - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * { u64 nr; + * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER -- cgit v1.2.3 From 77765eaf5cfb6b8dd98ec8b54b411d74ff6095f1 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 18 Jan 2013 11:18:45 +0530 Subject: cfg80211/nl80211: add API for MAC address ACLs Add API to enable drivers to implement MAC address based access control in AP/P2P GO mode. Capable drivers advertise this capability by setting the maximum number of MAC addresses in such a list in wiphy->max_acl_mac_addrs. An initial ACL may be given to the NL80211_CMD_START_AP command and/or changed later with NL80211_CMD_SET_MAC_ACL. Black- and whitelists are supported, but not simultaneously. Signed-off-by: Vasanthakumar Thiagarajan [rewrite commit log, many cleanups] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 34 +++++++++++++ include/uapi/linux/nl80211.h | 51 ++++++++++++++++++- net/wireless/core.c | 5 ++ net/wireless/nl80211.c | 116 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 12 +++++ net/wireless/trace.h | 18 +++++++ 6 files changed, 234 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 183033789e69..36e076e374d2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -531,6 +531,22 @@ struct mac_address { u8 addr[ETH_ALEN]; }; +/** + * struct cfg80211_acl_data - Access control list data + * + * @acl_policy: ACL policy to be applied on the station's + entry specified by mac_addr + * @n_acl_entries: Number of MAC address entries passed + * @mac_addrs: List of MAC addresses of stations to be used for ACL + */ +struct cfg80211_acl_data { + enum nl80211_acl_policy acl_policy; + int n_acl_entries; + + /* Keep it last */ + struct mac_address mac_addrs[]; +}; + /** * struct cfg80211_ap_settings - AP configuration * @@ -550,6 +566,8 @@ struct mac_address { * @inactivity_timeout: time in seconds to determine station's inactivity. * @p2p_ctwindow: P2P CT Window * @p2p_opp_ps: P2P opportunistic PS + * @acl: ACL configuration used by the drivers which has support for + * MAC address based access control */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -566,6 +584,7 @@ struct cfg80211_ap_settings { int inactivity_timeout; u8 p2p_ctwindow; bool p2p_opp_ps; + const struct cfg80211_acl_data *acl; }; /** @@ -1800,6 +1819,13 @@ struct cfg80211_gtk_rekey_data { * * @start_p2p_device: Start the given P2P device. * @stop_p2p_device: Stop the given P2P device. + * + * @set_mac_acl: Sets MAC address control list in AP and P2P GO mode. + * Parameters include ACL policy, an array of MAC address of stations + * and the number of MAC addresses. If there is already a list in driver + * this new list replaces the existing one. Driver has to clear its ACL + * when number of MAC addresses entries is passed as 0. Drivers which + * advertise the support for MAC based ACL have to implement this callback. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2020,6 +2046,9 @@ struct cfg80211_ops { struct wireless_dev *wdev); void (*stop_p2p_device)(struct wiphy *wiphy, struct wireless_dev *wdev); + + int (*set_mac_acl)(struct wiphy *wiphy, struct net_device *dev, + const struct cfg80211_acl_data *params); }; /* @@ -2325,6 +2354,9 @@ struct wiphy_wowlan_support { * @ap_sme_capa: AP SME capabilities, flags from &enum nl80211_ap_sme_features. * @ht_capa_mod_mask: Specify what ht_cap values can be over-ridden. * If null, then none can be over-ridden. + * + * @max_acl_mac_addrs: Maximum number of MAC addresses that the device + * supports for ACL. */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -2346,6 +2378,8 @@ struct wiphy { /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ u16 interface_modes; + u16 max_acl_mac_addrs; + u32 flags, features; u32 ap_sme_capa; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e6eeb4ba5dc5..5b7dbc1ea966 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -170,7 +170,8 @@ * %NL80211_ATTR_HIDDEN_SSID, %NL80211_ATTR_CIPHERS_PAIRWISE, * %NL80211_ATTR_CIPHER_GROUP, %NL80211_ATTR_WPA_VERSIONS, * %NL80211_ATTR_AKM_SUITES, %NL80211_ATTR_PRIVACY, - * %NL80211_ATTR_AUTH_TYPE and %NL80211_ATTR_INACTIVITY_TIMEOUT. + * %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_INACTIVITY_TIMEOUT, + * %NL80211_ATTR_ACL_POLICY and %NL80211_ATTR_MAC_ADDRS. * The channel to use can be set on the interface or be given using the * %NL80211_ATTR_WIPHY_FREQ and the attributes determining channel width. * @NL80211_CMD_NEW_BEACON: old alias for %NL80211_CMD_START_AP @@ -586,6 +587,16 @@ * @NL80211_CMD_SET_MCAST_RATE: Change the rate used to send multicast frames * for IBSS or MESH vif. * + * @NL80211_CMD_SET_MAC_ACL: sets ACL for MAC address based access control. + * This is to be used with the drivers advertising the support of MAC + * address based access control. List of MAC addresses is passed in + * %NL80211_ATTR_MAC_ADDRS and ACL policy is passed in + * %NL80211_ATTR_ACL_POLICY. Driver will enable ACL with this list, if it + * is not already done. The new list will replace any existing list. Driver + * will clear its ACL when the list of MAC addresses passed is empty. This + * command is used in AP/P2P GO mode. Driver has to make sure to clear its + * ACL list during %NL80211_CMD_STOP_AP. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -736,6 +747,8 @@ enum nl80211_commands { NL80211_CMD_SET_MCAST_RATE, + NL80211_CMD_SET_MAC_ACL, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1313,6 +1326,16 @@ enum nl80211_commands { * @NL80211_ATTR_LOCAL_MESH_POWER_MODE: local mesh STA link-specific power mode * defined in &enum nl80211_mesh_power_mode. * + * @NL80211_ATTR_ACL_POLICY: ACL policy, see &enum nl80211_acl_policy, + * carried in a u32 attribute + * + * @NL80211_ATTR_MAC_ADDRS: Array of nested MAC addresses, used for + * MAC ACL. + * + * @NL80211_ATTR_MAC_ACL_MAX: u32 attribute to advertise the maximum + * number of MAC addresses that a device can support for MAC + * ACL. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1585,6 +1608,12 @@ enum nl80211_attrs { NL80211_ATTR_LOCAL_MESH_POWER_MODE, + NL80211_ATTR_ACL_POLICY, + + NL80211_ATTR_MAC_ADDRS, + + NL80211_ATTR_MAC_ACL_MAX, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3248,7 +3277,7 @@ enum nl80211_probe_resp_offload_support_attr { * enum nl80211_connect_failed_reason - connection request failed reasons * @NL80211_CONN_FAIL_MAX_CLIENTS: Maximum number of clients that can be * handled by the AP is reached. - * @NL80211_CONN_FAIL_BLOCKED_CLIENT: Client's MAC is in the AP's blocklist. + * @NL80211_CONN_FAIL_BLOCKED_CLIENT: Connection request is rejected due to ACL. */ enum nl80211_connect_failed_reason { NL80211_CONN_FAIL_MAX_CLIENTS, @@ -3276,4 +3305,22 @@ enum nl80211_scan_flags { NL80211_SCAN_FLAG_AP = 1<<2, }; +/** + * enum nl80211_acl_policy - access control policy + * + * Access control policy is applied on a MAC list set by + * %NL80211_CMD_START_AP and %NL80211_CMD_SET_MAC_ACL, to + * be used with %NL80211_ATTR_ACL_POLICY. + * + * @NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED: Deny stations which are + * listed in ACL, i.e. allow all the stations which are not listed + * in ACL to authenticate. + * @NL80211_ACL_POLICY_DENY_UNLESS_LISTED: Allow the stations which are listed + * in ACL, i.e. deny all the stations which are not listed in ACL. + */ +enum nl80211_acl_policy { + NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED, + NL80211_ACL_POLICY_DENY_UNLESS_LISTED, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/core.c b/net/wireless/core.c index 0e702cdc6043..ce827242f390 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -478,6 +478,11 @@ int wiphy_register(struct wiphy *wiphy) ETH_ALEN))) return -EINVAL; + if (WARN_ON(wiphy->max_acl_mac_addrs && + (!(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME) || + !rdev->ops->set_mac_acl))) + return -EINVAL; + if (wiphy->addresses) memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 33de80364c5c..b5978ab4ad7a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -365,6 +365,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, + [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, + [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, }; /* policy for the key attributes */ @@ -1268,6 +1270,12 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.ht_capa_mod_mask)) goto nla_put_failure; + if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && + dev->wiphy.max_acl_mac_addrs && + nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, + dev->wiphy.max_acl_mac_addrs)) + goto nla_put_failure; + return genlmsg_end(msg, hdr); nla_put_failure: @@ -2491,6 +2499,97 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) return err; } +/* This function returns an error or the number of nested attributes */ +static int validate_acl_mac_addrs(struct nlattr *nl_attr) +{ + struct nlattr *attr; + int n_entries = 0, tmp; + + nla_for_each_nested(attr, nl_attr, tmp) { + if (nla_len(attr) != ETH_ALEN) + return -EINVAL; + + n_entries++; + } + + return n_entries; +} + +/* + * This function parses ACL information and allocates memory for ACL data. + * On successful return, the calling function is responsible to free the + * ACL buffer returned by this function. + */ +static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy, + struct genl_info *info) +{ + enum nl80211_acl_policy acl_policy; + struct nlattr *attr; + struct cfg80211_acl_data *acl; + int i = 0, n_entries, tmp; + + if (!wiphy->max_acl_mac_addrs) + return ERR_PTR(-EOPNOTSUPP); + + if (!info->attrs[NL80211_ATTR_ACL_POLICY]) + return ERR_PTR(-EINVAL); + + acl_policy = nla_get_u32(info->attrs[NL80211_ATTR_ACL_POLICY]); + if (acl_policy != NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED && + acl_policy != NL80211_ACL_POLICY_DENY_UNLESS_LISTED) + return ERR_PTR(-EINVAL); + + if (!info->attrs[NL80211_ATTR_MAC_ADDRS]) + return ERR_PTR(-EINVAL); + + n_entries = validate_acl_mac_addrs(info->attrs[NL80211_ATTR_MAC_ADDRS]); + if (n_entries < 0) + return ERR_PTR(n_entries); + + if (n_entries > wiphy->max_acl_mac_addrs) + return ERR_PTR(-ENOTSUPP); + + acl = kzalloc(sizeof(*acl) + (sizeof(struct mac_address) * n_entries), + GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + nla_for_each_nested(attr, info->attrs[NL80211_ATTR_MAC_ADDRS], tmp) { + memcpy(acl->mac_addrs[i].addr, nla_data(attr), ETH_ALEN); + i++; + } + + acl->n_acl_entries = n_entries; + acl->acl_policy = acl_policy; + + return acl; +} + +static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct cfg80211_acl_data *acl; + int err; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + if (!dev->ieee80211_ptr->beacon_interval) + return -EINVAL; + + acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + err = rdev_set_mac_acl(rdev, dev, acl); + + kfree(acl); + + return err; +} + static int nl80211_parse_beacon(struct genl_info *info, struct cfg80211_beacon_data *bcn) { @@ -2734,6 +2833,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (err) return err; + if (info->attrs[NL80211_ATTR_ACL_POLICY]) { + params.acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(params.acl)) + return PTR_ERR(params.acl); + } + err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { wdev->preset_chandef = params.chandef; @@ -2742,6 +2847,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->ssid_len = params.ssid_len; memcpy(wdev->ssid, params.ssid, wdev->ssid_len); } + + kfree(params.acl); + return err; } @@ -7876,6 +7984,14 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_MAC_ACL, + .doit = nl80211_set_mac_acl, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 6c0c8191f837..422d38291d66 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -875,4 +875,16 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); trace_rdev_return_void(&rdev->wiphy); } + +static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_acl_data *params) +{ + int ret; + + trace_rdev_set_mac_acl(&rdev->wiphy, dev, params); + ret = rdev->ops->set_mac_acl(&rdev->wiphy, dev, params); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 2134576f426e..8bc553199686 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1767,6 +1767,24 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device, TP_ARGS(wiphy, wdev) ); +TRACE_EVENT(rdev_set_mac_acl, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_acl_data *params), + TP_ARGS(wiphy, netdev, params), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u32, acl_policy) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WIPHY_ASSIGN; + __entry->acl_policy = params->acl_policy; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", acl policy: %d", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ -- cgit v1.2.3 From d904d3edcbb26efc86ea3575bb4265559801a94b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 17 Jan 2013 18:43:41 +0100 Subject: can: gw: make routing to the incoming CAN interface configurable Introduce new configuration flag CGW_FLAGS_CAN_IIF_TX_OK to configure if a CAN sk_buff that has been routed with can-gw is allowed to be send back to the originating CAN interface. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/gw.h | 1 + net/can/gw.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h index 8e1db18c3cb6..0505c7f86213 100644 --- a/include/uapi/linux/can/gw.h +++ b/include/uapi/linux/can/gw.h @@ -51,6 +51,7 @@ enum { #define CGW_FLAGS_CAN_ECHO 0x01 #define CGW_FLAGS_CAN_SRC_TSTAMP 0x02 +#define CGW_FLAGS_CAN_IIF_TX_OK 0x04 #define CGW_MOD_FUNCS 4 /* AND OR XOR SET */ diff --git a/net/can/gw.c b/net/can/gw.c index 574dda78eb0f..37a3efb7cc9d 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -347,6 +348,13 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) return; } + /* is sending the skb back to the incoming interface not allowed? */ + if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) && + skb_headroom(skb) == sizeof(struct can_skb_priv) && + (((struct can_skb_priv *)(skb->head))->ifindex == + gwj->dst.dev->ifindex)) + return; + /* * clone the given skb, which has not been done in can_rcv() * -- cgit v1.2.3 From e6afa00a1409bc3bceed9ccb33111519463dfe7b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 17 Jan 2013 18:43:46 +0100 Subject: can: gw: indicate and count deleted frames due to misconfiguration Add a statistic counter to detect deleted frames due to misconfiguration with a new read-only CGW_DELETED netlink attribute for the CAN gateway. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/gw.h | 1 + net/can/gw.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h index 0505c7f86213..ae07bec74f4b 100644 --- a/include/uapi/linux/can/gw.h +++ b/include/uapi/linux/can/gw.h @@ -44,6 +44,7 @@ enum { CGW_SRC_IF, /* ifindex of source network interface */ CGW_DST_IF, /* ifindex of destination network interface */ CGW_FILTER, /* specify struct can_filter on source CAN device */ + CGW_DELETED, /* number of deleted CAN frames (see max_hops param) */ __CGW_MAX }; diff --git a/net/can/gw.c b/net/can/gw.c index 4216a80618cb..acdd4656cc3b 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -131,6 +131,7 @@ struct cgw_job { struct rcu_head rcu; u32 handled_frames; u32 dropped_frames; + u32 deleted_frames; struct cf_mod mod; union { /* CAN frame data source */ @@ -367,8 +368,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) BUG_ON(skb->ip_summed != CHECKSUM_UNNECESSARY); - if (cgw_hops(skb) >= max_hops) + if (cgw_hops(skb) >= max_hops) { + /* indicate deleted frames due to misconfiguration */ + gwj->deleted_frames++; return; + } if (!(gwj->dst.dev->flags & IFF_UP)) { gwj->dropped_frames++; @@ -500,6 +504,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } + if (gwj->deleted_frames) { + if (nla_put_u32(skb, CGW_DELETED, gwj->deleted_frames) < 0) + goto cancel; + } + /* check non default settings of attributes */ if (gwj->mod.modtype.and) { @@ -799,6 +808,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, gwj->handled_frames = 0; gwj->dropped_frames = 0; + gwj->deleted_frames = 0; gwj->flags = r->flags; gwj->gwtype = r->gwtype; -- cgit v1.2.3 From cd8f7cb4e6dfa4ea08fc250a814240b883ef7911 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 22 Jan 2013 12:34:29 +0100 Subject: cfg80211/mac80211: support reporting wakeup reason When waking up from WoWLAN, it is useful to know what triggered the wakeup. Support reporting the wakeup reason(s) in cfg80211 (and a pass-through in mac80211) to allow userspace to know. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 41 +++++++++++++++++++ include/net/mac80211.h | 12 ++++++ include/uapi/linux/nl80211.h | 31 ++++++++++++++ net/mac80211/pm.c | 10 +++++ net/wireless/nl80211.c | 97 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/trace.h | 35 ++++++++++++++++ 6 files changed, 226 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 36e076e374d2..48add7e3ba1d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1596,6 +1596,32 @@ struct cfg80211_wowlan { int n_patterns; }; +/** + * struct cfg80211_wowlan_wakeup - wakeup report + * @disconnect: woke up by getting disconnected + * @magic_pkt: woke up by receiving magic packet + * @gtk_rekey_failure: woke up by GTK rekey failure + * @eap_identity_req: woke up by EAP identity request packet + * @four_way_handshake: woke up by 4-way handshake + * @rfkill_release: woke up by rfkill being released + * @pattern_idx: pattern that caused wakeup, -1 if not due to pattern + * @packet_present_len: copied wakeup packet data + * @packet_len: original wakeup packet length + * @packet: The packet causing the wakeup, if any. + * @packet_80211: For pattern match, magic packet and other data + * frame triggers an 802.3 frame should be reported, for + * disconnect due to deauth 802.11 frame. This indicates which + * it is. + */ +struct cfg80211_wowlan_wakeup { + bool disconnect, magic_pkt, gtk_rekey_failure, + eap_identity_req, four_way_handshake, + rfkill_release, packet_80211; + s32 pattern_idx; + u32 packet_present_len, packet_len; + const void *packet; +}; + /** * struct cfg80211_gtk_rekey_data - rekey data * @kek: key encryption key @@ -3852,6 +3878,21 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, enum ieee80211_p2p_attr_id attr, u8 *buf, unsigned int bufsize); +/** + * cfg80211_report_wowlan_wakeup - report wakeup from WoWLAN + * @wdev: the wireless device reporting the wakeup + * @wakeup: the wakeup report + * @gfp: allocation flags + * + * This function reports that the given device woke up. If it + * caused the wakeup, report the reason(s), otherwise you may + * pass %NULL as the @wakeup parameter to advertise that something + * else caused the wakeup. + */ +void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 21831ee57e3c..7a27e00c513a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4206,4 +4206,16 @@ void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif); */ int ieee80211_ave_rssi(struct ieee80211_vif *vif); +/** + * ieee80211_report_wowlan_wakeup - report WoWLAN wakeup + * @vif: virtual interface + * @wakeup: wakeup reason(s) + * @gfp: allocation flags + * + * See cfg80211_report_wowlan_wakeup(). + */ +void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp); + #endif /* MAC80211_H */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5b7dbc1ea966..225a65e72219 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -513,6 +513,12 @@ * command with the %NL80211_ATTR_WOWLAN_TRIGGERS attribute. For * more background information, see * http://wireless.kernel.org/en/users/Documentation/WoWLAN. + * The @NL80211_CMD_SET_WOWLAN command can also be used as a notification + * from the driver reporting the wakeup reason. In this case, the + * @NL80211_ATTR_WOWLAN_TRIGGERS attribute will contain the reason + * for the wakeup, if it was caused by wireless. If it is not present + * in the wakeup notification, the wireless device didn't cause the + * wakeup but reports that it was woken up. * * @NL80211_CMD_SET_REKEY_OFFLOAD: This command is used give the driver * the necessary information for supporting GTK rekey offload. This @@ -2947,6 +2953,10 @@ struct nl80211_wowlan_pattern_support { * * In %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, it is a binary attribute * carrying a &struct nl80211_wowlan_pattern_support. + * + * When reporting wakeup. it is a u32 attribute containing the 0-based + * index of the pattern that caused the wakeup, in the patterns passed + * to the kernel when configuring. * @NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED: Not a real trigger, and cannot be * used when setting, used only to indicate that GTK rekeying is supported * by the device (flag) @@ -2957,8 +2967,25 @@ struct nl80211_wowlan_pattern_support { * @NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE: wake up on 4-way handshake (flag) * @NL80211_WOWLAN_TRIG_RFKILL_RELEASE: wake up when rfkill is released * (on devices that have rfkill in the device) (flag) + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211: For wakeup reporting only, contains + * the 802.11 packet that caused the wakeup, e.g. a deauth frame. The frame + * may be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN + * attribute contains the original length. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN: Original length of the 802.11 + * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211 + * attribute if the packet was truncated somewhere. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023: For wakeup reporting only, contains the + * 802.11 packet that caused the wakeup, e.g. a magic packet. The frame may + * be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN attribute + * contains the original length. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN: Original length of the 802.3 + * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023 + * attribute if the packet was truncated somewhere. * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number + * + * These nested attributes are used to configure the wakeup triggers and + * to report the wakeup reason(s). */ enum nl80211_wowlan_triggers { __NL80211_WOWLAN_TRIG_INVALID, @@ -2971,6 +2998,10 @@ enum nl80211_wowlan_triggers { NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE, NL80211_WOWLAN_TRIG_RFKILL_RELEASE, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN, /* keep last */ NUM_NL80211_WOWLAN_TRIG, diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index e45b83610e85..53801d20176d 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -228,3 +228,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) * ieee80211_reconfig(), which is also needed for hardware * hang/firmware failure/etc. recovery. */ + +void ieee80211_report_wowlan_wakeup(struct ieee80211_vif *vif, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + cfg80211_report_wowlan_wakeup(&sdata->wdev, wakeup, gfp); +} +EXPORT_SYMBOL(ieee80211_report_wowlan_wakeup); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b5978ab4ad7a..d359734b6972 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9323,6 +9323,103 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_report_obss_beacon); +#ifdef CONFIG_PM +void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, + struct cfg80211_wowlan_wakeup *wakeup, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct sk_buff *msg; + void *hdr; + int err, size = 200; + + trace_cfg80211_report_wowlan_wakeup(wdev->wiphy, wdev, wakeup); + + if (wakeup) + size += wakeup->packet_present_len; + + msg = nlmsg_new(size, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_SET_WOWLAN); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + goto free_msg; + + if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) + goto free_msg; + + if (wakeup) { + struct nlattr *reasons; + + reasons = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); + + if (wakeup->disconnect && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) + goto free_msg; + if (wakeup->magic_pkt && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) + goto free_msg; + if (wakeup->gtk_rekey_failure && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) + goto free_msg; + if (wakeup->eap_identity_req && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) + goto free_msg; + if (wakeup->four_way_handshake && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) + goto free_msg; + if (wakeup->rfkill_release && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)) + goto free_msg; + + if (wakeup->pattern_idx >= 0 && + nla_put_u32(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, + wakeup->pattern_idx)) + goto free_msg; + + if (wakeup->packet) { + u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; + u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN; + + if (!wakeup->packet_80211) { + pkt_attr = + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023; + len_attr = + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN; + } + + if (wakeup->packet_len && + nla_put_u32(msg, len_attr, wakeup->packet_len)) + goto free_msg; + + if (nla_put(msg, pkt_attr, wakeup->packet_present_len, + wakeup->packet)) + goto free_msg; + } + + nla_nest_end(msg, reasons); + } + + err = genlmsg_end(msg, hdr); + if (err < 0) + goto free_msg; + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + free_msg: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_report_wowlan_wakeup); +#endif + void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper, u16 reason_code, gfp_t gfp) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 8bc553199686..c9cafb0ea95f 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2333,6 +2333,41 @@ TRACE_EVENT(cfg80211_return_u32, TP_printk("ret: %u", __entry->ret) ); +TRACE_EVENT(cfg80211_report_wowlan_wakeup, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_wowlan_wakeup *wakeup), + TP_ARGS(wiphy, wdev, wakeup), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(bool, disconnect) + __field(bool, magic_pkt) + __field(bool, gtk_rekey_failure) + __field(bool, eap_identity_req) + __field(bool, four_way_handshake) + __field(bool, rfkill_release) + __field(s32, pattern_idx) + __field(u32, packet_len) + __dynamic_array(u8, packet, wakeup->packet_present_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->disconnect = wakeup->disconnect; + __entry->magic_pkt = wakeup->magic_pkt; + __entry->gtk_rekey_failure = wakeup->gtk_rekey_failure; + __entry->eap_identity_req = wakeup->eap_identity_req; + __entry->four_way_handshake = wakeup->four_way_handshake; + __entry->rfkill_release = wakeup->rfkill_release; + __entry->pattern_idx = wakeup->pattern_idx; + __entry->packet_len = wakeup->packet_len; + if (wakeup->packet && wakeup->packet_present_len) + memcpy(__get_dynamic_array(packet), wakeup->packet, + wakeup->packet_present_len); + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 23c153e54197171f30b889d9654929d74b6599d5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 31 Jan 2013 17:08:11 +0100 Subject: fuse: bump version for READDIRPLUS Yeah, we have a capability flag for this as well, so this is not strictly necessary, but it doesn't hurt either. Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 5dc1fea49ecd..3451b6061e69 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -60,6 +60,9 @@ * * 7.20 * - add FUSE_AUTO_INVAL_DATA + * + * 7.21 + * - add FUSE_READDIRPLUS */ #ifndef _LINUX_FUSE_H @@ -91,7 +94,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 20 +#define FUSE_KERNEL_MINOR_VERSION 21 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 -- cgit v1.2.3 From 6fcdf4facb85e7d54ff6195378dd2ba8e0baccc4 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 30 Jan 2013 21:50:08 -0500 Subject: wanrouter: delete now orphaned header content, files/drivers The wanrouter support was identified earlier as unused for years, and so the previous commit totally decoupled it from the kernel, leaving the related wanrouter files present, but totally inert. Here we take the final step in that cleanup, by doing a wholesale removal of these files. The two step process is used so that the large deletion is decoupled from the git history of files that we still care about. The drivers deleted here all were dependent on the Kconfig setting CONFIG_WAN_ROUTER_DRIVERS. A stub wanrouter.h header (kernel & uapi) are left behind so that drivers/isdn/i4l/isdn_x25iface.c continues to compile, and so that we don't accidentally break userspace that expected these defines. Cc: Joe Perches Cc: Dan Carpenter Cc: Arnaldo Carvalho de Melo Signed-off-by: Paul Gortmaker --- drivers/net/wan/cycx_drv.c | 569 -------------- drivers/net/wan/cycx_main.c | 346 --------- drivers/net/wan/cycx_x25.c | 1602 ---------------------------------------- include/linux/cyclomx.h | 77 -- include/linux/cycx_drv.h | 64 -- include/linux/wanrouter.h | 127 +--- include/uapi/linux/wanrouter.h | 443 +---------- net/wanrouter/Kconfig | 27 - net/wanrouter/Makefile | 7 - net/wanrouter/patchlevel | 1 - net/wanrouter/wanmain.c | 782 -------------------- net/wanrouter/wanproc.c | 380 ---------- 12 files changed, 8 insertions(+), 4417 deletions(-) delete mode 100644 drivers/net/wan/cycx_drv.c delete mode 100644 drivers/net/wan/cycx_main.c delete mode 100644 drivers/net/wan/cycx_x25.c delete mode 100644 include/linux/cyclomx.h delete mode 100644 include/linux/cycx_drv.h delete mode 100644 net/wanrouter/Kconfig delete mode 100644 net/wanrouter/Makefile delete mode 100644 net/wanrouter/patchlevel delete mode 100644 net/wanrouter/wanmain.c delete mode 100644 net/wanrouter/wanproc.c (limited to 'include/uapi/linux') diff --git a/drivers/net/wan/cycx_drv.c b/drivers/net/wan/cycx_drv.c deleted file mode 100644 index 2a3ecae67a90..000000000000 --- a/drivers/net/wan/cycx_drv.c +++ /dev/null @@ -1,569 +0,0 @@ -/* -* cycx_drv.c Cyclom 2X Support Module. -* -* This module is a library of common hardware specific -* functions used by the Cyclades Cyclom 2X sync card. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdladrv.c by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 1999/11/11 acme set_current_state(TASK_INTERRUPTIBLE), code -* cleanup -* 1999/11/08 acme init_cyc2x deleted, doing nothing -* 1999/11/06 acme back to read[bw], write[bw] and memcpy_to and -* fromio to use dpmbase ioremaped -* 1999/10/26 acme use isa_read[bw], isa_write[bw] & isa_memcpy_to -* & fromio -* 1999/10/23 acme cleanup to only supports cyclom2x: all the other -* boards are no longer manufactured by cyclades, -* if someone wants to support them... be my guest! -* 1999/05/28 acme cycx_intack & cycx_intde gone for good -* 1999/05/18 acme lots of unlogged work, submitting to Linus... -* 1999/01/03 acme more judicious use of data types -* 1999/01/03 acme judicious use of data types :> -* cycx_inten trying to reset pending interrupts -* from cyclom 2x - I think this isn't the way to -* go, but for now... -* 1999/01/02 acme cycx_intack ok, I think there's nothing to do -* to ack an int in cycx_drv.c, only handle it in -* cyx_isr (or in the other protocols: cyp_isr, -* cyf_isr, when they get implemented. -* Dec 31, 1998 acme cycx_data_boot & cycx_code_boot fixed, crossing -* fingers to see x25_configure in cycx_x25.c -* work... :) -* Dec 26, 1998 acme load implementation fixed, seems to work! :) -* cycx_2x_dpmbase_options with all the possible -* DPM addresses (20). -* cycx_intr implemented (test this!) -* general code cleanup -* Dec 8, 1998 Ivan Passos Cyclom-2X firmware load implementation. -* Aug 8, 1998 acme Initial version. -*/ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include /* __init */ -#include -#include /* printk(), and other useful stuff */ -#include /* offsetof(), etc. */ -#include /* return codes */ -#include /* API definitions */ -#include /* CYCX firmware module definitions */ -#include /* udelay, msleep_interruptible */ -#include /* read[wl], write[wl], ioremap, iounmap */ - -#define MOD_VERSION 0 -#define MOD_RELEASE 6 - -MODULE_AUTHOR("Arnaldo Carvalho de Melo"); -MODULE_DESCRIPTION("Cyclom 2x Sync Card Driver"); -MODULE_LICENSE("GPL"); - -/* Hardware-specific functions */ -static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len); -static void cycx_bootcfg(struct cycx_hw *hw); - -static int reset_cyc2x(void __iomem *addr); -static int detect_cyc2x(void __iomem *addr); - -/* Miscellaneous functions */ -static int get_option_index(const long *optlist, long optval); -static u16 checksum(u8 *buf, u32 len); - -#define wait_cyc(addr) cycx_exec(addr + CMD_OFFSET) - -/* Global Data */ - -/* private data */ -static const char fullname[] = "Cyclom 2X Support Module"; -static const char copyright[] = - "(c) 1998-2003 Arnaldo Carvalho de Melo "; - -/* Hardware configuration options. - * These are arrays of configuration options used by verification routines. - * The first element of each array is its size (i.e. number of options). - */ -static const long cyc2x_dpmbase_options[] = { - 20, - 0xA0000, 0xA4000, 0xA8000, 0xAC000, 0xB0000, 0xB4000, 0xB8000, - 0xBC000, 0xC0000, 0xC4000, 0xC8000, 0xCC000, 0xD0000, 0xD4000, - 0xD8000, 0xDC000, 0xE0000, 0xE4000, 0xE8000, 0xEC000 -}; - -static const long cycx_2x_irq_options[] = { 7, 3, 5, 9, 10, 11, 12, 15 }; - -/* Kernel Loadable Module Entry Points */ -/* Module 'insert' entry point. - * o print announcement - * o initialize static data - * - * Return: 0 Ok - * < 0 error. - * Context: process */ - -static int __init cycx_drv_init(void) -{ - pr_info("%s v%u.%u %s\n", - fullname, MOD_VERSION, MOD_RELEASE, copyright); - - return 0; -} - -/* Module 'remove' entry point. - * o release all remaining system resources */ -static void cycx_drv_cleanup(void) -{ -} - -/* Kernel APIs */ -/* Set up adapter. - * o detect adapter type - * o verify hardware configuration options - * o check for hardware conflicts - * o set up adapter shared memory - * o test adapter memory - * o load firmware - * Return: 0 ok. - * < 0 error */ -EXPORT_SYMBOL(cycx_setup); -int cycx_setup(struct cycx_hw *hw, void *cfm, u32 len, unsigned long dpmbase) -{ - int err; - - /* Verify IRQ configuration options */ - if (!get_option_index(cycx_2x_irq_options, hw->irq)) { - pr_err("IRQ %d is invalid!\n", hw->irq); - return -EINVAL; - } - - /* Setup adapter dual-port memory window and test memory */ - if (!dpmbase) { - pr_err("you must specify the dpm address!\n"); - return -EINVAL; - } else if (!get_option_index(cyc2x_dpmbase_options, dpmbase)) { - pr_err("memory address 0x%lX is invalid!\n", dpmbase); - return -EINVAL; - } - - hw->dpmbase = ioremap(dpmbase, CYCX_WINDOWSIZE); - hw->dpmsize = CYCX_WINDOWSIZE; - - if (!detect_cyc2x(hw->dpmbase)) { - pr_err("adapter Cyclom 2X not found at address 0x%lX!\n", - dpmbase); - return -EINVAL; - } - - pr_info("found Cyclom 2X card at address 0x%lX\n", dpmbase); - - /* Load firmware. If loader fails then shut down adapter */ - err = load_cyc2x(hw, cfm, len); - - if (err) - cycx_down(hw); /* shutdown adapter */ - - return err; -} - -EXPORT_SYMBOL(cycx_down); -int cycx_down(struct cycx_hw *hw) -{ - iounmap(hw->dpmbase); - return 0; -} - -/* Enable interrupt generation. */ -static void cycx_inten(struct cycx_hw *hw) -{ - writeb(0, hw->dpmbase); -} - -/* Generate an interrupt to adapter's CPU. */ -EXPORT_SYMBOL(cycx_intr); -void cycx_intr(struct cycx_hw *hw) -{ - writew(0, hw->dpmbase + GEN_CYCX_INTR); -} - -/* Execute Adapter Command. - * o Set exec flag. - * o Busy-wait until flag is reset. */ -EXPORT_SYMBOL(cycx_exec); -int cycx_exec(void __iomem *addr) -{ - u16 i = 0; - /* wait till addr content is zeroed */ - - while (readw(addr)) { - udelay(1000); - - if (++i > 50) - return -1; - } - - return 0; -} - -/* Read absolute adapter memory. - * Transfer data from adapter's memory to data buffer. */ -EXPORT_SYMBOL(cycx_peek); -int cycx_peek(struct cycx_hw *hw, u32 addr, void *buf, u32 len) -{ - if (len == 1) - *(u8*)buf = readb(hw->dpmbase + addr); - else - memcpy_fromio(buf, hw->dpmbase + addr, len); - - return 0; -} - -/* Write Absolute Adapter Memory. - * Transfer data from data buffer to adapter's memory. */ -EXPORT_SYMBOL(cycx_poke); -int cycx_poke(struct cycx_hw *hw, u32 addr, void *buf, u32 len) -{ - if (len == 1) - writeb(*(u8*)buf, hw->dpmbase + addr); - else - memcpy_toio(hw->dpmbase + addr, buf, len); - - return 0; -} - -/* Hardware-Specific Functions */ - -/* Load Aux Routines */ -/* Reset board hardware. - return 1 if memory exists at addr and 0 if not. */ -static int memory_exists(void __iomem *addr) -{ - int tries = 0; - - for (; tries < 3 ; tries++) { - writew(TEST_PATTERN, addr + 0x10); - - if (readw(addr + 0x10) == TEST_PATTERN) - if (readw(addr + 0x10) == TEST_PATTERN) - return 1; - - msleep_interruptible(1 * 1000); - } - - return 0; -} - -/* Load reset code. */ -static void reset_load(void __iomem *addr, u8 *buffer, u32 cnt) -{ - void __iomem *pt_code = addr + RESET_OFFSET; - u16 i; /*, j; */ - - for (i = 0 ; i < cnt ; i++) { -/* for (j = 0 ; j < 50 ; j++); Delay - FIXME busy waiting... */ - writeb(*buffer++, pt_code++); - } -} - -/* Load buffer using boot interface. - * o copy data from buffer to Cyclom-X memory - * o wait for reset code to copy it to right portion of memory */ -static int buffer_load(void __iomem *addr, u8 *buffer, u32 cnt) -{ - memcpy_toio(addr + DATA_OFFSET, buffer, cnt); - writew(GEN_BOOT_DAT, addr + CMD_OFFSET); - - return wait_cyc(addr); -} - -/* Set up entry point and kick start Cyclom-X CPU. */ -static void cycx_start(void __iomem *addr) -{ - /* put in 0x30 offset the jump instruction to the code entry point */ - writeb(0xea, addr + 0x30); - writeb(0x00, addr + 0x31); - writeb(0xc4, addr + 0x32); - writeb(0x00, addr + 0x33); - writeb(0x00, addr + 0x34); - - /* cmd to start executing code */ - writew(GEN_START, addr + CMD_OFFSET); -} - -/* Load and boot reset code. */ -static void cycx_reset_boot(void __iomem *addr, u8 *code, u32 len) -{ - void __iomem *pt_start = addr + START_OFFSET; - - writeb(0xea, pt_start++); /* jmp to f000:3f00 */ - writeb(0x00, pt_start++); - writeb(0xfc, pt_start++); - writeb(0x00, pt_start++); - writeb(0xf0, pt_start); - reset_load(addr, code, len); - - /* 80186 was in hold, go */ - writeb(0, addr + START_CPU); - msleep_interruptible(1 * 1000); -} - -/* Load data.bin file through boot (reset) interface. */ -static int cycx_data_boot(void __iomem *addr, u8 *code, u32 len) -{ - void __iomem *pt_boot_cmd = addr + CMD_OFFSET; - u32 i; - - /* boot buffer length */ - writew(CFM_LOAD_BUFSZ, pt_boot_cmd + sizeof(u16)); - writew(GEN_DEFPAR, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - writew(0, pt_boot_cmd + sizeof(u16)); - writew(0x4000, pt_boot_cmd + 2 * sizeof(u16)); - writew(GEN_SET_SEG, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - for (i = 0 ; i < len ; i += CFM_LOAD_BUFSZ) - if (buffer_load(addr, code + i, - min_t(u32, CFM_LOAD_BUFSZ, (len - i))) < 0) { - pr_err("Error !!\n"); - return -1; - } - - return 0; -} - - -/* Load code.bin file through boot (reset) interface. */ -static int cycx_code_boot(void __iomem *addr, u8 *code, u32 len) -{ - void __iomem *pt_boot_cmd = addr + CMD_OFFSET; - u32 i; - - /* boot buffer length */ - writew(CFM_LOAD_BUFSZ, pt_boot_cmd + sizeof(u16)); - writew(GEN_DEFPAR, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - writew(0x0000, pt_boot_cmd + sizeof(u16)); - writew(0xc400, pt_boot_cmd + 2 * sizeof(u16)); - writew(GEN_SET_SEG, pt_boot_cmd); - - if (wait_cyc(addr) < 0) - return -1; - - for (i = 0 ; i < len ; i += CFM_LOAD_BUFSZ) - if (buffer_load(addr, code + i, - min_t(u32, CFM_LOAD_BUFSZ, (len - i)))) { - pr_err("Error !!\n"); - return -1; - } - - return 0; -} - -/* Load adapter from the memory image of the CYCX firmware module. - * o verify firmware integrity and compatibility - * o start adapter up */ -static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len) -{ - int i, j; - struct cycx_fw_header *img_hdr; - u8 *reset_image, - *data_image, - *code_image; - void __iomem *pt_cycld = hw->dpmbase + 0x400; - u16 cksum; - - /* Announce */ - pr_info("firmware signature=\"%s\"\n", cfm->signature); - - /* Verify firmware signature */ - if (strcmp(cfm->signature, CFM_SIGNATURE)) { - pr_err("load_cyc2x: not Cyclom-2X firmware!\n"); - return -EINVAL; - } - - pr_info("firmware version=%u\n", cfm->version); - - /* Verify firmware module format version */ - if (cfm->version != CFM_VERSION) { - pr_err("%s: firmware format %u rejected! Expecting %u.\n", - __func__, cfm->version, CFM_VERSION); - return -EINVAL; - } - - /* Verify firmware module length and checksum */ - cksum = checksum((u8*)&cfm->info, sizeof(struct cycx_fw_info) + - cfm->info.codesize); -/* - FIXME cfm->info.codesize is off by 2 - if (((len - sizeof(struct cycx_firmware) - 1) != cfm->info.codesize) || -*/ - if (cksum != cfm->checksum) { - pr_err("%s: firmware corrupted!\n", __func__); - pr_err(" cdsize = 0x%x (expected 0x%lx)\n", - len - (int)sizeof(struct cycx_firmware) - 1, - cfm->info.codesize); - pr_err(" chksum = 0x%x (expected 0x%x)\n", - cksum, cfm->checksum); - return -EINVAL; - } - - /* If everything is ok, set reset, data and code pointers */ - img_hdr = (struct cycx_fw_header *)&cfm->image; -#ifdef FIRMWARE_DEBUG - pr_info("%s: image sizes\n", __func__); - pr_info(" reset=%lu\n", img_hdr->reset_size); - pr_info(" data=%lu\n", img_hdr->data_size); - pr_info(" code=%lu\n", img_hdr->code_size); -#endif - reset_image = ((u8 *)img_hdr) + sizeof(struct cycx_fw_header); - data_image = reset_image + img_hdr->reset_size; - code_image = data_image + img_hdr->data_size; - - /*---- Start load ----*/ - /* Announce */ - pr_info("loading firmware %s (ID=%u)...\n", - cfm->descr[0] ? cfm->descr : "unknown firmware", - cfm->info.codeid); - - for (i = 0 ; i < 5 ; i++) { - /* Reset Cyclom hardware */ - if (!reset_cyc2x(hw->dpmbase)) { - pr_err("dpm problem or board not found\n"); - return -EINVAL; - } - - /* Load reset.bin */ - cycx_reset_boot(hw->dpmbase, reset_image, img_hdr->reset_size); - /* reset is waiting for boot */ - writew(GEN_POWER_ON, pt_cycld); - msleep_interruptible(1 * 1000); - - for (j = 0 ; j < 3 ; j++) - if (!readw(pt_cycld)) - goto reset_loaded; - else - msleep_interruptible(1 * 1000); - } - - pr_err("reset not started\n"); - return -EINVAL; - -reset_loaded: - /* Load data.bin */ - if (cycx_data_boot(hw->dpmbase, data_image, img_hdr->data_size)) { - pr_err("cannot load data file\n"); - return -EINVAL; - } - - /* Load code.bin */ - if (cycx_code_boot(hw->dpmbase, code_image, img_hdr->code_size)) { - pr_err("cannot load code file\n"); - return -EINVAL; - } - - /* Prepare boot-time configuration data */ - cycx_bootcfg(hw); - - /* kick-off CPU */ - cycx_start(hw->dpmbase); - - /* Arthur Ganzert's tip: wait a while after the firmware loading... - seg abr 26 17:17:12 EST 1999 - acme */ - msleep_interruptible(7 * 1000); - pr_info("firmware loaded!\n"); - - /* enable interrupts */ - cycx_inten(hw); - - return 0; -} - -/* Prepare boot-time firmware configuration data. - * o initialize configuration data area - From async.doc - V_3.4.0 - 07/18/1994 - - As of now, only static buffers are available to the user. - So, the bit VD_RXDIRC must be set in 'valid'. That means that user - wants to use the static transmission and reception buffers. */ -static void cycx_bootcfg(struct cycx_hw *hw) -{ - /* use fixed buffers */ - writeb(FIXED_BUFFERS, hw->dpmbase + CONF_OFFSET); -} - -/* Detect Cyclom 2x adapter. - * Following tests are used to detect Cyclom 2x adapter: - * to be completed based on the tests done below - * Return 1 if detected o.k. or 0 if failed. - * Note: This test is destructive! Adapter will be left in shutdown - * state after the test. */ -static int detect_cyc2x(void __iomem *addr) -{ - reset_cyc2x(addr); - - return memory_exists(addr); -} - -/* Miscellaneous */ -/* Get option's index into the options list. - * Return option's index (1 .. N) or zero if option is invalid. */ -static int get_option_index(const long *optlist, long optval) -{ - int i = 1; - - for (; i <= optlist[0]; ++i) - if (optlist[i] == optval) - return i; - - return 0; -} - -/* Reset adapter's CPU. */ -static int reset_cyc2x(void __iomem *addr) -{ - writeb(0, addr + RST_ENABLE); - msleep_interruptible(2 * 1000); - writeb(0, addr + RST_DISABLE); - msleep_interruptible(2 * 1000); - - return memory_exists(addr); -} - -/* Calculate 16-bit CRC using CCITT polynomial. */ -static u16 checksum(u8 *buf, u32 len) -{ - u16 crc = 0; - u16 mask, flag; - - for (; len; --len, ++buf) - for (mask = 0x80; mask; mask >>= 1) { - flag = (crc & 0x8000); - crc <<= 1; - crc |= ((*buf & mask) ? 1 : 0); - - if (flag) - crc ^= 0x1021; - } - - return crc; -} - -module_init(cycx_drv_init); -module_exit(cycx_drv_cleanup); - -/* End */ diff --git a/drivers/net/wan/cycx_main.c b/drivers/net/wan/cycx_main.c deleted file mode 100644 index 81fbbad406be..000000000000 --- a/drivers/net/wan/cycx_main.c +++ /dev/null @@ -1,346 +0,0 @@ -/* -* cycx_main.c Cyclades Cyclom 2X WAN Link Driver. Main module. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdlamain.c by Gene Kozin & -* Jaspreet Singh -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Please look at the bitkeeper changelog (or any other scm tool that ends up -* importing bitkeeper changelog or that replaces bitkeeper in the future as -* main tool for linux development). -* -* 2001/05/09 acme Fix MODULE_DESC for debug, .bss nitpicks, -* some cleanups -* 2000/07/13 acme remove useless #ifdef MODULE and crap -* #if KERNEL_VERSION > blah -* 2000/07/06 acme __exit at cyclomx_cleanup -* 2000/04/02 acme dprintk and cycx_debug -* module_init/module_exit -* 2000/01/21 acme rename cyclomx_open to cyclomx_mod_inc_use_count -* and cyclomx_close to cyclomx_mod_dec_use_count -* 2000/01/08 acme cleanup -* 1999/11/06 acme cycx_down back to life (it needs to be -* called to iounmap the dpmbase) -* 1999/08/09 acme removed references to enable_tx_int -* use spinlocks instead of cli/sti in -* cyclomx_set_state -* 1999/05/19 acme works directly linked into the kernel -* init_waitqueue_head for 2.3.* kernel -* 1999/05/18 acme major cleanup (polling not needed), etc -* 1998/08/28 acme minor cleanup (ioctls for firmware deleted) -* queue_task activated -* 1998/08/08 acme Initial version. -*/ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include /* offsetof(), etc. */ -#include /* return codes */ -#include /* inline memset(), etc. */ -#include /* kmalloc(), kfree() */ -#include /* printk(), and other useful stuff */ -#include /* support for loadable modules */ -#include /* request_region(), release_region() */ -#include /* WAN router definitions */ -#include /* cyclomx common user API definitions */ -#include /* __init (when not using as a module) */ -#include - -unsigned int cycx_debug; - -MODULE_AUTHOR("Arnaldo Carvalho de Melo"); -MODULE_DESCRIPTION("Cyclom 2X Sync Card Driver."); -MODULE_LICENSE("GPL"); -module_param(cycx_debug, int, 0); -MODULE_PARM_DESC(cycx_debug, "cyclomx debug level"); - -/* Defines & Macros */ - -#define CYCX_DRV_VERSION 0 /* version number */ -#define CYCX_DRV_RELEASE 11 /* release (minor version) number */ -#define CYCX_MAX_CARDS 1 /* max number of adapters */ - -#define CONFIG_CYCX_CARDS 1 - -/* Function Prototypes */ - -/* WAN link driver entry points */ -static int cycx_wan_setup(struct wan_device *wandev, wandev_conf_t *conf); -static int cycx_wan_shutdown(struct wan_device *wandev); - -/* Miscellaneous functions */ -static irqreturn_t cycx_isr(int irq, void *dev_id); - -/* Global Data - * Note: All data must be explicitly initialized!!! - */ - -/* private data */ -static const char cycx_drvname[] = "cyclomx"; -static const char cycx_fullname[] = "CYCLOM 2X(tm) Sync Card Driver"; -static const char cycx_copyright[] = "(c) 1998-2003 Arnaldo Carvalho de Melo " - ""; -static int cycx_ncards = CONFIG_CYCX_CARDS; -static struct cycx_device *cycx_card_array; /* adapter data space */ - -/* Kernel Loadable Module Entry Points */ - -/* - * Module 'insert' entry point. - * o print announcement - * o allocate adapter data space - * o initialize static data - * o register all cards with WAN router - * o calibrate Cyclom 2X shared memory access delay. - * - * Return: 0 Ok - * < 0 error. - * Context: process - */ -static int __init cycx_init(void) -{ - int cnt, err = -ENOMEM; - - pr_info("%s v%u.%u %s\n", - cycx_fullname, CYCX_DRV_VERSION, CYCX_DRV_RELEASE, - cycx_copyright); - - /* Verify number of cards and allocate adapter data space */ - cycx_ncards = min_t(int, cycx_ncards, CYCX_MAX_CARDS); - cycx_ncards = max_t(int, cycx_ncards, 1); - cycx_card_array = kcalloc(cycx_ncards, sizeof(struct cycx_device), GFP_KERNEL); - if (!cycx_card_array) - goto out; - - - /* Register adapters with WAN router */ - for (cnt = 0; cnt < cycx_ncards; ++cnt) { - struct cycx_device *card = &cycx_card_array[cnt]; - struct wan_device *wandev = &card->wandev; - - sprintf(card->devname, "%s%d", cycx_drvname, cnt + 1); - wandev->magic = ROUTER_MAGIC; - wandev->name = card->devname; - wandev->private = card; - wandev->setup = cycx_wan_setup; - wandev->shutdown = cycx_wan_shutdown; - err = register_wan_device(wandev); - - if (err) { - pr_err("%s registration failed with error %d!\n", - card->devname, err); - break; - } - } - - err = -ENODEV; - if (!cnt) { - kfree(cycx_card_array); - goto out; - } - err = 0; - cycx_ncards = cnt; /* adjust actual number of cards */ -out: return err; -} - -/* - * Module 'remove' entry point. - * o unregister all adapters from the WAN router - * o release all remaining system resources - */ -static void __exit cycx_exit(void) -{ - int i = 0; - - for (; i < cycx_ncards; ++i) { - struct cycx_device *card = &cycx_card_array[i]; - unregister_wan_device(card->devname); - } - - kfree(cycx_card_array); -} - -/* WAN Device Driver Entry Points */ -/* - * Setup/configure WAN link driver. - * o check adapter state - * o make sure firmware is present in configuration - * o allocate interrupt vector - * o setup Cyclom 2X hardware - * o call appropriate routine to perform protocol-specific initialization - * - * This function is called when router handles ROUTER_SETUP IOCTL. The - * configuration structure is in kernel memory (including extended data, if - * any). - */ -static int cycx_wan_setup(struct wan_device *wandev, wandev_conf_t *conf) -{ - int rc = -EFAULT; - struct cycx_device *card; - int irq; - - /* Sanity checks */ - - if (!wandev || !wandev->private || !conf) - goto out; - - card = wandev->private; - rc = -EBUSY; - if (wandev->state != WAN_UNCONFIGURED) - goto out; - - rc = -EINVAL; - if (!conf->data_size || !conf->data) { - pr_err("%s: firmware not found in configuration data!\n", - wandev->name); - goto out; - } - - if (conf->irq <= 0) { - pr_err("%s: can't configure without IRQ!\n", wandev->name); - goto out; - } - - /* Allocate IRQ */ - irq = conf->irq == 2 ? 9 : conf->irq; /* IRQ2 -> IRQ9 */ - - if (request_irq(irq, cycx_isr, 0, wandev->name, card)) { - pr_err("%s: can't reserve IRQ %d!\n", wandev->name, irq); - goto out; - } - - /* Configure hardware, load firmware, etc. */ - memset(&card->hw, 0, sizeof(card->hw)); - card->hw.irq = irq; - card->hw.dpmsize = CYCX_WINDOWSIZE; - card->hw.fwid = CFID_X25_2X; - spin_lock_init(&card->lock); - init_waitqueue_head(&card->wait_stats); - - rc = cycx_setup(&card->hw, conf->data, conf->data_size, conf->maddr); - if (rc) - goto out_irq; - - /* Initialize WAN device data space */ - wandev->irq = irq; - wandev->dma = wandev->ioport = 0; - wandev->maddr = (unsigned long)card->hw.dpmbase; - wandev->msize = card->hw.dpmsize; - wandev->hw_opt[2] = 0; - wandev->hw_opt[3] = card->hw.fwid; - - /* Protocol-specific initialization */ - switch (card->hw.fwid) { -#ifdef CONFIG_CYCLOMX_X25 - case CFID_X25_2X: - rc = cycx_x25_wan_init(card, conf); - break; -#endif - default: - pr_err("%s: this firmware is not supported!\n", wandev->name); - rc = -EINVAL; - } - - if (rc) { - cycx_down(&card->hw); - goto out_irq; - } - - rc = 0; -out: - return rc; -out_irq: - free_irq(irq, card); - goto out; -} - -/* - * Shut down WAN link driver. - * o shut down adapter hardware - * o release system resources. - * - * This function is called by the router when device is being unregistered or - * when it handles ROUTER_DOWN IOCTL. - */ -static int cycx_wan_shutdown(struct wan_device *wandev) -{ - int ret = -EFAULT; - struct cycx_device *card; - - /* sanity checks */ - if (!wandev || !wandev->private) - goto out; - - ret = 0; - if (wandev->state == WAN_UNCONFIGURED) - goto out; - - card = wandev->private; - wandev->state = WAN_UNCONFIGURED; - cycx_down(&card->hw); - pr_info("%s: irq %d being freed!\n", wandev->name, wandev->irq); - free_irq(wandev->irq, card); -out: return ret; -} - -/* Miscellaneous */ -/* - * Cyclom 2X Interrupt Service Routine. - * o acknowledge Cyclom 2X hardware interrupt. - * o call protocol-specific interrupt service routine, if any. - */ -static irqreturn_t cycx_isr(int irq, void *dev_id) -{ - struct cycx_device *card = dev_id; - - if (card->wandev.state == WAN_UNCONFIGURED) - goto out; - - if (card->in_isr) { - pr_warn("%s: interrupt re-entrancy on IRQ %d!\n", - card->devname, card->wandev.irq); - goto out; - } - - if (card->isr) - card->isr(card); - return IRQ_HANDLED; -out: - return IRQ_NONE; -} - -/* Set WAN device state. */ -void cycx_set_state(struct cycx_device *card, int state) -{ - unsigned long flags; - char *string_state = NULL; - - spin_lock_irqsave(&card->lock, flags); - - if (card->wandev.state != state) { - switch (state) { - case WAN_CONNECTED: - string_state = "connected!"; - break; - case WAN_DISCONNECTED: - string_state = "disconnected!"; - break; - } - pr_info("%s: link %s\n", card->devname, string_state); - card->wandev.state = state; - } - - card->state_tick = jiffies; - spin_unlock_irqrestore(&card->lock, flags); -} - -module_init(cycx_init); -module_exit(cycx_exit); diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c deleted file mode 100644 index 06f3f6309e4b..000000000000 --- a/drivers/net/wan/cycx_x25.c +++ /dev/null @@ -1,1602 +0,0 @@ -/* -* cycx_x25.c Cyclom 2X WAN Link Driver. X.25 module. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdla_x25.c by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 2001/01/12 acme use dev_kfree_skb_irq on interrupt context -* 2000/04/02 acme dprintk, cycx_debug -* fixed the bug introduced in get_dev_by_lcn and -* get_dev_by_dte_addr by the anonymous hacker -* that converted this driver to softnet -* 2000/01/08 acme cleanup -* 1999/10/27 acme use ARPHRD_HWX25 so that the X.25 stack know -* that we have a X.25 stack implemented in -* firmware onboard -* 1999/10/18 acme support for X.25 sockets in if_send, -* beware: socket(AF_X25...) IS WORK IN PROGRESS, -* TCP/IP over X.25 via wanrouter not affected, -* working. -* 1999/10/09 acme chan_disc renamed to chan_disconnect, -* began adding support for X.25 sockets: -* conf->protocol in new_if -* 1999/10/05 acme fixed return E... to return -E... -* 1999/08/10 acme serialized access to the card thru a spinlock -* in x25_exec -* 1999/08/09 acme removed per channel spinlocks -* removed references to enable_tx_int -* 1999/05/28 acme fixed nibble_to_byte, ackvc now properly treated -* if_send simplified -* 1999/05/25 acme fixed t1, t2, t21 & t23 configuration -* use spinlocks instead of cli/sti in some points -* 1999/05/24 acme finished the x25_get_stat function -* 1999/05/23 acme dev->type = ARPHRD_X25 (tcpdump only works, -* AFAIT, with ARPHRD_ETHER). This seems to be -* needed to use socket(AF_X25)... -* Now the config file must specify a peer media -* address for svc channels over a crossover cable. -* Removed hold_timeout from x25_channel_t, -* not used. -* A little enhancement in the DEBUG processing -* 1999/05/22 acme go to DISCONNECTED in disconnect_confirm_intr, -* instead of chan_disc. -* 1999/05/16 marcelo fixed timer initialization in SVCs -* 1999/01/05 acme x25_configure now get (most of) all -* parameters... -* 1999/01/05 acme pktlen now (correctly) uses log2 (value -* configured) -* 1999/01/03 acme judicious use of data types (u8, u16, u32, etc) -* 1999/01/03 acme cyx_isr: reset dpmbase to acknowledge -* indication (interrupt from cyclom 2x) -* 1999/01/02 acme cyx_isr: first hackings... -* 1999/01/0203 acme when initializing an array don't give less -* elements than declared... -* example: char send_cmd[6] = "?\xFF\x10"; -* you'll gonna lose a couple hours, 'cause your -* brain won't admit that there's an error in the -* above declaration... the side effect is that -* memset is put into the unresolved symbols -* instead of using the inline memset functions... -* 1999/01/02 acme began chan_connect, chan_send, x25_send -* 1998/12/31 acme x25_configure -* this code can be compiled as non module -* 1998/12/27 acme code cleanup -* IPX code wiped out! let's decrease code -* complexity for now, remember: I'm learning! :) -* bps_to_speed_code OK -* 1998/12/26 acme Minimal debug code cleanup -* 1998/08/08 acme Initial version. -*/ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#define CYCLOMX_X25_DEBUG 1 - -#include /* isdigit() */ -#include /* return codes */ -#include /* ARPHRD_HWX25 */ -#include /* printk(), and other useful stuff */ -#include -#include /* inline memset(), etc. */ -#include -#include /* kmalloc(), kfree() */ -#include /* offsetof(), etc. */ -#include /* WAN router definitions */ - -#include /* htons(), etc. */ - -#include /* Cyclom 2X common user API definitions */ -#include /* X.25 firmware API definitions */ - -#include - -/* Defines & Macros */ -#define CYCX_X25_MAX_CMD_RETRY 5 -#define CYCX_X25_CHAN_MTU 2048 /* unfragmented logical channel MTU */ - -/* Data Structures */ -/* This is an extension of the 'struct net_device' we create for each network - interface to keep the rest of X.25 channel-specific data. */ -struct cycx_x25_channel { - /* This member must be first. */ - struct net_device *slave; /* WAN slave */ - - char name[WAN_IFNAME_SZ+1]; /* interface name, ASCIIZ */ - char addr[WAN_ADDRESS_SZ+1]; /* media address, ASCIIZ */ - char *local_addr; /* local media address, ASCIIZ - - svc thru crossover cable */ - s16 lcn; /* logical channel number/conn.req.key*/ - u8 link; - struct timer_list timer; /* timer used for svc channel disc. */ - u16 protocol; /* ethertype, 0 - multiplexed */ - u8 svc; /* 0 - permanent, 1 - switched */ - u8 state; /* channel state */ - u8 drop_sequence; /* mark sequence for dropping */ - u32 idle_tmout; /* sec, before disconnecting */ - struct sk_buff *rx_skb; /* receive socket buffer */ - struct cycx_device *card; /* -> owner */ - struct net_device_stats ifstats;/* interface statistics */ -}; - -/* Function Prototypes */ -/* WAN link driver entry points. These are called by the WAN router module. */ -static int cycx_wan_update(struct wan_device *wandev), - cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev, - wanif_conf_t *conf), - cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev); - -/* Network device interface */ -static int cycx_netdevice_init(struct net_device *dev); -static int cycx_netdevice_open(struct net_device *dev); -static int cycx_netdevice_stop(struct net_device *dev); -static int cycx_netdevice_hard_header(struct sk_buff *skb, - struct net_device *dev, u16 type, - const void *daddr, const void *saddr, - unsigned len); -static int cycx_netdevice_rebuild_header(struct sk_buff *skb); -static netdev_tx_t cycx_netdevice_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev); - -static struct net_device_stats * - cycx_netdevice_get_stats(struct net_device *dev); - -/* Interrupt handlers */ -static void cycx_x25_irq_handler(struct cycx_device *card), - cycx_x25_irq_tx(struct cycx_device *card, struct cycx_x25_cmd *cmd), - cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd), - cycx_x25_irq_log(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_stat(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_connect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_disconnect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_connect(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_disconnect(struct cycx_device *card, - struct cycx_x25_cmd *cmd), - cycx_x25_irq_spurious(struct cycx_device *card, - struct cycx_x25_cmd *cmd); - -/* X.25 firmware interface functions */ -static int cycx_x25_configure(struct cycx_device *card, - struct cycx_x25_config *conf), - cycx_x25_get_stats(struct cycx_device *card), - cycx_x25_send(struct cycx_device *card, u8 link, u8 lcn, u8 bitm, - int len, void *buf), - cycx_x25_connect_response(struct cycx_device *card, - struct cycx_x25_channel *chan), - cycx_x25_disconnect_response(struct cycx_device *card, u8 link, - u8 lcn); - -/* channel functions */ -static int cycx_x25_chan_connect(struct net_device *dev), - cycx_x25_chan_send(struct net_device *dev, struct sk_buff *skb); - -static void cycx_x25_chan_disconnect(struct net_device *dev), - cycx_x25_chan_send_event(struct net_device *dev, u8 event); - -/* Miscellaneous functions */ -static void cycx_x25_set_chan_state(struct net_device *dev, u8 state), - cycx_x25_chan_timer(unsigned long d); - -static void nibble_to_byte(u8 *s, u8 *d, u8 len, u8 nibble), - reset_timer(struct net_device *dev); - -static u8 bps_to_speed_code(u32 bps); -static u8 cycx_log2(u32 n); - -static unsigned dec_to_uint(u8 *str, int len); - -static struct net_device *cycx_x25_get_dev_by_lcn(struct wan_device *wandev, - s16 lcn); -static struct net_device * - cycx_x25_get_dev_by_dte_addr(struct wan_device *wandev, char *dte); - -static void cycx_x25_chan_setup(struct net_device *dev); - -#ifdef CYCLOMX_X25_DEBUG -static void hex_dump(char *msg, unsigned char *p, int len); -static void cycx_x25_dump_config(struct cycx_x25_config *conf); -static void cycx_x25_dump_stats(struct cycx_x25_stats *stats); -static void cycx_x25_dump_devs(struct wan_device *wandev); -#else -#define hex_dump(msg, p, len) -#define cycx_x25_dump_config(conf) -#define cycx_x25_dump_stats(stats) -#define cycx_x25_dump_devs(wandev) -#endif -/* Public Functions */ - -/* X.25 Protocol Initialization routine. - * - * This routine is called by the main Cyclom 2X module during setup. At this - * point adapter is completely initialized and X.25 firmware is running. - * o configure adapter - * o initialize protocol-specific fields of the adapter data space. - * - * Return: 0 o.k. - * < 0 failure. */ -int cycx_x25_wan_init(struct cycx_device *card, wandev_conf_t *conf) -{ - struct cycx_x25_config cfg; - - /* Verify configuration ID */ - if (conf->config_id != WANCONFIG_X25) { - pr_info("%s: invalid configuration ID %u!\n", - card->devname, conf->config_id); - return -EINVAL; - } - - /* Initialize protocol-specific fields */ - card->mbox = card->hw.dpmbase + X25_MBOX_OFFS; - card->u.x.connection_keys = 0; - spin_lock_init(&card->u.x.lock); - - /* Configure adapter. Here we set reasonable defaults, then parse - * device configuration structure and set configuration options. - * Most configuration options are verified and corrected (if - * necessary) since we can't rely on the adapter to do so and don't - * want it to fail either. */ - memset(&cfg, 0, sizeof(cfg)); - cfg.link = 0; - cfg.clock = conf->clocking == WANOPT_EXTERNAL ? 8 : 55; - cfg.speed = bps_to_speed_code(conf->bps); - cfg.n3win = 7; - cfg.n2win = 2; - cfg.n2 = 5; - cfg.nvc = 1; - cfg.npvc = 1; - cfg.flags = 0x02; /* default = V35 */ - cfg.t1 = 10; /* line carrier timeout */ - cfg.t2 = 29; /* tx timeout */ - cfg.t21 = 180; /* CALL timeout */ - cfg.t23 = 180; /* CLEAR timeout */ - - /* adjust MTU */ - if (!conf->mtu || conf->mtu >= 512) - card->wandev.mtu = 512; - else if (conf->mtu >= 256) - card->wandev.mtu = 256; - else if (conf->mtu >= 128) - card->wandev.mtu = 128; - else - card->wandev.mtu = 64; - - cfg.pktlen = cycx_log2(card->wandev.mtu); - - if (conf->station == WANOPT_DTE) { - cfg.locaddr = 3; /* DTE */ - cfg.remaddr = 1; /* DCE */ - } else { - cfg.locaddr = 1; /* DCE */ - cfg.remaddr = 3; /* DTE */ - } - - if (conf->interface == WANOPT_RS232) - cfg.flags = 0; /* FIXME just reset the 2nd bit */ - - if (conf->u.x25.hi_pvc) { - card->u.x.hi_pvc = min_t(unsigned int, conf->u.x25.hi_pvc, 4095); - card->u.x.lo_pvc = min_t(unsigned int, conf->u.x25.lo_pvc, card->u.x.hi_pvc); - } - - if (conf->u.x25.hi_svc) { - card->u.x.hi_svc = min_t(unsigned int, conf->u.x25.hi_svc, 4095); - card->u.x.lo_svc = min_t(unsigned int, conf->u.x25.lo_svc, card->u.x.hi_svc); - } - - if (card->u.x.lo_pvc == 255) - cfg.npvc = 0; - else - cfg.npvc = card->u.x.hi_pvc - card->u.x.lo_pvc + 1; - - cfg.nvc = card->u.x.hi_svc - card->u.x.lo_svc + 1 + cfg.npvc; - - if (conf->u.x25.hdlc_window) - cfg.n2win = min_t(unsigned int, conf->u.x25.hdlc_window, 7); - - if (conf->u.x25.pkt_window) - cfg.n3win = min_t(unsigned int, conf->u.x25.pkt_window, 7); - - if (conf->u.x25.t1) - cfg.t1 = min_t(unsigned int, conf->u.x25.t1, 30); - - if (conf->u.x25.t2) - cfg.t2 = min_t(unsigned int, conf->u.x25.t2, 30); - - if (conf->u.x25.t11_t21) - cfg.t21 = min_t(unsigned int, conf->u.x25.t11_t21, 30); - - if (conf->u.x25.t13_t23) - cfg.t23 = min_t(unsigned int, conf->u.x25.t13_t23, 30); - - if (conf->u.x25.n2) - cfg.n2 = min_t(unsigned int, conf->u.x25.n2, 30); - - /* initialize adapter */ - if (cycx_x25_configure(card, &cfg)) - return -EIO; - - /* Initialize protocol-specific fields of adapter data space */ - card->wandev.bps = conf->bps; - card->wandev.interface = conf->interface; - card->wandev.clocking = conf->clocking; - card->wandev.station = conf->station; - card->isr = cycx_x25_irq_handler; - card->exec = NULL; - card->wandev.update = cycx_wan_update; - card->wandev.new_if = cycx_wan_new_if; - card->wandev.del_if = cycx_wan_del_if; - card->wandev.state = WAN_DISCONNECTED; - - return 0; -} - -/* WAN Device Driver Entry Points */ -/* Update device status & statistics. */ -static int cycx_wan_update(struct wan_device *wandev) -{ - /* sanity checks */ - if (!wandev || !wandev->private) - return -EFAULT; - - if (wandev->state == WAN_UNCONFIGURED) - return -ENODEV; - - cycx_x25_get_stats(wandev->private); - - return 0; -} - -/* Create new logical channel. - * This routine is called by the router when ROUTER_IFNEW IOCTL is being - * handled. - * o parse media- and hardware-specific configuration - * o make sure that a new channel can be created - * o allocate resources, if necessary - * o prepare network device structure for registration. - * - * Return: 0 o.k. - * < 0 failure (channel will not be created) */ -static int cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev, - wanif_conf_t *conf) -{ - struct cycx_device *card = wandev->private; - struct cycx_x25_channel *chan; - int err = 0; - - if (!conf->name[0] || strlen(conf->name) > WAN_IFNAME_SZ) { - pr_info("%s: invalid interface name!\n", card->devname); - return -EINVAL; - } - - dev = alloc_netdev(sizeof(struct cycx_x25_channel), conf->name, - cycx_x25_chan_setup); - if (!dev) - return -ENOMEM; - - chan = netdev_priv(dev); - strcpy(chan->name, conf->name); - chan->card = card; - chan->link = conf->port; - chan->protocol = conf->protocol ? ETH_P_X25 : ETH_P_IP; - chan->rx_skb = NULL; - /* only used in svc connected thru crossover cable */ - chan->local_addr = NULL; - - if (conf->addr[0] == '@') { /* SVC */ - int len = strlen(conf->local_addr); - - if (len) { - if (len > WAN_ADDRESS_SZ) { - pr_err("%s: %s local addr too long!\n", - wandev->name, chan->name); - err = -EINVAL; - goto error; - } else { - chan->local_addr = kmalloc(len + 1, GFP_KERNEL); - - if (!chan->local_addr) { - err = -ENOMEM; - goto error; - } - } - - strncpy(chan->local_addr, conf->local_addr, - WAN_ADDRESS_SZ); - } - - chan->svc = 1; - strncpy(chan->addr, &conf->addr[1], WAN_ADDRESS_SZ); - init_timer(&chan->timer); - chan->timer.function = cycx_x25_chan_timer; - chan->timer.data = (unsigned long)dev; - - /* Set channel timeouts (default if not specified) */ - chan->idle_tmout = conf->idle_timeout ? conf->idle_timeout : 90; - } else if (isdigit(conf->addr[0])) { /* PVC */ - s16 lcn = dec_to_uint(conf->addr, 0); - - if (lcn >= card->u.x.lo_pvc && lcn <= card->u.x.hi_pvc) - chan->lcn = lcn; - else { - pr_err("%s: PVC %u is out of range on interface %s!\n", - wandev->name, lcn, chan->name); - err = -EINVAL; - goto error; - } - } else { - pr_err("%s: invalid media address on interface %s!\n", - wandev->name, chan->name); - err = -EINVAL; - goto error; - } - - return 0; - -error: - free_netdev(dev); - return err; -} - -/* Delete logical channel. */ -static int cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->svc) { - kfree(chan->local_addr); - if (chan->state == WAN_CONNECTED) - del_timer(&chan->timer); - } - - return 0; -} - - -/* Network Device Interface */ - -static const struct header_ops cycx_header_ops = { - .create = cycx_netdevice_hard_header, - .rebuild = cycx_netdevice_rebuild_header, -}; - -static const struct net_device_ops cycx_netdev_ops = { - .ndo_init = cycx_netdevice_init, - .ndo_open = cycx_netdevice_open, - .ndo_stop = cycx_netdevice_stop, - .ndo_start_xmit = cycx_netdevice_hard_start_xmit, - .ndo_get_stats = cycx_netdevice_get_stats, -}; - -static void cycx_x25_chan_setup(struct net_device *dev) -{ - /* Initialize device driver entry points */ - dev->netdev_ops = &cycx_netdev_ops; - dev->header_ops = &cycx_header_ops; - - /* Initialize media-specific parameters */ - dev->mtu = CYCX_X25_CHAN_MTU; - dev->type = ARPHRD_HWX25; /* ARP h/w type */ - dev->hard_header_len = 0; /* media header length */ - dev->addr_len = 0; /* hardware address length */ -} - -/* Initialize Linux network interface. - * - * This routine is called only once for each interface, during Linux network - * interface registration. Returning anything but zero will fail interface - * registration. */ -static int cycx_netdevice_init(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - struct wan_device *wandev = &card->wandev; - - if (!chan->svc) - *(__be16*)dev->dev_addr = htons(chan->lcn); - - /* Initialize hardware parameters (just for reference) */ - dev->irq = wandev->irq; - dev->dma = wandev->dma; - dev->base_addr = wandev->ioport; - dev->mem_start = (unsigned long)wandev->maddr; - dev->mem_end = (unsigned long)(wandev->maddr + - wandev->msize - 1); - dev->flags |= IFF_NOARP; - - /* Set transmit buffer queue length */ - dev->tx_queue_len = 10; - - /* Initialize socket buffers */ - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); - - return 0; -} - -/* Open network interface. - * o prevent module from unloading by incrementing use count - * o if link is disconnected then initiate connection - * - * Return 0 if O.k. or errno. */ -static int cycx_netdevice_open(struct net_device *dev) -{ - if (netif_running(dev)) - return -EBUSY; /* only one open is allowed */ - - netif_start_queue(dev); - return 0; -} - -/* Close network interface. - * o reset flags. - * o if there's no more open channels then disconnect physical link. */ -static int cycx_netdevice_stop(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - netif_stop_queue(dev); - - if (chan->state == WAN_CONNECTED || chan->state == WAN_CONNECTING) - cycx_x25_chan_disconnect(dev); - - return 0; -} - -/* Build media header. - * o encapsulate packet according to encapsulation type. - * - * The trick here is to put packet type (Ethertype) into 'protocol' field of - * the socket buffer, so that we don't forget it. If encapsulation fails, - * set skb->protocol to 0 and discard packet later. - * - * Return: media header length. */ -static int cycx_netdevice_hard_header(struct sk_buff *skb, - struct net_device *dev, u16 type, - const void *daddr, const void *saddr, - unsigned len) -{ - skb->protocol = htons(type); - - return dev->hard_header_len; -} - -/* * Re-build media header. - * Return: 1 physical address resolved. - * 0 physical address not resolved */ -static int cycx_netdevice_rebuild_header(struct sk_buff *skb) -{ - return 1; -} - -/* Send a packet on a network interface. - * o set busy flag (marks start of the transmission). - * o check link state. If link is not up, then drop the packet. - * o check channel status. If it's down then initiate a call. - * o pass a packet to corresponding WAN device. - * o free socket buffer - * - * Return: 0 complete (socket buffer must be freed) - * non-0 packet may be re-transmitted (tbusy must be set) - * - * Notes: - * 1. This routine is called either by the protocol stack or by the "net - * bottom half" (with interrupts enabled). - * 2. Setting tbusy flag will inhibit further transmit requests from the - * protocol stack and can be used for flow control with protocol layer. */ -static netdev_tx_t cycx_netdevice_hard_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - - if (!chan->svc) - chan->protocol = ntohs(skb->protocol); - - if (card->wandev.state != WAN_CONNECTED) - ++chan->ifstats.tx_dropped; - else if (chan->svc && chan->protocol && - chan->protocol != ntohs(skb->protocol)) { - pr_info("%s: unsupported Ethertype 0x%04X on interface %s!\n", - card->devname, ntohs(skb->protocol), dev->name); - ++chan->ifstats.tx_errors; - } else if (chan->protocol == ETH_P_IP) { - switch (chan->state) { - case WAN_DISCONNECTED: - if (cycx_x25_chan_connect(dev)) { - netif_stop_queue(dev); - return NETDEV_TX_BUSY; - } - /* fall thru */ - case WAN_CONNECTED: - reset_timer(dev); - dev->trans_start = jiffies; - netif_stop_queue(dev); - - if (cycx_x25_chan_send(dev, skb)) - return NETDEV_TX_BUSY; - - break; - default: - ++chan->ifstats.tx_dropped; - ++card->wandev.stats.tx_dropped; - } - } else { /* chan->protocol == ETH_P_X25 */ - switch (skb->data[0]) { - case X25_IFACE_DATA: - break; - case X25_IFACE_CONNECT: - cycx_x25_chan_connect(dev); - goto free_packet; - case X25_IFACE_DISCONNECT: - cycx_x25_chan_disconnect(dev); - goto free_packet; - default: - pr_info("%s: unknown %d x25-iface request on %s!\n", - card->devname, skb->data[0], dev->name); - ++chan->ifstats.tx_errors; - goto free_packet; - } - - skb_pull(skb, 1); /* Remove control byte */ - reset_timer(dev); - dev->trans_start = jiffies; - netif_stop_queue(dev); - - if (cycx_x25_chan_send(dev, skb)) { - /* prepare for future retransmissions */ - skb_push(skb, 1); - return NETDEV_TX_BUSY; - } - } - -free_packet: - dev_kfree_skb(skb); - - return NETDEV_TX_OK; -} - -/* Get Ethernet-style interface statistics. - * Return a pointer to struct net_device_stats */ -static struct net_device_stats *cycx_netdevice_get_stats(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - return chan ? &chan->ifstats : NULL; -} - -/* Interrupt Handlers */ -/* X.25 Interrupt Service Routine. */ -static void cycx_x25_irq_handler(struct cycx_device *card) -{ - struct cycx_x25_cmd cmd; - u16 z = 0; - - card->in_isr = 1; - card->buff_int_mode_unbusy = 0; - cycx_peek(&card->hw, X25_RXMBOX_OFFS, &cmd, sizeof(cmd)); - - switch (cmd.command) { - case X25_DATA_INDICATION: - cycx_x25_irq_rx(card, &cmd); - break; - case X25_ACK_FROM_VC: - cycx_x25_irq_tx(card, &cmd); - break; - case X25_LOG: - cycx_x25_irq_log(card, &cmd); - break; - case X25_STATISTIC: - cycx_x25_irq_stat(card, &cmd); - break; - case X25_CONNECT_CONFIRM: - cycx_x25_irq_connect_confirm(card, &cmd); - break; - case X25_CONNECT_INDICATION: - cycx_x25_irq_connect(card, &cmd); - break; - case X25_DISCONNECT_INDICATION: - cycx_x25_irq_disconnect(card, &cmd); - break; - case X25_DISCONNECT_CONFIRM: - cycx_x25_irq_disconnect_confirm(card, &cmd); - break; - case X25_LINE_ON: - cycx_set_state(card, WAN_CONNECTED); - break; - case X25_LINE_OFF: - cycx_set_state(card, WAN_DISCONNECTED); - break; - default: - cycx_x25_irq_spurious(card, &cmd); - break; - } - - cycx_poke(&card->hw, 0, &z, sizeof(z)); - cycx_poke(&card->hw, X25_RXMBOX_OFFS, &z, sizeof(z)); - card->in_isr = 0; -} - -/* Transmit interrupt handler. - * o Release socket buffer - * o Clear 'tbusy' flag */ -static void cycx_x25_irq_tx(struct cycx_device *card, struct cycx_x25_cmd *cmd) -{ - struct net_device *dev; - struct wan_device *wandev = &card->wandev; - u8 lcn; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - - /* unbusy device and then dev_tint(); */ - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (dev) { - card->buff_int_mode_unbusy = 1; - netif_wake_queue(dev); - } else - pr_err("%s:ackvc for inexistent lcn %d\n", card->devname, lcn); -} - -/* Receive interrupt handler. - * This routine handles fragmented IP packets using M-bit according to the - * RFC1356. - * o map logical channel number to network interface. - * o allocate socket buffer or append received packet to the existing one. - * o if M-bit is reset (i.e. it's the last packet in a sequence) then - * decapsulate packet and pass socket buffer to the protocol stack. - * - * Notes: - * 1. When allocating a socket buffer, if M-bit is set then more data is - * coming and we have to allocate buffer for the maximum IP packet size - * expected on this channel. - * 2. If something goes wrong and X.25 packet has to be dropped (e.g. no - * socket buffers available) the whole packet sequence must be discarded. */ -static void cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - struct cycx_x25_channel *chan; - struct sk_buff *skb; - u8 bitm, lcn; - int pktlen = cmd->len - 5; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - cycx_peek(&card->hw, cmd->buf + 4, &bitm, sizeof(bitm)); - bitm &= 0x10; - - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (!dev) { - /* Invalid channel, discard packet */ - pr_info("%s: receiving on orphaned LCN %d!\n", - card->devname, lcn); - return; - } - - chan = netdev_priv(dev); - reset_timer(dev); - - if (chan->drop_sequence) { - if (!bitm) - chan->drop_sequence = 0; - else - return; - } - - if ((skb = chan->rx_skb) == NULL) { - /* Allocate new socket buffer */ - int bufsize = bitm ? dev->mtu : pktlen; - - if ((skb = dev_alloc_skb((chan->protocol == ETH_P_X25 ? 1 : 0) + - bufsize + - dev->hard_header_len)) == NULL) { - pr_info("%s: no socket buffers available!\n", - card->devname); - chan->drop_sequence = 1; - ++chan->ifstats.rx_dropped; - return; - } - - if (chan->protocol == ETH_P_X25) /* X.25 socket layer control */ - /* 0 = data packet (dev_alloc_skb zeroed skb->data) */ - skb_put(skb, 1); - - skb->dev = dev; - skb->protocol = htons(chan->protocol); - chan->rx_skb = skb; - } - - if (skb_tailroom(skb) < pktlen) { - /* No room for the packet. Call off the whole thing! */ - dev_kfree_skb_irq(skb); - chan->rx_skb = NULL; - - if (bitm) - chan->drop_sequence = 1; - - pr_info("%s: unexpectedly long packet sequence on interface %s!\n", - card->devname, dev->name); - ++chan->ifstats.rx_length_errors; - return; - } - - /* Append packet to the socket buffer */ - cycx_peek(&card->hw, cmd->buf + 5, skb_put(skb, pktlen), pktlen); - - if (bitm) - return; /* more data is coming */ - - chan->rx_skb = NULL; /* dequeue packet */ - - ++chan->ifstats.rx_packets; - chan->ifstats.rx_bytes += pktlen; - - skb_reset_mac_header(skb); - netif_rx(skb); -} - -/* Connect interrupt handler. */ -static void cycx_x25_irq_connect(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev = NULL; - struct cycx_x25_channel *chan; - u8 d[32], - loc[24], - rem[24]; - u8 lcn, sizeloc, sizerem; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - cycx_peek(&card->hw, cmd->buf + 5, &sizeloc, sizeof(sizeloc)); - cycx_peek(&card->hw, cmd->buf + 6, d, cmd->len - 6); - - sizerem = sizeloc >> 4; - sizeloc &= 0x0F; - - loc[0] = rem[0] = '\0'; - - if (sizeloc) - nibble_to_byte(d, loc, sizeloc, 0); - - if (sizerem) - nibble_to_byte(d + (sizeloc >> 1), rem, sizerem, sizeloc & 1); - - dprintk(1, KERN_INFO "%s:lcn=%d, local=%s, remote=%s\n", - __func__, lcn, loc, rem); - - dev = cycx_x25_get_dev_by_dte_addr(wandev, rem); - if (!dev) { - /* Invalid channel, discard packet */ - pr_info("%s: connect not expected: remote %s!\n", - card->devname, rem); - return; - } - - chan = netdev_priv(dev); - chan->lcn = lcn; - cycx_x25_connect_response(card, chan); - cycx_x25_set_chan_state(dev, WAN_CONNECTED); -} - -/* Connect confirm interrupt handler. */ -static void cycx_x25_irq_connect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - struct cycx_x25_channel *chan; - u8 lcn, key; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - cycx_peek(&card->hw, cmd->buf + 1, &key, sizeof(key)); - dprintk(1, KERN_INFO "%s: %s:lcn=%d, key=%d\n", - card->devname, __func__, lcn, key); - - dev = cycx_x25_get_dev_by_lcn(wandev, -key); - if (!dev) { - /* Invalid channel, discard packet */ - clear_bit(--key, (void*)&card->u.x.connection_keys); - pr_info("%s: connect confirm not expected: lcn %d, key=%d!\n", - card->devname, lcn, key); - return; - } - - clear_bit(--key, (void*)&card->u.x.connection_keys); - chan = netdev_priv(dev); - chan->lcn = lcn; - cycx_x25_set_chan_state(dev, WAN_CONNECTED); -} - -/* Disconnect confirm interrupt handler. */ -static void cycx_x25_irq_disconnect_confirm(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - u8 lcn; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - dprintk(1, KERN_INFO "%s: %s:lcn=%d\n", - card->devname, __func__, lcn); - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (!dev) { - /* Invalid channel, discard packet */ - pr_info("%s:disconnect confirm not expected!:lcn %d\n", - card->devname, lcn); - return; - } - - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); -} - -/* disconnect interrupt handler. */ -static void cycx_x25_irq_disconnect(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - struct wan_device *wandev = &card->wandev; - struct net_device *dev; - u8 lcn; - - cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn)); - dprintk(1, KERN_INFO "%s:lcn=%d\n", __func__, lcn); - - dev = cycx_x25_get_dev_by_lcn(wandev, lcn); - if (dev) { - struct cycx_x25_channel *chan = netdev_priv(dev); - - cycx_x25_disconnect_response(card, chan->link, lcn); - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); - } else - cycx_x25_disconnect_response(card, 0, lcn); -} - -/* LOG interrupt handler. */ -static void cycx_x25_irq_log(struct cycx_device *card, struct cycx_x25_cmd *cmd) -{ -#if CYCLOMX_X25_DEBUG - char bf[20]; - u16 size, toread, link, msg_code; - u8 code, routine; - - cycx_peek(&card->hw, cmd->buf, &msg_code, sizeof(msg_code)); - cycx_peek(&card->hw, cmd->buf + 2, &link, sizeof(link)); - cycx_peek(&card->hw, cmd->buf + 4, &size, sizeof(size)); - /* at most 20 bytes are available... thanks to Daniela :) */ - toread = size < 20 ? size : 20; - cycx_peek(&card->hw, cmd->buf + 10, &bf, toread); - cycx_peek(&card->hw, cmd->buf + 10 + toread, &code, 1); - cycx_peek(&card->hw, cmd->buf + 10 + toread + 1, &routine, 1); - - pr_info("cycx_x25_irq_handler: X25_LOG (0x4500) indic.:\n"); - pr_info("cmd->buf=0x%X\n", cmd->buf); - pr_info("Log message code=0x%X\n", msg_code); - pr_info("Link=%d\n", link); - pr_info("log code=0x%X\n", code); - pr_info("log routine=0x%X\n", routine); - pr_info("Message size=%d\n", size); - hex_dump("Message", bf, toread); -#endif -} - -/* STATISTIC interrupt handler. */ -static void cycx_x25_irq_stat(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - cycx_peek(&card->hw, cmd->buf, &card->u.x.stats, - sizeof(card->u.x.stats)); - hex_dump("cycx_x25_irq_stat", (unsigned char*)&card->u.x.stats, - sizeof(card->u.x.stats)); - cycx_x25_dump_stats(&card->u.x.stats); - wake_up_interruptible(&card->wait_stats); -} - -/* Spurious interrupt handler. - * o print a warning - * If number of spurious interrupts exceeded some limit, then ??? */ -static void cycx_x25_irq_spurious(struct cycx_device *card, - struct cycx_x25_cmd *cmd) -{ - pr_info("%s: spurious interrupt (0x%X)!\n", - card->devname, cmd->command); -} -#ifdef CYCLOMX_X25_DEBUG -static void hex_dump(char *msg, unsigned char *p, int len) -{ - print_hex_dump(KERN_INFO, msg, DUMP_PREFIX_OFFSET, 16, 1, - p, len, true); -} -#endif - -/* Cyclom 2X Firmware-Specific Functions */ -/* Exec X.25 command. */ -static int x25_exec(struct cycx_device *card, int command, int link, - void *d1, int len1, void *d2, int len2) -{ - struct cycx_x25_cmd c; - unsigned long flags; - u32 addr = 0x1200 + 0x2E0 * link + 0x1E2; - u8 retry = CYCX_X25_MAX_CMD_RETRY; - int err = 0; - - c.command = command; - c.link = link; - c.len = len1 + len2; - - spin_lock_irqsave(&card->u.x.lock, flags); - - /* write command */ - cycx_poke(&card->hw, X25_MBOX_OFFS, &c, sizeof(c) - sizeof(c.buf)); - - /* write X.25 data */ - if (d1) { - cycx_poke(&card->hw, addr, d1, len1); - - if (d2) { - if (len2 > 254) { - u32 addr1 = 0xA00 + 0x400 * link; - - cycx_poke(&card->hw, addr + len1, d2, 249); - cycx_poke(&card->hw, addr1, ((u8*)d2) + 249, - len2 - 249); - } else - cycx_poke(&card->hw, addr + len1, d2, len2); - } - } - - /* generate interruption, executing command */ - cycx_intr(&card->hw); - - /* wait till card->mbox == 0 */ - do { - err = cycx_exec(card->mbox); - } while (retry-- && err); - - spin_unlock_irqrestore(&card->u.x.lock, flags); - - return err; -} - -/* Configure adapter. */ -static int cycx_x25_configure(struct cycx_device *card, - struct cycx_x25_config *conf) -{ - struct { - u16 nlinks; - struct cycx_x25_config conf[2]; - } x25_cmd_conf; - - memset(&x25_cmd_conf, 0, sizeof(x25_cmd_conf)); - x25_cmd_conf.nlinks = 2; - x25_cmd_conf.conf[0] = *conf; - /* FIXME: we need to find a way in the wanrouter framework - to configure the second link, for now lets use it - with the same config from the first link, fixing - the interface type to RS232, the speed in 38400 and - the clock to external */ - x25_cmd_conf.conf[1] = *conf; - x25_cmd_conf.conf[1].link = 1; - x25_cmd_conf.conf[1].speed = 5; /* 38400 */ - x25_cmd_conf.conf[1].clock = 8; - x25_cmd_conf.conf[1].flags = 0; /* default = RS232 */ - - cycx_x25_dump_config(&x25_cmd_conf.conf[0]); - cycx_x25_dump_config(&x25_cmd_conf.conf[1]); - - return x25_exec(card, X25_CONFIG, 0, - &x25_cmd_conf, sizeof(x25_cmd_conf), NULL, 0); -} - -/* Get protocol statistics. */ -static int cycx_x25_get_stats(struct cycx_device *card) -{ - /* the firmware expects 20 in the size field!!! - thanks to Daniela */ - int err = x25_exec(card, X25_STATISTIC, 0, NULL, 20, NULL, 0); - - if (err) - return err; - - interruptible_sleep_on(&card->wait_stats); - - if (signal_pending(current)) - return -EINTR; - - card->wandev.stats.rx_packets = card->u.x.stats.n2_rx_frames; - card->wandev.stats.rx_over_errors = card->u.x.stats.rx_over_errors; - card->wandev.stats.rx_crc_errors = card->u.x.stats.rx_crc_errors; - card->wandev.stats.rx_length_errors = 0; /* not available from fw */ - card->wandev.stats.rx_frame_errors = 0; /* not available from fw */ - card->wandev.stats.rx_missed_errors = card->u.x.stats.rx_aborts; - card->wandev.stats.rx_dropped = 0; /* not available from fw */ - card->wandev.stats.rx_errors = 0; /* not available from fw */ - card->wandev.stats.tx_packets = card->u.x.stats.n2_tx_frames; - card->wandev.stats.tx_aborted_errors = card->u.x.stats.tx_aborts; - card->wandev.stats.tx_dropped = 0; /* not available from fw */ - card->wandev.stats.collisions = 0; /* not available from fw */ - card->wandev.stats.tx_errors = 0; /* not available from fw */ - - cycx_x25_dump_devs(&card->wandev); - - return 0; -} - -/* return the number of nibbles */ -static int byte_to_nibble(u8 *s, u8 *d, char *nibble) -{ - int i = 0; - - if (*nibble && *s) { - d[i] |= *s++ - '0'; - *nibble = 0; - ++i; - } - - while (*s) { - d[i] = (*s - '0') << 4; - if (*(s + 1)) - d[i] |= *(s + 1) - '0'; - else { - *nibble = 1; - break; - } - ++i; - s += 2; - } - - return i; -} - -static void nibble_to_byte(u8 *s, u8 *d, u8 len, u8 nibble) -{ - if (nibble) { - *d++ = '0' + (*s++ & 0x0F); - --len; - } - - while (len) { - *d++ = '0' + (*s >> 4); - - if (--len) { - *d++ = '0' + (*s & 0x0F); - --len; - } else break; - - ++s; - } - - *d = '\0'; -} - -/* Place X.25 call. */ -static int x25_place_call(struct cycx_device *card, - struct cycx_x25_channel *chan) -{ - int err = 0, - len; - char d[64], - nibble = 0, - mylen = chan->local_addr ? strlen(chan->local_addr) : 0, - remotelen = strlen(chan->addr); - u8 key; - - if (card->u.x.connection_keys == ~0U) { - pr_info("%s: too many simultaneous connection requests!\n", - card->devname); - return -EAGAIN; - } - - key = ffz(card->u.x.connection_keys); - set_bit(key, (void*)&card->u.x.connection_keys); - ++key; - dprintk(1, KERN_INFO "%s:x25_place_call:key=%d\n", card->devname, key); - memset(d, 0, sizeof(d)); - d[1] = key; /* user key */ - d[2] = 0x10; - d[4] = 0x0B; - - len = byte_to_nibble(chan->addr, d + 6, &nibble); - - if (chan->local_addr) - len += byte_to_nibble(chan->local_addr, d + 6 + len, &nibble); - - if (nibble) - ++len; - - d[5] = mylen << 4 | remotelen; - d[6 + len + 1] = 0xCC; /* TCP/IP over X.25, thanks to Daniela :) */ - - if ((err = x25_exec(card, X25_CONNECT_REQUEST, chan->link, - &d, 7 + len + 1, NULL, 0)) != 0) - clear_bit(--key, (void*)&card->u.x.connection_keys); - else - chan->lcn = -key; - - return err; -} - -/* Place X.25 CONNECT RESPONSE. */ -static int cycx_x25_connect_response(struct cycx_device *card, - struct cycx_x25_channel *chan) -{ - u8 d[8]; - - memset(d, 0, sizeof(d)); - d[0] = d[3] = chan->lcn; - d[2] = 0x10; - d[4] = 0x0F; - d[7] = 0xCC; /* TCP/IP over X.25, thanks Daniela */ - - return x25_exec(card, X25_CONNECT_RESPONSE, chan->link, &d, 8, NULL, 0); -} - -/* Place X.25 DISCONNECT RESPONSE. */ -static int cycx_x25_disconnect_response(struct cycx_device *card, u8 link, - u8 lcn) -{ - char d[5]; - - memset(d, 0, sizeof(d)); - d[0] = d[3] = lcn; - d[2] = 0x10; - d[4] = 0x17; - - return x25_exec(card, X25_DISCONNECT_RESPONSE, link, &d, 5, NULL, 0); -} - -/* Clear X.25 call. */ -static int x25_clear_call(struct cycx_device *card, u8 link, u8 lcn, u8 cause, - u8 diagn) -{ - u8 d[7]; - - memset(d, 0, sizeof(d)); - d[0] = d[3] = lcn; - d[2] = 0x10; - d[4] = 0x13; - d[5] = cause; - d[6] = diagn; - - return x25_exec(card, X25_DISCONNECT_REQUEST, link, d, 7, NULL, 0); -} - -/* Send X.25 data packet. */ -static int cycx_x25_send(struct cycx_device *card, u8 link, u8 lcn, u8 bitm, - int len, void *buf) -{ - u8 d[] = "?\xFF\x10??"; - - d[0] = d[3] = lcn; - d[4] = bitm; - - return x25_exec(card, X25_DATA_REQUEST, link, &d, 5, buf, len); -} - -/* Miscellaneous */ -/* Find network device by its channel number. */ -static struct net_device *cycx_x25_get_dev_by_lcn(struct wan_device *wandev, - s16 lcn) -{ - struct net_device *dev = wandev->dev; - struct cycx_x25_channel *chan; - - while (dev) { - chan = netdev_priv(dev); - - if (chan->lcn == lcn) - break; - dev = chan->slave; - } - return dev; -} - -/* Find network device by its remote dte address. */ -static struct net_device * - cycx_x25_get_dev_by_dte_addr(struct wan_device *wandev, char *dte) -{ - struct net_device *dev = wandev->dev; - struct cycx_x25_channel *chan; - - while (dev) { - chan = netdev_priv(dev); - - if (!strcmp(chan->addr, dte)) - break; - dev = chan->slave; - } - return dev; -} - -/* Initiate connection on the logical channel. - * o for PVC we just get channel configuration - * o for SVCs place an X.25 call - * - * Return: 0 connected - * >0 connection in progress - * <0 failure */ -static int cycx_x25_chan_connect(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - - if (chan->svc) { - if (!chan->addr[0]) - return -EINVAL; /* no destination address */ - - dprintk(1, KERN_INFO "%s: placing X.25 call to %s...\n", - card->devname, chan->addr); - - if (x25_place_call(card, chan)) - return -EIO; - - cycx_x25_set_chan_state(dev, WAN_CONNECTING); - return 1; - } else - cycx_x25_set_chan_state(dev, WAN_CONNECTED); - - return 0; -} - -/* Disconnect logical channel. - * o if SVC then clear X.25 call */ -static void cycx_x25_chan_disconnect(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->svc) { - x25_clear_call(chan->card, chan->link, chan->lcn, 0, 0); - cycx_x25_set_chan_state(dev, WAN_DISCONNECTING); - } else - cycx_x25_set_chan_state(dev, WAN_DISCONNECTED); -} - -/* Called by kernel timer */ -static void cycx_x25_chan_timer(unsigned long d) -{ - struct net_device *dev = (struct net_device *)d; - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->state == WAN_CONNECTED) - cycx_x25_chan_disconnect(dev); - else - pr_err("%s: %s for svc (%s) not connected!\n", - chan->card->devname, __func__, dev->name); -} - -/* Set logical channel state. */ -static void cycx_x25_set_chan_state(struct net_device *dev, u8 state) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - unsigned long flags; - char *string_state = NULL; - - spin_lock_irqsave(&card->lock, flags); - - if (chan->state != state) { - if (chan->svc && chan->state == WAN_CONNECTED) - del_timer(&chan->timer); - - switch (state) { - case WAN_CONNECTED: - string_state = "connected!"; - *(__be16*)dev->dev_addr = htons(chan->lcn); - netif_wake_queue(dev); - reset_timer(dev); - - if (chan->protocol == ETH_P_X25) - cycx_x25_chan_send_event(dev, - X25_IFACE_CONNECT); - - break; - case WAN_CONNECTING: - string_state = "connecting..."; - break; - case WAN_DISCONNECTING: - string_state = "disconnecting..."; - break; - case WAN_DISCONNECTED: - string_state = "disconnected!"; - - if (chan->svc) { - *(unsigned short*)dev->dev_addr = 0; - chan->lcn = 0; - } - - if (chan->protocol == ETH_P_X25) - cycx_x25_chan_send_event(dev, - X25_IFACE_DISCONNECT); - - netif_wake_queue(dev); - break; - } - - pr_info("%s: interface %s %s\n", - card->devname, dev->name, string_state); - chan->state = state; - } - - spin_unlock_irqrestore(&card->lock, flags); -} - -/* Send packet on a logical channel. - * When this function is called, tx_skb field of the channel data space - * points to the transmit socket buffer. When transmission is complete, - * release socket buffer and reset 'tbusy' flag. - * - * Return: 0 - transmission complete - * 1 - busy - * - * Notes: - * 1. If packet length is greater than MTU for this channel, we'll fragment - * the packet into 'complete sequence' using M-bit. - * 2. When transmission is complete, an event notification should be issued - * to the router. */ -static int cycx_x25_chan_send(struct net_device *dev, struct sk_buff *skb) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - struct cycx_device *card = chan->card; - int bitm = 0; /* final packet */ - unsigned len = skb->len; - - if (skb->len > card->wandev.mtu) { - len = card->wandev.mtu; - bitm = 0x10; /* set M-bit (more data) */ - } - - if (cycx_x25_send(card, chan->link, chan->lcn, bitm, len, skb->data)) - return 1; - - if (bitm) { - skb_pull(skb, len); - return 1; - } - - ++chan->ifstats.tx_packets; - chan->ifstats.tx_bytes += len; - - return 0; -} - -/* Send event (connection, disconnection, etc) to X.25 socket layer */ - -static void cycx_x25_chan_send_event(struct net_device *dev, u8 event) -{ - struct sk_buff *skb; - unsigned char *ptr; - - if ((skb = dev_alloc_skb(1)) == NULL) { - pr_err("%s: out of memory\n", __func__); - return; - } - - ptr = skb_put(skb, 1); - *ptr = event; - - skb->protocol = x25_type_trans(skb, dev); - netif_rx(skb); -} - -/* Convert line speed in bps to a number used by cyclom 2x code. */ -static u8 bps_to_speed_code(u32 bps) -{ - u8 number = 0; /* defaults to the lowest (1200) speed ;> */ - - if (bps >= 512000) number = 8; - else if (bps >= 256000) number = 7; - else if (bps >= 64000) number = 6; - else if (bps >= 38400) number = 5; - else if (bps >= 19200) number = 4; - else if (bps >= 9600) number = 3; - else if (bps >= 4800) number = 2; - else if (bps >= 2400) number = 1; - - return number; -} - -/* log base 2 */ -static u8 cycx_log2(u32 n) -{ - u8 log = 0; - - if (!n) - return 0; - - while (n > 1) { - n >>= 1; - ++log; - } - - return log; -} - -/* Convert decimal string to unsigned integer. - * If len != 0 then only 'len' characters of the string are converted. */ -static unsigned dec_to_uint(u8 *str, int len) -{ - unsigned val = 0; - - if (!len) - len = strlen(str); - - for (; len && isdigit(*str); ++str, --len) - val = (val * 10) + (*str - (unsigned) '0'); - - return val; -} - -static void reset_timer(struct net_device *dev) -{ - struct cycx_x25_channel *chan = netdev_priv(dev); - - if (chan->svc) - mod_timer(&chan->timer, jiffies+chan->idle_tmout*HZ); -} -#ifdef CYCLOMX_X25_DEBUG -static void cycx_x25_dump_config(struct cycx_x25_config *conf) -{ - pr_info("X.25 configuration\n"); - pr_info("-----------------\n"); - pr_info("link number=%d\n", conf->link); - pr_info("line speed=%d\n", conf->speed); - pr_info("clock=%sternal\n", conf->clock == 8 ? "Ex" : "In"); - pr_info("# level 2 retransm.=%d\n", conf->n2); - pr_info("level 2 window=%d\n", conf->n2win); - pr_info("level 3 window=%d\n", conf->n3win); - pr_info("# logical channels=%d\n", conf->nvc); - pr_info("level 3 pkt len=%d\n", conf->pktlen); - pr_info("my address=%d\n", conf->locaddr); - pr_info("remote address=%d\n", conf->remaddr); - pr_info("t1=%d seconds\n", conf->t1); - pr_info("t2=%d seconds\n", conf->t2); - pr_info("t21=%d seconds\n", conf->t21); - pr_info("# PVCs=%d\n", conf->npvc); - pr_info("t23=%d seconds\n", conf->t23); - pr_info("flags=0x%x\n", conf->flags); -} - -static void cycx_x25_dump_stats(struct cycx_x25_stats *stats) -{ - pr_info("X.25 statistics\n"); - pr_info("--------------\n"); - pr_info("rx_crc_errors=%d\n", stats->rx_crc_errors); - pr_info("rx_over_errors=%d\n", stats->rx_over_errors); - pr_info("n2_tx_frames=%d\n", stats->n2_tx_frames); - pr_info("n2_rx_frames=%d\n", stats->n2_rx_frames); - pr_info("tx_timeouts=%d\n", stats->tx_timeouts); - pr_info("rx_timeouts=%d\n", stats->rx_timeouts); - pr_info("n3_tx_packets=%d\n", stats->n3_tx_packets); - pr_info("n3_rx_packets=%d\n", stats->n3_rx_packets); - pr_info("tx_aborts=%d\n", stats->tx_aborts); - pr_info("rx_aborts=%d\n", stats->rx_aborts); -} - -static void cycx_x25_dump_devs(struct wan_device *wandev) -{ - struct net_device *dev = wandev->dev; - - pr_info("X.25 dev states\n"); - pr_info("name: addr: txoff: protocol:\n"); - pr_info("---------------------------------------\n"); - - while(dev) { - struct cycx_x25_channel *chan = netdev_priv(dev); - - pr_info("%-5.5s %-15.15s %d ETH_P_%s\n", - chan->name, chan->addr, netif_queue_stopped(dev), - chan->protocol == ETH_P_IP ? "IP" : "X25"); - dev = chan->slave; - } -} - -#endif /* CYCLOMX_X25_DEBUG */ -/* End */ diff --git a/include/linux/cyclomx.h b/include/linux/cyclomx.h deleted file mode 100644 index b88f7f428e58..000000000000 --- a/include/linux/cyclomx.h +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef _CYCLOMX_H -#define _CYCLOMX_H -/* -* cyclomx.h Cyclom 2X WAN Link Driver. -* User-level API definitions. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on wanpipe.h by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 2000/07/13 acme remove crap #if KERNEL_VERSION > blah -* 2000/01/21 acme rename cyclomx_open to cyclomx_mod_inc_use_count -* and cyclomx_close to cyclomx_mod_dec_use_count -* 1999/05/19 acme wait_queue_head_t wait_stats(support for 2.3.*) -* 1999/01/03 acme judicious use of data types -* 1998/12/27 acme cleanup: PACKED not needed -* 1998/08/08 acme Version 0.0.1 -*/ - -#include -#include - -#ifdef __KERNEL__ -/* Kernel Interface */ - -#include /* Cyclom 2X support module API definitions */ -#include /* Cyclom 2X firmware module definitions */ -#ifdef CONFIG_CYCLOMX_X25 -#include -#endif - -/* Adapter Data Space. - * This structure is needed because we handle multiple cards, otherwise - * static data would do it. - */ -struct cycx_device { - char devname[WAN_DRVNAME_SZ + 1];/* card name */ - struct cycx_hw hw; /* hardware configuration */ - struct wan_device wandev; /* WAN device data space */ - u32 state_tick; /* link state timestamp */ - spinlock_t lock; - char in_isr; /* interrupt-in-service flag */ - char buff_int_mode_unbusy; /* flag for carrying out dev_tint */ - wait_queue_head_t wait_stats; /* to wait for the STATS indication */ - void __iomem *mbox; /* -> mailbox */ - void (*isr)(struct cycx_device* card); /* interrupt service routine */ - int (*exec)(struct cycx_device* card, void* u_cmd, void* u_data); - union { -#ifdef CONFIG_CYCLOMX_X25 - struct { /* X.25 specific data */ - u32 lo_pvc; - u32 hi_pvc; - u32 lo_svc; - u32 hi_svc; - struct cycx_x25_stats stats; - spinlock_t lock; - u32 connection_keys; - } x; -#endif - } u; -}; - -/* Public Functions */ -void cycx_set_state(struct cycx_device *card, int state); - -#ifdef CONFIG_CYCLOMX_X25 -int cycx_x25_wan_init(struct cycx_device *card, wandev_conf_t *conf); -#endif -#endif /* __KERNEL__ */ -#endif /* _CYCLOMX_H */ diff --git a/include/linux/cycx_drv.h b/include/linux/cycx_drv.h deleted file mode 100644 index 12fe6b0bfcff..000000000000 --- a/include/linux/cycx_drv.h +++ /dev/null @@ -1,64 +0,0 @@ -/* -* cycx_drv.h CYCX Support Module. Kernel API Definitions. -* -* Author: Arnaldo Carvalho de Melo -* -* Copyright: (c) 1998-2003 Arnaldo Carvalho de Melo -* -* Based on sdladrv.h by Gene Kozin -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* 1999/10/23 acme cycxhw_t cleanup -* 1999/01/03 acme more judicious use of data types... -* uclong, ucchar, etc deleted, the u8, u16, u32 -* types are the portable way to go. -* 1999/01/03 acme judicious use of data types... u16, u32, etc -* 1998/12/26 acme FIXED_BUFFERS, CONF_OFFSET, -* removal of cy_read{bwl} -* 1998/08/08 acme Initial version. -*/ -#ifndef _CYCX_DRV_H -#define _CYCX_DRV_H - -#define CYCX_WINDOWSIZE 0x4000 /* default dual-port memory window size */ -#define GEN_CYCX_INTR 0x02 -#define RST_ENABLE 0x04 -#define START_CPU 0x06 -#define RST_DISABLE 0x08 -#define FIXED_BUFFERS 0x08 -#define TEST_PATTERN 0xaa55 -#define CMD_OFFSET 0x20 -#define CONF_OFFSET 0x0380 -#define RESET_OFFSET 0x3c00 /* For reset file load */ -#define DATA_OFFSET 0x0100 /* For code and data files load */ -#define START_OFFSET 0x3ff0 /* 80186 starts here */ - -/** - * struct cycx_hw - Adapter hardware configuration - * @fwid - firmware ID - * @irq - interrupt request level - * @dpmbase - dual-port memory base - * @dpmsize - dual-port memory size - * @reserved - reserved for future use - */ -struct cycx_hw { - u32 fwid; - int irq; - void __iomem *dpmbase; - u32 dpmsize; - u32 reserved[5]; -}; - -/* Function Prototypes */ -extern int cycx_setup(struct cycx_hw *hw, void *sfm, u32 len, unsigned long base); -extern int cycx_down(struct cycx_hw *hw); -extern int cycx_peek(struct cycx_hw *hw, u32 addr, void *buf, u32 len); -extern int cycx_poke(struct cycx_hw *hw, u32 addr, void *buf, u32 len); -extern int cycx_exec(void __iomem *addr); - -extern void cycx_intr(struct cycx_hw *hw); -#endif /* _CYCX_DRV_H */ diff --git a/include/linux/wanrouter.h b/include/linux/wanrouter.h index cec4b4159767..8198a63cf459 100644 --- a/include/linux/wanrouter.h +++ b/include/linux/wanrouter.h @@ -1,129 +1,10 @@ -/***************************************************************************** -* wanrouter.h Definitions for the WAN Multiprotocol Router Module. -* This module provides API and common services for WAN Link -* Drivers and is completely hardware-independent. -* -* Author: Nenad Corbic -* Gideon Hack -* Additions: Arnaldo Melo -* -* Copyright: (c) 1995-2000 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jul 21, 2000 Nenad Corbic Added WAN_FT1_READY State -* Feb 24, 2000 Nenad Corbic Added support for socket based x25api -* Jan 28, 2000 Nenad Corbic Added support for the ASYNC protocol. -* Oct 04, 1999 Nenad Corbic Updated for 2.1.0 release -* Jun 02, 1999 Gideon Hack Added support for the S514 adapter. -* May 23, 1999 Arnaldo Melo Added local_addr to wanif_conf_t -* WAN_DISCONNECTING state added -* Jul 20, 1998 David Fong Added Inverse ARP options to 'wanif_conf_t' -* Jun 12, 1998 David Fong Added Cisco HDLC support. -* Dec 16, 1997 Jaspreet Singh Moved 'enable_IPX' and 'network_number' to -* 'wanif_conf_t' -* Dec 05, 1997 Jaspreet Singh Added 'pap', 'chap' to 'wanif_conf_t' -* Added 'authenticator' to 'wan_ppp_conf_t' -* Nov 06, 1997 Jaspreet Singh Changed Router Driver version to 1.1 from 1.0 -* Oct 20, 1997 Jaspreet Singh Added 'cir','bc','be' and 'mc' to 'wanif_conf_t' -* Added 'enable_IPX' and 'network_number' to -* 'wan_device_t'. Also added defines for -* UDP PACKET TYPE, Interrupt test, critical values -* for RACE conditions. -* Oct 05, 1997 Jaspreet Singh Added 'dlci_num' and 'dlci[100]' to -* 'wan_fr_conf_t' to configure a list of dlci(s) -* for a NODE -* Jul 07, 1997 Jaspreet Singh Added 'ttl' to 'wandev_conf_t' & 'wan_device_t' -* May 29, 1997 Jaspreet Singh Added 'tx_int_enabled' to 'wan_device_t' -* May 21, 1997 Jaspreet Singh Added 'udp_port' to 'wan_device_t' -* Apr 25, 1997 Farhan Thawar Added 'udp_port' to 'wandev_conf_t' -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 02, 1997 Gene Kozin Initial version (based on wanpipe.h). -*****************************************************************************/ +/* + * wanrouter.h Legacy declarations kept around until X25 is removed + */ + #ifndef _ROUTER_H #define _ROUTER_H #include -/****** Kernel Interface ****************************************************/ - -#include /* support for device drivers */ -#include /* proc filesystem pragmatics */ -#include /* support for network drivers */ -#include /* Support for SMP Locking */ - -/*---------------------------------------------------------------------------- - * WAN device data space. - */ -struct wan_device { - unsigned magic; /* magic number */ - char* name; /* -> WAN device name (ASCIIZ) */ - void* private; /* -> driver private data */ - unsigned config_id; /* Configuration ID */ - /****** hardware configuration ******/ - unsigned ioport; /* adapter I/O port base #1 */ - char S514_cpu_no[1]; /* PCI CPU Number */ - unsigned char S514_slot_no; /* PCI Slot Number */ - unsigned long maddr; /* dual-port memory address */ - unsigned msize; /* dual-port memory size */ - int irq; /* interrupt request level */ - int dma; /* DMA request level */ - unsigned bps; /* data transfer rate */ - unsigned mtu; /* max physical transmit unit size */ - unsigned udp_port; /* UDP port for management */ - unsigned char ttl; /* Time To Live for UDP security */ - unsigned enable_tx_int; /* Transmit Interrupt enabled or not */ - char interface; /* RS-232/V.35, etc. */ - char clocking; /* external/internal */ - char line_coding; /* NRZ/NRZI/FM0/FM1, etc. */ - char station; /* DTE/DCE, primary/secondary, etc. */ - char connection; /* permanent/switched/on-demand */ - char signalling; /* Signalling RS232 or V35 */ - char read_mode; /* read mode: Polling or interrupt */ - char new_if_cnt; /* Number of interfaces per wanpipe */ - char del_if_cnt; /* Number of times del_if() gets called */ - unsigned char piggyback; /* Piggibacking a port */ - unsigned hw_opt[4]; /* other hardware options */ - /****** status and statistics *******/ - char state; /* device state */ - char api_status; /* device api status */ - struct net_device_stats stats; /* interface statistics */ - unsigned reserved[16]; /* reserved for future use */ - unsigned long critical; /* critical section flag */ - spinlock_t lock; /* Support for SMP Locking */ - - /****** device management methods ***/ - int (*setup) (struct wan_device *wandev, wandev_conf_t *conf); - int (*shutdown) (struct wan_device *wandev); - int (*update) (struct wan_device *wandev); - int (*ioctl) (struct wan_device *wandev, unsigned cmd, - unsigned long arg); - int (*new_if)(struct wan_device *wandev, struct net_device *dev, - wanif_conf_t *conf); - int (*del_if)(struct wan_device *wandev, struct net_device *dev); - /****** maintained by the router ****/ - struct wan_device* next; /* -> next device */ - struct net_device* dev; /* list of network interfaces */ - unsigned ndev; /* number of interfaces */ - struct proc_dir_entry *dent; /* proc filesystem entry */ -}; - -/* Public functions available for device drivers */ -extern int register_wan_device(struct wan_device *wandev); -extern int unregister_wan_device(char *name); - -/* Proc interface functions. These must not be called by the drivers! */ -extern int wanrouter_proc_init(void); -extern void wanrouter_proc_cleanup(void); -extern int wanrouter_proc_add(struct wan_device *wandev); -extern int wanrouter_proc_delete(struct wan_device *wandev); -extern long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg); - -/* Public Data */ -/* list of registered devices */ -extern struct wan_device *wanrouter_router_devlist; - #endif /* _ROUTER_H */ diff --git a/include/uapi/linux/wanrouter.h b/include/uapi/linux/wanrouter.h index 7617df2833d5..498d6c12c666 100644 --- a/include/uapi/linux/wanrouter.h +++ b/include/uapi/linux/wanrouter.h @@ -1,363 +1,9 @@ -/***************************************************************************** -* wanrouter.h Definitions for the WAN Multiprotocol Router Module. -* This module provides API and common services for WAN Link -* Drivers and is completely hardware-independent. -* -* Author: Nenad Corbic -* Gideon Hack -* Additions: Arnaldo Melo -* -* Copyright: (c) 1995-2000 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jul 21, 2000 Nenad Corbic Added WAN_FT1_READY State -* Feb 24, 2000 Nenad Corbic Added support for socket based x25api -* Jan 28, 2000 Nenad Corbic Added support for the ASYNC protocol. -* Oct 04, 1999 Nenad Corbic Updated for 2.1.0 release -* Jun 02, 1999 Gideon Hack Added support for the S514 adapter. -* May 23, 1999 Arnaldo Melo Added local_addr to wanif_conf_t -* WAN_DISCONNECTING state added -* Jul 20, 1998 David Fong Added Inverse ARP options to 'wanif_conf_t' -* Jun 12, 1998 David Fong Added Cisco HDLC support. -* Dec 16, 1997 Jaspreet Singh Moved 'enable_IPX' and 'network_number' to -* 'wanif_conf_t' -* Dec 05, 1997 Jaspreet Singh Added 'pap', 'chap' to 'wanif_conf_t' -* Added 'authenticator' to 'wan_ppp_conf_t' -* Nov 06, 1997 Jaspreet Singh Changed Router Driver version to 1.1 from 1.0 -* Oct 20, 1997 Jaspreet Singh Added 'cir','bc','be' and 'mc' to 'wanif_conf_t' -* Added 'enable_IPX' and 'network_number' to -* 'wan_device_t'. Also added defines for -* UDP PACKET TYPE, Interrupt test, critical values -* for RACE conditions. -* Oct 05, 1997 Jaspreet Singh Added 'dlci_num' and 'dlci[100]' to -* 'wan_fr_conf_t' to configure a list of dlci(s) -* for a NODE -* Jul 07, 1997 Jaspreet Singh Added 'ttl' to 'wandev_conf_t' & 'wan_device_t' -* May 29, 1997 Jaspreet Singh Added 'tx_int_enabled' to 'wan_device_t' -* May 21, 1997 Jaspreet Singh Added 'udp_port' to 'wan_device_t' -* Apr 25, 1997 Farhan Thawar Added 'udp_port' to 'wandev_conf_t' -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 02, 1997 Gene Kozin Initial version (based on wanpipe.h). -*****************************************************************************/ - -#ifndef _UAPI_ROUTER_H -#define _UAPI_ROUTER_H - -#define ROUTER_NAME "wanrouter" /* in case we ever change it */ -#define ROUTER_VERSION 1 /* version number */ -#define ROUTER_RELEASE 1 /* release (minor version) number */ -#define ROUTER_IOCTL 'W' /* for IOCTL calls */ -#define ROUTER_MAGIC 0x524D4157L /* signature: 'WANR' reversed */ - -/* IOCTL codes for /proc/router/ entries (up to 255) */ -enum router_ioctls -{ - ROUTER_SETUP = ROUTER_IOCTL<<8, /* configure device */ - ROUTER_DOWN, /* shut down device */ - ROUTER_STAT, /* get device status */ - ROUTER_IFNEW, /* add interface */ - ROUTER_IFDEL, /* delete interface */ - ROUTER_IFSTAT, /* get interface status */ - ROUTER_USER = (ROUTER_IOCTL<<8)+16, /* driver-specific calls */ - ROUTER_USER_MAX = (ROUTER_IOCTL<<8)+31 -}; - -/* identifiers for displaying proc file data for dual port adapters */ -#define PROC_DATA_PORT_0 0x8000 /* the data is for port 0 */ -#define PROC_DATA_PORT_1 0x8001 /* the data is for port 1 */ - -/* NLPID for packet encapsulation (ISO/IEC TR 9577) */ -#define NLPID_IP 0xCC /* Internet Protocol Datagram */ -#define NLPID_SNAP 0x80 /* IEEE Subnetwork Access Protocol */ -#define NLPID_CLNP 0x81 /* ISO/IEC 8473 */ -#define NLPID_ESIS 0x82 /* ISO/IEC 9542 */ -#define NLPID_ISIS 0x83 /* ISO/IEC ISIS */ -#define NLPID_Q933 0x08 /* CCITT Q.933 */ - -/* Miscellaneous */ -#define WAN_IFNAME_SZ 15 /* max length of the interface name */ -#define WAN_DRVNAME_SZ 15 /* max length of the link driver name */ -#define WAN_ADDRESS_SZ 31 /* max length of the WAN media address */ -#define USED_BY_FIELD 8 /* max length of the used by field */ - -/* Defines for UDP PACKET TYPE */ -#define UDP_PTPIPE_TYPE 0x01 -#define UDP_FPIPE_TYPE 0x02 -#define UDP_CPIPE_TYPE 0x03 -#define UDP_DRVSTATS_TYPE 0x04 -#define UDP_INVALID_TYPE 0x05 - -/* Command return code */ -#define CMD_OK 0 /* normal firmware return code */ -#define CMD_TIMEOUT 0xFF /* firmware command timed out */ - -/* UDP Packet Management */ -#define UDP_PKT_FRM_STACK 0x00 -#define UDP_PKT_FRM_NETWORK 0x01 - -/* Maximum interrupt test counter */ -#define MAX_INTR_TEST_COUNTER 100 - -/* Critical Values for RACE conditions*/ -#define CRITICAL_IN_ISR 0xA1 -#define CRITICAL_INTR_HANDLED 0xB1 - -/****** Data Types **********************************************************/ - -/*---------------------------------------------------------------------------- - * X.25-specific link-level configuration. - */ -typedef struct wan_x25_conf -{ - unsigned lo_pvc; /* lowest permanent circuit number */ - unsigned hi_pvc; /* highest permanent circuit number */ - unsigned lo_svc; /* lowest switched circuit number */ - unsigned hi_svc; /* highest switched circuit number */ - unsigned hdlc_window; /* HDLC window size (1..7) */ - unsigned pkt_window; /* X.25 packet window size (1..7) */ - unsigned t1; /* HDLC timer T1, sec (1..30) */ - unsigned t2; /* HDLC timer T2, sec (0..29) */ - unsigned t4; /* HDLC supervisory frame timer = T4 * T1 */ - unsigned n2; /* HDLC retransmission limit (1..30) */ - unsigned t10_t20; /* X.25 RESTART timeout, sec (1..255) */ - unsigned t11_t21; /* X.25 CALL timeout, sec (1..255) */ - unsigned t12_t22; /* X.25 RESET timeout, sec (1..255) */ - unsigned t13_t23; /* X.25 CLEAR timeout, sec (1..255) */ - unsigned t16_t26; /* X.25 INTERRUPT timeout, sec (1..255) */ - unsigned t28; /* X.25 REGISTRATION timeout, sec (1..255) */ - unsigned r10_r20; /* RESTART retransmission limit (0..250) */ - unsigned r12_r22; /* RESET retransmission limit (0..250) */ - unsigned r13_r23; /* CLEAR retransmission limit (0..250) */ - unsigned ccitt_compat; /* compatibility mode: 1988/1984/1980 */ - unsigned x25_conf_opt; /* User defined x25 config optoins */ - unsigned char LAPB_hdlc_only; /* Run in HDLC only mode */ - unsigned char logging; /* Control connection logging */ - unsigned char oob_on_modem; /* Whether to send modem status to the user app */ -} wan_x25_conf_t; - -/*---------------------------------------------------------------------------- - * Frame relay specific link-level configuration. - */ -typedef struct wan_fr_conf -{ - unsigned signalling; /* local in-channel signalling type */ - unsigned t391; /* link integrity verification timer */ - unsigned t392; /* polling verification timer */ - unsigned n391; /* full status polling cycle counter */ - unsigned n392; /* error threshold counter */ - unsigned n393; /* monitored events counter */ - unsigned dlci_num; /* number of DLCs (access node) */ - unsigned dlci[100]; /* List of all DLCIs */ -} wan_fr_conf_t; - -/*---------------------------------------------------------------------------- - * PPP-specific link-level configuration. - */ -typedef struct wan_ppp_conf -{ - unsigned restart_tmr; /* restart timer */ - unsigned auth_rsrt_tmr; /* authentication timer */ - unsigned auth_wait_tmr; /* authentication timer */ - unsigned mdm_fail_tmr; /* modem failure timer */ - unsigned dtr_drop_tmr; /* DTR drop timer */ - unsigned connect_tmout; /* connection timeout */ - unsigned conf_retry; /* max. retry */ - unsigned term_retry; /* max. retry */ - unsigned fail_retry; /* max. retry */ - unsigned auth_retry; /* max. retry */ - unsigned auth_options; /* authentication opt. */ - unsigned ip_options; /* IP options */ - char authenticator; /* AUTHENTICATOR or not */ - char ip_mode; /* Static/Host/Peer */ -} wan_ppp_conf_t; - -/*---------------------------------------------------------------------------- - * CHDLC-specific link-level configuration. - */ -typedef struct wan_chdlc_conf -{ - unsigned char ignore_dcd; /* Protocol options: */ - unsigned char ignore_cts; /* Ignore these to determine */ - unsigned char ignore_keepalive; /* link status (Yes or No) */ - unsigned char hdlc_streaming; /* hdlc_streaming mode (Y/N) */ - unsigned char receive_only; /* no transmit buffering (Y/N) */ - unsigned keepalive_tx_tmr; /* transmit keepalive timer */ - unsigned keepalive_rx_tmr; /* receive keepalive timer */ - unsigned keepalive_err_margin; /* keepalive_error_tolerance */ - unsigned slarp_timer; /* SLARP request timer */ -} wan_chdlc_conf_t; - - -/*---------------------------------------------------------------------------- - * WAN device configuration. Passed to ROUTER_SETUP IOCTL. - */ -typedef struct wandev_conf -{ - unsigned magic; /* magic number (for verification) */ - unsigned config_id; /* configuration structure identifier */ - /****** hardware configuration ******/ - unsigned ioport; /* adapter I/O port base */ - unsigned long maddr; /* dual-port memory address */ - unsigned msize; /* dual-port memory size */ - int irq; /* interrupt request level */ - int dma; /* DMA request level */ - char S514_CPU_no[1]; /* S514 PCI adapter CPU number ('A' or 'B') */ - unsigned PCI_slot_no; /* S514 PCI adapter slot number */ - char auto_pci_cfg; /* S515 PCI automatic slot detection */ - char comm_port; /* Communication Port (PRI=0, SEC=1) */ - unsigned bps; /* data transfer rate */ - unsigned mtu; /* maximum transmit unit size */ - unsigned udp_port; /* UDP port for management */ - unsigned char ttl; /* Time To Live for UDP security */ - unsigned char ft1; /* FT1 Configurator Option */ - char interface; /* RS-232/V.35, etc. */ - char clocking; /* external/internal */ - char line_coding; /* NRZ/NRZI/FM0/FM1, etc. */ - char station; /* DTE/DCE, primary/secondary, etc. */ - char connection; /* permanent/switched/on-demand */ - char read_mode; /* read mode: Polling or interrupt */ - char receive_only; /* disable tx buffers */ - char tty; /* Create a fake tty device */ - unsigned tty_major; /* Major number for wanpipe tty device */ - unsigned tty_minor; /* Minor number for wanpipe tty device */ - unsigned tty_mode; /* TTY operation mode SYNC or ASYNC */ - char backup; /* Backup Mode */ - unsigned hw_opt[4]; /* other hardware options */ - unsigned reserved[4]; - /****** arbitrary data ***************/ - unsigned data_size; /* data buffer size */ - void* data; /* data buffer, e.g. firmware */ - union /****** protocol-specific ************/ - { - wan_x25_conf_t x25; /* X.25 configuration */ - wan_ppp_conf_t ppp; /* PPP configuration */ - wan_fr_conf_t fr; /* frame relay configuration */ - wan_chdlc_conf_t chdlc; /* Cisco HDLC configuration */ - } u; -} wandev_conf_t; - -/* 'config_id' definitions */ -#define WANCONFIG_X25 101 /* X.25 link */ -#define WANCONFIG_FR 102 /* frame relay link */ -#define WANCONFIG_PPP 103 /* synchronous PPP link */ -#define WANCONFIG_CHDLC 104 /* Cisco HDLC Link */ -#define WANCONFIG_BSC 105 /* BiSync Streaming */ -#define WANCONFIG_HDLC 106 /* HDLC Support */ -#define WANCONFIG_MPPP 107 /* Multi Port PPP over RAW CHDLC */ - /* - * Configuration options defines. + * wanrouter.h Legacy declarations kept around until X25 is removed */ -/* general options */ -#define WANOPT_OFF 0 -#define WANOPT_ON 1 -#define WANOPT_NO 0 -#define WANOPT_YES 1 - -/* intercace options */ -#define WANOPT_RS232 0 -#define WANOPT_V35 1 - -/* data encoding options */ -#define WANOPT_NRZ 0 -#define WANOPT_NRZI 1 -#define WANOPT_FM0 2 -#define WANOPT_FM1 3 - -/* link type options */ -#define WANOPT_POINTTOPOINT 0 /* RTS always active */ -#define WANOPT_MULTIDROP 1 /* RTS is active when transmitting */ - -/* clocking options */ -#define WANOPT_EXTERNAL 0 -#define WANOPT_INTERNAL 1 - -/* station options */ -#define WANOPT_DTE 0 -#define WANOPT_DCE 1 -#define WANOPT_CPE 0 -#define WANOPT_NODE 1 -#define WANOPT_SECONDARY 0 -#define WANOPT_PRIMARY 1 - -/* connection options */ -#define WANOPT_PERMANENT 0 /* DTR always active */ -#define WANOPT_SWITCHED 1 /* use DTR to setup link (dial-up) */ -#define WANOPT_ONDEMAND 2 /* activate DTR only before sending */ - -/* frame relay in-channel signalling */ -#define WANOPT_FR_ANSI 1 /* ANSI T1.617 Annex D */ -#define WANOPT_FR_Q933 2 /* ITU Q.933A */ -#define WANOPT_FR_LMI 3 /* LMI */ - -/* PPP IP Mode Options */ -#define WANOPT_PPP_STATIC 0 -#define WANOPT_PPP_HOST 1 -#define WANOPT_PPP_PEER 2 - -/* ASY Mode Options */ -#define WANOPT_ONE 1 -#define WANOPT_TWO 2 -#define WANOPT_ONE_AND_HALF 3 - -#define WANOPT_NONE 0 -#define WANOPT_ODD 1 -#define WANOPT_EVEN 2 - -/* CHDLC Protocol Options */ -/* DF Commented out for now. - -#define WANOPT_CHDLC_NO_DCD IGNORE_DCD_FOR_LINK_STAT -#define WANOPT_CHDLC_NO_CTS IGNORE_CTS_FOR_LINK_STAT -#define WANOPT_CHDLC_NO_KEEPALIVE IGNORE_KPALV_FOR_LINK_STAT -*/ - -/* Port options */ -#define WANOPT_PRI 0 -#define WANOPT_SEC 1 -/* read mode */ -#define WANOPT_INTR 0 -#define WANOPT_POLL 1 - -#define WANOPT_TTY_SYNC 0 -#define WANOPT_TTY_ASYNC 1 -/*---------------------------------------------------------------------------- - * WAN Link Status Info (for ROUTER_STAT IOCTL). - */ -typedef struct wandev_stat -{ - unsigned state; /* link state */ - unsigned ndev; /* number of configured interfaces */ - - /* link/interface configuration */ - unsigned connection; /* permanent/switched/on-demand */ - unsigned media_type; /* Frame relay/PPP/X.25/SDLC, etc. */ - unsigned mtu; /* max. transmit unit for this device */ - - /* physical level statistics */ - unsigned modem_status; /* modem status */ - unsigned rx_frames; /* received frames count */ - unsigned rx_overruns; /* receiver overrun error count */ - unsigned rx_crc_err; /* receive CRC error count */ - unsigned rx_aborts; /* received aborted frames count */ - unsigned rx_bad_length; /* unexpetedly long/short frames count */ - unsigned rx_dropped; /* frames discarded at device level */ - unsigned tx_frames; /* transmitted frames count */ - unsigned tx_underruns; /* aborted transmissions (underruns) count */ - unsigned tx_timeouts; /* transmission timeouts */ - unsigned tx_rejects; /* other transmit errors */ - - /* media level statistics */ - unsigned rx_bad_format; /* frames with invalid format */ - unsigned rx_bad_addr; /* frames with invalid media address */ - unsigned tx_retries; /* frames re-transmitted */ - unsigned reserved[16]; /* reserved for future use */ -} wandev_stat_t; +#ifndef _UAPI_ROUTER_H +#define _UAPI_ROUTER_H /* 'state' defines */ enum wan_states @@ -365,88 +11,7 @@ enum wan_states WAN_UNCONFIGURED, /* link/channel is not configured */ WAN_DISCONNECTED, /* link/channel is disconnected */ WAN_CONNECTING, /* connection is in progress */ - WAN_CONNECTED, /* link/channel is operational */ - WAN_LIMIT, /* for verification only */ - WAN_DUALPORT, /* for Dual Port cards */ - WAN_DISCONNECTING, - WAN_FT1_READY /* FT1 Configurator Ready */ + WAN_CONNECTED /* link/channel is operational */ }; -enum { - WAN_LOCAL_IP, - WAN_POINTOPOINT_IP, - WAN_NETMASK_IP, - WAN_BROADCAST_IP -}; - -/* 'modem_status' masks */ -#define WAN_MODEM_CTS 0x0001 /* CTS line active */ -#define WAN_MODEM_DCD 0x0002 /* DCD line active */ -#define WAN_MODEM_DTR 0x0010 /* DTR line active */ -#define WAN_MODEM_RTS 0x0020 /* RTS line active */ - -/*---------------------------------------------------------------------------- - * WAN interface (logical channel) configuration (for ROUTER_IFNEW IOCTL). - */ -typedef struct wanif_conf -{ - unsigned magic; /* magic number */ - unsigned config_id; /* configuration identifier */ - char name[WAN_IFNAME_SZ+1]; /* interface name, ASCIIZ */ - char addr[WAN_ADDRESS_SZ+1]; /* media address, ASCIIZ */ - char usedby[USED_BY_FIELD]; /* used by API or WANPIPE */ - unsigned idle_timeout; /* sec, before disconnecting */ - unsigned hold_timeout; /* sec, before re-connecting */ - unsigned cir; /* Committed Information Rate fwd,bwd*/ - unsigned bc; /* Committed Burst Size fwd, bwd */ - unsigned be; /* Excess Burst Size fwd, bwd */ - unsigned char enable_IPX; /* Enable or Disable IPX */ - unsigned char inarp; /* Send Inverse ARP requests Y/N */ - unsigned inarp_interval; /* sec, between InARP requests */ - unsigned long network_number; /* Network Number for IPX */ - char mc; /* Multicast on or off */ - char local_addr[WAN_ADDRESS_SZ+1];/* local media address, ASCIIZ */ - unsigned char port; /* board port */ - unsigned char protocol; /* prococol used in this channel (TCPOX25 or X25) */ - char pap; /* PAP enabled or disabled */ - char chap; /* CHAP enabled or disabled */ - unsigned char userid[511]; /* List of User Id */ - unsigned char passwd[511]; /* List of passwords */ - unsigned char sysname[31]; /* Name of the system */ - unsigned char ignore_dcd; /* Protocol options: */ - unsigned char ignore_cts; /* Ignore these to determine */ - unsigned char ignore_keepalive; /* link status (Yes or No) */ - unsigned char hdlc_streaming; /* Hdlc streaming mode (Y/N) */ - unsigned keepalive_tx_tmr; /* transmit keepalive timer */ - unsigned keepalive_rx_tmr; /* receive keepalive timer */ - unsigned keepalive_err_margin; /* keepalive_error_tolerance */ - unsigned slarp_timer; /* SLARP request timer */ - unsigned char ttl; /* Time To Live for UDP security */ - char interface; /* RS-232/V.35, etc. */ - char clocking; /* external/internal */ - unsigned bps; /* data transfer rate */ - unsigned mtu; /* maximum transmit unit size */ - unsigned char if_down; /* brind down interface when disconnected */ - unsigned char gateway; /* Is this interface a gateway */ - unsigned char true_if_encoding; /* Set the dev->type to true board protocol */ - - unsigned char asy_data_trans; /* async API options */ - unsigned char rts_hs_for_receive; /* async Protocol options */ - unsigned char xon_xoff_hs_for_receive; - unsigned char xon_xoff_hs_for_transmit; - unsigned char dcd_hs_for_transmit; - unsigned char cts_hs_for_transmit; - unsigned char async_mode; - unsigned tx_bits_per_char; - unsigned rx_bits_per_char; - unsigned stop_bits; - unsigned char parity; - unsigned break_timer; - unsigned inter_char_timer; - unsigned rx_complete_length; - unsigned xon_char; - unsigned xoff_char; - unsigned char receive_only; /* no transmit buffering (Y/N) */ -} wanif_conf_t; - #endif /* _UAPI_ROUTER_H */ diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig deleted file mode 100644 index a157a2e64e18..000000000000 --- a/net/wanrouter/Kconfig +++ /dev/null @@ -1,27 +0,0 @@ -# -# Configuration for WAN router -# - -config WAN_ROUTER - tristate "WAN router (DEPRECATED)" - depends on EXPERIMENTAL - ---help--- - Wide Area Networks (WANs), such as X.25, frame relay and leased - lines, are used to interconnect Local Area Networks (LANs) over vast - distances with data transfer rates significantly higher than those - achievable with commonly used asynchronous modem connections. - Usually, a quite expensive external device called a `WAN router' is - needed to connect to a WAN. - - As an alternative, WAN routing can be built into the Linux kernel. - With relatively inexpensive WAN interface cards available on the - market, a perfectly usable router can be built for less than half - the price of an external router. If you have one of those cards and - wish to use your Linux box as a WAN router, say Y here and also to - the WAN driver for your card, below. You will then need the - wan-tools package which is available from . - - To compile WAN routing support as a module, choose M here: the - module will be called wanrouter. - - If unsure, say N. diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile deleted file mode 100644 index 4da14bc48078..000000000000 --- a/net/wanrouter/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the Linux WAN router layer. -# - -obj-$(CONFIG_WAN_ROUTER) += wanrouter.o - -wanrouter-y := wanproc.o wanmain.o diff --git a/net/wanrouter/patchlevel b/net/wanrouter/patchlevel deleted file mode 100644 index c043eea7767e..000000000000 --- a/net/wanrouter/patchlevel +++ /dev/null @@ -1 +0,0 @@ -2.2.1 diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c deleted file mode 100644 index 2ab785064b7e..000000000000 --- a/net/wanrouter/wanmain.c +++ /dev/null @@ -1,782 +0,0 @@ -/***************************************************************************** -* wanmain.c WAN Multiprotocol Router Module. Main code. -* -* This module is completely hardware-independent and provides -* the following common services for the WAN Link Drivers: -* o WAN device management (registering, unregistering) -* o Network interface management -* o Physical connection management (dial-up, incoming calls) -* o Logical connection management (switched virtual circuits) -* o Protocol encapsulation/decapsulation -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Nov 24, 2000 Nenad Corbic Updated for 2.4.X kernels -* Nov 07, 2000 Nenad Corbic Fixed the Mulit-Port PPP for kernels 2.2.16 and -* greater. -* Aug 2, 2000 Nenad Corbic Block the Multi-Port PPP from running on -* kernels 2.2.16 or greater. The SyncPPP -* has changed. -* Jul 13, 2000 Nenad Corbic Added SyncPPP support -* Added extra debugging in device_setup(). -* Oct 01, 1999 Gideon Hack Update for s514 PCI card -* Dec 27, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -* Jan 16, 1997 Gene Kozin router_devlist made public -* Jan 31, 1997 Alan Cox Hacked it about a bit for 2.1 -* Jun 27, 1997 Alan Cox realigned with vendor code -* Oct 15, 1997 Farhan Thawar changed wan_encapsulate to add a pad byte of 0 -* Apr 20, 1998 Alan Cox Fixed 2.1 symbols -* May 17, 1998 K. Baranowski Fixed SNAP encapsulation in wan_encapsulate -* Dec 15, 1998 Arnaldo Melo support for firmwares of up to 128000 bytes -* check wandev->setup return value -* Dec 22, 1998 Arnaldo Melo vmalloc/vfree used in device_setup to allocate -* kernel memory and copy configuration data to -* kernel space (for big firmwares) -* Jun 02, 1999 Gideon Hack Updates for Linux 2.0.X and 2.2.X kernels. -*****************************************************************************/ - -#include /* offsetof(), etc. */ -#include -#include /* return codes */ -#include -#include /* support for loadable modules */ -#include /* kmalloc(), kfree() */ -#include -#include -#include /* inline mem*, str* functions */ - -#include /* htons(), etc. */ -#include /* WAN router API definitions */ - -#include /* vmalloc, vfree */ -#include /* copy_to/from_user */ -#include /* __initfunc et al. */ - -#define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) - -/* - * Function Prototypes - */ - -/* - * WAN device IOCTL handlers - */ - -static DEFINE_MUTEX(wanrouter_mutex); -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf); -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat); -static int wanrouter_device_shutdown(struct wan_device *wandev); -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf); -static int wanrouter_device_del_if(struct wan_device *wandev, - char __user *u_name); - -/* - * Miscellaneous - */ - -static struct wan_device *wanrouter_find_device(char *name); -static int wanrouter_delete_interface(struct wan_device *wandev, char *name); -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock); -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock); - - - -/* - * Global Data - */ - -static char wanrouter_fullname[] = "Sangoma WANPIPE Router"; -static char wanrouter_copyright[] = "(c) 1995-2000 Sangoma Technologies Inc."; -static char wanrouter_modname[] = ROUTER_NAME; /* short module name */ -struct wan_device* wanrouter_router_devlist; /* list of registered devices */ - -/* - * Organize Unique Identifiers for encapsulation/decapsulation - */ - -#if 0 -static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 }; -static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 }; -#endif - -static int __init wanrouter_init(void) -{ - int err; - - printk(KERN_INFO "%s v%u.%u %s\n", - wanrouter_fullname, ROUTER_VERSION, ROUTER_RELEASE, - wanrouter_copyright); - - err = wanrouter_proc_init(); - if (err) - printk(KERN_INFO "%s: can't create entry in proc filesystem!\n", - wanrouter_modname); - - return err; -} - -static void __exit wanrouter_cleanup (void) -{ - wanrouter_proc_cleanup(); -} - -/* - * This is just plain dumb. We should move the bugger to drivers/net/wan, - * slap it first in directory and make it module_init(). The only reason - * for subsys_initcall() here is that net goes after drivers (why, BTW?) - */ -subsys_initcall(wanrouter_init); -module_exit(wanrouter_cleanup); - -/* - * Kernel APIs - */ - -/* - * Register WAN device. - * o verify device credentials - * o create an entry for the device in the /proc/net/router directory - * o initialize internally maintained fields of the wan_device structure - * o link device data space to a singly-linked list - * o if it's the first device, then start kernel 'thread' - * o increment module use count - * - * Return: - * 0 Ok - * < 0 error. - * - * Context: process - */ - - -int register_wan_device(struct wan_device *wandev) -{ - int err, namelen; - - if ((wandev == NULL) || (wandev->magic != ROUTER_MAGIC) || - (wandev->name == NULL)) - return -EINVAL; - - namelen = strlen(wandev->name); - if (!namelen || (namelen > WAN_DRVNAME_SZ)) - return -EINVAL; - - if (wanrouter_find_device(wandev->name)) - return -EEXIST; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering WAN device %s\n", - wanrouter_modname, wandev->name); -#endif - - /* - * Register /proc directory entry - */ - err = wanrouter_proc_add(wandev); - if (err) { - printk(KERN_INFO - "%s: can't create /proc/net/router/%s entry!\n", - wanrouter_modname, wandev->name); - return err; - } - - /* - * Initialize fields of the wan_device structure maintained by the - * router and update local data. - */ - - wandev->ndev = 0; - wandev->dev = NULL; - wandev->next = wanrouter_router_devlist; - wanrouter_router_devlist = wandev; - return 0; -} - -/* - * Unregister WAN device. - * o shut down device - * o unlink device data space from the linked list - * o delete device entry in the /proc/net/router directory - * o decrement module use count - * - * Return: 0 Ok - * <0 error. - * Context: process - */ - - -int unregister_wan_device(char *name) -{ - struct wan_device *wandev, *prev; - - if (name == NULL) - return -EINVAL; - - for (wandev = wanrouter_router_devlist, prev = NULL; - wandev && strcmp(wandev->name, name); - prev = wandev, wandev = wandev->next) - ; - if (wandev == NULL) - return -ENODEV; - -#ifdef WANDEBUG - printk(KERN_INFO "%s: unregistering WAN device %s\n", - wanrouter_modname, name); -#endif - - if (wandev->state != WAN_UNCONFIGURED) - wanrouter_device_shutdown(wandev); - - if (prev) - prev->next = wandev->next; - else - wanrouter_router_devlist = wandev->next; - - wanrouter_proc_delete(wandev); - return 0; -} - -#if 0 - -/* - * Encapsulate packet. - * - * Return: encapsulation header size - * < 0 - unsupported Ethertype - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev, - unsigned short type) -{ - int hdr_len = 0; - - switch (type) { - case ETH_P_IP: /* IP datagram encapsulation */ - hdr_len += 1; - skb_push(skb, 1); - skb->data[0] = NLPID_IP; - break; - - case ETH_P_IPX: /* SNAP encapsulation */ - case ETH_P_ARP: - hdr_len += 7; - skb_push(skb, 7); - skb->data[0] = 0; - skb->data[1] = NLPID_SNAP; - skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether, - sizeof(wanrouter_oui_ether)); - *((unsigned short*)&skb->data[5]) = htons(type); - break; - - default: /* Unknown packet type */ - printk(KERN_INFO - "%s: unsupported Ethertype 0x%04X on interface %s!\n", - wanrouter_modname, type, dev->name); - hdr_len = -EINVAL; - } - return hdr_len; -} - - -/* - * Decapsulate packet. - * - * Return: Ethertype (in network order) - * 0 unknown encapsulation - * - * Notes: - * 1. This function may be called on interrupt context. - */ - - -__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) -{ - int cnt = skb->data[0] ? 0 : 1; /* there may be a pad present */ - __be16 ethertype; - - switch (skb->data[cnt]) { - case NLPID_IP: /* IP datagramm */ - ethertype = htons(ETH_P_IP); - cnt += 1; - break; - - case NLPID_SNAP: /* SNAP encapsulation */ - if (memcmp(&skb->data[cnt + 1], wanrouter_oui_ether, - sizeof(wanrouter_oui_ether))){ - printk(KERN_INFO - "%s: unsupported SNAP OUI %02X-%02X-%02X " - "on interface %s!\n", wanrouter_modname, - skb->data[cnt+1], skb->data[cnt+2], - skb->data[cnt+3], dev->name); - return 0; - } - ethertype = *((__be16*)&skb->data[cnt+4]); - cnt += 6; - break; - - /* add other protocols, e.g. CLNP, ESIS, ISIS, if needed */ - - default: - printk(KERN_INFO - "%s: unsupported NLPID 0x%02X on interface %s!\n", - wanrouter_modname, skb->data[cnt], dev->name); - return 0; - } - skb->protocol = ethertype; - skb->pkt_type = PACKET_HOST; /* Physically point to point */ - skb_pull(skb, cnt); - skb_reset_mac_header(skb); - return ethertype; -} - -#endif /* 0 */ - -/* - * WAN device IOCTL. - * o find WAN device associated with this node - * o execute requested action or pass command to the device driver - */ - -long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file->f_path.dentry->d_inode; - int err = 0; - struct proc_dir_entry *dent; - struct wan_device *wandev; - void __user *data = (void __user *)arg; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if ((cmd >> 8) != ROUTER_IOCTL) - return -EINVAL; - - dent = PDE(inode); - if ((dent == NULL) || (dent->data == NULL)) - return -EINVAL; - - wandev = dent->data; - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - mutex_lock(&wanrouter_mutex); - switch (cmd) { - case ROUTER_SETUP: - err = wanrouter_device_setup(wandev, data); - break; - - case ROUTER_DOWN: - err = wanrouter_device_shutdown(wandev); - break; - - case ROUTER_STAT: - err = wanrouter_device_stat(wandev, data); - break; - - case ROUTER_IFNEW: - err = wanrouter_device_new_if(wandev, data); - break; - - case ROUTER_IFDEL: - err = wanrouter_device_del_if(wandev, data); - break; - - case ROUTER_IFSTAT: - break; - - default: - if ((cmd >= ROUTER_USER) && - (cmd <= ROUTER_USER_MAX) && - wandev->ioctl) - err = wandev->ioctl(wandev, cmd, arg); - else err = -EINVAL; - } - mutex_unlock(&wanrouter_mutex); - return err; -} - -/* - * WAN Driver IOCTL Handlers - */ - -/* - * Setup WAN link device. - * o verify user address space - * o allocate kernel memory and copy configuration data to kernel space - * o if configuration data includes extension, copy it to kernel space too - * o call driver's setup() entry point - */ - -static int wanrouter_device_setup(struct wan_device *wandev, - wandev_conf_t __user *u_conf) -{ - void *data = NULL; - wandev_conf_t *conf; - int err = -EINVAL; - - if (wandev->setup == NULL) { /* Nothing to do ? */ - printk(KERN_INFO "%s: ERROR, No setup script: wandev->setup()\n", - wandev->name); - return 0; - } - - conf = kmalloc(sizeof(wandev_conf_t), GFP_KERNEL); - if (conf == NULL){ - printk(KERN_INFO "%s: ERROR, Failed to allocate kernel memory !\n", - wandev->name); - return -ENOBUFS; - } - - if (copy_from_user(conf, u_conf, sizeof(wandev_conf_t))) { - printk(KERN_INFO "%s: Failed to copy user config data to kernel space!\n", - wandev->name); - kfree(conf); - return -EFAULT; - } - - if (conf->magic != ROUTER_MAGIC) { - kfree(conf); - printk(KERN_INFO "%s: ERROR, Invalid MAGIC Number\n", - wandev->name); - return -EINVAL; - } - - if (conf->data_size && conf->data) { - if (conf->data_size > 128000) { - printk(KERN_INFO - "%s: ERROR, Invalid firmware data size %i !\n", - wandev->name, conf->data_size); - kfree(conf); - return -EINVAL; - } - - data = vmalloc(conf->data_size); - if (!data) { - printk(KERN_INFO - "%s: ERROR, Failed allocate kernel memory !\n", - wandev->name); - kfree(conf); - return -ENOBUFS; - } - if (!copy_from_user(data, conf->data, conf->data_size)) { - conf->data = data; - err = wandev->setup(wandev, conf); - } else { - printk(KERN_INFO - "%s: ERROR, Failed to copy from user data !\n", - wandev->name); - err = -EFAULT; - } - vfree(data); - } else { - printk(KERN_INFO - "%s: ERROR, No firmware found ! Firmware size = %i !\n", - wandev->name, conf->data_size); - } - - kfree(conf); - return err; -} - -/* - * Shutdown WAN device. - * o delete all not opened logical channels for this device - * o call driver's shutdown() entry point - */ - -static int wanrouter_device_shutdown(struct wan_device *wandev) -{ - struct net_device *dev; - int err=0; - - if (wandev->state == WAN_UNCONFIGURED) - return 0; - - printk(KERN_INFO "\n%s: Shutting Down!\n",wandev->name); - - for (dev = wandev->dev; dev;) { - err = wanrouter_delete_interface(wandev, dev->name); - if (err) - return err; - /* The above function deallocates the current dev - * structure. Therefore, we cannot use netdev_priv(dev) - * as the next element: wandev->dev points to the - * next element */ - dev = wandev->dev; - } - - if (wandev->ndev) - return -EBUSY; /* there are opened interfaces */ - - if (wandev->shutdown) - err=wandev->shutdown(wandev); - - return err; -} - -/* - * Get WAN device status & statistics. - */ - -static int wanrouter_device_stat(struct wan_device *wandev, - wandev_stat_t __user *u_stat) -{ - wandev_stat_t stat; - - memset(&stat, 0, sizeof(stat)); - - /* Ask device driver to update device statistics */ - if ((wandev->state != WAN_UNCONFIGURED) && wandev->update) - wandev->update(wandev); - - /* Fill out structure */ - stat.ndev = wandev->ndev; - stat.state = wandev->state; - - if (copy_to_user(u_stat, &stat, sizeof(stat))) - return -EFAULT; - - return 0; -} - -/* - * Create new WAN interface. - * o verify user address space - * o copy configuration data to kernel address space - * o allocate network interface data space - * o call driver's new_if() entry point - * o make sure there is no interface name conflict - * o register network interface - */ - -static int wanrouter_device_new_if(struct wan_device *wandev, - wanif_conf_t __user *u_conf) -{ - wanif_conf_t *cnf; - struct net_device *dev = NULL; - int err; - - if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL)) - return -ENODEV; - - cnf = kmalloc(sizeof(wanif_conf_t), GFP_KERNEL); - if (!cnf) - return -ENOBUFS; - - err = -EFAULT; - if (copy_from_user(cnf, u_conf, sizeof(wanif_conf_t))) - goto out; - - err = -EINVAL; - if (cnf->magic != ROUTER_MAGIC) - goto out; - - if (cnf->config_id == WANCONFIG_MPPP) { - printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n", - wandev->name); - err = -EPROTONOSUPPORT; - goto out; - } else { - err = wandev->new_if(wandev, dev, cnf); - } - - if (!err) { - /* Register network interface. This will invoke init() - * function supplied by the driver. If device registered - * successfully, add it to the interface list. - */ - -#ifdef WANDEBUG - printk(KERN_INFO "%s: registering interface %s...\n", - wanrouter_modname, dev->name); -#endif - - err = register_netdev(dev); - if (!err) { - struct net_device *slave = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - - if (wandev->dev == NULL) { - wandev->dev = dev; - } else { - for (slave=wandev->dev; - DEV_TO_SLAVE(slave); - slave = DEV_TO_SLAVE(slave)) - DEV_TO_SLAVE(slave) = dev; - } - ++wandev->ndev; - - unlock_adapter_irq(&wandev->lock, &smp_flags); - err = 0; /* done !!! */ - goto out; - } - if (wandev->del_if) - wandev->del_if(wandev, dev); - free_netdev(dev); - } - -out: - kfree(cnf); - return err; -} - - -/* - * Delete WAN logical channel. - * o verify user address space - * o copy configuration data to kernel address space - */ - -static int wanrouter_device_del_if(struct wan_device *wandev, char __user *u_name) -{ - char name[WAN_IFNAME_SZ + 1]; - int err = 0; - - if (wandev->state == WAN_UNCONFIGURED) - return -ENODEV; - - memset(name, 0, sizeof(name)); - - if (copy_from_user(name, u_name, WAN_IFNAME_SZ)) - return -EFAULT; - - err = wanrouter_delete_interface(wandev, name); - if (err) - return err; - - /* If last interface being deleted, shutdown card - * This helps with administration at leaf nodes - * (You can tell if the person at the other end of the phone - * has an interface configured) and avoids DoS vulnerabilities - * in binary driver files - this fixes a problem with the current - * Sangoma driver going into strange states when all the network - * interfaces are deleted and the link irrecoverably disconnected. - */ - - if (!wandev->ndev && wandev->shutdown) - err = wandev->shutdown(wandev); - - return err; -} - -/* - * Miscellaneous Functions - */ - -/* - * Find WAN device by name. - * Return pointer to the WAN device data space or NULL if device not found. - */ - -static struct wan_device *wanrouter_find_device(char *name) -{ - struct wan_device *wandev; - - for (wandev = wanrouter_router_devlist; - wandev && strcmp(wandev->name, name); - wandev = wandev->next); - return wandev; -} - -/* - * Delete WAN logical channel identified by its name. - * o find logical channel by its name - * o call driver's del_if() entry point - * o unregister network interface - * o unlink channel data space from linked list of channels - * o release channel data space - * - * Return: 0 success - * -ENODEV channel not found. - * -EBUSY interface is open - * - * Note: If (force != 0), then device will be destroyed even if interface - * associated with it is open. It's caller's responsibility to make - * sure that opened interfaces are not removed! - */ - -static int wanrouter_delete_interface(struct wan_device *wandev, char *name) -{ - struct net_device *dev = NULL, *prev = NULL; - unsigned long smp_flags=0; - - lock_adapter_irq(&wandev->lock, &smp_flags); - dev = wandev->dev; - prev = NULL; - while (dev && strcmp(name, dev->name)) { - struct net_device **slave = netdev_priv(dev); - prev = dev; - dev = *slave; - } - unlock_adapter_irq(&wandev->lock, &smp_flags); - - if (dev == NULL) - return -ENODEV; /* interface not found */ - - if (netif_running(dev)) - return -EBUSY; /* interface in use */ - - if (wandev->del_if) - wandev->del_if(wandev, dev); - - lock_adapter_irq(&wandev->lock, &smp_flags); - if (prev) { - struct net_device **prev_slave = netdev_priv(prev); - struct net_device **slave = netdev_priv(dev); - - *prev_slave = *slave; - } else { - struct net_device **slave = netdev_priv(dev); - wandev->dev = *slave; - } - --wandev->ndev; - unlock_adapter_irq(&wandev->lock, &smp_flags); - - printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name); - - unregister_netdev(dev); - - free_netdev(dev); - - return 0; -} - -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __acquires(lock) -{ - spin_lock_irqsave(lock, *smp_flags); -} - - -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) - __releases(lock) -{ - spin_unlock_irqrestore(lock, *smp_flags); -} - -EXPORT_SYMBOL(register_wan_device); -EXPORT_SYMBOL(unregister_wan_device); - -MODULE_LICENSE("GPL"); - -/* - * End - */ diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c deleted file mode 100644 index c43612ee96bb..000000000000 --- a/net/wanrouter/wanproc.c +++ /dev/null @@ -1,380 +0,0 @@ -/***************************************************************************** -* wanproc.c WAN Router Module. /proc filesystem interface. -* -* This module is completely hardware-independent and provides -* access to the router using Linux /proc filesystem. -* -* Author: Gideon Hack -* -* Copyright: (c) 1995-1999 Sangoma Technologies Inc. -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version -* 2 of the License, or (at your option) any later version. -* ============================================================================ -* Jun 02, 1999 Gideon Hack Updates for Linux 2.2.X kernels. -* Jun 29, 1997 Alan Cox Merged with 1.0.3 vendor code -* Jan 29, 1997 Gene Kozin v1.0.1. Implemented /proc read routines -* Jan 30, 1997 Alan Cox Hacked around for 2.1 -* Dec 13, 1996 Gene Kozin Initial version (based on Sangoma's WANPIPE) -*****************************************************************************/ - -#include /* __initfunc et al. */ -#include /* offsetof(), etc. */ -#include /* return codes */ -#include -#include -#include /* WAN router API definitions */ -#include -#include - -#include -#include - -#define PROC_STATS_FORMAT "%30s: %12lu\n" - -/****** Defines and Macros **************************************************/ - -#define PROT_DECODE(prot) ((prot == WANCONFIG_FR) ? " FR" :\ - (prot == WANCONFIG_X25) ? " X25" : \ - (prot == WANCONFIG_PPP) ? " PPP" : \ - (prot == WANCONFIG_CHDLC) ? " CHDLC": \ - (prot == WANCONFIG_MPPP) ? " MPPP" : \ - " Unknown" ) - -/****** Function Prototypes *************************************************/ - -#ifdef CONFIG_PROC_FS - -/* Miscellaneous */ - -/* - * Structures for interfacing with the /proc filesystem. - * Router creates its own directory /proc/net/router with the following - * entries: - * config device configuration - * status global device statistics - * entry for each WAN device - */ - -/* - * Generic /proc/net/router/ file and inode operations - */ - -/* - * /proc/net/router - */ - -static DEFINE_MUTEX(config_mutex); -static struct proc_dir_entry *proc_router; - -/* Strings */ - -/* - * Interface functions - */ - -/****** Proc filesystem entry points ****************************************/ - -/* - * Iterator - */ -static void *r_start(struct seq_file *m, loff_t *pos) -{ - struct wan_device *wandev; - loff_t l = *pos; - - mutex_lock(&config_mutex); - if (!l--) - return SEQ_START_TOKEN; - for (wandev = wanrouter_router_devlist; l-- && wandev; - wandev = wandev->next) - ; - return wandev; -} - -static void *r_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct wan_device *wandev = v; - (*pos)++; - return (v == SEQ_START_TOKEN) ? wanrouter_router_devlist : wandev->next; -} - -static void r_stop(struct seq_file *m, void *v) -{ - mutex_unlock(&config_mutex); -} - -static int config_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name | port |IRQ|DMA| mem.addr |" - "mem.size|option1|option2|option3|option4\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|0x%-4X|%3u|%3u| 0x%-8lX |0x%-6X|%7u|%7u|%7u|%7u\n", - p->name, p->ioport, p->irq, p->dma, p->maddr, p->msize, - p->hw_opt[0], p->hw_opt[1], p->hw_opt[2], p->hw_opt[3]); - return 0; -} - -static int status_show(struct seq_file *m, void *v) -{ - struct wan_device *p = v; - if (v == SEQ_START_TOKEN) { - seq_puts(m, "Device name |protocol|station|interface|" - "clocking|baud rate| MTU |ndev|link state\n"); - return 0; - } - if (!p->state) - return 0; - seq_printf(m, "%-15s|%-8s| %-7s| %-9s|%-8s|%9u|%5u|%3u |", - p->name, - PROT_DECODE(p->config_id), - p->config_id == WANCONFIG_FR ? - (p->station ? "Node" : "CPE") : - (p->config_id == WANCONFIG_X25 ? - (p->station ? "DCE" : "DTE") : - ("N/A")), - p->interface ? "V.35" : "RS-232", - p->clocking ? "internal" : "external", - p->bps, - p->mtu, - p->ndev); - - switch (p->state) { - case WAN_UNCONFIGURED: - seq_printf(m, "%-12s\n", "unconfigured"); - break; - case WAN_DISCONNECTED: - seq_printf(m, "%-12s\n", "disconnected"); - break; - case WAN_CONNECTING: - seq_printf(m, "%-12s\n", "connecting"); - break; - case WAN_CONNECTED: - seq_printf(m, "%-12s\n", "connected"); - break; - default: - seq_printf(m, "%-12s\n", "invalid"); - break; - } - return 0; -} - -static const struct seq_operations config_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = config_show, -}; - -static const struct seq_operations status_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = status_show, -}; - -static int config_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &config_op); -} - -static int status_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &status_op); -} - -static const struct file_operations config_fops = { - .owner = THIS_MODULE, - .open = config_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static const struct file_operations status_fops = { - .owner = THIS_MODULE, - .open = status_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int wandev_show(struct seq_file *m, void *v) -{ - struct wan_device *wandev = m->private; - - if (wandev->magic != ROUTER_MAGIC) - return 0; - - if (!wandev->state) { - seq_puts(m, "device is not configured!\n"); - return 0; - } - - /* Update device statistics */ - if (wandev->update) { - int err = wandev->update(wandev); - if (err == -EAGAIN) { - seq_puts(m, "Device is busy!\n"); - return 0; - } - if (err) { - seq_puts(m, "Device is not configured!\n"); - return 0; - } - } - - seq_printf(m, PROC_STATS_FORMAT, - "total packets received", wandev->stats.rx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total packets transmitted", wandev->stats.tx_packets); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes received", wandev->stats.rx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "total bytes transmitted", wandev->stats.tx_bytes); - seq_printf(m, PROC_STATS_FORMAT, - "bad packets received", wandev->stats.rx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "packet transmit problems", wandev->stats.tx_errors); - seq_printf(m, PROC_STATS_FORMAT, - "received frames dropped", wandev->stats.rx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "transmit frames dropped", wandev->stats.tx_dropped); - seq_printf(m, PROC_STATS_FORMAT, - "multicast packets received", wandev->stats.multicast); - seq_printf(m, PROC_STATS_FORMAT, - "transmit collisions", wandev->stats.collisions); - seq_printf(m, PROC_STATS_FORMAT, - "receive length errors", wandev->stats.rx_length_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver overrun errors", wandev->stats.rx_over_errors); - seq_printf(m, PROC_STATS_FORMAT, - "CRC errors", wandev->stats.rx_crc_errors); - seq_printf(m, PROC_STATS_FORMAT, - "frame format errors (aborts)", wandev->stats.rx_frame_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver fifo overrun", wandev->stats.rx_fifo_errors); - seq_printf(m, PROC_STATS_FORMAT, - "receiver missed packet", wandev->stats.rx_missed_errors); - seq_printf(m, PROC_STATS_FORMAT, - "aborted frames transmitted", wandev->stats.tx_aborted_errors); - return 0; -} - -static int wandev_open(struct inode *inode, struct file *file) -{ - return single_open(file, wandev_show, PDE(inode)->data); -} - -static const struct file_operations wandev_fops = { - .owner = THIS_MODULE, - .open = wandev_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .unlocked_ioctl = wanrouter_ioctl, -}; - -/* - * Initialize router proc interface. - */ - -int __init wanrouter_proc_init(void) -{ - struct proc_dir_entry *p; - proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net); - if (!proc_router) - goto fail; - - p = proc_create("config", S_IRUGO, proc_router, &config_fops); - if (!p) - goto fail_config; - p = proc_create("status", S_IRUGO, proc_router, &status_fops); - if (!p) - goto fail_stat; - return 0; -fail_stat: - remove_proc_entry("config", proc_router); -fail_config: - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -fail: - return -ENOMEM; -} - -/* - * Clean up router proc interface. - */ - -void wanrouter_proc_cleanup(void) -{ - remove_proc_entry("config", proc_router); - remove_proc_entry("status", proc_router); - remove_proc_entry(ROUTER_NAME, init_net.proc_net); -} - -/* - * Add directory entry for WAN device. - */ - -int wanrouter_proc_add(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - - wandev->dent = proc_create(wandev->name, S_IRUGO, - proc_router, &wandev_fops); - if (!wandev->dent) - return -ENOMEM; - wandev->dent->data = wandev; - return 0; -} - -/* - * Delete directory entry for WAN device. - */ -int wanrouter_proc_delete(struct wan_device* wandev) -{ - if (wandev->magic != ROUTER_MAGIC) - return -EINVAL; - remove_proc_entry(wandev->name, proc_router); - return 0; -} - -#else - -/* - * No /proc - output stubs - */ - -int __init wanrouter_proc_init(void) -{ - return 0; -} - -void wanrouter_proc_cleanup(void) -{ -} - -int wanrouter_proc_add(struct wan_device *wandev) -{ - return 0; -} - -int wanrouter_proc_delete(struct wan_device *wandev) -{ - return 0; -} - -#endif - -/* - * End - */ - -- cgit v1.2.3 From 0415d291022543d83ee799e9ffee08d856bca6e8 Mon Sep 17 00:00:00 2001 From: Enke Chen Date: Mon, 4 Feb 2013 16:14:32 +0100 Subject: fuse: send poll events commit 626cf23660 "poll: add poll_requested_events()..." enabled us to send the requested events to the filesystem. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 1 + include/uapi/linux/fuse.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a010585b0a74..c8071768b950 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2167,6 +2167,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait) return DEFAULT_POLLMASK; poll_wait(file, &ff->poll_wait, wait); + inarg.events = (__u32)poll_requested_events(wait); /* * Ask for notification iff there's someone waiting for it. diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 3451b6061e69..68619e9210b9 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -63,6 +63,7 @@ * * 7.21 * - add FUSE_READDIRPLUS + * - send the requested events in POLL request */ #ifndef _LINUX_FUSE_H @@ -585,7 +586,7 @@ struct fuse_poll_in { __u64 fh; __u64 kh; __u32 flags; - __u32 padding; + __u32 events; }; struct fuse_poll_out { -- cgit v1.2.3 From 42745e039312ab4672c60ec584651f0c74e8264f Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Mon, 4 Feb 2013 13:53:11 +0200 Subject: cfg80211: expand per-station byte counters to 64bit In per-station statistics, present 32bit counters are too small for practical purposes - with gigabit speeds, it get overlapped every few seconds. Expand counters in the struct station_info to be 64-bit. Driver can still fill only 32-bit and indicate in @filled only bits like STATION_INFO_[TR]X_BYTES; in case driver provides full 64-bit counter, it should also set in @filled bit STATION_INFO_[TR]RX_BYTES64 Netlink sends both 32-bit and 64-bit counters, if present, to not break userspace. Signed-off-by: Vladimir Kondratiev [change to also have 32-bit counters if driver advertises 64-bit] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++---- include/uapi/linux/nl80211.h | 4 ++++ net/wireless/nl80211.c | 16 +++++++++++++--- 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 63599ab6005b..94a0810ef68e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -672,8 +672,10 @@ struct station_parameters { * @STATION_INFO_SIGNAL: @signal filled * @STATION_INFO_TX_BITRATE: @txrate fields are filled * (tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs) - * @STATION_INFO_RX_PACKETS: @rx_packets filled - * @STATION_INFO_TX_PACKETS: @tx_packets filled + * @STATION_INFO_RX_PACKETS: @rx_packets filled with 32-bit value + * @STATION_INFO_TX_PACKETS: @tx_packets filled with 32-bit value + * @STATION_INFO_RX_PACKETS64: @rx_packets filled with 64-bit value + * @STATION_INFO_TX_PACKETS64: @tx_packets filled with 64-bit value * @STATION_INFO_TX_RETRIES: @tx_retries filled * @STATION_INFO_TX_FAILED: @tx_failed filled * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled @@ -714,6 +716,8 @@ enum station_info_flags { STATION_INFO_LOCAL_PM = 1<<21, STATION_INFO_PEER_PM = 1<<22, STATION_INFO_NONPEER_PM = 1<<23, + STATION_INFO_RX_BYTES64 = 1<<24, + STATION_INFO_TX_BYTES64 = 1<<25, }; /** @@ -835,8 +839,8 @@ struct station_info { u32 filled; u32 connected_time; u32 inactive_time; - u32 rx_bytes; - u32 tx_bytes; + u64 rx_bytes; + u64 tx_bytes; u16 llid; u16 plid; u8 plink_state; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 225a65e72219..9a2ecdc4136c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1857,6 +1857,8 @@ enum nl80211_sta_bss_param { * @NL80211_STA_INFO_INACTIVE_TIME: time since last activity (u32, msecs) * @NL80211_STA_INFO_RX_BYTES: total received bytes (u32, from this station) * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (u32, to this station) + * @NL80211_STA_INFO_RX_BYTES64: total received bytes (u64, from this station) + * @NL80211_STA_INFO_TX_BYTES64: total transmitted bytes (u64, to this station) * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute * containing info as possible, see &enum nl80211_rate_info @@ -1909,6 +1911,8 @@ enum nl80211_sta_info { NL80211_STA_INFO_LOCAL_PM, NL80211_STA_INFO_PEER_PM, NL80211_STA_INFO_NONPEER_PM, + NL80211_STA_INFO_RX_BYTES64, + NL80211_STA_INFO_TX_BYTES64, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d359734b6972..807d448e702e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3057,12 +3057,22 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_STA_INFO_INACTIVE_TIME, sinfo->inactive_time)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_RX_BYTES) && + if ((sinfo->filled & (STATION_INFO_RX_BYTES | + STATION_INFO_RX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES, - sinfo->rx_bytes)) + (u32)sinfo->rx_bytes)) goto nla_put_failure; - if ((sinfo->filled & STATION_INFO_TX_BYTES) && + if ((sinfo->filled & (STATION_INFO_TX_BYTES | + NL80211_STA_INFO_TX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES, + (u32)sinfo->tx_bytes)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_RX_BYTES64) && + nla_put_u64(msg, NL80211_STA_INFO_RX_BYTES64, + sinfo->rx_bytes)) + goto nla_put_failure; + if ((sinfo->filled & STATION_INFO_TX_BYTES64) && + nla_put_u64(msg, NL80211_STA_INFO_TX_BYTES64, sinfo->tx_bytes)) goto nla_put_failure; if ((sinfo->filled & STATION_INFO_LLID) && -- cgit v1.2.3 From 85f024401bf80746ae08b7fd5809a9b16accf0b1 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 29 Jan 2013 17:54:44 -0800 Subject: serial_core: Fix type definition for PORT_BRCM_TRUMANAGE. It was mistakenly defined to be 24 instead of the next higher number 25. Reported-by: Alexander Shishkin Cc: Stephen Hurd Signed-off-by: Michael Chan Cc: stable # 3.8 Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 08464ef2c72c..b6a23a483d74 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -50,7 +50,7 @@ #define PORT_LPC3220 22 /* NXP LPC32xx SoC "Standard" UART */ #define PORT_8250_CIR 23 /* CIR infrared port, has its own driver */ #define PORT_XR17V35X 24 /* Exar XR17V35x UARTs */ -#define PORT_BRCM_TRUMANAGE 24 +#define PORT_BRCM_TRUMANAGE 25 #define PORT_MAX_8250 25 /* max port ID */ /* -- cgit v1.2.3 From d1beadd1cb649404bfa2c3d92f77dbcb15b712e5 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 28 Jan 2013 10:44:48 +0000 Subject: netfilter: xt_conntrack: Add flag to support aliases The patch adds the flag to denote the "state" alias as of the subset of the "conntrack" match. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_conntrack.h b/include/uapi/linux/netfilter/xt_conntrack.h index e3c041d54020..e5bd3083a843 100644 --- a/include/uapi/linux/netfilter/xt_conntrack.h +++ b/include/uapi/linux/netfilter/xt_conntrack.h @@ -31,6 +31,7 @@ enum { XT_CONNTRACK_REPLSRC_PORT = 1 << 10, XT_CONNTRACK_REPLDST_PORT = 1 << 11, XT_CONNTRACK_DIRECTION = 1 << 12, + XT_CONNTRACK_STATE_ALIAS = 1 << 13, }; struct xt_conntrack_mtinfo1 { -- cgit v1.2.3 From 5474f57f7d686ac918355419cb71496f835aaf5d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 30 Jan 2013 20:24:22 +0100 Subject: netfilter: xt_CT: add alias flag This patch adds the alias flag to support full NOTRACK target aliasing. Based on initial patch from Jozsef Kadlecsik. Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_CT.h | 6 +++++- net/netfilter/xt_CT.c | 32 +++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_CT.h b/include/uapi/linux/netfilter/xt_CT.h index a064b8af360c..5a688c1ca4d7 100644 --- a/include/uapi/linux/netfilter/xt_CT.h +++ b/include/uapi/linux/netfilter/xt_CT.h @@ -3,7 +3,11 @@ #include -#define XT_CT_NOTRACK 0x1 +enum { + XT_CT_NOTRACK = 1 << 0, + XT_CT_NOTRACK_ALIAS = 1 << 1, + XT_CT_MASK = XT_CT_NOTRACK | XT_CT_NOTRACK_ALIAS, +}; struct xt_ct_target_info { __u16 flags; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index d69f1c7532f7..a60261cb0e80 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -185,9 +185,6 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, struct nf_conn *ct; int ret = -EOPNOTSUPP; - if (info->flags & ~XT_CT_NOTRACK) - return -EINVAL; - if (info->flags & XT_CT_NOTRACK) { ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); @@ -256,6 +253,9 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) }; int ret; + if (info->flags & ~XT_CT_NOTRACK) + return -EINVAL; + memcpy(info_v1.helper, info->helper, sizeof(info->helper)); ret = xt_ct_tg_check(par, &info_v1); @@ -269,6 +269,21 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) { + struct xt_ct_target_info_v1 *info = par->targinfo; + + if (info->flags & ~XT_CT_NOTRACK) + return -EINVAL; + + return xt_ct_tg_check(par, par->targinfo); +} + +static int xt_ct_tg_check_v2(const struct xt_tgchk_param *par) +{ + struct xt_ct_target_info_v1 *info = par->targinfo; + + if (info->flags & ~XT_CT_MASK) + return -EINVAL; + return xt_ct_tg_check(par, par->targinfo); } @@ -350,6 +365,17 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { .table = "raw", .me = THIS_MODULE, }, + { + .name = "CT", + .family = NFPROTO_UNSPEC, + .revision = 2, + .targetsize = sizeof(struct xt_ct_target_info_v1), + .checkentry = xt_ct_tg_check_v2, + .destroy = xt_ct_tg_destroy_v1, + .target = xt_ct_target_v1, + .table = "raw", + .me = THIS_MODULE, + }, }; static unsigned int -- cgit v1.2.3 From 2ccbe779bcdee130ea7f1525670dc9d60318a981 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Sat, 19 Jan 2013 15:51:55 -0300 Subject: [media] v4l2-ctrl: Add helper function for the controls range update This patch adds a helper function that allows to modify range, i.e. minimum, maximum, step and default value of a v4l2 control, after the control has been created and initialized. This is helpful in situations when range of a control depends on user configurable parameters, e.g. camera sensor absolute exposure time depending on an output image resolution and frame rate. v4l2_ctrl_modify_range() function allows to modify range of an INTEGER, BOOL, MENU, INTEGER_MENU and BITMASK type controls. Based on a patch from Hans Verkuil http://patchwork.linuxtv.org/patch/8654. Signed-off-by: Sylwester Nawrocki Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/compat.xml | 4 + Documentation/DocBook/media/v4l/v4l2.xml | 4 +- Documentation/DocBook/media/v4l/vidioc-dqevent.xml | 6 + drivers/media/v4l2-core/v4l2-ctrls.c | 143 +++++++++++++++------ include/media/v4l2-ctrls.h | 20 +++ include/uapi/linux/videodev2.h | 1 + 6 files changed, 138 insertions(+), 40 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml index ebd2bfd1ee8e..104a1a2b8849 100644 --- a/Documentation/DocBook/media/v4l/compat.xml +++ b/Documentation/DocBook/media/v4l/compat.xml @@ -2486,6 +2486,10 @@ that used it. It was originally scheduled for removal in 2.6.35. v4l2_buffer. See . + + Added V4L2_EVENT_CTRL_CH_RANGE control event + changes flag. See . +
diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml index 8fe29427c8e4..c3851a2fb50d 100644 --- a/Documentation/DocBook/media/v4l/v4l2.xml +++ b/Documentation/DocBook/media/v4l/v4l2.xml @@ -142,10 +142,12 @@ applications. --> 3.9 2012-12-03 - sa + sa, sn Added timestamp types to v4l2_buffer, see . + Added V4L2_EVENT_CTRL_CH_RANGE control + event changes flag, see . diff --git a/Documentation/DocBook/media/v4l/vidioc-dqevent.xml b/Documentation/DocBook/media/v4l/vidioc-dqevent.xml index 98a856f9ec30..89891adb928a 100644 --- a/Documentation/DocBook/media/v4l/vidioc-dqevent.xml +++ b/Documentation/DocBook/media/v4l/vidioc-dqevent.xml @@ -261,6 +261,12 @@ This control event was triggered because the control flags changed.
+ + V4L2_EVENT_CTRL_CH_RANGE + 0x0004 + This control event was triggered because the minimum, + maximum, step or the default value of the control changed. + diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index 7b486ac3f4d9..3f27571b814d 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -1158,8 +1158,7 @@ static int new_to_user(struct v4l2_ext_control *c, } /* Copy the new value to the current value. */ -static void new_to_cur(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, - bool update_inactive) +static void new_to_cur(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, u32 ch_flags) { bool changed = false; @@ -1183,8 +1182,8 @@ static void new_to_cur(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, ctrl->cur.val = ctrl->val; break; } - if (update_inactive) { - /* Note: update_inactive can only be true for auto clusters. */ + if (ch_flags & V4L2_EVENT_CTRL_CH_FLAGS) { + /* Note: CH_FLAGS is only set for auto clusters. */ ctrl->flags &= ~(V4L2_CTRL_FLAG_INACTIVE | V4L2_CTRL_FLAG_VOLATILE); if (!is_cur_manual(ctrl->cluster[0])) { @@ -1194,14 +1193,13 @@ static void new_to_cur(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, } fh = NULL; } - if (changed || update_inactive) { + if (changed || ch_flags) { /* If a control was changed that was not one of the controls modified by the application, then send the event to all. */ if (!ctrl->is_new) fh = NULL; send_event(fh, ctrl, - (changed ? V4L2_EVENT_CTRL_CH_VALUE : 0) | - (update_inactive ? V4L2_EVENT_CTRL_CH_FLAGS : 0)); + (changed ? V4L2_EVENT_CTRL_CH_VALUE : 0) | ch_flags); if (ctrl->call_notify && changed && ctrl->handler->notify) ctrl->handler->notify(ctrl, ctrl->handler->notify_priv); } @@ -1257,6 +1255,41 @@ static int cluster_changed(struct v4l2_ctrl *master) return diff; } +/* Control range checking */ +static int check_range(enum v4l2_ctrl_type type, + s32 min, s32 max, u32 step, s32 def) +{ + switch (type) { + case V4L2_CTRL_TYPE_BOOLEAN: + if (step != 1 || max > 1 || min < 0) + return -ERANGE; + /* fall through */ + case V4L2_CTRL_TYPE_INTEGER: + if (step <= 0 || min > max || def < min || def > max) + return -ERANGE; + return 0; + case V4L2_CTRL_TYPE_BITMASK: + if (step || min || !max || (def & ~max)) + return -ERANGE; + return 0; + case V4L2_CTRL_TYPE_MENU: + case V4L2_CTRL_TYPE_INTEGER_MENU: + if (min > max || def < min || def > max) + return -ERANGE; + /* Note: step == menu_skip_mask for menu controls. + So here we check if the default value is masked out. */ + if (step && ((1 << def) & step)) + return -EINVAL; + return 0; + case V4L2_CTRL_TYPE_STRING: + if (min > max || min < 0 || step < 1 || def) + return -ERANGE; + return 0; + default: + return 0; + } +} + /* Validate a new control */ static int validate_new(const struct v4l2_ctrl *ctrl, struct v4l2_ext_control *c) @@ -1529,30 +1562,21 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl, { struct v4l2_ctrl *ctrl; unsigned sz_extra = 0; + int err; if (hdl->error) return NULL; /* Sanity checks */ if (id == 0 || name == NULL || id >= V4L2_CID_PRIVATE_BASE || - (type == V4L2_CTRL_TYPE_INTEGER && step == 0) || - (type == V4L2_CTRL_TYPE_BITMASK && max == 0) || (type == V4L2_CTRL_TYPE_MENU && qmenu == NULL) || - (type == V4L2_CTRL_TYPE_INTEGER_MENU && qmenu_int == NULL) || - (type == V4L2_CTRL_TYPE_STRING && max == 0)) { - handler_set_err(hdl, -ERANGE); - return NULL; - } - if (type != V4L2_CTRL_TYPE_BITMASK && max < min) { + (type == V4L2_CTRL_TYPE_INTEGER_MENU && qmenu_int == NULL)) { handler_set_err(hdl, -ERANGE); return NULL; } - if ((type == V4L2_CTRL_TYPE_INTEGER || - type == V4L2_CTRL_TYPE_MENU || - type == V4L2_CTRL_TYPE_INTEGER_MENU || - type == V4L2_CTRL_TYPE_BOOLEAN) && - (def < min || def > max)) { - handler_set_err(hdl, -ERANGE); + err = check_range(type, min, max, step, def); + if (err) { + handler_set_err(hdl, err); return NULL; } if (type == V4L2_CTRL_TYPE_BITMASK && ((def & ~max) || min || step)) { @@ -2426,8 +2450,8 @@ EXPORT_SYMBOL(v4l2_ctrl_g_ctrl_int64); /* Core function that calls try/s_ctrl and ensures that the new value is copied to the current value on a set. Must be called with ctrl->handler->lock held. */ -static int try_or_set_cluster(struct v4l2_fh *fh, - struct v4l2_ctrl *master, bool set) +static int try_or_set_cluster(struct v4l2_fh *fh, struct v4l2_ctrl *master, + bool set, u32 ch_flags) { bool update_flag; int ret; @@ -2465,7 +2489,8 @@ static int try_or_set_cluster(struct v4l2_fh *fh, /* If OK, then make the new values permanent. */ update_flag = is_cur_manual(master) != is_new_manual(master); for (i = 0; i < master->ncontrols; i++) - new_to_cur(fh, master->cluster[i], update_flag && i > 0); + new_to_cur(fh, master->cluster[i], ch_flags | + ((update_flag && i > 0) ? V4L2_EVENT_CTRL_CH_FLAGS : 0)); return 0; } @@ -2592,7 +2617,7 @@ static int try_set_ext_ctrls(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl, } while (!ret && idx); if (!ret) - ret = try_or_set_cluster(fh, master, set); + ret = try_or_set_cluster(fh, master, set, 0); /* Copy the new values back to userspace. */ if (!ret) { @@ -2638,10 +2663,9 @@ EXPORT_SYMBOL(v4l2_subdev_s_ext_ctrls); /* Helper function for VIDIOC_S_CTRL compatibility */ static int set_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, - struct v4l2_ext_control *c) + struct v4l2_ext_control *c, u32 ch_flags) { struct v4l2_ctrl *master = ctrl->cluster[0]; - int ret; int i; /* String controls are not supported. The user_to_new() and @@ -2651,12 +2675,6 @@ static int set_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, if (ctrl->type == V4L2_CTRL_TYPE_STRING) return -EINVAL; - ret = validate_new(ctrl, c); - if (ret) - return ret; - - v4l2_ctrl_lock(ctrl); - /* Reset the 'is_new' flags of the cluster */ for (i = 0; i < master->ncontrols; i++) if (master->cluster[i]) @@ -2670,10 +2688,22 @@ static int set_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, update_from_auto_cluster(master); user_to_new(c, ctrl); - ret = try_or_set_cluster(fh, master, true); - cur_to_user(c, ctrl); + return try_or_set_cluster(fh, master, true, ch_flags); +} - v4l2_ctrl_unlock(ctrl); +/* Helper function for VIDIOC_S_CTRL compatibility */ +static int set_ctrl_lock(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, + struct v4l2_ext_control *c) +{ + int ret = validate_new(ctrl, c); + + if (!ret) { + v4l2_ctrl_lock(ctrl); + ret = set_ctrl(fh, ctrl, c, 0); + if (!ret) + cur_to_user(c, ctrl); + v4l2_ctrl_unlock(ctrl); + } return ret; } @@ -2691,7 +2721,7 @@ int v4l2_s_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl, return -EACCES; c.value = control->value; - ret = set_ctrl(fh, ctrl, &c); + ret = set_ctrl_lock(fh, ctrl, &c); control->value = c.value; return ret; } @@ -2710,7 +2740,7 @@ int v4l2_ctrl_s_ctrl(struct v4l2_ctrl *ctrl, s32 val) /* It's a driver bug if this happens. */ WARN_ON(!type_is_int(ctrl)); c.value = val; - return set_ctrl(NULL, ctrl, &c); + return set_ctrl_lock(NULL, ctrl, &c); } EXPORT_SYMBOL(v4l2_ctrl_s_ctrl); @@ -2721,7 +2751,7 @@ int v4l2_ctrl_s_ctrl_int64(struct v4l2_ctrl *ctrl, s64 val) /* It's a driver bug if this happens. */ WARN_ON(ctrl->type != V4L2_CTRL_TYPE_INTEGER64); c.value64 = val; - return set_ctrl(NULL, ctrl, &c); + return set_ctrl_lock(NULL, ctrl, &c); } EXPORT_SYMBOL(v4l2_ctrl_s_ctrl_int64); @@ -2741,6 +2771,41 @@ void v4l2_ctrl_notify(struct v4l2_ctrl *ctrl, v4l2_ctrl_notify_fnc notify, void } EXPORT_SYMBOL(v4l2_ctrl_notify); +int v4l2_ctrl_modify_range(struct v4l2_ctrl *ctrl, + s32 min, s32 max, u32 step, s32 def) +{ + int ret = check_range(ctrl->type, min, max, step, def); + struct v4l2_ext_control c; + + switch (ctrl->type) { + case V4L2_CTRL_TYPE_INTEGER: + case V4L2_CTRL_TYPE_BOOLEAN: + case V4L2_CTRL_TYPE_MENU: + case V4L2_CTRL_TYPE_INTEGER_MENU: + case V4L2_CTRL_TYPE_BITMASK: + if (ret) + return ret; + break; + default: + return -EINVAL; + } + v4l2_ctrl_lock(ctrl); + ctrl->minimum = min; + ctrl->maximum = max; + ctrl->step = step; + ctrl->default_value = def; + c.value = ctrl->cur.val; + if (validate_new(ctrl, &c)) + c.value = def; + if (c.value != ctrl->cur.val) + ret = set_ctrl(NULL, ctrl, &c, V4L2_EVENT_CTRL_CH_RANGE); + else + send_event(NULL, ctrl, V4L2_EVENT_CTRL_CH_RANGE); + v4l2_ctrl_unlock(ctrl); + return ret; +} +EXPORT_SYMBOL(v4l2_ctrl_modify_range); + static int v4l2_ctrl_add_event(struct v4l2_subscribed_event *sev, unsigned elems) { struct v4l2_ctrl *ctrl = v4l2_ctrl_find(sev->fh->ctrl_handler, sev->id); diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index c4cc04136074..91125b6f05a5 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -518,6 +518,26 @@ void v4l2_ctrl_activate(struct v4l2_ctrl *ctrl, bool active); */ void v4l2_ctrl_grab(struct v4l2_ctrl *ctrl, bool grabbed); +/** v4l2_ctrl_modify_range() - Update the range of a control. + * @ctrl: The control to update. + * @min: The control's minimum value. + * @max: The control's maximum value. + * @step: The control's step value + * @def: The control's default value. + * + * Update the range of a control on the fly. This works for control types + * INTEGER, BOOLEAN, MENU, INTEGER MENU and BITMASK. For menu controls the + * @step value is interpreted as a menu_skip_mask. + * + * An error is returned if one of the range arguments is invalid for this + * control type. + * + * This function assumes that the control handler is not locked and will + * take the lock itself. + */ +int v4l2_ctrl_modify_range(struct v4l2_ctrl *ctrl, + s32 min, s32 max, u32 step, s32 def); + /** v4l2_ctrl_lock() - Helper function to lock the handler * associated with the control. * @ctrl: The control to lock. diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 94cbe26e9f00..928799c2e2d9 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1822,6 +1822,7 @@ struct v4l2_event_vsync { /* Payload for V4L2_EVENT_CTRL */ #define V4L2_EVENT_CTRL_CH_VALUE (1 << 0) #define V4L2_EVENT_CTRL_CH_FLAGS (1 << 1) +#define V4L2_EVENT_CTRL_CH_RANGE (1 << 2) struct v4l2_event_ctrl { __u32 changes; -- cgit v1.2.3 From 28718152e0a78085297ec7705f53869e41d1ae73 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 24 Jan 2013 04:42:05 -0300 Subject: [media] Move DV-class control IDs from videodev2.h to v4l2-controls.h When the control IDs were split off from videodev2.h to v4l2-controls.h these new Digital Video controls were forgotten (the two patches may have crossed one another). Move these controls to their proper place in v4l2-controls.h. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 24 ++++++++++++++++++++++++ include/uapi/linux/videodev2.h | 22 ---------------------- 2 files changed, 24 insertions(+), 22 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 4dc0822700fe..0bece06792d7 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -778,6 +778,7 @@ enum v4l2_jpeg_chroma_subsampling { #define V4L2_JPEG_ACTIVE_MARKER_DQT (1 << 17) #define V4L2_JPEG_ACTIVE_MARKER_DHT (1 << 18) + /* Image source controls */ #define V4L2_CID_IMAGE_SOURCE_CLASS_BASE (V4L2_CTRL_CLASS_IMAGE_SOURCE | 0x900) #define V4L2_CID_IMAGE_SOURCE_CLASS (V4L2_CTRL_CLASS_IMAGE_SOURCE | 1) @@ -796,4 +797,27 @@ enum v4l2_jpeg_chroma_subsampling { #define V4L2_CID_PIXEL_RATE (V4L2_CID_IMAGE_PROC_CLASS_BASE + 2) #define V4L2_CID_TEST_PATTERN (V4L2_CID_IMAGE_PROC_CLASS_BASE + 3) + +/* DV-class control IDs defined by V4L2 */ +#define V4L2_CID_DV_CLASS_BASE (V4L2_CTRL_CLASS_DV | 0x900) +#define V4L2_CID_DV_CLASS (V4L2_CTRL_CLASS_DV | 1) + +#define V4L2_CID_DV_TX_HOTPLUG (V4L2_CID_DV_CLASS_BASE + 1) +#define V4L2_CID_DV_TX_RXSENSE (V4L2_CID_DV_CLASS_BASE + 2) +#define V4L2_CID_DV_TX_EDID_PRESENT (V4L2_CID_DV_CLASS_BASE + 3) +#define V4L2_CID_DV_TX_MODE (V4L2_CID_DV_CLASS_BASE + 4) +enum v4l2_dv_tx_mode { + V4L2_DV_TX_MODE_DVI_D = 0, + V4L2_DV_TX_MODE_HDMI = 1, +}; +#define V4L2_CID_DV_TX_RGB_RANGE (V4L2_CID_DV_CLASS_BASE + 5) +enum v4l2_dv_rgb_range { + V4L2_DV_RGB_RANGE_AUTO = 0, + V4L2_DV_RGB_RANGE_LIMITED = 1, + V4L2_DV_RGB_RANGE_FULL = 2, +}; + +#define V4L2_CID_DV_RX_POWER_PRESENT (V4L2_CID_DV_CLASS_BASE + 100) +#define V4L2_CID_DV_RX_RGB_RANGE (V4L2_CID_DV_CLASS_BASE + 101) + #endif diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 928799c2e2d9..234d1d870914 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1354,28 +1354,6 @@ struct v4l2_querymenu { #define V4L2_CID_PRIVATE_BASE 0x08000000 -/* DV-class control IDs defined by V4L2 */ -#define V4L2_CID_DV_CLASS_BASE (V4L2_CTRL_CLASS_DV | 0x900) -#define V4L2_CID_DV_CLASS (V4L2_CTRL_CLASS_DV | 1) - -#define V4L2_CID_DV_TX_HOTPLUG (V4L2_CID_DV_CLASS_BASE + 1) -#define V4L2_CID_DV_TX_RXSENSE (V4L2_CID_DV_CLASS_BASE + 2) -#define V4L2_CID_DV_TX_EDID_PRESENT (V4L2_CID_DV_CLASS_BASE + 3) -#define V4L2_CID_DV_TX_MODE (V4L2_CID_DV_CLASS_BASE + 4) -enum v4l2_dv_tx_mode { - V4L2_DV_TX_MODE_DVI_D = 0, - V4L2_DV_TX_MODE_HDMI = 1, -}; -#define V4L2_CID_DV_TX_RGB_RANGE (V4L2_CID_DV_CLASS_BASE + 5) -enum v4l2_dv_rgb_range { - V4L2_DV_RGB_RANGE_AUTO = 0, - V4L2_DV_RGB_RANGE_LIMITED = 1, - V4L2_DV_RGB_RANGE_FULL = 2, -}; - -#define V4L2_CID_DV_RX_POWER_PRESENT (V4L2_CID_DV_CLASS_BASE + 100) -#define V4L2_CID_DV_RX_RGB_RANGE (V4L2_CID_DV_CLASS_BASE + 101) - /* * T U N I N G */ -- cgit v1.2.3 From ed986d1fee77bbbb62291a1db1c7edbb00d99515 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 29 Jan 2013 07:21:02 -0300 Subject: [media] meye: convert to the control framework Convert the meye driver to the control framework. Some private controls have been replaced with standardized controls (SHARPNESS and JPEGQUAL). The AGC control looks like it can be replaced by the AUTOGAIN control, but it isn't a boolean so I do not know how to interpret it. The FRAMERATE control looks like it can be replaced by S_PARM, but again, without knowing how to interpret it I decided to leave it alone. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/pci/meye/meye.c | 278 ++++++++++++------------------------- drivers/media/pci/meye/meye.h | 2 + include/uapi/linux/meye.h | 8 +- include/uapi/linux/v4l2-controls.h | 5 + 4 files changed, 99 insertions(+), 194 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c index 3b39deaa8f6b..7859c43479d7 100644 --- a/drivers/media/pci/meye/meye.c +++ b/drivers/media/pci/meye/meye.c @@ -35,6 +35,8 @@ #include #include #include +#include +#include #include #include #include @@ -865,7 +867,7 @@ static int meye_open(struct file *file) meye.grab_buffer[i].state = MEYE_BUF_UNUSED; kfifo_reset(&meye.grabq); kfifo_reset(&meye.doneq); - return 0; + return v4l2_fh_open(file); } static int meye_release(struct file *file) @@ -873,7 +875,7 @@ static int meye_release(struct file *file) mchip_hic_stop(); mchip_dma_free(); clear_bit(0, &meye.in_use); - return 0; + return v4l2_fh_release(file); } static int meyeioc_g_params(struct meye_params *p) @@ -1032,8 +1034,9 @@ static int vidioc_querycap(struct file *file, void *fh, cap->version = (MEYE_DRIVER_MAJORVERSION << 8) + MEYE_DRIVER_MINORVERSION; - cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | + cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING; + cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS; return 0; } @@ -1063,191 +1066,50 @@ static int vidioc_s_input(struct file *file, void *fh, unsigned int i) return 0; } -static int vidioc_queryctrl(struct file *file, void *fh, - struct v4l2_queryctrl *c) -{ - switch (c->id) { - - case V4L2_CID_BRIGHTNESS: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Brightness"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 32; - c->flags = 0; - break; - case V4L2_CID_HUE: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Hue"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 32; - c->flags = 0; - break; - case V4L2_CID_CONTRAST: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Contrast"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 32; - c->flags = 0; - break; - case V4L2_CID_SATURATION: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Saturation"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 32; - c->flags = 0; - break; - case V4L2_CID_AGC: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Agc"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 48; - c->flags = 0; - break; - case V4L2_CID_MEYE_SHARPNESS: - case V4L2_CID_SHARPNESS: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Sharpness"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 32; - - /* Continue to report legacy private SHARPNESS ctrl but - * say it is disabled in preference to ctrl in the spec - */ - c->flags = (c->id == V4L2_CID_SHARPNESS) ? 0 : - V4L2_CTRL_FLAG_DISABLED; - break; - case V4L2_CID_PICTURE: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Picture"); - c->minimum = 0; - c->maximum = 63; - c->step = 1; - c->default_value = 0; - c->flags = 0; - break; - case V4L2_CID_JPEGQUAL: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "JPEG quality"); - c->minimum = 0; - c->maximum = 10; - c->step = 1; - c->default_value = 8; - c->flags = 0; - break; - case V4L2_CID_FRAMERATE: - c->type = V4L2_CTRL_TYPE_INTEGER; - strcpy(c->name, "Framerate"); - c->minimum = 0; - c->maximum = 31; - c->step = 1; - c->default_value = 0; - c->flags = 0; - break; - default: - return -EINVAL; - } - - return 0; -} - -static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *c) +static int meye_s_ctrl(struct v4l2_ctrl *ctrl) { mutex_lock(&meye.lock); - switch (c->id) { + switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERABRIGHTNESS, c->value); - meye.brightness = c->value << 10; + SONY_PIC_COMMAND_SETCAMERABRIGHTNESS, ctrl->val); + meye.brightness = ctrl->val << 10; break; case V4L2_CID_HUE: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERAHUE, c->value); - meye.hue = c->value << 10; + SONY_PIC_COMMAND_SETCAMERAHUE, ctrl->val); + meye.hue = ctrl->val << 10; break; case V4L2_CID_CONTRAST: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERACONTRAST, c->value); - meye.contrast = c->value << 10; + SONY_PIC_COMMAND_SETCAMERACONTRAST, ctrl->val); + meye.contrast = ctrl->val << 10; break; case V4L2_CID_SATURATION: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERACOLOR, c->value); - meye.colour = c->value << 10; + SONY_PIC_COMMAND_SETCAMERACOLOR, ctrl->val); + meye.colour = ctrl->val << 10; break; - case V4L2_CID_AGC: + case V4L2_CID_MEYE_AGC: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERAAGC, c->value); - meye.params.agc = c->value; + SONY_PIC_COMMAND_SETCAMERAAGC, ctrl->val); + meye.params.agc = ctrl->val; break; case V4L2_CID_SHARPNESS: - case V4L2_CID_MEYE_SHARPNESS: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERASHARPNESS, c->value); - meye.params.sharpness = c->value; + SONY_PIC_COMMAND_SETCAMERASHARPNESS, ctrl->val); + meye.params.sharpness = ctrl->val; break; - case V4L2_CID_PICTURE: + case V4L2_CID_MEYE_PICTURE: sony_pic_camera_command( - SONY_PIC_COMMAND_SETCAMERAPICTURE, c->value); - meye.params.picture = c->value; + SONY_PIC_COMMAND_SETCAMERAPICTURE, ctrl->val); + meye.params.picture = ctrl->val; break; - case V4L2_CID_JPEGQUAL: - meye.params.quality = c->value; + case V4L2_CID_JPEG_COMPRESSION_QUALITY: + meye.params.quality = ctrl->val; break; - case V4L2_CID_FRAMERATE: - meye.params.framerate = c->value; - break; - default: - mutex_unlock(&meye.lock); - return -EINVAL; - } - mutex_unlock(&meye.lock); - - return 0; -} - -static int vidioc_g_ctrl(struct file *file, void *fh, struct v4l2_control *c) -{ - mutex_lock(&meye.lock); - switch (c->id) { - case V4L2_CID_BRIGHTNESS: - c->value = meye.brightness >> 10; - break; - case V4L2_CID_HUE: - c->value = meye.hue >> 10; - break; - case V4L2_CID_CONTRAST: - c->value = meye.contrast >> 10; - break; - case V4L2_CID_SATURATION: - c->value = meye.colour >> 10; - break; - case V4L2_CID_AGC: - c->value = meye.params.agc; - break; - case V4L2_CID_SHARPNESS: - case V4L2_CID_MEYE_SHARPNESS: - c->value = meye.params.sharpness; - break; - case V4L2_CID_PICTURE: - c->value = meye.params.picture; - break; - case V4L2_CID_JPEGQUAL: - c->value = meye.params.quality; - break; - case V4L2_CID_FRAMERATE: - c->value = meye.params.framerate; + case V4L2_CID_MEYE_FRAMERATE: + meye.params.framerate = ctrl->val; break; default: mutex_unlock(&meye.lock); @@ -1577,12 +1439,12 @@ static long vidioc_default(struct file *file, void *fh, bool valid_prio, static unsigned int meye_poll(struct file *file, poll_table *wait) { - unsigned int res = 0; + unsigned int res = v4l2_ctrl_poll(file, wait); mutex_lock(&meye.lock); poll_wait(file, &meye.proc_list, wait); if (kfifo_len(&meye.doneq)) - res = POLLIN | POLLRDNORM; + res |= POLLIN | POLLRDNORM; mutex_unlock(&meye.lock); return res; } @@ -1669,9 +1531,6 @@ static const struct v4l2_ioctl_ops meye_ioctl_ops = { .vidioc_enum_input = vidioc_enum_input, .vidioc_g_input = vidioc_g_input, .vidioc_s_input = vidioc_s_input, - .vidioc_queryctrl = vidioc_queryctrl, - .vidioc_s_ctrl = vidioc_s_ctrl, - .vidioc_g_ctrl = vidioc_g_ctrl, .vidioc_enum_fmt_vid_cap = vidioc_enum_fmt_vid_cap, .vidioc_try_fmt_vid_cap = vidioc_try_fmt_vid_cap, .vidioc_g_fmt_vid_cap = vidioc_g_fmt_vid_cap, @@ -1682,6 +1541,9 @@ static const struct v4l2_ioctl_ops meye_ioctl_ops = { .vidioc_dqbuf = vidioc_dqbuf, .vidioc_streamon = vidioc_streamon, .vidioc_streamoff = vidioc_streamoff, + .vidioc_log_status = v4l2_ctrl_log_status, + .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, + .vidioc_unsubscribe_event = v4l2_event_unsubscribe, .vidioc_default = vidioc_default, }; @@ -1692,6 +1554,10 @@ static struct video_device meye_template = { .release = video_device_release, }; +static const struct v4l2_ctrl_ops meye_ctrl_ops = { + .s_ctrl = meye_s_ctrl, +}; + #ifdef CONFIG_PM static int meye_suspend(struct pci_dev *pdev, pm_message_t state) { @@ -1730,6 +1596,32 @@ static int meye_resume(struct pci_dev *pdev) static int meye_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) { + static const struct v4l2_ctrl_config ctrl_agc = { + .id = V4L2_CID_MEYE_AGC, + .type = V4L2_CTRL_TYPE_INTEGER, + .ops = &meye_ctrl_ops, + .name = "AGC", + .max = 63, + .step = 1, + .def = 48, + .flags = V4L2_CTRL_FLAG_SLIDER, + }; + static const struct v4l2_ctrl_config ctrl_picture = { + .id = V4L2_CID_MEYE_PICTURE, + .type = V4L2_CTRL_TYPE_INTEGER, + .ops = &meye_ctrl_ops, + .name = "Picture", + .max = 63, + .step = 1, + }; + static const struct v4l2_ctrl_config ctrl_framerate = { + .id = V4L2_CID_MEYE_FRAMERATE, + .type = V4L2_CTRL_TYPE_INTEGER, + .ops = &meye_ctrl_ops, + .name = "Framerate", + .max = 31, + .step = 1, + }; struct v4l2_device *v4l2_dev = &meye.v4l2_dev; int ret = -EBUSY; unsigned long mchip_adr; @@ -1833,24 +1725,31 @@ static int meye_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) mutex_init(&meye.lock); init_waitqueue_head(&meye.proc_list); - meye.brightness = 32 << 10; - meye.hue = 32 << 10; - meye.colour = 32 << 10; - meye.contrast = 32 << 10; - meye.params.subsample = 0; - meye.params.quality = 8; - meye.params.sharpness = 32; - meye.params.agc = 48; - meye.params.picture = 0; - meye.params.framerate = 0; - - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERABRIGHTNESS, 32); - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERAHUE, 32); - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERACOLOR, 32); - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERACONTRAST, 32); - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERASHARPNESS, 32); - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERAPICTURE, 0); - sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERAAGC, 48); + + v4l2_ctrl_handler_init(&meye.hdl, 3); + v4l2_ctrl_new_std(&meye.hdl, &meye_ctrl_ops, + V4L2_CID_BRIGHTNESS, 0, 63, 1, 32); + v4l2_ctrl_new_std(&meye.hdl, &meye_ctrl_ops, + V4L2_CID_HUE, 0, 63, 1, 32); + v4l2_ctrl_new_std(&meye.hdl, &meye_ctrl_ops, + V4L2_CID_CONTRAST, 0, 63, 1, 32); + v4l2_ctrl_new_std(&meye.hdl, &meye_ctrl_ops, + V4L2_CID_SATURATION, 0, 63, 1, 32); + v4l2_ctrl_new_custom(&meye.hdl, &ctrl_agc, NULL); + v4l2_ctrl_new_std(&meye.hdl, &meye_ctrl_ops, + V4L2_CID_SHARPNESS, 0, 63, 1, 32); + v4l2_ctrl_new_custom(&meye.hdl, &ctrl_picture, NULL); + v4l2_ctrl_new_std(&meye.hdl, &meye_ctrl_ops, + V4L2_CID_JPEG_COMPRESSION_QUALITY, 0, 10, 1, 8); + v4l2_ctrl_new_custom(&meye.hdl, &ctrl_framerate, NULL); + if (meye.hdl.error) { + v4l2_err(v4l2_dev, "couldn't register controls\n"); + goto outvideoreg; + } + + v4l2_ctrl_handler_setup(&meye.hdl); + meye.vdev->ctrl_handler = &meye.hdl; + set_bit(V4L2_FL_USE_FH_PRIO, &meye.vdev->flags); if (video_register_device(meye.vdev, VFL_TYPE_GRABBER, video_nr) < 0) { @@ -1866,6 +1765,7 @@ static int meye_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) return 0; outvideoreg: + v4l2_ctrl_handler_free(&meye.hdl); free_irq(meye.mchip_irq, meye_irq); outreqirq: iounmap(meye.mchip_mmregs); diff --git a/drivers/media/pci/meye/meye.h b/drivers/media/pci/meye/meye.h index 4bdeb03f1644..6fed9274cfa5 100644 --- a/drivers/media/pci/meye/meye.h +++ b/drivers/media/pci/meye/meye.h @@ -39,6 +39,7 @@ #include #include #include +#include /****************************************************************************/ /* Motion JPEG chip registers */ @@ -290,6 +291,7 @@ struct meye_grab_buffer { /* Motion Eye device structure */ struct meye { struct v4l2_device v4l2_dev; /* Main v4l2_device struct */ + struct v4l2_ctrl_handler hdl; struct pci_dev *mchip_dev; /* pci device */ u8 mchip_irq; /* irq */ u8 mchip_mode; /* actual mchip mode: HIC_MODE... */ diff --git a/include/uapi/linux/meye.h b/include/uapi/linux/meye.h index 0dd49954f746..8ff50fe9e481 100644 --- a/include/uapi/linux/meye.h +++ b/include/uapi/linux/meye.h @@ -57,10 +57,8 @@ struct meye_params { #define MEYEIOC_STILLJCAPT _IOR ('v', BASE_VIDIOC_PRIVATE+5, int) /* V4L2 private controls */ -#define V4L2_CID_AGC V4L2_CID_PRIVATE_BASE -#define V4L2_CID_MEYE_SHARPNESS (V4L2_CID_PRIVATE_BASE + 1) -#define V4L2_CID_PICTURE (V4L2_CID_PRIVATE_BASE + 2) -#define V4L2_CID_JPEGQUAL (V4L2_CID_PRIVATE_BASE + 3) -#define V4L2_CID_FRAMERATE (V4L2_CID_PRIVATE_BASE + 4) +#define V4L2_CID_MEYE_AGC (V4L2_CID_USER_MEYE_BASE + 0) +#define V4L2_CID_MEYE_PICTURE (V4L2_CID_USER_MEYE_BASE + 1) +#define V4L2_CID_MEYE_FRAMERATE (V4L2_CID_USER_MEYE_BASE + 2) #endif diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 0bece06792d7..dcd63745e83a 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -140,6 +140,11 @@ enum v4l2_colorfx { /* last CID + 1 */ #define V4L2_CID_LASTP1 (V4L2_CID_BASE+43) +/* USER-class private control IDs */ + +/* The base for the meye driver controls. See linux/meye.h for the list + * of controls. We reserve 16 controls for this driver. */ +#define V4L2_CID_USER_MEYE_BASE (V4L2_CID_USER_BASE + 0x1000) /* MPEG-class control IDs */ -- cgit v1.2.3 From 7e98d53086d18c877cb44e9065219335184024de Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Feb 2013 11:58:12 +0100 Subject: Synchronize fuse header with one used in library The library one has provisions for use in *BSD, add them to the kernel one too. They don't hurt and ease maintenance. Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 68619e9210b9..baee03e90438 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -1,9 +1,35 @@ /* - FUSE: Filesystem in Userspace + This file defines the kernel interface of FUSE Copyright (C) 2001-2008 Miklos Szeredi This program can be distributed under the terms of the GNU GPL. See the file COPYING. + + This -- and only this -- header file may also be distributed under + the terms of the BSD Licence as follows: + + Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. */ /* @@ -69,7 +95,16 @@ #ifndef _LINUX_FUSE_H #define _LINUX_FUSE_H +#ifdef __linux__ #include +#else +#include +#define __u64 uint64_t +#define __s64 int64_t +#define __u32 uint32_t +#define __s32 int32_t +#define __u16 uint16_t +#endif /* * Version negotiation: -- cgit v1.2.3 From 634734b63ac39e137a1c623ba74f3e062b6577db Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 6 Feb 2013 22:29:01 +0000 Subject: fuse: allow control of adaptive readdirplus use For some filesystems (e.g. GlusterFS), the cost of performing a normal readdir and readdirplus are identical. Since adaptively using readdirplus has no benefit for those systems, give users/filesystems the option to control adaptive readdirplus use. v2 of this patch incorporates Miklos's suggestion to simplify the code, as well as improving consistency of macro names and documentation. Signed-off-by: Eric Wong Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 2 ++ fs/fuse/fuse_i.h | 5 ++++- fs/fuse/inode.c | 4 +++- include/uapi/linux/fuse.h | 3 +++ 4 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 2b112d978e9f..85065221a58a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -21,6 +21,8 @@ static bool fuse_use_readdirplus(struct inode *dir, struct file *filp) if (!fc->do_readdirplus) return false; + if (!fc->readdirplus_auto) + return true; if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) return true; if (filp->f_pos == 0) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fc55dd33c1e2..6aeba864f070 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -514,9 +514,12 @@ struct fuse_conn { /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; - /** Does the filesystem support readdir-plus? */ + /** Does the filesystem support readdirplus? */ unsigned do_readdirplus:1; + /** Does the filesystem want adaptive readdirplus? */ + unsigned readdirplus_auto:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9876a87255fe..01353ed75750 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -866,6 +866,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->auto_inval_data = 1; if (arg->flags & FUSE_DO_READDIRPLUS) fc->do_readdirplus = 1; + if (arg->flags & FUSE_READDIRPLUS_AUTO) + fc->readdirplus_auto = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -893,7 +895,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | - FUSE_DO_READDIRPLUS; + FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index baee03e90438..4c43b4448792 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -218,6 +218,8 @@ struct fuse_file_lock { * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages + * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) + * FUSE_READDIRPLUS_AUTO: adaptive readdirplus */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -233,6 +235,7 @@ struct fuse_file_lock { #define FUSE_HAS_IOCTL_DIR (1 << 11) #define FUSE_AUTO_INVAL_DATA (1 << 12) #define FUSE_DO_READDIRPLUS (1 << 13) +#define FUSE_READDIRPLUS_AUTO (1 << 14) /** * CUSE INIT request/reply flags -- cgit v1.2.3 From 4f4ffc3a5398ef9bdbb32db04756d7d34e356fcf Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 4 Feb 2013 19:39:52 +0000 Subject: unbreak automounter support on 64-bit kernel with 32-bit userspace (v2) automount-support is broken on the parisc architecture, because the existing #if list does not include a check for defined(__hppa__). The HPPA (parisc) architecture is similiar to other 64bit Linux targets where we have to define autofs_wqt_t (which is passed back and forth to user space) as int type which has a size of 32bit across 32 and 64bit kernels. During the discussion on the mailing list, H. Peter Anvin suggested to invert the #if list since only specific platforms (specifically those who do not have a 32bit userspace, like IA64 and Alpha) should have autofs_wqt_t as unsigned long type. This suggestion is probably the best way to go, since Arm64 (and maybe others?) seems to have a non-working automounter. So in the long run even for other new upcoming architectures this inverted check seem to be the best solution, since it will not require them to change this #if again (unless they are 64bit only). Signed-off-by: Helge Deller Acked-by: H. Peter Anvin Acked-by: Ian Kent Acked-by: Catalin Marinas CC: James Bottomley CC: Rolf Eike Beer --- include/uapi/linux/auto_fs.h | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index 77cdba9df274..bb991dfe134f 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h @@ -28,25 +28,16 @@ #define AUTOFS_MIN_PROTO_VERSION AUTOFS_PROTO_VERSION /* - * Architectures where both 32- and 64-bit binaries can be executed - * on 64-bit kernels need this. This keeps the structure format - * uniform, and makes sure the wait_queue_token isn't too big to be - * passed back down to the kernel. - * - * This assumes that on these architectures: - * mode 32 bit 64 bit - * ------------------------- - * int 32 bit 32 bit - * long 32 bit 64 bit - * - * If so, 32-bit user-space code should be backwards compatible. + * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed + * back to the kernel via ioctl from userspace. On architectures where 32- and + * 64-bit userspace binaries can be executed it's important that the size of + * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we + * do not break the binary ABI interface by changing the structure size. */ - -#if defined(__sparc__) || defined(__mips__) || defined(__x86_64__) \ - || defined(__powerpc__) || defined(__s390__) -typedef unsigned int autofs_wqt_t; -#else +#if defined(__ia64__) || defined(__alpha__) /* pure 64bit architectures */ typedef unsigned long autofs_wqt_t; +#else +typedef unsigned int autofs_wqt_t; #endif /* Packet types */ -- cgit v1.2.3 From d021c344051af91f42c5ba9fdedc176740cbd238 Mon Sep 17 00:00:00 2001 From: Andy King Date: Wed, 6 Feb 2013 14:23:56 +0000 Subject: VSOCK: Introduce VM Sockets VM Sockets allows communication between virtual machines and the hypervisor. User level applications both in a virtual machine and on the host can use the VM Sockets API, which facilitates fast and efficient communication between guest virtual machines and their host. A socket address family, designed to be compatible with UDP and TCP at the interface level, is provided. Today, VM Sockets is used by various VMware Tools components inside the guest for zero-config, network-less access to VMware host services. In addition to this, VMware's users are using VM Sockets for various applications, where network access of the virtual machine is restricted or non-existent. Examples of this are VMs communicating with device proxies for proprietary hardware running as host applications and automated testing of applications running within virtual machines. The VMware VM Sockets are similar to other socket types, like Berkeley UNIX socket interface. The VM Sockets module supports both connection-oriented stream sockets like TCP, and connectionless datagram sockets like UDP. The VM Sockets protocol family is defined as "AF_VSOCK" and the socket operations split for SOCK_DGRAM and SOCK_STREAM. For additional information about the use of VM Sockets, please refer to the VM Sockets Programming Guide available at: https://www.vmware.com/support/developer/vmci-sdk/ Signed-off-by: George Zhang Signed-off-by: Dmitry Torokhov Signed-off-by: Andy king Signed-off-by: David S. Miller --- include/linux/socket.h | 4 +- include/uapi/linux/vm_sockets.h | 171 ++ net/Kconfig | 1 + net/Makefile | 1 + net/vmw_vsock/Kconfig | 28 + net/vmw_vsock/Makefile | 7 + net/vmw_vsock/af_vsock.c | 2015 ++++++++++++++++++++++++ net/vmw_vsock/af_vsock.h | 175 +++ net/vmw_vsock/vmci_transport.c | 2157 ++++++++++++++++++++++++++ net/vmw_vsock/vmci_transport.h | 139 ++ net/vmw_vsock/vmci_transport_notify.c | 680 ++++++++ net/vmw_vsock/vmci_transport_notify.h | 83 + net/vmw_vsock/vmci_transport_notify_qstate.c | 438 ++++++ net/vmw_vsock/vsock_addr.c | 86 + net/vmw_vsock/vsock_addr.h | 32 + net/vmw_vsock/vsock_version.h | 22 + 16 files changed, 6038 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/vm_sockets.h create mode 100644 net/vmw_vsock/Kconfig create mode 100644 net/vmw_vsock/Makefile create mode 100644 net/vmw_vsock/af_vsock.c create mode 100644 net/vmw_vsock/af_vsock.h create mode 100644 net/vmw_vsock/vmci_transport.c create mode 100644 net/vmw_vsock/vmci_transport.h create mode 100644 net/vmw_vsock/vmci_transport_notify.c create mode 100644 net/vmw_vsock/vmci_transport_notify.h create mode 100644 net/vmw_vsock/vmci_transport_notify_qstate.c create mode 100644 net/vmw_vsock/vsock_addr.c create mode 100644 net/vmw_vsock/vsock_addr.h create mode 100644 net/vmw_vsock/vsock_version.h (limited to 'include/uapi/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 9a546ff853dc..2b9f74b0ffea 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -178,7 +178,8 @@ struct ucred { #define AF_CAIF 37 /* CAIF sockets */ #define AF_ALG 38 /* Algorithm sockets */ #define AF_NFC 39 /* NFC sockets */ -#define AF_MAX 40 /* For now.. */ +#define AF_VSOCK 40 /* vSockets */ +#define AF_MAX 41 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -221,6 +222,7 @@ struct ucred { #define PF_CAIF AF_CAIF #define PF_ALG AF_ALG #define PF_NFC AF_NFC +#define PF_VSOCK AF_VSOCK #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h new file mode 100644 index 000000000000..f7f2e99dec84 --- /dev/null +++ b/include/uapi/linux/vm_sockets.h @@ -0,0 +1,171 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VM_SOCKETS_H_ +#define _VM_SOCKETS_H_ + +#if !defined(__KERNEL__) +#include +#endif + +/* Option name for STREAM socket buffer size. Use as the option name in + * setsockopt(3) or getsockopt(3) to set or get an unsigned long long that + * specifies the size of the buffer underlying a vSockets STREAM socket. + * Value is clamped to the MIN and MAX. + */ + +#define SO_VM_SOCKETS_BUFFER_SIZE 0 + +/* Option name for STREAM socket minimum buffer size. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long that + * specifies the minimum size allowed for the buffer underlying a vSockets + * STREAM socket. + */ + +#define SO_VM_SOCKETS_BUFFER_MIN_SIZE 1 + +/* Option name for STREAM socket maximum buffer size. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long + * that specifies the maximum size allowed for the buffer underlying a + * vSockets STREAM socket. + */ + +#define SO_VM_SOCKETS_BUFFER_MAX_SIZE 2 + +/* Option name for socket peer's host-specific VM ID. Use as the option name + * in getsockopt(3) to get a host-specific identifier for the peer endpoint's + * VM. The identifier is a signed integer. + * Only available for hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_PEER_HOST_VM_ID 3 + +/* Option name for socket's service label. Use as the option name in + * setsockopt(3) or getsockopt(3) to set or get the service label for a socket. + * The service label is a C-style NUL-terminated string. Only available for + * hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_SERVICE_LABEL 4 + +/* Option name for determining if a socket is trusted. Use as the option name + * in getsockopt(3) to determine if a socket is trusted. The value is a + * signed integer. + */ + +#define SO_VM_SOCKETS_TRUSTED 5 + +/* Option name for STREAM socket connection timeout. Use as the option name + * in setsockopt(3) or getsockopt(3) to set or get the connection + * timeout for a STREAM socket. + */ + +#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6 + +/* Option name for using non-blocking send/receive. Use as the option name + * for setsockopt(3) or getsockopt(3) to set or get the non-blocking + * transmit/receive flag for a STREAM socket. This flag determines whether + * send() and recv() can be called in non-blocking contexts for the given + * socket. The value is a signed integer. + * + * This option is only relevant to kernel endpoints, where descheduling the + * thread of execution is not allowed, for example, while holding a spinlock. + * It is not to be confused with conventional non-blocking socket operations. + * + * Only available for hypervisor endpoints. + */ + +#define SO_VM_SOCKETS_NONBLOCK_TXRX 7 + +/* The vSocket equivalent of INADDR_ANY. This works for the svm_cid field of + * sockaddr_vm and indicates the context ID of the current endpoint. + */ + +#define VMADDR_CID_ANY -1U + +/* Bind to any available port. Works for the svm_port field of + * sockaddr_vm. + */ + +#define VMADDR_PORT_ANY -1U + +/* Use this as the destination CID in an address when referring to the + * hypervisor. VMCI relies on it being 0, but this would be useful for other + * transports too. + */ + +#define VMADDR_CID_HYPERVISOR 0 + +/* This CID is specific to VMCI and can be considered reserved (even VMCI + * doesn't use it anymore, it's a legacy value from an older release). + */ + +#define VMADDR_CID_RESERVED 1 + +/* Use this as the destination CID in an address when referring to the host + * (any process other than the hypervisor). VMCI relies on it being 2, but + * this would be useful for other transports too. + */ + +#define VMADDR_CID_HOST 2 + +/* Invalid vSockets version. */ + +#define VM_SOCKETS_INVALID_VERSION -1U + +/* The epoch (first) component of the vSockets version. A single byte + * representing the epoch component of the vSockets version. + */ + +#define VM_SOCKETS_VERSION_EPOCH(_v) (((_v) & 0xFF000000) >> 24) + +/* The major (second) component of the vSockets version. A single byte + * representing the major component of the vSockets version. Typically + * changes for every major release of a product. + */ + +#define VM_SOCKETS_VERSION_MAJOR(_v) (((_v) & 0x00FF0000) >> 16) + +/* The minor (third) component of the vSockets version. Two bytes representing + * the minor component of the vSockets version. + */ + +#define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF)) + +/* Address structure for vSockets. The address family should be set to + * whatever vmci_sock_get_af_value_fd() returns. The structure members should + * all align on their natural boundaries without resorting to compiler packing + * directives. The total size of this structure should be exactly the same as + * that of struct sockaddr. + */ + +struct sockaddr_vm { + sa_family_t svm_family; + unsigned short svm_reserved1; + unsigned int svm_port; + unsigned int svm_cid; + unsigned char svm_zero[sizeof(struct sockaddr) - + sizeof(sa_family_t) - + sizeof(unsigned short) - + sizeof(unsigned int) - sizeof(unsigned int)]; +}; + +#define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) + +#if defined(__KERNEL__) +int vm_sockets_get_local_cid(void); +#endif + +#endif diff --git a/net/Kconfig b/net/Kconfig index c31348e70aad..5a1888bb036d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -217,6 +217,7 @@ source "net/dcb/Kconfig" source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" +source "net/vmw_vsock/Kconfig" config RPS boolean diff --git a/net/Makefile b/net/Makefile index c5aa8b3b49dc..091e7b04f301 100644 --- a/net/Makefile +++ b/net/Makefile @@ -69,3 +69,4 @@ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ +obj-$(CONFIG_VSOCKETS) += vmw_vsock/ diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig new file mode 100644 index 000000000000..b5fa7e40cdcb --- /dev/null +++ b/net/vmw_vsock/Kconfig @@ -0,0 +1,28 @@ +# +# Vsock protocol +# + +config VSOCKETS + tristate "Virtual Socket protocol" + help + Virtual Socket Protocol is a socket protocol similar to TCP/IP + allowing comunication between Virtual Machines and hypervisor + or host. + + You should also select one or more hypervisor-specific transports + below. + + To compile this driver as a module, choose M here: the module + will be called vsock. If unsure, say N. + +config VMWARE_VMCI_VSOCKETS + tristate "VMware VMCI transport for Virtual Sockets" + depends on VSOCKETS && VMWARE_VMCI + help + This module implements a VMCI transport for Virtual Sockets. + + Enable this transport if your Virtual Machine runs on a VMware + hypervisor. + + To compile this driver as a module, choose M here: the module + will be called vmw_vsock_vmci_transport. If unsure, say N. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile new file mode 100644 index 000000000000..2ce52d70f224 --- /dev/null +++ b/net/vmw_vsock/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_VSOCKETS) += vsock.o +obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o + +vsock-y += af_vsock.o vsock_addr.o + +vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ + vmci_transport_notify_qstate.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c new file mode 100644 index 000000000000..54bb7bdf92d3 --- /dev/null +++ b/net/vmw_vsock/af_vsock.c @@ -0,0 +1,2015 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +/* Implementation notes: + * + * - There are two kinds of sockets: those created by user action (such as + * calling socket(2)) and those created by incoming connection request packets. + * + * - There are two "global" tables, one for bound sockets (sockets that have + * specified an address that they are responsible for) and one for connected + * sockets (sockets that have established a connection with another socket). + * These tables are "global" in that all sockets on the system are placed + * within them. - Note, though, that the bound table contains an extra entry + * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in + * that list. The bound table is used solely for lookup of sockets when packets + * are received and that's not necessary for SOCK_DGRAM sockets since we create + * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM + * sockets out of the bound hash buckets will reduce the chance of collisions + * when looking for SOCK_STREAM sockets and prevents us from having to check the + * socket type in the hash table lookups. + * + * - Sockets created by user action will either be "client" sockets that + * initiate a connection or "server" sockets that listen for connections; we do + * not support simultaneous connects (two "client" sockets connecting). + * + * - "Server" sockets are referred to as listener sockets throughout this + * implementation because they are in the SS_LISTEN state. When a connection + * request is received (the second kind of socket mentioned above), we create a + * new socket and refer to it as a pending socket. These pending sockets are + * placed on the pending connection list of the listener socket. When future + * packets are received for the address the listener socket is bound to, we + * check if the source of the packet is from one that has an existing pending + * connection. If it does, we process the packet for the pending socket. When + * that socket reaches the connected state, it is removed from the listener + * socket's pending list and enqueued in the listener socket's accept queue. + * Callers of accept(2) will accept connected sockets from the listener socket's + * accept queue. If the socket cannot be accepted for some reason then it is + * marked rejected. Once the connection is accepted, it is owned by the user + * process and the responsibility for cleanup falls with that user process. + * + * - It is possible that these pending sockets will never reach the connected + * state; in fact, we may never receive another packet after the connection + * request. Because of this, we must schedule a cleanup function to run in the + * future, after some amount of time passes where a connection should have been + * established. This function ensures that the socket is off all lists so it + * cannot be retrieved, then drops all references to the socket so it is cleaned + * up (sock_put() -> sk_free() -> our sk_destruct implementation). Note this + * function will also cleanup rejected sockets, those that reach the connected + * state but leave it before they have been accepted. + * + * - Sockets created by user action will be cleaned up when the user process + * calls close(2), causing our release implementation to be called. Our release + * implementation will perform some cleanup then drop the last reference so our + * sk_destruct implementation is invoked. Our sk_destruct implementation will + * perform additional cleanup that's common for both types of sockets. + * + * - A socket's reference count is what ensures that the structure won't be + * freed. Each entry in a list (such as the "global" bound and connected tables + * and the listener socket's pending list and connected queue) ensures a + * reference. When we defer work until process context and pass a socket as our + * argument, we must ensure the reference count is increased to ensure the + * socket isn't freed before the function is run; the deferred function will + * then drop the reference. + */ + +#include + +#define EXPORT_SYMTAB +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "af_vsock.h" +#include "vsock_version.h" + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); +static void vsock_sk_destruct(struct sock *sk); +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + +/* Protocol family. */ +static struct proto vsock_proto = { + .name = "AF_VSOCK", + .owner = THIS_MODULE, + .obj_size = sizeof(struct vsock_sock), +}; + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +static const struct vsock_transport *transport; +static DEFINE_MUTEX(vsock_register_mutex); + +/**** EXPORTS ****/ + +/* Get the ID of the local context. This is transport dependent. */ + +int vm_sockets_get_local_cid(void) +{ + return transport->get_local_cid(); +} +EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); + +/**** UTILS ****/ + +/* Each bound VSocket is stored in the bind hash table and each connected + * VSocket is stored in the connected hash table. + * + * Unbound sockets are all put on the same list attached to the end of the hash + * table (vsock_unbound_sockets). Bound sockets are added to the hash table in + * the bucket that their local address hashes to (vsock_bound_sockets(addr) + * represents the list that addr hashes to). + * + * Specifically, we initialize the vsock_bind_table array to a size of + * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through + * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and + * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function + * mods with VSOCK_HASH_SIZE - 1 to ensure this. + */ +#define VSOCK_HASH_SIZE 251 +#define MAX_PORT_RETRIES 24 + +#define VSOCK_HASH(addr) ((addr)->svm_port % (VSOCK_HASH_SIZE - 1)) +#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) +#define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) + +/* XXX This can probably be implemented in a better way. */ +#define VSOCK_CONN_HASH(src, dst) \ + (((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1)) +#define vsock_connected_sockets(src, dst) \ + (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)]) +#define vsock_connected_sockets_vsk(vsk) \ + vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr) + +static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; +static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; +static DEFINE_SPINLOCK(vsock_table_lock); + +static __init void vsock_init_tables(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++) + INIT_LIST_HEAD(&vsock_bind_table[i]); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) + INIT_LIST_HEAD(&vsock_connected_table[i]); +} + +static void __vsock_insert_bound(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->bound_table, list); +} + +static void __vsock_insert_connected(struct list_head *list, + struct vsock_sock *vsk) +{ + sock_hold(&vsk->sk); + list_add(&vsk->connected_table, list); +} + +static void __vsock_remove_bound(struct vsock_sock *vsk) +{ + list_del_init(&vsk->bound_table); + sock_put(&vsk->sk); +} + +static void __vsock_remove_connected(struct vsock_sock *vsk) +{ + list_del_init(&vsk->connected_table); + sock_put(&vsk->sk); +} + +static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) + if (vsock_addr_equals_addr_any(addr, &vsk->local_addr)) + return sk_vsock(vsk); + + return NULL; +} + +static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_connected_sockets(src, dst), + connected_table) { + if (vsock_addr_equals_addr(src, &vsk->remote_addr) + && vsock_addr_equals_addr(dst, &vsk->local_addr)) { + return sk_vsock(vsk); + } + } + + return NULL; +} + +static bool __vsock_in_bound_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->bound_table); +} + +static bool __vsock_in_connected_table(struct vsock_sock *vsk) +{ + return !list_empty(&vsk->connected_table); +} + +static void vsock_insert_unbound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_insert_bound(vsock_unbound_sockets, vsk); + spin_unlock_bh(&vsock_table_lock); +} + +void vsock_insert_connected(struct vsock_sock *vsk) +{ + struct list_head *list = vsock_connected_sockets( + &vsk->remote_addr, &vsk->local_addr); + + spin_lock_bh(&vsock_table_lock); + __vsock_insert_connected(list, vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_insert_connected); + +void vsock_remove_bound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_bound(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_bound); + +void vsock_remove_connected(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_table_lock); + __vsock_remove_connected(vsk); + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_remove_connected); + +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_bound_socket(addr); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_bound_socket); + +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_connected_socket(src, dst); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_connected_socket); + +static bool vsock_in_bound_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_bound_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +static bool vsock_in_connected_table(struct vsock_sock *vsk) +{ + bool ret; + + spin_lock_bh(&vsock_table_lock); + ret = __vsock_in_connected_table(vsk); + spin_unlock_bh(&vsock_table_lock); + + return ret; +} + +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +{ + int i; + + spin_lock_bh(&vsock_table_lock); + + for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { + struct vsock_sock *vsk; + list_for_each_entry(vsk, &vsock_connected_table[i], + connected_table); + fn(sk_vsock(vsk)); + } + + spin_unlock_bh(&vsock_table_lock); +} +EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket); + +void vsock_add_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + + vlistener = vsock_sk(listener); + vpending = vsock_sk(pending); + + sock_hold(pending); + sock_hold(listener); + list_add_tail(&vpending->pending_links, &vlistener->pending_links); +} +EXPORT_SYMBOL_GPL(vsock_add_pending); + +void vsock_remove_pending(struct sock *listener, struct sock *pending) +{ + struct vsock_sock *vpending = vsock_sk(pending); + + list_del_init(&vpending->pending_links); + sock_put(listener); + sock_put(pending); +} +EXPORT_SYMBOL_GPL(vsock_remove_pending); + +void vsock_enqueue_accept(struct sock *listener, struct sock *connected) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + vconnected = vsock_sk(connected); + + sock_hold(connected); + sock_hold(listener); + list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue); +} +EXPORT_SYMBOL_GPL(vsock_enqueue_accept); + +static struct sock *vsock_dequeue_accept(struct sock *listener) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vconnected; + + vlistener = vsock_sk(listener); + + if (list_empty(&vlistener->accept_queue)) + return NULL; + + vconnected = list_entry(vlistener->accept_queue.next, + struct vsock_sock, accept_queue); + + list_del_init(&vconnected->accept_queue); + sock_put(listener); + /* The caller will need a reference on the connected socket so we let + * it call sock_put(). + */ + + return sk_vsock(vconnected); +} + +static bool vsock_is_accept_queue_empty(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return list_empty(&vsk->accept_queue); +} + +static bool vsock_is_pending(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + return !list_empty(&vsk->pending_links); +} + +static int vsock_send_shutdown(struct sock *sk, int mode) +{ + return transport->shutdown(vsock_sk(sk), mode); +} + +void vsock_pending_work(struct work_struct *work) +{ + struct sock *sk; + struct sock *listener; + struct vsock_sock *vsk; + bool cleanup; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + listener = vsk->listener; + cleanup = true; + + lock_sock(listener); + lock_sock(sk); + + if (vsock_is_pending(sk)) { + vsock_remove_pending(listener, sk); + } else if (!vsk->rejected) { + /* We are not on the pending list and accept() did not reject + * us, so we must have been accepted by our user process. We + * just need to drop our references to the sockets and be on + * our way. + */ + cleanup = false; + goto out; + } + + listener->sk_ack_backlog--; + + /* We need to remove ourself from the global connected sockets list so + * incoming packets can't find this socket, and to reduce the reference + * count. + */ + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + sk->sk_state = SS_FREE; + +out: + release_sock(sk); + release_sock(listener); + if (cleanup) + sock_put(sk); + + sock_put(sk); + sock_put(listener); +} +EXPORT_SYMBOL_GPL(vsock_pending_work); + +/**** SOCKET OPERATIONS ****/ + +static int __vsock_bind_stream(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + static u32 port = LAST_RESERVED_PORT + 1; + struct sockaddr_vm new_addr; + + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); + + if (addr->svm_port == VMADDR_PORT_ANY) { + bool found = false; + unsigned int i; + + for (i = 0; i < MAX_PORT_RETRIES; i++) { + if (port <= LAST_RESERVED_PORT) + port = LAST_RESERVED_PORT + 1; + + new_addr.svm_port = port++; + + if (!__vsock_find_bound_socket(&new_addr)) { + found = true; + break; + } + } + + if (!found) + return -EADDRNOTAVAIL; + } else { + /* If port is in reserved range, ensure caller + * has necessary privileges. + */ + if (addr->svm_port <= LAST_RESERVED_PORT && + !capable(CAP_NET_BIND_SERVICE)) { + return -EACCES; + } + + if (__vsock_find_bound_socket(&new_addr)) + return -EADDRINUSE; + } + + vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); + + /* Remove stream sockets from the unbound list and add them to the hash + * table for easy lookup by its address. The unbound list is simply an + * extra entry at the end of the hash table, a trick used by AF_UNIX. + */ + __vsock_remove_bound(vsk); + __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk); + + return 0; +} + +static int __vsock_bind_dgram(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + return transport->dgram_bind(vsk, addr); +} + +static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk = vsock_sk(sk); + u32 cid; + int retval; + + /* First ensure this socket isn't already bound. */ + if (vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + /* Now bind to the provided address or select appropriate values if + * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that + * like AF_INET prevents binding to a non-local IP address (in most + * cases), we only allow binding to the local CID. + */ + cid = transport->get_local_cid(); + if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY) + return -EADDRNOTAVAIL; + + switch (sk->sk_socket->type) { + case SOCK_STREAM: + spin_lock_bh(&vsock_table_lock); + retval = __vsock_bind_stream(vsk, addr); + spin_unlock_bh(&vsock_table_lock); + break; + + case SOCK_DGRAM: + retval = __vsock_bind_dgram(vsk, addr); + break; + + default: + retval = -EINVAL; + break; + } + + return retval; +} + +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, + unsigned short type) +{ + struct sock *sk; + struct vsock_sock *psk; + struct vsock_sock *vsk; + + sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + + /* sk->sk_type is normally set in sock_init_data, but only if sock is + * non-NULL. We make sure that our sockets always have a type by + * setting it here if needed. + */ + if (!sock) + sk->sk_type = type; + + vsk = vsock_sk(sk); + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + sk->sk_destruct = vsock_sk_destruct; + sk->sk_backlog_rcv = vsock_queue_rcv_skb; + sk->sk_state = 0; + sock_reset_flag(sk, SOCK_DONE); + + INIT_LIST_HEAD(&vsk->bound_table); + INIT_LIST_HEAD(&vsk->connected_table); + vsk->listener = NULL; + INIT_LIST_HEAD(&vsk->pending_links); + INIT_LIST_HEAD(&vsk->accept_queue); + vsk->rejected = false; + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + vsk->peer_shutdown = 0; + + psk = parent ? vsock_sk(parent) : NULL; + if (parent) { + vsk->trusted = psk->trusted; + vsk->owner = get_cred(psk->owner); + vsk->connect_timeout = psk->connect_timeout; + } else { + vsk->trusted = capable(CAP_NET_ADMIN); + vsk->owner = get_current_cred(); + vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; + } + + if (transport->init(vsk, psk) < 0) { + sk_free(sk); + return NULL; + } + + if (sock) + vsock_insert_unbound(vsk); + + return sk; +} +EXPORT_SYMBOL_GPL(__vsock_create); + +static void __vsock_release(struct sock *sk) +{ + if (sk) { + struct sk_buff *skb; + struct sock *pending; + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + pending = NULL; /* Compiler warning. */ + + if (vsock_in_bound_table(vsk)) + vsock_remove_bound(vsk); + + if (vsock_in_connected_table(vsk)) + vsock_remove_connected(vsk); + + transport->release(vsk); + + lock_sock(sk); + sock_orphan(sk); + sk->sk_shutdown = SHUTDOWN_MASK; + + while ((skb = skb_dequeue(&sk->sk_receive_queue))) + kfree_skb(skb); + + /* Clean up any sockets that never were accepted. */ + while ((pending = vsock_dequeue_accept(sk)) != NULL) { + __vsock_release(pending); + sock_put(pending); + } + + release_sock(sk); + sock_put(sk); + } +} + +static void vsock_sk_destruct(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + transport->destruct(vsk); + + /* When clearing these addresses, there's no need to set the family and + * possibly register the address family with the kernel. + */ + vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + put_cred(vsk->owner); +} + +static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int err; + + err = sock_queue_rcv_skb(sk, skb); + if (err) + kfree_skb(skb); + + return err; +} + +s64 vsock_stream_has_data(struct vsock_sock *vsk) +{ + return transport->stream_has_data(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_data); + +s64 vsock_stream_has_space(struct vsock_sock *vsk) +{ + return transport->stream_has_space(vsk); +} +EXPORT_SYMBOL_GPL(vsock_stream_has_space); + +static int vsock_release(struct socket *sock) +{ + __vsock_release(sock->sk); + sock->sk = NULL; + sock->state = SS_FREE; + + return 0; +} + +static int +vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +{ + int err; + struct sock *sk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + + if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0) + return -EINVAL; + + lock_sock(sk); + err = __vsock_bind(sk, vm_addr); + release_sock(sk); + + return err; +} + +static int vsock_getname(struct socket *sock, + struct sockaddr *addr, int *addr_len, int peer) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *vm_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + if (peer) { + if (sock->state != SS_CONNECTED) { + err = -ENOTCONN; + goto out; + } + vm_addr = &vsk->remote_addr; + } else { + vm_addr = &vsk->local_addr; + } + + if (!vm_addr) { + err = -EINVAL; + goto out; + } + + /* sys_getsockname() and sys_getpeername() pass us a + * MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately + * that macro is defined in socket.c instead of .h, so we hardcode its + * value here. + */ + BUILD_BUG_ON(sizeof(*vm_addr) > 128); + memcpy(addr, vm_addr, sizeof(*vm_addr)); + *addr_len = sizeof(*vm_addr); + +out: + release_sock(sk); + return err; +} + +static int vsock_shutdown(struct socket *sock, int mode) +{ + int err; + struct sock *sk; + + /* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses + * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode + * here like the other address families do. Note also that the + * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3), + * which is what we want. + */ + mode++; + + if ((mode & ~SHUTDOWN_MASK) || !mode) + return -EINVAL; + + /* If this is a STREAM socket and it is not connected then bail out + * immediately. If it is a DGRAM socket then we must first kick the + * socket so that it wakes up from any sleeping calls, for example + * recv(), and then afterwards return the error. + */ + + sk = sock->sk; + if (sock->state == SS_UNCONNECTED) { + err = -ENOTCONN; + if (sk->sk_type == SOCK_STREAM) + return err; + } else { + sock->state = SS_DISCONNECTING; + err = 0; + } + + /* Receive and send shutdowns are treated alike. */ + mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); + if (mode) { + lock_sock(sk); + sk->sk_shutdown |= mode; + sk->sk_state_change(sk); + release_sock(sk); + + if (sk->sk_type == SOCK_STREAM) { + sock_reset_flag(sk, SOCK_DONE); + vsock_send_shutdown(sk, mode); + } + } + + return err; +} + +static unsigned int vsock_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk; + unsigned int mask; + struct vsock_sock *vsk; + + sk = sock->sk; + vsk = vsock_sk(sk); + + poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + if (sk->sk_err) + /* Signify that there has been an error on this socket. */ + mask |= POLLERR; + + /* INET sockets treat local write shutdown and peer write shutdown as a + * case of POLLHUP set. + */ + if ((sk->sk_shutdown == SHUTDOWN_MASK) || + ((sk->sk_shutdown & SEND_SHUTDOWN) && + (vsk->peer_shutdown & SEND_SHUTDOWN))) { + mask |= POLLHUP; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLRDHUP; + } + + if (sock->type == SOCK_DGRAM) { + /* For datagram sockets we can read if there is something in + * the queue and write as long as the socket isn't shutdown for + * sending. + */ + if (!skb_queue_empty(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) { + mask |= POLLIN | POLLRDNORM; + } + + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + } else if (sock->type == SOCK_STREAM) { + lock_sock(sk); + + /* Listening sockets that have connections in their accept + * queue can be read. + */ + if (sk->sk_state == SS_LISTEN + && !vsock_is_accept_queue_empty(sk)) + mask |= POLLIN | POLLRDNORM; + + /* If there is something in the queue then we can read. */ + if (transport->stream_is_active(vsk) && + !(sk->sk_shutdown & RCV_SHUTDOWN)) { + bool data_ready_now = false; + int ret = transport->notify_poll_in( + vsk, 1, &data_ready_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (data_ready_now) + mask |= POLLIN | POLLRDNORM; + + } + } + + /* Sockets whose connections have been closed, reset, or + * terminated should also be considered read, and we check the + * shutdown flag for that. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN || + vsk->peer_shutdown & SEND_SHUTDOWN) { + mask |= POLLIN | POLLRDNORM; + } + + /* Connected sockets that can produce data can be written. */ + if (sk->sk_state == SS_CONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { + bool space_avail_now = false; + int ret = transport->notify_poll_out( + vsk, 1, &space_avail_now); + if (ret < 0) { + mask |= POLLERR; + } else { + if (space_avail_now) + /* Remove POLLWRBAND since INET + * sockets are not setting it. + */ + mask |= POLLOUT | POLLWRNORM; + + } + } + } + + /* Simulate INET socket poll behaviors, which sets + * POLLOUT|POLLWRNORM when peer is closed and nothing to read, + * but local send is not shutdown. + */ + if (sk->sk_state == SS_UNCONNECTED) { + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) + mask |= POLLOUT | POLLWRNORM; + + } + + release_sock(sk); + } + + return mask; +} + +static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + /* For now, MSG_DONTWAIT is always assumed... */ + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + /* If the provided message contains an address, use that. Otherwise + * fall back on the socket's remote handle (if it has been connected). + */ + if (msg->msg_name && + vsock_addr_cast(msg->msg_name, msg->msg_namelen, + &remote_addr) == 0) { + /* Ensure this address is of the right type and is a valid + * destination. + */ + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + if (!vsock_addr_bound(remote_addr)) { + err = -EINVAL; + goto out; + } + } else if (sock->state == SS_CONNECTED) { + remote_addr = &vsk->remote_addr; + + if (remote_addr->svm_cid == VMADDR_CID_ANY) + remote_addr->svm_cid = transport->get_local_cid(); + + /* XXX Should connect() or this function ensure remote_addr is + * bound? + */ + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EINVAL; + goto out; + } + } else { + err = -EINVAL; + goto out; + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + err = transport->dgram_enqueue(vsk, remote_addr, msg->msg_iov, len); + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_connect(struct socket *sock, + struct sockaddr *addr, int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + + sk = sock->sk; + vsk = vsock_sk(sk); + + err = vsock_addr_cast(addr, addr_len, &remote_addr); + if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) { + lock_sock(sk); + vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + sock->state = SS_UNCONNECTED; + release_sock(sk); + return 0; + } else if (err != 0) + return -EINVAL; + + lock_sock(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + if (!transport->dgram_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -EINVAL; + goto out; + } + + memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr)); + sock->state = SS_CONNECTED; + +out: + release_sock(sk); + return err; +} + +static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len, + flags); +} + +static const struct proto_ops vsock_dgram_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_dgram_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = vsock_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = vsock_dgram_sendmsg, + .recvmsg = vsock_dgram_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static void vsock_connect_timeout(struct work_struct *work) +{ + struct sock *sk; + struct vsock_sock *vsk; + + vsk = container_of(work, struct vsock_sock, dwork.work); + sk = sk_vsock(vsk); + + lock_sock(sk); + if (sk->sk_state == SS_CONNECTING && + (sk->sk_shutdown != SHUTDOWN_MASK)) { + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ETIMEDOUT; + sk->sk_error_report(sk); + } + release_sock(sk); + + sock_put(sk); +} + +static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, + int addr_len, int flags) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + struct sockaddr_vm *remote_addr; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + /* XXX AF_UNSPEC should make us disconnect like AF_INET. */ + switch (sock->state) { + case SS_CONNECTED: + err = -EISCONN; + goto out; + case SS_DISCONNECTING: + err = -EINVAL; + goto out; + case SS_CONNECTING: + /* This continues on so we can move sock into the SS_CONNECTED + * state once the connection has completed (at which point err + * will be set to zero also). Otherwise, we will either wait + * for the connection or return -EALREADY should this be a + * non-blocking call. + */ + err = -EALREADY; + break; + default: + if ((sk->sk_state == SS_LISTEN) || + vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { + err = -EINVAL; + goto out; + } + + /* The hypervisor and well-known contexts do not have socket + * endpoints. + */ + if (!transport->stream_allow(remote_addr->svm_cid, + remote_addr->svm_port)) { + err = -ENETUNREACH; + goto out; + } + + /* Set the remote address that we are connecting to. */ + memcpy(&vsk->remote_addr, remote_addr, + sizeof(vsk->remote_addr)); + + /* Autobind this socket to the local address if necessary. */ + if (!vsock_addr_bound(&vsk->local_addr)) { + struct sockaddr_vm local_addr; + + vsock_addr_init(&local_addr, VMADDR_CID_ANY, + VMADDR_PORT_ANY); + err = __vsock_bind(sk, &local_addr); + if (err != 0) + goto out; + + } + + sk->sk_state = SS_CONNECTING; + + err = transport->connect(vsk); + if (err < 0) + goto out; + + /* Mark sock as connecting and set the error code to in + * progress in case this is a non-blocking connect. + */ + sock->state = SS_CONNECTING; + err = -EINPROGRESS; + } + + /* The receive path will handle all communication until we are able to + * enter the connected state. Here we wait for the connection to be + * completed or a notification of an error. + */ + timeout = vsk->connect_timeout; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { + if (flags & O_NONBLOCK) { + /* If we're not going to block, we schedule a timeout + * function to generate a timeout on the connection + * attempt, in case the peer doesn't respond in a + * timely manner. We hold on to the socket until the + * timeout fires. + */ + sock_hold(sk); + INIT_DELAYED_WORK(&vsk->dwork, + vsock_connect_timeout); + schedule_delayed_work(&vsk->dwork, timeout); + + /* Skip ahead to preserve error code set above. */ + goto out_wait; + } + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait_error; + } else if (timeout == 0) { + err = -ETIMEDOUT; + goto out_wait_error; + } + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + } + + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait_error; + } else + err = 0; + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; + +out_wait_error: + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + goto out_wait; +} + +static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct sock *listener; + int err; + struct sock *connected; + struct vsock_sock *vconnected; + long timeout; + DEFINE_WAIT(wait); + + err = 0; + listener = sock->sk; + + lock_sock(listener); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (listener->sk_state != SS_LISTEN) { + err = -EINVAL; + goto out; + } + + /* Wait for children sockets to appear; these are the new sockets + * created upon connection establishment. + */ + timeout = sock_sndtimeo(listener, flags & O_NONBLOCK); + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + + while ((connected = vsock_dequeue_accept(listener)) == NULL && + listener->sk_err == 0) { + release_sock(listener); + timeout = schedule_timeout(timeout); + lock_sock(listener); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); + } + + if (listener->sk_err) + err = -listener->sk_err; + + if (connected) { + listener->sk_ack_backlog--; + + lock_sock(connected); + vconnected = vsock_sk(connected); + + /* If the listener socket has received an error, then we should + * reject this socket and return. Note that we simply mark the + * socket rejected, drop our reference, and let the cleanup + * function handle the cleanup; the fact that we found it in + * the listener's accept queue guarantees that the cleanup + * function hasn't run yet. + */ + if (err) { + vconnected->rejected = true; + release_sock(connected); + sock_put(connected); + goto out_wait; + } + + newsock->state = SS_CONNECTED; + sock_graft(connected, newsock); + release_sock(connected); + sock_put(connected); + } + +out_wait: + finish_wait(sk_sleep(listener), &wait); +out: + release_sock(listener); + return err; +} + +static int vsock_listen(struct socket *sock, int backlog) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + + sk = sock->sk; + + lock_sock(sk); + + if (sock->type != SOCK_STREAM) { + err = -EOPNOTSUPP; + goto out; + } + + if (sock->state != SS_UNCONNECTED) { + err = -EINVAL; + goto out; + } + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) { + err = -EINVAL; + goto out; + } + + sk->sk_max_ack_backlog = backlog; + sk->sk_state = SS_LISTEN; + + err = 0; + +out: + release_sock(sk); + return err; +} + +static int vsock_stream_setsockopt(struct socket *sock, + int level, + int optname, + char __user *optval, + unsigned int optlen) +{ + int err; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + +#define COPY_IN(_v) \ + do { \ + if (optlen < sizeof(_v)) { \ + err = -EINVAL; \ + goto exit; \ + } \ + if (copy_from_user(&_v, optval, sizeof(_v)) != 0) { \ + err = -EFAULT; \ + goto exit; \ + } \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + lock_sock(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + COPY_IN(val); + transport->set_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + COPY_IN(val); + transport->set_max_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + COPY_IN(val); + transport->set_min_buffer_size(vsk, val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + COPY_IN(tv); + if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC && + tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) { + vsk->connect_timeout = tv.tv_sec * HZ + + DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ)); + if (vsk->connect_timeout == 0) + vsk->connect_timeout = + VSOCK_DEFAULT_CONNECT_TIMEOUT; + + } else { + err = -ERANGE; + } + break; + } + + default: + err = -ENOPROTOOPT; + break; + } + +#undef COPY_IN + +exit: + release_sock(sk); + return err; +} + +static int vsock_stream_getsockopt(struct socket *sock, + int level, int optname, + char __user *optval, + int __user *optlen) +{ + int err; + int len; + struct sock *sk; + struct vsock_sock *vsk; + u64 val; + + if (level != AF_VSOCK) + return -ENOPROTOOPT; + + err = get_user(len, optlen); + if (err != 0) + return err; + +#define COPY_OUT(_v) \ + do { \ + if (len < sizeof(_v)) \ + return -EINVAL; \ + \ + len = sizeof(_v); \ + if (copy_to_user(optval, &_v, len) != 0) \ + return -EFAULT; \ + \ + } while (0) + + err = 0; + sk = sock->sk; + vsk = vsock_sk(sk); + + switch (optname) { + case SO_VM_SOCKETS_BUFFER_SIZE: + val = transport->get_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MAX_SIZE: + val = transport->get_max_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_BUFFER_MIN_SIZE: + val = transport->get_min_buffer_size(vsk); + COPY_OUT(val); + break; + + case SO_VM_SOCKETS_CONNECT_TIMEOUT: { + struct timeval tv; + tv.tv_sec = vsk->connect_timeout / HZ; + tv.tv_usec = + (vsk->connect_timeout - + tv.tv_sec * HZ) * (1000000 / HZ); + COPY_OUT(tv); + break; + } + default: + return -ENOPROTOOPT; + } + + err = put_user(len, optlen); + if (err != 0) + return -EFAULT; + +#undef COPY_OUT + + return 0; +} + +static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, + struct msghdr *msg, size_t len) +{ + struct sock *sk; + struct vsock_sock *vsk; + ssize_t total_written; + long timeout; + int err; + struct vsock_transport_send_notify_data send_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + total_written = 0; + err = 0; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + lock_sock(sk); + + /* Callers should not provide a destination with stream sockets. */ + if (msg->msg_namelen) { + err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP; + goto out; + } + + /* Send data only if both sides are not shutdown in the direction. */ + if (sk->sk_shutdown & SEND_SHUTDOWN || + vsk->peer_shutdown & RCV_SHUTDOWN) { + err = -EPIPE; + goto out; + } + + if (sk->sk_state != SS_CONNECTED || + !vsock_addr_bound(&vsk->local_addr)) { + err = -ENOTCONN; + goto out; + } + + if (!vsock_addr_bound(&vsk->remote_addr)) { + err = -EDESTADDRREQ; + goto out; + } + + /* Wait for room in the produce queue to enqueue our user's data. */ + timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + err = transport->notify_send_init(vsk, &send_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (total_written < len) { + ssize_t written; + + while (vsock_stream_has_space(vsk) == 0 && + sk->sk_err == 0 && + !(sk->sk_shutdown & SEND_SHUTDOWN) && + !(vsk->peer_shutdown & RCV_SHUTDOWN)) { + + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + err = transport->notify_send_pre_block(vsk, &send_data); + if (err < 0) + goto out_wait; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_wait; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_wait; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + + /* These checks occur both as part of and after the loop + * conditional since we need to check before and after + * sleeping. + */ + if (sk->sk_err) { + err = -sk->sk_err; + goto out_wait; + } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || + (vsk->peer_shutdown & RCV_SHUTDOWN)) { + err = -EPIPE; + goto out_wait; + } + + err = transport->notify_send_pre_enqueue(vsk, &send_data); + if (err < 0) + goto out_wait; + + /* Note that enqueue will only write as many bytes as are free + * in the produce queue, so we don't need to ensure len is + * smaller than the queue size. It is the caller's + * responsibility to check how many bytes we were able to send. + */ + + written = transport->stream_enqueue( + vsk, msg->msg_iov, + len - total_written); + if (written < 0) { + err = -ENOMEM; + goto out_wait; + } + + total_written += written; + + err = transport->notify_send_post_enqueue( + vsk, written, &send_data); + if (err < 0) + goto out_wait; + + } + +out_wait: + if (total_written > 0) + err = total_written; + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + + +static int +vsock_stream_recvmsg(struct kiocb *kiocb, + struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + struct sock *sk; + struct vsock_sock *vsk; + int err; + size_t target; + ssize_t copied; + long timeout; + struct vsock_transport_recv_notify_data recv_data; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + if (sk->sk_state != SS_CONNECTED) { + /* Recvmsg is supposed to return 0 if a peer performs an + * orderly shutdown. Differentiate between that case and when a + * peer has not connected or a local shutdown occured with the + * SOCK_DONE flag. + */ + if (sock_flag(sk, SOCK_DONE)) + err = 0; + else + err = -ENOTCONN; + + goto out; + } + + if (flags & MSG_OOB) { + err = -EOPNOTSUPP; + goto out; + } + + /* We don't check peer_shutdown flag here since peer may actually shut + * down, but there can be data in the queue that a local socket can + * receive. + */ + if (sk->sk_shutdown & RCV_SHUTDOWN) { + err = 0; + goto out; + } + + /* It is valid on Linux to pass in a zero-length receive buffer. This + * is not an error. We may as well bail out now. + */ + if (!len) { + err = 0; + goto out; + } + + /* We must not copy less than target bytes into the user's buffer + * before returning successfully, so we wait for the consume queue to + * have that much data to consume before dequeueing. Note that this + * makes it impossible to handle cases where target is greater than the + * queue size. + */ + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + if (target >= transport->stream_rcvhiwat(vsk)) { + err = -ENOMEM; + goto out; + } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + copied = 0; + + err = transport->notify_recv_init(vsk, target, &recv_data); + if (err < 0) + goto out; + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + + while (1) { + s64 ready = vsock_stream_has_data(vsk); + + if (ready < 0) { + /* Invalid queue pair content. XXX This should be + * changed to a connection reset in a later change. + */ + + err = -ENOMEM; + goto out_wait; + } else if (ready > 0) { + ssize_t read; + + err = transport->notify_recv_pre_dequeue( + vsk, target, &recv_data); + if (err < 0) + break; + + read = transport->stream_dequeue( + vsk, msg->msg_iov, + len - copied, flags); + if (read < 0) { + err = -ENOMEM; + break; + } + + copied += read; + + err = transport->notify_recv_post_dequeue( + vsk, target, read, + !(flags & MSG_PEEK), &recv_data); + if (err < 0) + goto out_wait; + + if (read >= target || flags & MSG_PEEK) + break; + + target -= read; + } else { + if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) + || (vsk->peer_shutdown & SEND_SHUTDOWN)) { + break; + } + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + break; + } + + err = transport->notify_recv_pre_block( + vsk, target, &recv_data); + if (err < 0) + break; + + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + break; + } else if (timeout == 0) { + err = -EAGAIN; + break; + } + + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + } + } + + if (sk->sk_err) + err = -sk->sk_err; + else if (sk->sk_shutdown & RCV_SHUTDOWN) + err = 0; + + if (copied > 0) { + /* We only do these additional bookkeeping/notification steps + * if we actually copied something out of the queue pair + * instead of just peeking ahead. + */ + + if (!(flags & MSG_PEEK)) { + /* If the other side has shutdown for sending and there + * is nothing more to read, then modify the socket + * state. + */ + if (vsk->peer_shutdown & SEND_SHUTDOWN) { + if (vsock_stream_has_data(vsk) <= 0) { + sk->sk_state = SS_UNCONNECTED; + sock_set_flag(sk, SOCK_DONE); + sk->sk_state_change(sk); + } + } + } + err = copied; + } + +out_wait: + finish_wait(sk_sleep(sk), &wait); +out: + release_sock(sk); + return err; +} + +static const struct proto_ops vsock_stream_ops = { + .family = PF_VSOCK, + .owner = THIS_MODULE, + .release = vsock_release, + .bind = vsock_bind, + .connect = vsock_stream_connect, + .socketpair = sock_no_socketpair, + .accept = vsock_accept, + .getname = vsock_getname, + .poll = vsock_poll, + .ioctl = sock_no_ioctl, + .listen = vsock_listen, + .shutdown = vsock_shutdown, + .setsockopt = vsock_stream_setsockopt, + .getsockopt = vsock_stream_getsockopt, + .sendmsg = vsock_stream_sendmsg, + .recvmsg = vsock_stream_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static int vsock_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + if (!sock) + return -EINVAL; + + if (protocol) + return -EPROTONOSUPPORT; + + switch (sock->type) { + case SOCK_DGRAM: + sock->ops = &vsock_dgram_ops; + break; + case SOCK_STREAM: + sock->ops = &vsock_stream_ops; + break; + default: + return -ESOCKTNOSUPPORT; + } + + sock->state = SS_UNCONNECTED; + + return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM; +} + +static const struct net_proto_family vsock_family_ops = { + .family = AF_VSOCK, + .create = vsock_create, + .owner = THIS_MODULE, +}; + +static long vsock_dev_do_ioctl(struct file *filp, + unsigned int cmd, void __user *ptr) +{ + u32 __user *p = ptr; + int retval = 0; + + switch (cmd) { + case IOCTL_VM_SOCKETS_GET_LOCAL_CID: + if (put_user(transport->get_local_cid(), p) != 0) + retval = -EFAULT; + break; + + default: + pr_err("Unknown ioctl %d\n", cmd); + retval = -EINVAL; + } + + return retval; +} + +static long vsock_dev_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg); +} + +#ifdef CONFIG_COMPAT +static long vsock_dev_compat_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg)); +} +#endif + +static const struct file_operations vsock_device_ops = { + .owner = THIS_MODULE, + .unlocked_ioctl = vsock_dev_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vsock_dev_compat_ioctl, +#endif + .open = nonseekable_open, +}; + +static struct miscdevice vsock_device = { + .name = "vsock", + .minor = MISC_DYNAMIC_MINOR, + .fops = &vsock_device_ops, +}; + +static int __vsock_core_init(void) +{ + int err; + + vsock_init_tables(); + + err = misc_register(&vsock_device); + if (err) { + pr_err("Failed to register misc device\n"); + return -ENOENT; + } + + err = proto_register(&vsock_proto, 1); /* we want our slab */ + if (err) { + pr_err("Cannot register vsock protocol\n"); + goto err_misc_deregister; + } + + err = sock_register(&vsock_family_ops); + if (err) { + pr_err("could not register af_vsock (%d) address family: %d\n", + AF_VSOCK, err); + goto err_unregister_proto; + } + + return 0; + +err_unregister_proto: + proto_unregister(&vsock_proto); +err_misc_deregister: + misc_deregister(&vsock_device); + return err; +} + +int vsock_core_init(const struct vsock_transport *t) +{ + int retval = mutex_lock_interruptible(&vsock_register_mutex); + if (retval) + return retval; + + if (transport) { + retval = -EBUSY; + goto out; + } + + transport = t; + retval = __vsock_core_init(); + if (retval) + transport = NULL; + +out: + mutex_unlock(&vsock_register_mutex); + return retval; +} +EXPORT_SYMBOL_GPL(vsock_core_init); + +void vsock_core_exit(void) +{ + mutex_lock(&vsock_register_mutex); + + misc_deregister(&vsock_device); + sock_unregister(AF_VSOCK); + proto_unregister(&vsock_proto); + + /* We do not want the assignment below re-ordered. */ + mb(); + transport = NULL; + + mutex_unlock(&vsock_register_mutex); +} +EXPORT_SYMBOL_GPL(vsock_core_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMware Virtual Socket Family"); +MODULE_VERSION(VSOCK_DRIVER_VERSION_STRING); +MODULE_LICENSE("GPL v2"); diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h new file mode 100644 index 000000000000..7d64d3609ec9 --- /dev/null +++ b/net/vmw_vsock/af_vsock.h @@ -0,0 +1,175 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __AF_VSOCK_H__ +#define __AF_VSOCK_H__ + +#include +#include +#include + +#include "vsock_addr.h" + +#define LAST_RESERVED_PORT 1023 + +#define vsock_sk(__sk) ((struct vsock_sock *)__sk) +#define sk_vsock(__vsk) (&(__vsk)->sk) + +struct vsock_sock { + /* sk must be the first member. */ + struct sock sk; + struct sockaddr_vm local_addr; + struct sockaddr_vm remote_addr; + /* Links for the global tables of bound and connected sockets. */ + struct list_head bound_table; + struct list_head connected_table; + /* Accessed without the socket lock held. This means it can never be + * modified outsided of socket create or destruct. + */ + bool trusted; + bool cached_peer_allow_dgram; /* Dgram communication allowed to + * cached peer? + */ + u32 cached_peer; /* Context ID of last dgram destination check. */ + const struct cred *owner; + /* Rest are SOCK_STREAM only. */ + long connect_timeout; + /* Listening socket that this came from. */ + struct sock *listener; + /* Used for pending list and accept queue during connection handshake. + * The listening socket is the head for both lists. Sockets created + * for connection requests are placed in the pending list until they + * are connected, at which point they are put in the accept queue list + * so they can be accepted in accept(). If accept() cannot accept the + * connection, it is marked as rejected so the cleanup function knows + * to clean up the socket. + */ + struct list_head pending_links; + struct list_head accept_queue; + bool rejected; + struct delayed_work dwork; + u32 peer_shutdown; + bool sent_request; + bool ignore_connecting_rst; + + /* Private to transport. */ + void *trans; +}; + +s64 vsock_stream_has_data(struct vsock_sock *vsk); +s64 vsock_stream_has_space(struct vsock_sock *vsk); +void vsock_pending_work(struct work_struct *work); +struct sock *__vsock_create(struct net *net, + struct socket *sock, + struct sock *parent, + gfp_t priority, unsigned short type); + +/**** TRANSPORT ****/ + +struct vsock_transport_recv_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ + bool notify_on_block; +}; + +struct vsock_transport_send_notify_data { + u64 data1; /* Transport-defined. */ + u64 data2; /* Transport-defined. */ +}; + +struct vsock_transport { + /* Initialize/tear-down socket. */ + int (*init)(struct vsock_sock *, struct vsock_sock *); + void (*destruct)(struct vsock_sock *); + void (*release)(struct vsock_sock *); + + /* Connections. */ + int (*connect)(struct vsock_sock *); + + /* DGRAM. */ + int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *); + int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk, + struct msghdr *msg, size_t len, int flags); + int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, + struct iovec *, size_t len); + bool (*dgram_allow)(u32 cid, u32 port); + + /* STREAM. */ + /* TODO: stream_bind() */ + ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *, + size_t len, int flags); + ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *, + size_t len); + s64 (*stream_has_data)(struct vsock_sock *); + s64 (*stream_has_space)(struct vsock_sock *); + u64 (*stream_rcvhiwat)(struct vsock_sock *); + bool (*stream_is_active)(struct vsock_sock *); + bool (*stream_allow)(u32 cid, u32 port); + + /* Notification. */ + int (*notify_poll_in)(struct vsock_sock *, size_t, bool *); + int (*notify_poll_out)(struct vsock_sock *, size_t, bool *); + int (*notify_recv_init)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_block)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t, + struct vsock_transport_recv_notify_data *); + int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t, + ssize_t, bool, struct vsock_transport_recv_notify_data *); + int (*notify_send_init)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_block)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_pre_enqueue)(struct vsock_sock *, + struct vsock_transport_send_notify_data *); + int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t, + struct vsock_transport_send_notify_data *); + + /* Shutdown. */ + int (*shutdown)(struct vsock_sock *, int); + + /* Buffer sizes. */ + void (*set_buffer_size)(struct vsock_sock *, u64); + void (*set_min_buffer_size)(struct vsock_sock *, u64); + void (*set_max_buffer_size)(struct vsock_sock *, u64); + u64 (*get_buffer_size)(struct vsock_sock *); + u64 (*get_min_buffer_size)(struct vsock_sock *); + u64 (*get_max_buffer_size)(struct vsock_sock *); + + /* Addressing. */ + u32 (*get_local_cid)(void); +}; + +/**** CORE ****/ + +int vsock_core_init(const struct vsock_transport *t); +void vsock_core_exit(void); + +/**** UTILS ****/ + +void vsock_release_pending(struct sock *pending); +void vsock_add_pending(struct sock *listener, struct sock *pending); +void vsock_remove_pending(struct sock *listener, struct sock *pending); +void vsock_enqueue_accept(struct sock *listener, struct sock *connected); +void vsock_insert_connected(struct vsock_sock *vsk); +void vsock_remove_bound(struct vsock_sock *vsk); +void vsock_remove_connected(struct vsock_sock *vsk); +struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); +struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst); +void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); + +#endif /* __AF_VSOCK_H__ */ diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c new file mode 100644 index 000000000000..e8a87cf37072 --- /dev/null +++ b/net/vmw_vsock/vmci_transport.c @@ -0,0 +1,2157 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include + +#define EXPORT_SYMTAB +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "af_vsock.h" +#include "vmci_transport_notify.h" + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg); +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg); +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *ed, + void *client_data); +static void vmci_transport_recv_pkt_work(struct work_struct *work); +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_server( + struct sock *sk, + struct sock *pending, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connecting_client_invalid( + struct sock *sk, + struct vmci_transport_packet *pkt); +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt); +static bool vmci_transport_old_proto_override(bool *old_pkt_proto); +static u16 vmci_transport_new_proto_supported_versions(void); +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto, + bool old_pkt_proto); + +struct vmci_transport_recv_pkt_info { + struct work_struct work; + struct sock *sk; + struct vmci_transport_packet pkt; +}; + +static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID, + VMCI_INVALID_ID }; +static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + +static int PROTOCOL_OVERRIDE = -1; + +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN 128 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE 262144 +#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX 262144 + +/* The default peer timeout indicates how long we will wait for a peer response + * to a control message. + */ +#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ) + +#define SS_LISTEN 255 + +/* Helper function to convert from a VMCI error code to a VSock error code. */ + +static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) +{ + int err; + + switch (vmci_error) { + case VMCI_ERROR_NO_MEM: + err = ENOMEM; + break; + case VMCI_ERROR_DUPLICATE_ENTRY: + case VMCI_ERROR_ALREADY_EXISTS: + err = EADDRINUSE; + break; + case VMCI_ERROR_NO_ACCESS: + err = EPERM; + break; + case VMCI_ERROR_NO_RESOURCES: + err = ENOBUFS; + break; + case VMCI_ERROR_INVALID_RESOURCE: + err = EHOSTUNREACH; + break; + case VMCI_ERROR_INVALID_ARGS: + default: + err = EINVAL; + } + + return err > 0 ? -err : err; +} + +static inline void +vmci_transport_packet_init(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + u8 type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + /* We register the stream control handler as an any cid handle so we + * must always send from a source address of VMADDR_CID_ANY + */ + pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.dst = vmci_make_handle(dst->svm_cid, + VMCI_TRANSPORT_PACKET_RID); + pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg); + pkt->version = VMCI_TRANSPORT_PACKET_VERSION; + pkt->type = type; + pkt->src_port = src->svm_port; + pkt->dst_port = dst->svm_port; + memset(&pkt->proto, 0, sizeof(pkt->proto)); + memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2)); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + pkt->u.size = size; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + pkt->u.handle = handle; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + case VMCI_TRANSPORT_PACKET_TYPE_READ: + case VMCI_TRANSPORT_PACKET_TYPE_RST: + pkt->u.size = 0; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + pkt->u.mode = mode; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait)); + break; + + case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + pkt->u.size = size; + pkt->proto = proto; + break; + } +} + +static inline void +vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt, + struct sockaddr_vm *local, + struct sockaddr_vm *remote) +{ + vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port); + vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port); +} + +static int +__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt, + struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle, + bool convert_error) +{ + int err; + + vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait, + proto, handle); + err = vmci_datagram_send(&pkt->dg); + if (convert_error && (err < 0)) + return vmci_transport_error_to_vsock_error(err); + + return err; +} + +static int +vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + struct vmci_transport_packet reply; + struct sockaddr_vm src, dst; + + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) { + return 0; + } else { + vmci_transport_packet_get_addresses(pkt, &src, &dst); + return __vmci_transport_send_control_pkt(&reply, &src, &dst, + type, + size, mode, wait, + VSOCK_PROTO_INVALID, + handle, true); + } +} + +static int +vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src, + struct sockaddr_vm *dst, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + struct vmci_handle handle) +{ + /* Note that it is safe to use a single packet across all CPUs since + * two tasklets of the same type are guaranteed to not ever run + * simultaneously. If that ever changes, or VMCI stops using tasklets, + * we can use per-cpu packets. + */ + static struct vmci_transport_packet pkt; + + return __vmci_transport_send_control_pkt(&pkt, src, dst, type, + size, mode, wait, + VSOCK_PROTO_INVALID, handle, + false); +} + +static int +vmci_transport_send_control_pkt(struct sock *sk, + enum vmci_transport_packet_type type, + u64 size, + u64 mode, + struct vmci_transport_waiting_info *wait, + u16 proto, + struct vmci_handle handle) +{ + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + + if (!vsock_addr_bound(&vsk->local_addr)) + return -EINVAL; + + if (!vsock_addr_bound(&vsk->remote_addr)) + return -EINVAL; + + pkt = kmalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr, + &vsk->remote_addr, type, size, + mode, wait, proto, handle, + true); + kfree(pkt); + + return err; +} + +static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_RST, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_reset(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) + return 0; + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_negotiate2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_qp_offer(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0, + 0, NULL, + VSOCK_PROTO_INVALID, handle); +} + +static int vmci_transport_send_attach(struct sock *sk, + struct vmci_handle handle) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + 0, 0, NULL, VSOCK_PROTO_INVALID, + handle); +} + +static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt) +{ + return vmci_transport_reply_control_pkt_fast( + pkt, + VMCI_TRANSPORT_PACKET_TYPE_RST, + 0, 0, NULL, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_INVALID, + 0, 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ + return vmci_transport_send_control_pkt_bh( + dst, src, + VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_wrote(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_read(struct sock *sk) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0, + 0, NULL, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + 0, 0, wait, VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode) +{ + return vmci_transport_send_control_pkt( + &vsk->sk, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + 0, mode, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request(struct sock *sk, size_t size) +{ + return vmci_transport_send_control_pkt(sk, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + size, 0, NULL, + VSOCK_PROTO_INVALID, + VMCI_INVALID_HANDLE); +} + +static int vmci_transport_send_conn_request2(struct sock *sk, size_t size, + u16 version) +{ + return vmci_transport_send_control_pkt( + sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + size, 0, NULL, version, + VMCI_INVALID_HANDLE); +} + +static struct sock *vmci_transport_get_pending( + struct sock *listener, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vlistener; + struct vsock_sock *vpending; + struct sock *pending; + + vlistener = vsock_sk(listener); + + list_for_each_entry(vpending, &vlistener->pending_links, + pending_links) { + struct sockaddr_vm src; + struct sockaddr_vm dst; + + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); + + if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && + vsock_addr_equals_addr(&dst, &vpending->local_addr)) { + pending = sk_vsock(vpending); + sock_hold(pending); + goto found; + } + } + + pending = NULL; +found: + return pending; + +} + +static void vmci_transport_release_pending(struct sock *pending) +{ + sock_put(pending); +} + +/* We allow two kinds of sockets to communicate with a restricted VM: 1) + * trusted sockets 2) sockets from applications running as the same user as the + * VM (this is only true for the host side and only when using hosted products) + */ + +static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid) +{ + return vsock->trusted || + vmci_is_context_owner(peer_cid, vsock->owner->uid); +} + +/* We allow sending datagrams to and receiving datagrams from a restricted VM + * only if it is trusted as described in vmci_transport_is_trusted. + */ + +static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid) +{ + if (vsock->cached_peer != peer_cid) { + vsock->cached_peer = peer_cid; + if (!vmci_transport_is_trusted(vsock, peer_cid) && + (vmci_context_get_priv_flags(peer_cid) & + VMCI_PRIVILEGE_FLAG_RESTRICTED)) { + vsock->cached_peer_allow_dgram = false; + } else { + vsock->cached_peer_allow_dgram = true; + } + } + + return vsock->cached_peer_allow_dgram; +} + +static int +vmci_transport_queue_pair_alloc(struct vmci_qp **qpair, + struct vmci_handle *handle, + u64 produce_size, + u64 consume_size, + u32 peer, u32 flags, bool trusted) +{ + int err = 0; + + if (trusted) { + /* Try to allocate our queue pair as trusted. This will only + * work if vsock is running in the host. + */ + + err = vmci_qpair_alloc(qpair, handle, produce_size, + consume_size, + peer, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED); + if (err != VMCI_ERROR_NO_ACCESS) + goto out; + + } + + err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size, + peer, flags, VMCI_NO_PRIVILEGE_FLAGS); +out: + if (err < 0) { + pr_err("Could not attach to queue pair with %d\n", + err); + err = vmci_transport_error_to_vsock_error(err); + } + + return err; +} + +static int +vmci_transport_datagram_create_hnd(u32 resource_id, + u32 flags, + vmci_datagram_recv_cb recv_cb, + void *client_data, + struct vmci_handle *out_handle) +{ + int err = 0; + + /* Try to allocate our datagram handler as trusted. This will only work + * if vsock is running in the host. + */ + + err = vmci_datagram_create_handle_priv(resource_id, flags, + VMCI_PRIVILEGE_FLAG_TRUSTED, + recv_cb, + client_data, out_handle); + + if (err == VMCI_ERROR_NO_ACCESS) + err = vmci_datagram_create_handle(resource_id, flags, + recv_cb, client_data, + out_handle); + + return err; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context and if it ever needs to + * sleep it should defer that work to a work queue. + */ + +static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + size_t size; + struct sk_buff *skb; + struct vsock_sock *vsk; + + sk = (struct sock *)data; + + /* This handler is privileged when this module is running on the host. + * We will get datagrams from all endpoints (even VMs that are in a + * restricted context). If we get one from a restricted context then + * the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, dg->src.context)) + return VMCI_ERROR_NO_ACCESS; + + size = VMCI_DG_SIZE(dg); + + /* Attach the packet to the socket's receive queue as an sk_buff. */ + skb = alloc_skb(size, GFP_ATOMIC); + if (skb) { + /* sk_receive_skb() will do a sock_put(), so hold here. */ + sock_hold(sk); + skb_put(skb, size); + memcpy(skb->data, dg, size); + sk_receive_skb(sk, skb, 0); + } + + return VMCI_SUCCESS; +} + +static bool vmci_transport_stream_allow(u32 cid, u32 port) +{ + static const u32 non_socket_contexts[] = { + VMADDR_CID_HYPERVISOR, + VMADDR_CID_RESERVED, + }; + int i; + + BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts)); + + for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) { + if (cid == non_socket_contexts[i]) + return false; + } + + return true; +} + +/* This is invoked as part of a tasklet that's scheduled when the VMCI + * interrupt fires. This is run in bottom-half context but it defers most of + * its work to the packet handling work queue. + */ + +static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) +{ + struct sock *sk; + struct sockaddr_vm dst; + struct sockaddr_vm src; + struct vmci_transport_packet *pkt; + struct vsock_sock *vsk; + bool bh_process_pkt; + int err; + + sk = NULL; + err = VMCI_SUCCESS; + bh_process_pkt = false; + + /* Ignore incoming packets from contexts without sockets, or resources + * that aren't vsock implementations. + */ + + if (!vmci_transport_stream_allow(dg->src.context, -1) + || VMCI_TRANSPORT_PACKET_RID != dg->src.resource) + return VMCI_ERROR_NO_ACCESS; + + if (VMCI_DG_SIZE(dg) < sizeof(*pkt)) + /* Drop datagrams that do not contain full VSock packets. */ + return VMCI_ERROR_INVALID_ARGS; + + pkt = (struct vmci_transport_packet *)dg; + + /* Find the socket that should handle this packet. First we look for a + * connected socket and if there is none we look for a socket bound to + * the destintation address. + */ + vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port); + vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port); + + sk = vsock_find_connected_socket(&src, &dst); + if (!sk) { + sk = vsock_find_bound_socket(&dst); + if (!sk) { + /* We could not find a socket for this specified + * address. If this packet is a RST, we just drop it. + * If it is another packet, we send a RST. Note that + * we do not send a RST reply to RSTs so that we do not + * continually send RSTs between two endpoints. + * + * Note that since this is a reply, dst is src and src + * is dst. + */ + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NOT_FOUND; + goto out; + } + } + + /* If the received packet type is beyond all types known to this + * implementation, reply with an invalid message. Hopefully this will + * help when implementing backwards compatibility in the future. + */ + if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) { + vmci_transport_send_invalid_bh(&dst, &src); + err = VMCI_ERROR_INVALID_ARGS; + goto out; + } + + /* This handler is privileged when this module is running on the host. + * We will get datagram connect requests from all endpoints (even VMs + * that are in a restricted context). If we get one from a restricted + * context then the destination socket must be trusted. + * + * NOTE: We access the socket struct without holding the lock here. + * This is ok because the field we are interested is never modified + * outside of the create and destruct socket functions. + */ + vsk = vsock_sk(sk); + if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) { + err = VMCI_ERROR_NO_ACCESS; + goto out; + } + + /* We do most everything in a work queue, but let's fast path the + * notification of reads and writes to help data transfer performance. + * We can only do this if there is no process context code executing + * for this socket since that may change the state. + */ + bh_lock_sock(sk); + + if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED) + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, true, &dst, &src, + &bh_process_pkt); + + bh_unlock_sock(sk); + + if (!bh_process_pkt) { + struct vmci_transport_recv_pkt_info *recv_pkt_info; + + recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC); + if (!recv_pkt_info) { + if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0) + pr_err("unable to send reset\n"); + + err = VMCI_ERROR_NO_MEM; + goto out; + } + + recv_pkt_info->sk = sk; + memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt)); + INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work); + + schedule_work(&recv_pkt_info->work); + /* Clear sk so that the reference count incremented by one of + * the Find functions above is not decremented below. We need + * that reference count for the packet handler we've scheduled + * to run. + */ + sk = NULL; + } + +out: + if (sk) + sock_put(sk); + + return err; +} + +static void vmci_transport_peer_attach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + + vsk = vsock_sk(sk); + + /* We don't ask for delayed CBs when we subscribe to this event (we + * pass 0 as flags to vmci_event_subscribe()). VMCI makes no + * guarantees in that case about what context we might be running in, + * so it could be BH or process, blockable or non-blockable. So we + * need to account for all possible contexts here. + */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) { + /* XXX This doesn't do anything, but in the future we may want + * to set a flag here to verify the attach really did occur and + * we weren't just sent a datagram claiming it was. + */ + goto out; + } + +out: + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_handle_detach(struct sock *sk) +{ + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + sock_set_flag(sk, SOCK_DONE); + + /* On a detach the peer will not be sending or receiving + * anymore. + */ + vsk->peer_shutdown = SHUTDOWN_MASK; + + /* We should not be sending anymore since the peer won't be + * there to receive, but we can still receive if there is data + * left in our consume queue. + */ + if (vsock_stream_has_data(vsk) <= 0) { + if (sk->sk_state == SS_CONNECTING) { + /* The peer may detach from a queue pair while + * we are still in the connecting state, i.e., + * if the peer VM is killed after attaching to + * a queue pair, but before we complete the + * handshake. In that case, we treat the detach + * event like a reset. + */ + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = ECONNRESET; + sk->sk_error_report(sk); + return; + } + sk->sk_state = SS_UNCONNECTED; + } + sk->sk_state_change(sk); + } +} + +static void vmci_transport_peer_detach_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + struct sock *sk = client_data; + const struct vmci_event_payload_qp *e_payload; + struct vsock_sock *vsk; + + e_payload = vmci_event_data_const_payload(e_data); + vsk = vsock_sk(sk); + if (vmci_handle_is_invalid(e_payload->handle)) + return; + + /* Same rules for locking as for peer_attach_cb(). */ + local_bh_disable(); + bh_lock_sock(sk); + + /* XXX This is lame, we should provide a way to lookup sockets by + * qp_handle. + */ + if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle, + e_payload->handle)) + vmci_transport_handle_detach(sk); + + bh_unlock_sock(sk); + local_bh_enable(); +} + +static void vmci_transport_qp_resumed_cb(u32 sub_id, + const struct vmci_event_data *e_data, + void *client_data) +{ + vsock_for_each_connected_socket(vmci_transport_handle_detach); +} + +static void vmci_transport_recv_pkt_work(struct work_struct *work) +{ + struct vmci_transport_recv_pkt_info *recv_pkt_info; + struct vmci_transport_packet *pkt; + struct sock *sk; + + recv_pkt_info = + container_of(work, struct vmci_transport_recv_pkt_info, work); + sk = recv_pkt_info->sk; + pkt = &recv_pkt_info->pkt; + + lock_sock(sk); + + switch (sk->sk_state) { + case SS_LISTEN: + vmci_transport_recv_listen(sk, pkt); + break; + case SS_CONNECTING: + /* Processing of pending connections for servers goes through + * the listening socket, so see vmci_transport_recv_listen() + * for that path. + */ + vmci_transport_recv_connecting_client(sk, pkt); + break; + case SS_CONNECTED: + vmci_transport_recv_connected(sk, pkt); + break; + default: + /* Because this function does not run in the same context as + * vmci_transport_recv_stream_cb it is possible that the + * socket has closed. We need to let the other side know or it + * could be sitting in a connect and hang forever. Send a + * reset to prevent that. + */ + vmci_transport_send_reset(sk, pkt); + goto out; + } + +out: + release_sock(sk); + kfree(recv_pkt_info); + /* Release reference obtained in the stream callback when we fetched + * this socket out of the bound or connected list. + */ + sock_put(sk); +} + +static int vmci_transport_recv_listen(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct sock *pending; + struct vsock_sock *vpending; + int err; + u64 qp_size; + bool old_request = false; + bool old_pkt_proto = false; + + err = 0; + + /* Because we are in the listen state, we could be receiving a packet + * for ourself or any previous connection requests that we received. + * If it's the latter, we try to find a socket in our list of pending + * connections and, if we do, call the appropriate handler for the + * state that that socket is in. Otherwise we try to service the + * connection request. + */ + pending = vmci_transport_get_pending(sk, pkt); + if (pending) { + lock_sock(pending); + switch (pending->sk_state) { + case SS_CONNECTING: + err = vmci_transport_recv_connecting_server(sk, + pending, + pkt); + break; + default: + vmci_transport_send_reset(pending, pkt); + err = -EINVAL; + } + + if (err < 0) + vsock_remove_pending(sk, pending); + + release_sock(pending); + vmci_transport_release_pending(pending); + + return err; + } + + /* The listen state only accepts connection requests. Reply with a + * reset unless we received a reset. + */ + + if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST || + pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + if (pkt->u.size == 0) { + vmci_transport_reply_reset(pkt); + return -EINVAL; + } + + /* If this socket can't accommodate this connection request, we send a + * reset. Otherwise we create and initialize a child socket and reply + * with a connection negotiation. + */ + if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) { + vmci_transport_reply_reset(pkt); + return -ECONNREFUSED; + } + + pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, + sk->sk_type); + if (!pending) { + vmci_transport_send_reset(sk, pkt); + return -ENOMEM; + } + + vpending = vsock_sk(pending); + + vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context, + pkt->dst_port); + vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context, + pkt->src_port); + + /* If the proposed size fits within our min/max, accept it. Otherwise + * propose our own size. + */ + if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size && + pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) { + qp_size = pkt->u.size; + } else { + qp_size = vmci_trans(vpending)->queue_pair_size; + } + + /* Figure out if we are using old or new requests based on the + * overrides pkt types sent by our peer. + */ + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_request = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST) + old_request = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2) + old_request = false; + + } + + if (old_request) { + /* Handle a REQUEST (or override) */ + u16 version = VSOCK_PROTO_INVALID; + if (vmci_transport_proto_to_notify_struct( + pending, &version, true)) + err = vmci_transport_send_negotiate(pending, qp_size); + else + err = -EINVAL; + + } else { + /* Handle a REQUEST2 (or override) */ + int proto_int = pkt->proto; + int pos; + u16 active_proto_version = 0; + + /* The list of possible protocols is the intersection of all + * protocols the client supports ... plus all the protocols we + * support. + */ + proto_int &= vmci_transport_new_proto_supported_versions(); + + /* We choose the highest possible protocol version and use that + * one. + */ + pos = fls(proto_int); + if (pos) { + active_proto_version = (1 << (pos - 1)); + if (vmci_transport_proto_to_notify_struct( + pending, &active_proto_version, false)) + err = vmci_transport_send_negotiate2(pending, + qp_size, + active_proto_version); + else + err = -EINVAL; + + } else { + err = -EINVAL; + } + } + + if (err < 0) { + vmci_transport_send_reset(sk, pkt); + sock_put(pending); + err = vmci_transport_error_to_vsock_error(err); + goto out; + } + + vsock_add_pending(sk, pending); + sk->sk_ack_backlog++; + + pending->sk_state = SS_CONNECTING; + vmci_trans(vpending)->produce_size = + vmci_trans(vpending)->consume_size = qp_size; + vmci_trans(vpending)->queue_pair_size = qp_size; + + vmci_trans(vpending)->notify_ops->process_request(pending); + + /* We might never receive another message for this socket and it's not + * connected to any process, so we have to ensure it gets cleaned up + * ourself. Our delayed work function will take care of that. Note + * that we do not ever cancel this function since we have few + * guarantees about its state when calling cancel_delayed_work(). + * Instead we hold a reference on the socket for that function and make + * it capable of handling cases where it needs to do nothing but + * release that reference. + */ + vpending->listener = sk; + sock_hold(sk); + sock_hold(pending); + INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); + schedule_delayed_work(&vpending->dwork, HZ); + +out: + return err; +} + +static int +vmci_transport_recv_connecting_server(struct sock *listener, + struct sock *pending, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vpending; + struct vmci_handle handle; + struct vmci_qp *qpair; + bool is_local; + u32 flags; + u32 detach_sub_id; + int err; + int skerr; + + vpending = vsock_sk(pending); + detach_sub_id = VMCI_INVALID_ID; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_OFFER: + if (vmci_handle_is_invalid(pkt->u.handle)) { + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + break; + default: + /* Close and cleanup the connection. */ + vmci_transport_send_reset(pending, pkt); + skerr = EPROTO; + err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL; + goto destroy; + } + + /* In order to complete the connection we need to attach to the offered + * queue pair and send an attach notification. We also subscribe to the + * detach event so we know when our peer goes away, and we do that + * before attaching so we don't miss an event. If all this succeeds, + * we update our state and wakeup anything waiting in accept() for a + * connection. + */ + + /* We don't care about attach since we ensure the other side has + * attached by specifying the ATTACH_ONLY flag below. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + pending, &detach_sub_id); + if (err < VMCI_SUCCESS) { + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->detach_sub_id = detach_sub_id; + + /* Now attach to the queue pair the client created. */ + handle = pkt->u.handle; + + /* vpending->local_addr always has a context id so we do not need to + * worry about VMADDR_CID_ANY in this case. + */ + is_local = + vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid; + flags = VMCI_QPFLAG_ATTACH_ONLY; + flags |= is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc( + &qpair, + &handle, + vmci_trans(vpending)->produce_size, + vmci_trans(vpending)->consume_size, + pkt->dg.src.context, + flags, + vmci_transport_is_trusted( + vpending, + vpending->remote_addr.svm_cid)); + if (err < 0) { + vmci_transport_send_reset(pending, pkt); + skerr = -err; + goto destroy; + } + + vmci_trans(vpending)->qp_handle = handle; + vmci_trans(vpending)->qpair = qpair; + + /* When we send the attach message, we must be ready to handle incoming + * control messages on the newly connected socket. So we move the + * pending socket to the connected state before sending the attach + * message. Otherwise, an incoming packet triggered by the attach being + * received by the peer may be processed concurrently with what happens + * below after sending the attach message, and that incoming packet + * will find the listening socket instead of the (currently) pending + * socket. Note that enqueueing the socket increments the reference + * count, so even if a reset comes before the connection is accepted, + * the socket will be valid until it is removed from the queue. + * + * If we fail sending the attach below, we remove the socket from the + * connected list and move the socket to SS_UNCONNECTED before + * releasing the lock, so a pending slow path processing of an incoming + * packet will not see the socket in the connected state in that case. + */ + pending->sk_state = SS_CONNECTED; + + vsock_insert_connected(vpending); + + /* Notify our peer of our attach. */ + err = vmci_transport_send_attach(pending, handle); + if (err < 0) { + vsock_remove_connected(vpending); + pr_err("Could not send attach\n"); + vmci_transport_send_reset(pending, pkt); + err = vmci_transport_error_to_vsock_error(err); + skerr = -err; + goto destroy; + } + + /* We have a connection. Move the now connected socket from the + * listener's pending list to the accept queue so callers of accept() + * can find it. + */ + vsock_remove_pending(listener, pending); + vsock_enqueue_accept(listener, pending); + + /* Callers of accept() will be be waiting on the listening socket, not + * the pending socket. + */ + listener->sk_state_change(listener); + + return 0; + +destroy: + pending->sk_err = skerr; + pending->sk_state = SS_UNCONNECTED; + /* As long as we drop our reference, all necessary cleanup will handle + * when the cleanup function drops its reference and our destruct + * implementation is called. Note that since the listen handler will + * remove pending from the pending list upon our failure, the cleanup + * function won't drop the additional reference, which is why we do it + * here. + */ + sock_put(pending); + + return err; +} + +static int +vmci_transport_recv_connecting_client(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + int err; + int skerr; + + vsk = vsock_sk(sk); + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_ATTACH: + if (vmci_handle_is_invalid(pkt->u.handle) || + !vmci_handle_is_equal(pkt->u.handle, + vmci_trans(vsk)->qp_handle)) { + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + /* Signify the socket is connected and wakeup the waiter in + * connect(). Also place the socket in the connected table for + * accounting (it can already be found since it's in the bound + * table). + */ + sk->sk_state = SS_CONNECTED; + sk->sk_socket->state = SS_CONNECTED; + vsock_insert_connected(vsk); + sk->sk_state_change(sk); + + break; + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE: + case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2: + if (pkt->u.size == 0 + || pkt->dg.src.context != vsk->remote_addr.svm_cid + || pkt->src_port != vsk->remote_addr.svm_port + || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle) + || vmci_trans(vsk)->qpair + || vmci_trans(vsk)->produce_size != 0 + || vmci_trans(vsk)->consume_size != 0 + || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID + || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + skerr = EPROTO; + err = -EINVAL; + + goto destroy; + } + + err = vmci_transport_recv_connecting_client_negotiate(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_INVALID: + err = vmci_transport_recv_connecting_client_invalid(sk, pkt); + if (err) { + skerr = -err; + goto destroy; + } + + break; + case VMCI_TRANSPORT_PACKET_TYPE_RST: + /* Older versions of the linux code (WS 6.5 / ESX 4.0) used to + * continue processing here after they sent an INVALID packet. + * This meant that we got a RST after the INVALID. We ignore a + * RST after an INVALID. The common code doesn't send the RST + * ... so we can hang if an old version of the common code + * fails between getting a REQUEST and sending an OFFER back. + * Not much we can do about it... except hope that it doesn't + * happen. + */ + if (vsk->ignore_connecting_rst) { + vsk->ignore_connecting_rst = false; + } else { + skerr = ECONNRESET; + err = 0; + goto destroy; + } + + break; + default: + /* Close and cleanup the connection. */ + skerr = EPROTO; + err = -EINVAL; + goto destroy; + } + + return 0; + +destroy: + vmci_transport_send_reset(sk, pkt); + + sk->sk_state = SS_UNCONNECTED; + sk->sk_err = skerr; + sk->sk_error_report(sk); + return err; +} + +static int vmci_transport_recv_connecting_client_negotiate( + struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err; + struct vsock_sock *vsk; + struct vmci_handle handle; + struct vmci_qp *qpair; + u32 attach_sub_id; + u32 detach_sub_id; + bool is_local; + u32 flags; + bool old_proto = true; + bool old_pkt_proto; + u16 version; + + vsk = vsock_sk(sk); + handle = VMCI_INVALID_HANDLE; + attach_sub_id = VMCI_INVALID_ID; + detach_sub_id = VMCI_INVALID_ID; + + /* If we have gotten here then we should be past the point where old + * linux vsock could have sent the bogus rst. + */ + vsk->sent_request = false; + vsk->ignore_connecting_rst = false; + + /* Verify that we're OK with the proposed queue pair size */ + if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size || + pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) { + err = -EINVAL; + goto destroy; + } + + /* At this point we know the CID the peer is using to talk to us. */ + + if (vsk->local_addr.svm_cid == VMADDR_CID_ANY) + vsk->local_addr.svm_cid = pkt->dg.dst.context; + + /* Setup the notify ops to be the highest supported version that both + * the server and the client support. + */ + + if (vmci_transport_old_proto_override(&old_pkt_proto)) { + old_proto = old_pkt_proto; + } else { + if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE) + old_proto = true; + else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2) + old_proto = false; + + } + + if (old_proto) + version = VSOCK_PROTO_INVALID; + else + version = pkt->proto; + + if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) { + err = -EINVAL; + goto destroy; + } + + /* Subscribe to attach and detach events first. + * + * XXX We attach once for each queue pair created for now so it is easy + * to find the socket (it's provided), but later we should only + * subscribe once and add a way to lookup sockets by queue pair handle. + */ + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH, + vmci_transport_peer_attach_cb, + sk, &attach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH, + vmci_transport_peer_detach_cb, + sk, &detach_sub_id); + if (err < VMCI_SUCCESS) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + /* Make VMCI select the handle for us. */ + handle = VMCI_INVALID_HANDLE; + is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid; + flags = is_local ? VMCI_QPFLAG_LOCAL : 0; + + err = vmci_transport_queue_pair_alloc(&qpair, + &handle, + pkt->u.size, + pkt->u.size, + vsk->remote_addr.svm_cid, + flags, + vmci_transport_is_trusted( + vsk, + vsk-> + remote_addr.svm_cid)); + if (err < 0) + goto destroy; + + err = vmci_transport_send_qp_offer(sk, handle); + if (err < 0) { + err = vmci_transport_error_to_vsock_error(err); + goto destroy; + } + + vmci_trans(vsk)->qp_handle = handle; + vmci_trans(vsk)->qpair = qpair; + + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = + pkt->u.size; + + vmci_trans(vsk)->attach_sub_id = attach_sub_id; + vmci_trans(vsk)->detach_sub_id = detach_sub_id; + + vmci_trans(vsk)->notify_ops->process_negotiate(sk); + + return 0; + +destroy: + if (attach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(attach_sub_id); + + if (detach_sub_id != VMCI_INVALID_ID) + vmci_event_unsubscribe(detach_sub_id); + + if (!vmci_handle_is_invalid(handle)) + vmci_qpair_detach(&qpair); + + return err; +} + +static int +vmci_transport_recv_connecting_client_invalid(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + int err = 0; + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsk->sent_request) { + vsk->sent_request = false; + vsk->ignore_connecting_rst = true; + + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) + err = vmci_transport_error_to_vsock_error(err); + else + err = 0; + + } + + return err; +} + +static int vmci_transport_recv_connected(struct sock *sk, + struct vmci_transport_packet *pkt) +{ + struct vsock_sock *vsk; + bool pkt_processed = false; + + /* In cases where we are closing the connection, it's sufficient to + * mark the state change (and maybe error) and wake up any waiting + * threads. Since this is a connected socket, it's owned by a user + * process and will be cleaned up when the failure is passed back on + * the current or next system call. Our system call implementations + * must therefore check for error and state changes on entry and when + * being awoken. + */ + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN: + if (pkt->u.mode) { + vsk = vsock_sk(sk); + + vsk->peer_shutdown |= pkt->u.mode; + sk->sk_state_change(sk); + } + break; + + case VMCI_TRANSPORT_PACKET_TYPE_RST: + vsk = vsock_sk(sk); + /* It is possible that we sent our peer a message (e.g a + * WAITING_READ) right before we got notified that the peer had + * detached. If that happens then we can get a RST pkt back + * from our peer even though there is data available for us to + * read. In that case, don't shutdown the socket completely but + * instead allow the local client to finish reading data off + * the queuepair. Always treat a RST pkt in connected mode like + * a clean shutdown. + */ + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = SS_DISCONNECTING; + + sk->sk_state_change(sk); + break; + + default: + vsk = vsock_sk(sk); + vmci_trans(vsk)->notify_ops->handle_notify_pkt( + sk, pkt, false, NULL, NULL, + &pkt_processed); + if (!pkt_processed) + return -EINVAL; + + break; + } + + return 0; +} + +static int vmci_transport_socket_init(struct vsock_sock *vsk, + struct vsock_sock *psk) +{ + vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL); + if (!vsk->trans) + return -ENOMEM; + + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->qpair = NULL; + vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0; + vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id = + VMCI_INVALID_ID; + vmci_trans(vsk)->notify_ops = NULL; + if (psk) { + vmci_trans(vsk)->queue_pair_size = + vmci_trans(psk)->queue_pair_size; + vmci_trans(vsk)->queue_pair_min_size = + vmci_trans(psk)->queue_pair_min_size; + vmci_trans(vsk)->queue_pair_max_size = + vmci_trans(psk)->queue_pair_max_size; + } else { + vmci_trans(vsk)->queue_pair_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE; + vmci_trans(vsk)->queue_pair_min_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN; + vmci_trans(vsk)->queue_pair_max_size = + VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX; + } + + return 0; +} + +static void vmci_transport_destruct(struct vsock_sock *vsk) +{ + if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id); + vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID; + } + + if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id); + vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID; + } + + if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { + vmci_qpair_detach(&vmci_trans(vsk)->qpair); + vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE; + vmci_trans(vsk)->produce_size = 0; + vmci_trans(vsk)->consume_size = 0; + } + + if (vmci_trans(vsk)->notify_ops) + vmci_trans(vsk)->notify_ops->socket_destruct(vsk); + + kfree(vsk->trans); + vsk->trans = NULL; +} + +static void vmci_transport_release(struct vsock_sock *vsk) +{ + if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) { + vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle); + vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; + } +} + +static int vmci_transport_dgram_bind(struct vsock_sock *vsk, + struct sockaddr_vm *addr) +{ + u32 port; + u32 flags; + int err; + + /* VMCI will select a resource ID for us if we provide + * VMCI_INVALID_ID. + */ + port = addr->svm_port == VMADDR_PORT_ANY ? + VMCI_INVALID_ID : addr->svm_port; + + if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE)) + return -EACCES; + + flags = addr->svm_cid == VMADDR_CID_ANY ? + VMCI_FLAG_ANYCID_DG_HND : 0; + + err = vmci_transport_datagram_create_hnd(port, flags, + vmci_transport_recv_dgram_cb, + &vsk->sk, + &vmci_trans(vsk)->dg_handle); + if (err < VMCI_SUCCESS) + return vmci_transport_error_to_vsock_error(err); + vsock_addr_init(&vsk->local_addr, addr->svm_cid, + vmci_trans(vsk)->dg_handle.resource); + + return 0; +} + +static int vmci_transport_dgram_enqueue( + struct vsock_sock *vsk, + struct sockaddr_vm *remote_addr, + struct iovec *iov, + size_t len) +{ + int err; + struct vmci_datagram *dg; + + if (len > VMCI_MAX_DG_PAYLOAD_SIZE) + return -EMSGSIZE; + + if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid)) + return -EPERM; + + /* Allocate a buffer for the user's message and our packet header. */ + dg = kmalloc(len + sizeof(*dg), GFP_KERNEL); + if (!dg) + return -ENOMEM; + + memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len); + + dg->dst = vmci_make_handle(remote_addr->svm_cid, + remote_addr->svm_port); + dg->src = vmci_make_handle(vsk->local_addr.svm_cid, + vsk->local_addr.svm_port); + dg->payload_size = len; + + err = vmci_datagram_send(dg); + kfree(dg); + if (err < 0) + return vmci_transport_error_to_vsock_error(err); + + return err - sizeof(*dg); +} + +static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, + struct vsock_sock *vsk, + struct msghdr *msg, size_t len, + int flags) +{ + int err; + int noblock; + struct vmci_datagram *dg; + size_t payload_len; + struct sk_buff *skb; + + noblock = flags & MSG_DONTWAIT; + + if (flags & MSG_OOB || flags & MSG_ERRQUEUE) + return -EOPNOTSUPP; + + /* Retrieve the head sk_buff from the socket's receive queue. */ + err = 0; + skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + if (err) + return err; + + if (!skb) + return -EAGAIN; + + dg = (struct vmci_datagram *)skb->data; + if (!dg) + /* err is 0, meaning we read zero bytes. */ + goto out; + + payload_len = dg->payload_size; + /* Ensure the sk_buff matches the payload size claimed in the packet. */ + if (payload_len != skb->len - sizeof(*dg)) { + err = -EINVAL; + goto out; + } + + if (payload_len > len) { + payload_len = len; + msg->msg_flags |= MSG_TRUNC; + } + + /* Place the datagram payload in the user's iovec. */ + err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov, + payload_len); + if (err) + goto out; + + msg->msg_namelen = 0; + if (msg->msg_name) { + struct sockaddr_vm *vm_addr; + + /* Provide the address of the sender. */ + vm_addr = (struct sockaddr_vm *)msg->msg_name; + vsock_addr_init(vm_addr, dg->src.context, dg->src.resource); + msg->msg_namelen = sizeof(*vm_addr); + } + err = payload_len; + +out: + skb_free_datagram(&vsk->sk, skb); + return err; +} + +static bool vmci_transport_dgram_allow(u32 cid, u32 port) +{ + if (cid == VMADDR_CID_HYPERVISOR) { + /* Registrations of PBRPC Servers do not modify VMX/Hypervisor + * state and are allowed. + */ + return port == VMCI_UNITY_PBRPC_REGISTER; + } + + return true; +} + +static int vmci_transport_connect(struct vsock_sock *vsk) +{ + int err; + bool old_pkt_proto = false; + struct sock *sk = &vsk->sk; + + if (vmci_transport_old_proto_override(&old_pkt_proto) && + old_pkt_proto) { + err = vmci_transport_send_conn_request( + sk, vmci_trans(vsk)->queue_pair_size); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + } else { + int supported_proto_versions = + vmci_transport_new_proto_supported_versions(); + err = vmci_transport_send_conn_request2( + sk, vmci_trans(vsk)->queue_pair_size, + supported_proto_versions); + if (err < 0) { + sk->sk_state = SS_UNCONNECTED; + return err; + } + + vsk->sent_request = true; + } + + return err; +} + +static ssize_t vmci_transport_stream_dequeue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len, + int flags) +{ + if (flags & MSG_PEEK) + return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0); + else + return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static ssize_t vmci_transport_stream_enqueue( + struct vsock_sock *vsk, + struct iovec *iov, + size_t len) +{ + return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0); +} + +static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) +{ + return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair); +} + +static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk) +{ + return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair); +} + +static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->consume_size; +} + +static bool vmci_transport_stream_is_active(struct vsock_sock *vsk) +{ + return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle); +} + +static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_size; +} + +static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_min_size; +} + +static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk) +{ + return vmci_trans(vsk)->queue_pair_max_size; +} + +static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_min_size) + vmci_trans(vsk)->queue_pair_min_size = val; + if (val > vmci_trans(vsk)->queue_pair_max_size) + vmci_trans(vsk)->queue_pair_max_size = val; + vmci_trans(vsk)->queue_pair_size = val; +} + +static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val > vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_min_size = val; +} + +static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk, + u64 val) +{ + if (val < vmci_trans(vsk)->queue_pair_size) + vmci_trans(vsk)->queue_pair_size = val; + vmci_trans(vsk)->queue_pair_max_size = val; +} + +static int vmci_transport_notify_poll_in( + struct vsock_sock *vsk, + size_t target, + bool *data_ready_now) +{ + return vmci_trans(vsk)->notify_ops->poll_in( + &vsk->sk, target, data_ready_now); +} + +static int vmci_transport_notify_poll_out( + struct vsock_sock *vsk, + size_t target, + bool *space_available_now) +{ + return vmci_trans(vsk)->notify_ops->poll_out( + &vsk->sk, target, space_available_now); +} + +static int vmci_transport_notify_recv_init( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_init( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_block( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_block( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_pre_dequeue( + struct vsock_sock *vsk, + size_t target, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_pre_dequeue( + &vsk->sk, target, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_recv_post_dequeue( + struct vsock_sock *vsk, + size_t target, + ssize_t copied, + bool data_read, + struct vsock_transport_recv_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->recv_post_dequeue( + &vsk->sk, target, copied, data_read, + (struct vmci_transport_recv_notify_data *)data); +} + +static int vmci_transport_notify_send_init( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_init( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_block( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_block( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_pre_enqueue( + struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_pre_enqueue( + &vsk->sk, + (struct vmci_transport_send_notify_data *)data); +} + +static int vmci_transport_notify_send_post_enqueue( + struct vsock_sock *vsk, + ssize_t written, + struct vsock_transport_send_notify_data *data) +{ + return vmci_trans(vsk)->notify_ops->send_post_enqueue( + &vsk->sk, written, + (struct vmci_transport_send_notify_data *)data); +} + +static bool vmci_transport_old_proto_override(bool *old_pkt_proto) +{ + if (PROTOCOL_OVERRIDE != -1) { + if (PROTOCOL_OVERRIDE == 0) + *old_pkt_proto = true; + else + *old_pkt_proto = false; + + pr_info("Proto override in use\n"); + return true; + } + + return false; +} + +static bool vmci_transport_proto_to_notify_struct(struct sock *sk, + u16 *proto, + bool old_pkt_proto) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (old_pkt_proto) { + if (*proto != VSOCK_PROTO_INVALID) { + pr_err("Can't set both an old and new protocol\n"); + return false; + } + vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops; + goto exit; + } + + switch (*proto) { + case VSOCK_PROTO_PKT_ON_NOTIFY: + vmci_trans(vsk)->notify_ops = + &vmci_transport_notify_pkt_q_state_ops; + break; + default: + pr_err("Unknown notify protocol version\n"); + return false; + } + +exit: + vmci_trans(vsk)->notify_ops->socket_init(sk); + return true; +} + +static u16 vmci_transport_new_proto_supported_versions(void) +{ + if (PROTOCOL_OVERRIDE != -1) + return PROTOCOL_OVERRIDE; + + return VSOCK_PROTO_ALL_SUPPORTED; +} + +static u32 vmci_transport_get_local_cid(void) +{ + return vmci_get_context_id(); +} + +static struct vsock_transport vmci_transport = { + .init = vmci_transport_socket_init, + .destruct = vmci_transport_destruct, + .release = vmci_transport_release, + .connect = vmci_transport_connect, + .dgram_bind = vmci_transport_dgram_bind, + .dgram_dequeue = vmci_transport_dgram_dequeue, + .dgram_enqueue = vmci_transport_dgram_enqueue, + .dgram_allow = vmci_transport_dgram_allow, + .stream_dequeue = vmci_transport_stream_dequeue, + .stream_enqueue = vmci_transport_stream_enqueue, + .stream_has_data = vmci_transport_stream_has_data, + .stream_has_space = vmci_transport_stream_has_space, + .stream_rcvhiwat = vmci_transport_stream_rcvhiwat, + .stream_is_active = vmci_transport_stream_is_active, + .stream_allow = vmci_transport_stream_allow, + .notify_poll_in = vmci_transport_notify_poll_in, + .notify_poll_out = vmci_transport_notify_poll_out, + .notify_recv_init = vmci_transport_notify_recv_init, + .notify_recv_pre_block = vmci_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue, + .notify_send_init = vmci_transport_notify_send_init, + .notify_send_pre_block = vmci_transport_notify_send_pre_block, + .notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue, + .shutdown = vmci_transport_shutdown, + .set_buffer_size = vmci_transport_set_buffer_size, + .set_min_buffer_size = vmci_transport_set_min_buffer_size, + .set_max_buffer_size = vmci_transport_set_max_buffer_size, + .get_buffer_size = vmci_transport_get_buffer_size, + .get_min_buffer_size = vmci_transport_get_min_buffer_size, + .get_max_buffer_size = vmci_transport_get_max_buffer_size, + .get_local_cid = vmci_transport_get_local_cid, +}; + +static int __init vmci_transport_init(void) +{ + int err; + + /* Create the datagram handle that we will use to send and receive all + * VSocket control messages for this context. + */ + err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID, + VMCI_FLAG_ANYCID_DG_HND, + vmci_transport_recv_stream_cb, + NULL, + &vmci_transport_stream_handle); + if (err < VMCI_SUCCESS) { + pr_err("Unable to create datagram handle. (%d)\n", err); + return vmci_transport_error_to_vsock_error(err); + } + + err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED, + vmci_transport_qp_resumed_cb, + NULL, &vmci_transport_qp_resumed_sub_id); + if (err < VMCI_SUCCESS) { + pr_err("Unable to subscribe to resumed event. (%d)\n", err); + err = vmci_transport_error_to_vsock_error(err); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + goto err_destroy_stream_handle; + } + + err = vsock_core_init(&vmci_transport); + if (err < 0) + goto err_unsubscribe; + + return 0; + +err_unsubscribe: + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); +err_destroy_stream_handle: + vmci_datagram_destroy_handle(vmci_transport_stream_handle); + return err; +} +module_init(vmci_transport_init); + +static void __exit vmci_transport_exit(void) +{ + if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) { + if (vmci_datagram_destroy_handle( + vmci_transport_stream_handle) != VMCI_SUCCESS) + pr_err("Couldn't destroy datagram handle\n"); + vmci_transport_stream_handle = VMCI_INVALID_HANDLE; + } + + if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) { + vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id); + vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; + } + + vsock_core_exit(); +} +module_exit(vmci_transport_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("vmware_vsock"); +MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h new file mode 100644 index 000000000000..1bf991803ec0 --- /dev/null +++ b/net/vmw_vsock/vmci_transport.h @@ -0,0 +1,139 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VMCI_TRANSPORT_H_ +#define _VMCI_TRANSPORT_H_ + +#include +#include + +#include "vsock_addr.h" +#include "af_vsock.h" + +/* If the packet format changes in a release then this should change too. */ +#define VMCI_TRANSPORT_PACKET_VERSION 1 + +/* The resource ID on which control packets are sent. */ +#define VMCI_TRANSPORT_PACKET_RID 1 + +#define VSOCK_PROTO_INVALID 0 +#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0) +#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY) + +#define vmci_trans(_vsk) ((struct vmci_transport *)((_vsk)->trans)) + +enum vmci_transport_packet_type { + VMCI_TRANSPORT_PACKET_TYPE_INVALID = 0, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE, + VMCI_TRANSPORT_PACKET_TYPE_OFFER, + VMCI_TRANSPORT_PACKET_TYPE_ATTACH, + VMCI_TRANSPORT_PACKET_TYPE_WROTE, + VMCI_TRANSPORT_PACKET_TYPE_READ, + VMCI_TRANSPORT_PACKET_TYPE_RST, + VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE, + VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ, + VMCI_TRANSPORT_PACKET_TYPE_REQUEST2, + VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2, + VMCI_TRANSPORT_PACKET_TYPE_MAX +}; + +struct vmci_transport_waiting_info { + u64 generation; + u64 offset; +}; + +/* Control packet type for STREAM sockets. DGRAMs have no control packets nor + * special packet header for data packets, they are just raw VMCI DGRAM + * messages. For STREAMs, control packets are sent over the control channel + * while data is written and read directly from queue pairs with no packet + * format. + */ +struct vmci_transport_packet { + struct vmci_datagram dg; + u8 version; + u8 type; + u16 proto; + u32 src_port; + u32 dst_port; + u32 _reserved2; + union { + u64 size; + u64 mode; + struct vmci_handle handle; + struct vmci_transport_waiting_info wait; + } u; +}; + +struct vmci_transport_notify_pkt { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_read; + bool peer_waiting_write; + bool peer_waiting_write_detected; + bool sent_waiting_read; + bool sent_waiting_write; + struct vmci_transport_waiting_info peer_waiting_read_info; + struct vmci_transport_waiting_info peer_waiting_write_info; + u64 produce_q_generation; + u64 consume_q_generation; +}; + +struct vmci_transport_notify_pkt_q_state { + u64 write_notify_window; + u64 write_notify_min_window; + bool peer_waiting_write; + bool peer_waiting_write_detected; +}; + +union vmci_transport_notify { + struct vmci_transport_notify_pkt pkt; + struct vmci_transport_notify_pkt_q_state pkt_q_state; +}; + +/* Our transport-specific data. */ +struct vmci_transport { + /* For DGRAMs. */ + struct vmci_handle dg_handle; + /* For STREAMs. */ + struct vmci_handle qp_handle; + struct vmci_qp *qpair; + u64 produce_size; + u64 consume_size; + u64 queue_pair_size; + u64 queue_pair_min_size; + u64 queue_pair_max_size; + u32 attach_sub_id; + u32 detach_sub_id; + union vmci_transport_notify notify; + struct vmci_transport_notify_ops *notify_ops; +}; + +int vmci_transport_register(void); +void vmci_transport_unregister(void); + +int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_read_bh(struct sockaddr_vm *dst, + struct sockaddr_vm *src); +int vmci_transport_send_wrote(struct sock *sk); +int vmci_transport_send_read(struct sock *sk); +int vmci_transport_send_waiting_write(struct sock *sk, + struct vmci_transport_waiting_info *wait); +int vmci_transport_send_waiting_read(struct sock *sk, + struct vmci_transport_waiting_info *wait); + +#endif diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c new file mode 100644 index 000000000000..9a730744e7bc --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -0,0 +1,680 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); +#else + notify_limit = 0; +#endif + + /* For now we ignore the wait information and just see if the free + * space exceeds the notify limit. Note that improving this function + * to be more intelligent will not require a protocol change and will + * retain compatibility between endpoints with mixed versions of this + * function. + * + * The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (retval) { + /* + * Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } +#endif + return retval; +#else + return true; +#endif +} + +static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + if (!PKT_FIELD(vsk, peer_waiting_read)) + return false; + + /* For now we ignore the wait information and just see if there is any + * data for our peer to read. Note that improving this function to be + * more intelligent will not require a protocol change and will retain + * compatibility between endpoints with mixed versions of this + * function. + */ + return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0; +#else + return true; +#endif +} + +static void +vmci_transport_handle_waiting_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_read) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + + if (vmci_transport_notify_waiting_read(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_wrote_bh(dst, src) > 0; + else + sent = vmci_transport_send_wrote(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_read) = false; + } +#endif +} + +static void +vmci_transport_handle_waiting_write(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + + PKT_FIELD(vsk, peer_waiting_write) = true; + memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); + + if (vmci_transport_notify_waiting_write(vsk)) { + bool sent; + + if (bottom_half) + sent = vmci_transport_send_read_bh(dst, src) > 0; + else + sent = vmci_transport_send_read(sk) > 0; + + if (sent) + PKT_FIELD(vsk, peer_waiting_write) = false; + } +#endif +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + + vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_write) = false; +#endif + + sk->sk_write_space(sk); +} + +static bool send_waiting_read(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_read)) + return true; + + if (PKT_FIELD(vsk, write_notify_window) < + vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); + + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->consume_size - head; + if (room_needed >= room_left) { + waiting_info.offset = room_needed - room_left; + waiting_info.generation = + PKT_FIELD(vsk, consume_q_generation) + 1; + } else { + waiting_info.offset = head + room_needed; + waiting_info.generation = PKT_FIELD(vsk, consume_q_generation); + } + + ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_read) = true; + + return ret; +#else + return true; +#endif +} + +static bool send_waiting_write(struct sock *sk, u64 room_needed) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk; + struct vmci_transport_waiting_info waiting_info; + u64 tail; + u64 head; + u64 room_left; + bool ret; + + vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, sent_waiting_write)) + return true; + + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head); + room_left = vmci_trans(vsk)->produce_size - tail; + if (room_needed + 1 >= room_left) { + /* Wraps around to current generation. */ + waiting_info.offset = room_needed + 1 - room_left; + waiting_info.generation = PKT_FIELD(vsk, produce_q_generation); + } else { + waiting_info.offset = tail + room_needed + 1; + waiting_info.generation = + PKT_FIELD(vsk, produce_q_generation) - 1; + } + + ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0; + if (ret) + PKT_FIELD(vsk, sent_waiting_write) = true; + + return ret; +#else + return true; +#endif +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) + pr_err("%p unable to send read notify to peer\n", sk); + else +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_write) = false; +#endif + + } + return err; +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + struct vsock_sock *vsk = vsock_sk(sk); + PKT_FIELD(vsk, sent_waiting_read) = false; +#endif + sk->sk_data_ready(sk, 0); +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_read) = false; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + PKT_FIELD(vsk, sent_waiting_read) = false; + PKT_FIELD(vsk, sent_waiting_write) = false; + PKT_FIELD(vsk, produce_q_generation) = 0; + PKT_FIELD(vsk, consume_q_generation) = 0; + + memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_read_info))); + memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0, + sizeof(PKT_FIELD(vsk, peer_waiting_write_info))); +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) { + if (!send_waiting_read(sk, 1)) + return -1; + + } + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Notify the peer that we are waiting if the queue is full. We + * only send a waiting write if the queue is full because + * otherwise we end up in an infinite WAITING_WRITE, READ, + * WAITING_WRITE, READ, etc. loop. Treat failing to send the + * notification as a socket error, passing that back through + * the mask. + */ + if (!send_waiting_write(sk, 1)) + return -1; + + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } +#endif +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + /* Notify our peer that we are waiting for data to read. */ + if (!send_waiting_read(sk, target)) { + err = -EHOSTUNREACH; + return err; + } +#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + data->notify_on_block = false; + } +#endif + + return err; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + /* Now consume up to len bytes from the queue. Note that since we have + * the socket locked we should copy at least ready bytes. + */ +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note + * that this is safe since we hold the socket lock across the + * two queue pair operations. + */ + if (copied >= + vmci_trans(vsk)->consume_size - data->consume_head) + PKT_FIELD(vsk, consume_q_generation)++; +#endif + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + } + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ +#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY + data->consume_head = 0; + data->produce_tail = 0; +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + /* Notify our peer that we are waiting for room to write. */ + if (!send_waiting_write(sk, 1)) + return -EHOSTUNREACH; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, + &data->produce_tail, + &data->consume_head); +#endif + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + int retries = 0; + + vsk = vsock_sk(sk); + +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + /* Detect a wrap-around to maintain queue generation. Note that this + * is safe since we hold the socket lock across the two queue pair + * operations. + */ + if (written >= vmci_trans(vsk)->produce_size - data->produce_tail) + PKT_FIELD(vsk, produce_q_generation)++; + +#endif + + if (vmci_transport_notify_waiting_read(vsk)) { + /* Notify the peer that we have written, retrying the send on + * failure up to our maximum value. See the XXX comment for the + * corresponding piece of code in StreamRecvmsg() for potential + * improvements. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + pr_err("%p unable to send wrote notify to peer\n", sk); + return err; + } else { +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) + PKT_FIELD(vsk, peer_waiting_read) = false; +#endif + } + } + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE: + vmci_transport_handle_waiting_write(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + + case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ: + vmci_transport_handle_waiting_read(sk, pkt, bottom_half, + dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +/* Socket control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h new file mode 100644 index 000000000000..7df793249b6c --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify.h @@ -0,0 +1,83 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __VMCI_TRANSPORT_NOTIFY_H__ +#define __VMCI_TRANSPORT_NOTIFY_H__ + +#include +#include +#include +#include + +#include "vmci_transport.h" + +/* Comment this out to compare with old protocol. */ +#define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1 +#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) +/* Comment this out to remove flow control for "new" protocol */ +#define VSOCK_OPTIMIZATION_FLOW_CONTROL 1 +#endif + +#define VMCI_TRANSPORT_MAX_DGRAM_RESENDS 10 + +struct vmci_transport_recv_notify_data { + u64 consume_head; + u64 produce_tail; + bool notify_on_block; +}; + +struct vmci_transport_send_notify_data { + u64 consume_head; + u64 produce_tail; +}; + +/* Socket notification callbacks. */ +struct vmci_transport_notify_ops { + void (*socket_init) (struct sock *sk); + void (*socket_destruct) (struct vsock_sock *vsk); + int (*poll_in) (struct sock *sk, size_t target, + bool *data_ready_now); + int (*poll_out) (struct sock *sk, size_t target, + bool *space_avail_now); + void (*handle_notify_pkt) (struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, struct sockaddr_vm *dst, + struct sockaddr_vm *src, + bool *pkt_processed); + int (*recv_init) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_block) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_pre_dequeue) (struct sock *sk, size_t target, + struct vmci_transport_recv_notify_data *data); + int (*recv_post_dequeue) (struct sock *sk, size_t target, + ssize_t copied, bool data_read, + struct vmci_transport_recv_notify_data *data); + int (*send_init) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_block) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_pre_enqueue) (struct sock *sk, + struct vmci_transport_send_notify_data *data); + int (*send_post_enqueue) (struct sock *sk, ssize_t written, + struct vmci_transport_send_notify_data *data); + void (*process_request) (struct sock *sk); + void (*process_negotiate) (struct sock *sk); +}; + +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops; +extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops; + +#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */ diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c new file mode 100644 index 000000000000..622bd7aa1016 --- /dev/null +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -0,0 +1,438 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2009-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#include "vmci_transport_notify.h" + +#define PKT_FIELD(vsk, field_name) \ + (vmci_trans(vsk)->notify.pkt_q_state.field_name) + +static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk) +{ + bool retval; + u64 notify_limit; + + if (!PKT_FIELD(vsk, peer_waiting_write)) + return false; + + /* When the sender blocks, we take that as a sign that the sender is + * faster than the receiver. To reduce the transmit rate of the sender, + * we delay the sending of the read notification by decreasing the + * write_notify_window. The notification is delayed until the number of + * bytes used in the queue drops below the write_notify_window. + */ + + if (!PKT_FIELD(vsk, peer_waiting_write_detected)) { + PKT_FIELD(vsk, peer_waiting_write_detected) = true; + if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) { + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + } else { + PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + + } + } + notify_limit = vmci_trans(vsk)->consume_size - + PKT_FIELD(vsk, write_notify_window); + + /* The notify_limit is used to delay notifications in the case where + * flow control is enabled. Below the test is expressed in terms of + * free space in the queue: if free_space > ConsumeSize - + * write_notify_window then notify An alternate way of expressing this + * is to rewrite the expression to use the data ready in the receive + * queue: if write_notify_window > bufferReady then notify as + * free_space == ConsumeSize - bufferReady. + */ + + retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) > + notify_limit; + + if (retval) { + /* Once we notify the peer, we reset the detected flag so the + * next wait will again cause a decrease in the window size. + */ + + PKT_FIELD(vsk, peer_waiting_write_detected) = false; + } + return retval; +} + +static void +vmci_transport_handle_read(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_write_space(sk); +} + +static void +vmci_transport_handle_wrote(struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, struct sockaddr_vm *src) +{ + sk->sk_data_ready(sk, 0); +} + +static void vsock_block_update_write_window(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size) + PKT_FIELD(vsk, write_notify_window) = + min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE, + vmci_trans(vsk)->consume_size); +} + +static int vmci_transport_send_read_notification(struct sock *sk) +{ + struct vsock_sock *vsk; + bool sent_read; + unsigned int retries; + int err; + + vsk = vsock_sk(sk); + sent_read = false; + retries = 0; + err = 0; + + if (vmci_transport_notify_waiting_write(vsk)) { + /* Notify the peer that we have read, retrying the send on + * failure up to our maximum value. XXX For now we just log + * the failure, but later we should schedule a work item to + * handle the resend until it succeeds. That would require + * keeping track of work items in the vsk and cleaning them up + * upon socket close. + */ + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_read && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_read(sk); + if (err >= 0) + sent_read = true; + + retries++; + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read) + pr_err("%p unable to send read notification to peer\n", + sk); + else + PKT_FIELD(vsk, peer_waiting_write) = false; + + } + return err; +} + +static void vmci_transport_notify_pkt_socket_init(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk) +{ + PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE; + PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE; + PKT_FIELD(vsk, peer_waiting_write) = false; + PKT_FIELD(vsk, peer_waiting_write_detected) = false; +} + +static int +vmci_transport_notify_pkt_poll_in(struct sock *sk, + size_t target, bool *data_ready_now) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + if (vsock_stream_has_data(vsk)) { + *data_ready_now = true; + } else { + /* We can't read right now because there is nothing in the + * queue. Ask for notifications when there is something to + * read. + */ + if (sk->sk_state == SS_CONNECTED) + vsock_block_update_write_window(sk); + *data_ready_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_poll_out(struct sock *sk, + size_t target, bool *space_avail_now) +{ + s64 produce_q_free_space; + struct vsock_sock *vsk = vsock_sk(sk); + + produce_q_free_space = vsock_stream_has_space(vsk); + if (produce_q_free_space > 0) { + *space_avail_now = true; + return 0; + } else if (produce_q_free_space == 0) { + /* This is a connected socket but we can't currently send data. + * Nothing else to do. + */ + *space_avail_now = false; + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_init( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + data->consume_head = 0; + data->produce_tail = 0; + data->notify_on_block = false; + + if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) { + PKT_FIELD(vsk, write_notify_min_window) = target + 1; + if (PKT_FIELD(vsk, write_notify_window) < + PKT_FIELD(vsk, write_notify_min_window)) { + /* If the current window is smaller than the new + * minimal window size, we need to reevaluate whether + * we need to notify the sender. If the number of ready + * bytes are smaller than the new window, we need to + * send a notification to the sender before we block. + */ + + PKT_FIELD(vsk, write_notify_window) = + PKT_FIELD(vsk, write_notify_min_window); + data->notify_on_block = true; + } + } + + return 0; +} + +static int +vmci_transport_notify_pkt_recv_pre_block( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + int err = 0; + + vsock_block_update_write_window(sk); + + if (data->notify_on_block) { + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + data->notify_on_block = false; + } + + return err; +} + +static int +vmci_transport_notify_pkt_recv_post_dequeue( + struct sock *sk, + size_t target, + ssize_t copied, + bool data_read, + struct vmci_transport_recv_notify_data *data) +{ + struct vsock_sock *vsk; + int err; + bool was_full = false; + u64 free_space; + + vsk = vsock_sk(sk); + err = 0; + + if (data_read) { + smp_mb(); + + free_space = + vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair); + was_full = free_space == copied; + + if (was_full) + PKT_FIELD(vsk, peer_waiting_write) = true; + + err = vmci_transport_send_read_notification(sk); + if (err < 0) + return err; + + /* See the comment in + * vmci_transport_notify_pkt_send_post_enqueue(). + */ + sk->sk_data_ready(sk, 0); + } + + return err; +} + +static int +vmci_transport_notify_pkt_send_init( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + data->consume_head = 0; + data->produce_tail = 0; + + return 0; +} + +static int +vmci_transport_notify_pkt_send_post_enqueue( + struct sock *sk, + ssize_t written, + struct vmci_transport_send_notify_data *data) +{ + int err = 0; + struct vsock_sock *vsk; + bool sent_wrote = false; + bool was_empty; + int retries = 0; + + vsk = vsock_sk(sk); + + smp_mb(); + + was_empty = + vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written; + if (was_empty) { + while (!(vsk->peer_shutdown & RCV_SHUTDOWN) && + !sent_wrote && + retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) { + err = vmci_transport_send_wrote(sk); + if (err >= 0) + sent_wrote = true; + + retries++; + } + } + + if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) { + pr_err("%p unable to send wrote notification to peer\n", + sk); + return err; + } + + return err; +} + +static void +vmci_transport_notify_pkt_handle_pkt( + struct sock *sk, + struct vmci_transport_packet *pkt, + bool bottom_half, + struct sockaddr_vm *dst, + struct sockaddr_vm *src, bool *pkt_processed) +{ + bool processed = false; + + switch (pkt->type) { + case VMCI_TRANSPORT_PACKET_TYPE_WROTE: + vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src); + processed = true; + break; + case VMCI_TRANSPORT_PACKET_TYPE_READ: + vmci_transport_handle_read(sk, pkt, bottom_half, dst, src); + processed = true; + break; + } + + if (pkt_processed) + *pkt_processed = processed; +} + +static void vmci_transport_notify_pkt_process_request(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk) +{ + struct vsock_sock *vsk = vsock_sk(sk); + + PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size; + if (vmci_trans(vsk)->consume_size < + PKT_FIELD(vsk, write_notify_min_window)) + PKT_FIELD(vsk, write_notify_min_window) = + vmci_trans(vsk)->consume_size; +} + +static int +vmci_transport_notify_pkt_recv_pre_dequeue( + struct sock *sk, + size_t target, + struct vmci_transport_recv_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_block( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +static int +vmci_transport_notify_pkt_send_pre_enqueue( + struct sock *sk, + struct vmci_transport_send_notify_data *data) +{ + return 0; /* NOP for QState. */ +} + +/* Socket always on control packet based operations. */ +struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = { + vmci_transport_notify_pkt_socket_init, + vmci_transport_notify_pkt_socket_destruct, + vmci_transport_notify_pkt_poll_in, + vmci_transport_notify_pkt_poll_out, + vmci_transport_notify_pkt_handle_pkt, + vmci_transport_notify_pkt_recv_init, + vmci_transport_notify_pkt_recv_pre_block, + vmci_transport_notify_pkt_recv_pre_dequeue, + vmci_transport_notify_pkt_recv_post_dequeue, + vmci_transport_notify_pkt_send_init, + vmci_transport_notify_pkt_send_pre_block, + vmci_transport_notify_pkt_send_pre_enqueue, + vmci_transport_notify_pkt_send_post_enqueue, + vmci_transport_notify_pkt_process_request, + vmci_transport_notify_pkt_process_negotiate, +}; diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c new file mode 100644 index 000000000000..b7df1aea7c59 --- /dev/null +++ b/net/vmw_vsock/vsock_addr.c @@ -0,0 +1,86 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#include "vsock_addr.h" + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port) +{ + memset(addr, 0, sizeof(*addr)); + addr->svm_family = AF_VSOCK; + addr->svm_cid = cid; + addr->svm_port = port; +} +EXPORT_SYMBOL_GPL(vsock_addr_init); + +int vsock_addr_validate(const struct sockaddr_vm *addr) +{ + if (!addr) + return -EFAULT; + + if (addr->svm_family != AF_VSOCK) + return -EAFNOSUPPORT; + + if (addr->svm_zero[0] != 0) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(vsock_addr_validate); + +bool vsock_addr_bound(const struct sockaddr_vm *addr) +{ + return addr->svm_port != VMADDR_PORT_ANY; +} +EXPORT_SYMBOL_GPL(vsock_addr_bound); + +void vsock_addr_unbind(struct sockaddr_vm *addr) +{ + vsock_addr_init(addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); +} +EXPORT_SYMBOL_GPL(vsock_addr_unbind); + +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other) +{ + return addr->svm_cid == other->svm_cid && + addr->svm_port == other->svm_port; +} +EXPORT_SYMBOL_GPL(vsock_addr_equals_addr); + +bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other) +{ + return (addr->svm_cid == VMADDR_CID_ANY || + other->svm_cid == VMADDR_CID_ANY || + addr->svm_cid == other->svm_cid) && + addr->svm_port == other->svm_port; +} +EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any); + +int vsock_addr_cast(const struct sockaddr *addr, + size_t len, struct sockaddr_vm **out_addr) +{ + if (len < sizeof(**out_addr)) + return -EFAULT; + + *out_addr = (struct sockaddr_vm *)addr; + return vsock_addr_validate(*out_addr); +} +EXPORT_SYMBOL_GPL(vsock_addr_cast); diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h new file mode 100644 index 000000000000..cdfbcefdf843 --- /dev/null +++ b/net/vmw_vsock/vsock_addr.h @@ -0,0 +1,32 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VSOCK_ADDR_H_ +#define _VSOCK_ADDR_H_ + +#include + +void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port); +int vsock_addr_validate(const struct sockaddr_vm *addr); +bool vsock_addr_bound(const struct sockaddr_vm *addr); +void vsock_addr_unbind(struct sockaddr_vm *addr); +bool vsock_addr_equals_addr(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other); +bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr, + const struct sockaddr_vm *other); +int vsock_addr_cast(const struct sockaddr *addr, size_t len, + struct sockaddr_vm **out_addr); + +#endif diff --git a/net/vmw_vsock/vsock_version.h b/net/vmw_vsock/vsock_version.h new file mode 100644 index 000000000000..4df7f5e2151c --- /dev/null +++ b/net/vmw_vsock/vsock_version.h @@ -0,0 +1,22 @@ +/* + * VMware vSockets Driver + * + * Copyright (C) 2011-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _VSOCK_VERSION_H_ +#define _VSOCK_VERSION_H_ + +#define VSOCK_DRIVER_VERSION_PARTS { 1, 0, 0, 0 } +#define VSOCK_DRIVER_VERSION_STRING "1.0.0.0-k" + +#endif /* _VSOCK_VERSION_H_ */ -- cgit v1.2.3 From 86fbe9bb599fcaf7e92e38dbfdad0414a2d68f7d Mon Sep 17 00:00:00 2001 From: David Ward Date: Fri, 8 Feb 2013 17:17:07 +0000 Subject: net/8021q: Implement Multiple VLAN Registration Protocol (MVRP) Initial implementation of the Multiple VLAN Registration Protocol (MVRP) from IEEE 802.1Q-2011, based on the existing implementation of the GARP VLAN Registration Protocol (GVRP). Signed-off-by: David Ward Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + include/uapi/linux/if_vlan.h | 1 + net/8021q/Kconfig | 11 +++++++ net/8021q/Makefile | 1 + net/8021q/vlan.c | 27 +++++++++++++--- net/8021q/vlan.h | 16 ++++++++++ net/8021q/vlan_dev.c | 12 +++++++- net/8021q/vlan_mvrp.c | 72 +++++++++++++++++++++++++++++++++++++++++++ net/8021q/vlan_netlink.c | 2 +- 9 files changed, 136 insertions(+), 7 deletions(-) create mode 100644 net/8021q/vlan_mvrp.c (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 67fb87ca1094..798032d01112 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -83,6 +83,7 @@ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ #define ETH_P_TIPC 0x88CA /* TIPC */ #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ +#define ETH_P_MVRP 0x88F5 /* 802.1Q MVRP */ #define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */ #define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ #define ETH_P_TDLS 0x890D /* TDLS */ diff --git a/include/uapi/linux/if_vlan.h b/include/uapi/linux/if_vlan.h index 0744f8e65d15..7e5e6b397332 100644 --- a/include/uapi/linux/if_vlan.h +++ b/include/uapi/linux/if_vlan.h @@ -34,6 +34,7 @@ enum vlan_flags { VLAN_FLAG_REORDER_HDR = 0x1, VLAN_FLAG_GVRP = 0x2, VLAN_FLAG_LOOSE_BINDING = 0x4, + VLAN_FLAG_MVRP = 0x8, }; enum vlan_name_types { diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index fa073a54963e..8f7517df41a5 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -27,3 +27,14 @@ config VLAN_8021Q_GVRP automatic propagation of registered VLANs to switches. If unsure, say N. + +config VLAN_8021Q_MVRP + bool "MVRP (Multiple VLAN Registration Protocol) support" + depends on VLAN_8021Q + select MRP + help + Select this to enable MVRP end-system support. MVRP is used for + automatic propagation of registered VLANs to switches; it + supersedes GVRP and is not backwards-compatible. + + If unsure, say N. diff --git a/net/8021q/Makefile b/net/8021q/Makefile index 9f4f174ead1c..7bc8db08d7ef 100644 --- a/net/8021q/Makefile +++ b/net/8021q/Makefile @@ -6,5 +6,6 @@ obj-$(CONFIG_VLAN_8021Q) += 8021q.o 8021q-y := vlan.o vlan_dev.o vlan_netlink.o 8021q-$(CONFIG_VLAN_8021Q_GVRP) += vlan_gvrp.o +8021q-$(CONFIG_VLAN_8021Q_MVRP) += vlan_mvrp.o 8021q-$(CONFIG_PROC_FS) += vlanproc.o diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index addc578d5443..a18714469bf7 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -95,6 +95,8 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp->nr_vlan_devs--; + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_leave(dev); if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_leave(dev); @@ -107,8 +109,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) netdev_upper_dev_unlink(real_dev, dev); - if (grp->nr_vlan_devs == 0) + if (grp->nr_vlan_devs == 0) { + vlan_mvrp_uninit_applicant(real_dev); vlan_gvrp_uninit_applicant(real_dev); + } /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); @@ -151,15 +155,18 @@ int register_vlan_dev(struct net_device *dev) err = vlan_gvrp_init_applicant(real_dev); if (err < 0) goto out_vid_del; + err = vlan_mvrp_init_applicant(real_dev); + if (err < 0) + goto out_uninit_gvrp; } err = vlan_group_prealloc_vid(grp, vlan_id); if (err < 0) - goto out_uninit_applicant; + goto out_uninit_mvrp; err = netdev_upper_dev_link(real_dev, dev); if (err) - goto out_uninit_applicant; + goto out_uninit_mvrp; err = register_netdevice(dev); if (err < 0) @@ -181,7 +188,10 @@ int register_vlan_dev(struct net_device *dev) out_upper_dev_unlink: netdev_upper_dev_unlink(real_dev, dev); -out_uninit_applicant: +out_uninit_mvrp: + if (grp->nr_vlan_devs == 0) + vlan_mvrp_uninit_applicant(real_dev); +out_uninit_gvrp: if (grp->nr_vlan_devs == 0) vlan_gvrp_uninit_applicant(real_dev); out_vid_del: @@ -655,13 +665,19 @@ static int __init vlan_proto_init(void) if (err < 0) goto err3; - err = vlan_netlink_init(); + err = vlan_mvrp_init(); if (err < 0) goto err4; + err = vlan_netlink_init(); + if (err < 0) + goto err5; + vlan_ioctl_set(vlan_ioctl_handler); return 0; +err5: + vlan_mvrp_uninit(); err4: vlan_gvrp_uninit(); err3: @@ -682,6 +698,7 @@ static void __exit vlan_cleanup_module(void) unregister_pernet_subsys(&vlan_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ + vlan_mvrp_uninit(); vlan_gvrp_uninit(); } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index a4886d94c40c..670f1e8cfc0f 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -171,6 +171,22 @@ static inline int vlan_gvrp_init(void) { return 0; } static inline void vlan_gvrp_uninit(void) {} #endif +#ifdef CONFIG_VLAN_8021Q_MVRP +extern int vlan_mvrp_request_join(const struct net_device *dev); +extern void vlan_mvrp_request_leave(const struct net_device *dev); +extern int vlan_mvrp_init_applicant(struct net_device *dev); +extern void vlan_mvrp_uninit_applicant(struct net_device *dev); +extern int vlan_mvrp_init(void); +extern void vlan_mvrp_uninit(void); +#else +static inline int vlan_mvrp_request_join(const struct net_device *dev) { return 0; } +static inline void vlan_mvrp_request_leave(const struct net_device *dev) {} +static inline int vlan_mvrp_init_applicant(struct net_device *dev) { return 0; } +static inline void vlan_mvrp_uninit_applicant(struct net_device *dev) {} +static inline int vlan_mvrp_init(void) { return 0; } +static inline void vlan_mvrp_uninit(void) {} +#endif + extern const char vlan_fullname[]; extern const char vlan_version[]; extern int vlan_netlink_init(void); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 09f9108d4688..34df5b3c9b75 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -261,7 +261,7 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) u32 old_flags = vlan->flags; if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING)) + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) return -EINVAL; vlan->flags = (old_flags & ~mask) | (flags & mask); @@ -272,6 +272,13 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) else vlan_gvrp_request_leave(dev); } + + if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_MVRP) { + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_join(dev); + else + vlan_mvrp_request_leave(dev); + } return 0; } @@ -312,6 +319,9 @@ static int vlan_dev_open(struct net_device *dev) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); + if (vlan->flags & VLAN_FLAG_MVRP) + vlan_mvrp_request_join(dev); + if (netif_carrier_ok(real_dev)) netif_carrier_on(dev); return 0; diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c new file mode 100644 index 000000000000..d9ec1d5964aa --- /dev/null +++ b/net/8021q/vlan_mvrp.c @@ -0,0 +1,72 @@ +/* + * IEEE 802.1Q Multiple VLAN Registration Protocol (MVRP) + * + * Copyright (c) 2012 Massachusetts Institute of Technology + * + * Adapted from code in net/8021q/vlan_gvrp.c + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include "vlan.h" + +#define MRP_MVRP_ADDRESS { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 } + +enum mvrp_attributes { + MVRP_ATTR_INVALID, + MVRP_ATTR_VID, + __MVRP_ATTR_MAX +}; +#define MVRP_ATTR_MAX (__MVRP_ATTR_MAX - 1) + +static struct mrp_application vlan_mrp_app __read_mostly = { + .type = MRP_APPLICATION_MVRP, + .maxattr = MVRP_ATTR_MAX, + .pkttype.type = htons(ETH_P_MVRP), + .group_address = MRP_MVRP_ADDRESS, + .version = 0, +}; + +int vlan_mvrp_request_join(const struct net_device *dev) +{ + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + return mrp_request_join(vlan->real_dev, &vlan_mrp_app, + &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); +} + +void vlan_mvrp_request_leave(const struct net_device *dev) +{ + const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + __be16 vlan_id = htons(vlan->vlan_id); + + mrp_request_leave(vlan->real_dev, &vlan_mrp_app, + &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); +} + +int vlan_mvrp_init_applicant(struct net_device *dev) +{ + return mrp_init_applicant(dev, &vlan_mrp_app); +} + +void vlan_mvrp_uninit_applicant(struct net_device *dev) +{ + mrp_uninit_applicant(dev, &vlan_mrp_app); +} + +int __init vlan_mvrp_init(void) +{ + return mrp_register_application(&vlan_mrp_app); +} + +void vlan_mvrp_uninit(void) +{ + mrp_unregister_application(&vlan_mrp_app); +} diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 708c80ea1874..1789658b7cd7 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -62,7 +62,7 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) flags = nla_data(data[IFLA_VLAN_FLAGS]); if ((flags->flags & flags->mask) & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | - VLAN_FLAG_LOOSE_BINDING)) + VLAN_FLAG_LOOSE_BINDING | VLAN_FLAG_MVRP)) return -EINVAL; } -- cgit v1.2.3 From 3401d54696f992edf036f00f46c8c399d1b75c2a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 23 Jan 2013 13:18:04 -0500 Subject: KVM: ARM: Introduce KVM_ARM_SET_DEVICE_ADDR ioctl On ARM some bits are specific to the model being emulated for the guest and user space needs a way to tell the kernel about those bits. An example is mmio device base addresses, where KVM must know the base address for a given device to properly emulate mmio accesses within a certain address range or directly map a device with virtualiation extensions into the guest address space. We make this API ARM-specific as we haven't yet reached a consensus for a generic API for all KVM architectures that will allow us to do something like this. Reviewed-by: Will Deacon Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 37 +++++++++++++++++++++++++++++++++++++ arch/arm/include/uapi/asm/kvm.h | 13 +++++++++++++ arch/arm/kvm/arm.c | 23 ++++++++++++++++++++++- include/uapi/linux/kvm.h | 8 ++++++++ 4 files changed, 80 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index c25439a58274..4505f869e450 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2210,6 +2210,43 @@ This ioctl returns the guest registers that are supported for the KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. +4.80 KVM_ARM_SET_DEVICE_ADDR + +Capability: KVM_CAP_ARM_SET_DEVICE_ADDR +Architectures: arm +Type: vm ioctl +Parameters: struct kvm_arm_device_address (in) +Returns: 0 on success, -1 on error +Errors: + ENODEV: The device id is unknown + ENXIO: Device not supported on current system + EEXIST: Address already set + E2BIG: Address outside guest physical address space + +struct kvm_arm_device_addr { + __u64 id; + __u64 addr; +}; + +Specify a device address in the guest's physical address space where guests +can access emulated or directly exposed devices, which the host kernel needs +to know about. The id field is an architecture specific identifier for a +specific device. + +ARM divides the id field into two parts, a device id and an address type id +specific to the individual device. + +  bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 | + field: | 0x00000000 | device id | addr type id | + +ARM currently only require this when using the in-kernel GIC support for the +hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 as the device id. When +setting the base address for the guest's mapping of the VGIC virtual CPU +and distributor interface, the ioctl must be called after calling +KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling +this ioctl twice for any of the base addresses will return -EEXIST. + + 5. The kvm_run structure ------------------------ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 3303ff5adbf3..346ac3f4a2b8 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -65,6 +65,19 @@ struct kvm_regs { #define KVM_ARM_TARGET_CORTEX_A15 0 #define KVM_ARM_NUM_TARGETS 1 +/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ +#define KVM_ARM_DEVICE_TYPE_SHIFT 0 +#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_ID_SHIFT 16 +#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) + +/* Supported device IDs */ +#define KVM_ARM_DEVICE_VGIC_V2 0 + +/* Supported VGIC address types */ +#define KVM_VGIC_V2_ADDR_TYPE_DIST 0 +#define KVM_VGIC_V2_ADDR_TYPE_CPU 1 + #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ struct kvm_vcpu_init { diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 2d30e3afdaf9..523f77a44e44 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -167,6 +167,8 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; + case KVM_CAP_ARM_SET_DEVICE_ADDR: + r = 1; case KVM_CAP_NR_VCPUS: r = num_online_cpus(); break; @@ -827,10 +829,29 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -EINVAL; } +static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, + struct kvm_arm_device_addr *dev_addr) +{ + return -ENODEV; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - return -EINVAL; + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case KVM_ARM_SET_DEVICE_ADDR: { + struct kvm_arm_device_addr dev_addr; + + if (copy_from_user(&dev_addr, argp, sizeof(dev_addr))) + return -EFAULT; + return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); + } + default: + return -EINVAL; + } } static void cpu_init_hyp_mode(void *vector) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7f2360a46fc2..c70577cf67bc 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -637,6 +637,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 #define KVM_CAP_ARM_PSCI 87 +#define KVM_CAP_ARM_SET_DEVICE_ADDR 88 #ifdef KVM_CAP_IRQ_ROUTING @@ -784,6 +785,11 @@ struct kvm_msi { __u8 pad[16]; }; +struct kvm_arm_device_addr { + __u64 id; + __u64 addr; +}; + /* * ioctls for VM fds */ @@ -869,6 +875,8 @@ struct kvm_s390_ucas_mapping { #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) /* Available with KVM_CAP_PPC_HTAB_FD */ #define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) +/* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */ +#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) /* * ioctls for vcpu fds -- cgit v1.2.3 From b6a7bceb3b9315478657bc55884dfdcd104c9864 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Mon, 11 Feb 2013 23:56:40 -0800 Subject: nl80211: minor correction in sample wowlan mask calculation The match 00:xx:00:00:xx:00:00:00:00:xx:xx:xx (where xx indicates "don't care") should be represented by a pattern of twelve zero bytes, and a mask of "0xed,0x01", not "0xed,0x07". mask_len = (pat_len + 7) / 8 = (12 + 7) / 8 = 2 Hence the mask will be of 2 bytes. Replace each valid byte in pattern by 1 and don't care byte by 0: 10110111 1000 (0000) 1st byte of pattern corresponds to lower order bit in first byte of mask. And 9th byte of pattern corresponds to lower order bit in second byte of mask. With this logic the mask will be 11101101 00000001 = 0xed 0x01 Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9a2ecdc4136c..b23321154e8b 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2906,7 +2906,7 @@ enum nl80211_tx_power_setting { * corresponds to the lowest-order bit in the second byte of the mask. * For example: The match 00:xx:00:00:xx:00:00:00:00:xx:xx:xx (where * xx indicates "don't care") would be represented by a pattern of - * twelve zero bytes, and a mask of "0xed,0x07". + * twelve zero bytes, and a mask of "0xed,0x01". * Note that the pattern matching is done as though frames were not * 802.11 frames but 802.3 frames, i.e. the frame is fully unpacked * first (including SNAP header unpacking) and then matched. -- cgit v1.2.3 From bb92d19983a4b54be3e3b83441a8076d92cd04bc Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 12 Feb 2013 12:16:26 -0800 Subject: nl80211: add packet offset information for wowlan pattern If user knows the location of a wowlan pattern to be matched in Rx packet, he can provide an offset with the pattern. This will help drivers to ignore initial bytes and match the pattern efficiently. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao [refactor pattern sending] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 +++ include/uapi/linux/nl80211.h | 12 +++++-- net/wireless/nl80211.c | 74 ++++++++++++++++++++++++++++---------------- 3 files changed, 60 insertions(+), 30 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 77686ca28948..d3a73818e44c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1576,6 +1576,7 @@ struct cfg80211_pmksa { * one bit per byte, in same format as nl80211 * @pattern: bytes to match where bitmask is 1 * @pattern_len: length of pattern (in bytes) + * @pkt_offset: packet offset (in bytes) * * Internal note: @mask and @pattern are allocated in one chunk of * memory, free @mask only! @@ -1583,6 +1584,7 @@ struct cfg80211_pmksa { struct cfg80211_wowlan_trig_pkt_pattern { u8 *mask, *pattern; int pattern_len; + int pkt_offset; }; /** @@ -2290,12 +2292,14 @@ enum wiphy_wowlan_support_flags { * (see nl80211.h for the pattern definition) * @pattern_max_len: maximum length of each pattern * @pattern_min_len: minimum length of each pattern + * @max_pkt_offset: maximum Rx packet offset */ struct wiphy_wowlan_support { u32 flags; int n_patterns; int pattern_max_len; int pattern_min_len; + int max_pkt_offset; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index b23321154e8b..eb7b32247ec5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2910,6 +2910,8 @@ enum nl80211_tx_power_setting { * Note that the pattern matching is done as though frames were not * 802.11 frames but 802.3 frames, i.e. the frame is fully unpacked * first (including SNAP header unpacking) and then matched. + * @NL80211_WOWLAN_PKTPAT_OFFSET: packet offset, pattern is matched after + * these fixed number of bytes of received packet * @NUM_NL80211_WOWLAN_PKTPAT: number of attributes * @MAX_NL80211_WOWLAN_PKTPAT: max attribute number */ @@ -2917,6 +2919,7 @@ enum nl80211_wowlan_packet_pattern_attr { __NL80211_WOWLAN_PKTPAT_INVALID, NL80211_WOWLAN_PKTPAT_MASK, NL80211_WOWLAN_PKTPAT_PATTERN, + NL80211_WOWLAN_PKTPAT_OFFSET, NUM_NL80211_WOWLAN_PKTPAT, MAX_NL80211_WOWLAN_PKTPAT = NUM_NL80211_WOWLAN_PKTPAT - 1, @@ -2927,6 +2930,7 @@ enum nl80211_wowlan_packet_pattern_attr { * @max_patterns: maximum number of patterns supported * @min_pattern_len: minimum length of each pattern * @max_pattern_len: maximum length of each pattern + * @max_pkt_offset: maximum Rx packet offset * * This struct is carried in %NL80211_WOWLAN_TRIG_PKT_PATTERN when * that is part of %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED in the @@ -2936,6 +2940,7 @@ struct nl80211_wowlan_pattern_support { __u32 max_patterns; __u32 min_pattern_len; __u32 max_pattern_len; + __u32 max_pkt_offset; } __attribute__((packed)); /** @@ -2951,9 +2956,10 @@ struct nl80211_wowlan_pattern_support { * @NL80211_WOWLAN_TRIG_PKT_PATTERN: wake up on the specified packet patterns * which are passed in an array of nested attributes, each nested attribute * defining a with attributes from &struct nl80211_wowlan_trig_pkt_pattern. - * Each pattern defines a wakeup packet. The matching is done on the MSDU, - * i.e. as though the packet was an 802.3 packet, so the pattern matching - * is done after the packet is converted to the MSDU. + * Each pattern defines a wakeup packet. Packet offset is associated with + * each pattern which is used while matching the pattern. The matching is + * done on the MSDU, i.e. as though the packet was an 802.3 packet, so the + * pattern matching is done after the packet is converted to the MSDU. * * In %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, it is a binary attribute * carrying a &struct nl80211_wowlan_pattern_support. diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 93bc63eae076..cc0fad30b8c9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1238,6 +1238,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.wowlan.pattern_min_len, .max_pattern_len = dev->wiphy.wowlan.pattern_max_len, + .max_pkt_offset = + dev->wiphy.wowlan.max_pkt_offset, }; if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, sizeof(pat), &pat)) @@ -6895,6 +6897,39 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) } #ifdef CONFIG_PM +static int nl80211_send_wowlan_patterns(struct sk_buff *msg, + struct cfg80211_registered_device *rdev) +{ + struct nlattr *nl_pats, *nl_pat; + int i, pat_len; + + if (!rdev->wowlan->n_patterns) + return 0; + + nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); + if (!nl_pats) + return -ENOBUFS; + + for (i = 0; i < rdev->wowlan->n_patterns; i++) { + nl_pat = nla_nest_start(msg, i + 1); + if (!nl_pat) + return -ENOBUFS; + pat_len = rdev->wowlan->patterns[i].pattern_len; + if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, + DIV_ROUND_UP(pat_len, 8), + rdev->wowlan->patterns[i].mask) || + nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, + pat_len, rdev->wowlan->patterns[i].pattern) || + nla_put_u32(msg, NL80211_WOWLAN_PKTPAT_OFFSET, + rdev->wowlan->patterns[i].pkt_offset)) + return -ENOBUFS; + nla_nest_end(msg, nl_pat); + } + nla_nest_end(msg, nl_pats); + + return 0; +} + static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6935,32 +6970,8 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) (rdev->wowlan->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; - if (rdev->wowlan->n_patterns) { - struct nlattr *nl_pats, *nl_pat; - int i, pat_len; - - nl_pats = nla_nest_start(msg, - NL80211_WOWLAN_TRIG_PKT_PATTERN); - if (!nl_pats) - goto nla_put_failure; - - for (i = 0; i < rdev->wowlan->n_patterns; i++) { - nl_pat = nla_nest_start(msg, i + 1); - if (!nl_pat) - goto nla_put_failure; - pat_len = rdev->wowlan->patterns[i].pattern_len; - if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, - DIV_ROUND_UP(pat_len, 8), - rdev->wowlan->patterns[i].mask) || - nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, - pat_len, - rdev->wowlan->patterns[i].pattern)) - goto nla_put_failure; - nla_nest_end(msg, nl_pat); - } - nla_nest_end(msg, nl_pats); - } - + if (nl80211_send_wowlan_patterns(msg, rdev)) + goto nla_put_failure; nla_nest_end(msg, nl_wowlan); } @@ -7046,7 +7057,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) { struct nlattr *pat; int n_patterns = 0; - int rem, pat_len, mask_len; + int rem, pat_len, mask_len, pkt_offset; struct nlattr *pat_tb[NUM_NL80211_WOWLAN_PKTPAT]; nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], @@ -7081,6 +7092,15 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) pat_len < wowlan->pattern_min_len) goto error; + if (!pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]) + pkt_offset = 0; + else + pkt_offset = nla_get_u32( + pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]); + if (pkt_offset > wowlan->max_pkt_offset) + goto error; + new_triggers.patterns[i].pkt_offset = pkt_offset; + new_triggers.patterns[i].mask = kmalloc(mask_len + pat_len, GFP_KERNEL); if (!new_triggers.patterns[i].mask) { -- cgit v1.2.3 From 2a0e047ed62f20664005881b8e7f9328f910316a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 23 Jan 2013 22:57:40 +0100 Subject: cfg80211: configuration for WoWLAN over TCP Intel Wireless devices are able to make a TCP connection after suspending, sending some data and waking up when the connection receives wakeup data (or breaks). Add the WoWLAN configuration and feature advertising API for it. Acked-by: David S. Miller Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 55 +++++++- include/uapi/linux/nl80211.h | 125 ++++++++++++++++++ net/wireless/core.h | 3 + net/wireless/nl80211.c | 295 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 474 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d3a73818e44c..7e6569e1f16f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -19,6 +19,7 @@ #include #include #include +#include #include /** @@ -1587,6 +1588,41 @@ struct cfg80211_wowlan_trig_pkt_pattern { int pkt_offset; }; +/** + * struct cfg80211_wowlan_tcp - TCP connection parameters + * + * @sock: (internal) socket for source port allocation + * @src: source IP address + * @dst: destination IP address + * @dst_mac: destination MAC address + * @src_port: source port + * @dst_port: destination port + * @payload_len: data payload length + * @payload: data payload buffer + * @payload_seq: payload sequence stamping configuration + * @data_interval: interval at which to send data packets + * @wake_len: wakeup payload match length + * @wake_data: wakeup payload match data + * @wake_mask: wakeup payload match mask + * @tokens_size: length of the tokens buffer + * @payload_tok: payload token usage configuration + */ +struct cfg80211_wowlan_tcp { + struct socket *sock; + __be32 src, dst; + u16 src_port, dst_port; + u8 dst_mac[ETH_ALEN]; + int payload_len; + const u8 *payload; + struct nl80211_wowlan_tcp_data_seq payload_seq; + u32 data_interval; + u32 wake_len; + const u8 *wake_data, *wake_mask; + u32 tokens_size; + /* must be last, variable member */ + struct nl80211_wowlan_tcp_data_token payload_tok; +}; + /** * struct cfg80211_wowlan - Wake on Wireless-LAN support info * @@ -1601,12 +1637,15 @@ struct cfg80211_wowlan_trig_pkt_pattern { * @eap_identity_req: wake up on EAP identity request packet * @four_way_handshake: wake up on 4-way handshake * @rfkill_release: wake up when rfkill is released + * @tcp: TCP connection establishment/wakeup parameters, see nl80211.h. + * NULL if not configured. */ struct cfg80211_wowlan { bool any, disconnect, magic_pkt, gtk_rekey_failure, eap_identity_req, four_way_handshake, rfkill_release; struct cfg80211_wowlan_trig_pkt_pattern *patterns; + struct cfg80211_wowlan_tcp *tcp; int n_patterns; }; @@ -1626,11 +1665,15 @@ struct cfg80211_wowlan { * frame triggers an 802.3 frame should be reported, for * disconnect due to deauth 802.11 frame. This indicates which * it is. + * @tcp_match: TCP wakeup packet received + * @tcp_connlost: TCP connection lost or failed to establish + * @tcp_nomoretokens: TCP data ran out of tokens */ struct cfg80211_wowlan_wakeup { bool disconnect, magic_pkt, gtk_rekey_failure, eap_identity_req, four_way_handshake, - rfkill_release, packet_80211; + rfkill_release, packet_80211, + tcp_match, tcp_connlost, tcp_nomoretokens; s32 pattern_idx; u32 packet_present_len, packet_len; const void *packet; @@ -2285,6 +2328,14 @@ enum wiphy_wowlan_support_flags { WIPHY_WOWLAN_RFKILL_RELEASE = BIT(7), }; +struct wiphy_wowlan_tcp_support { + const struct nl80211_wowlan_tcp_data_token_feature *tok; + u32 data_payload_max; + u32 data_interval_max; + u32 wake_payload_max; + bool seq; +}; + /** * struct wiphy_wowlan_support - WoWLAN support data * @flags: see &enum wiphy_wowlan_support_flags @@ -2293,6 +2344,7 @@ enum wiphy_wowlan_support_flags { * @pattern_max_len: maximum length of each pattern * @pattern_min_len: minimum length of each pattern * @max_pkt_offset: maximum Rx packet offset + * @tcp: TCP wakeup support information */ struct wiphy_wowlan_support { u32 flags; @@ -2300,6 +2352,7 @@ struct wiphy_wowlan_support { int pattern_max_len; int pattern_min_len; int max_pkt_offset; + const struct wiphy_wowlan_tcp_support *tcp; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index eb7b32247ec5..5309b34930ea 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2991,6 +2991,17 @@ struct nl80211_wowlan_pattern_support { * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN: Original length of the 802.3 * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023 * attribute if the packet was truncated somewhere. + * @NL80211_WOWLAN_TRIG_TCP_CONNECTION: TCP connection wake, see DOC section + * "TCP connection wakeup" for more details. This is a nested attribute + * containing the exact information for establishing and keeping alive + * the TCP connection. + * @NL80211_WOWLAN_TRIG_TCP_WAKEUP_MATCH: For wakeup reporting only, the + * wakeup packet was received on the TCP connection + * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST: For wakeup reporting only, the + * TCP connection was lost or failed to be established + * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS: For wakeup reporting only, + * the TCP connection ran out of tokens to use for data to send to the + * service * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number * @@ -3012,12 +3023,126 @@ enum nl80211_wowlan_triggers { NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN, NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023, NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN, + NL80211_WOWLAN_TRIG_TCP_CONNECTION, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS, /* keep last */ NUM_NL80211_WOWLAN_TRIG, MAX_NL80211_WOWLAN_TRIG = NUM_NL80211_WOWLAN_TRIG - 1 }; +/** + * DOC: TCP connection wakeup + * + * Some devices can establish a TCP connection in order to be woken up by a + * packet coming in from outside their network segment, or behind NAT. If + * configured, the device will establish a TCP connection to the given + * service, and periodically send data to that service. The first data + * packet is usually transmitted after SYN/ACK, also ACKing the SYN/ACK. + * The data packets can optionally include a (little endian) sequence + * number (in the TCP payload!) that is generated by the device, and, also + * optionally, a token from a list of tokens. This serves as a keep-alive + * with the service, and for NATed connections, etc. + * + * During this keep-alive period, the server doesn't send any data to the + * client. When receiving data, it is compared against the wakeup pattern + * (and mask) and if it matches, the host is woken up. Similarly, if the + * connection breaks or cannot be established to start with, the host is + * also woken up. + * + * Developer's note: ARP offload is required for this, otherwise TCP + * response packets might not go through correctly. + */ + +/** + * struct nl80211_wowlan_tcp_data_seq - WoWLAN TCP data sequence + * @start: starting value + * @offset: offset of sequence number in packet + * @len: length of the sequence value to write, 1 through 4 + * + * Note: don't confuse with the TCP sequence number(s), this is for the + * keepalive packet payload. The actual value is written into the packet + * in little endian. + */ +struct nl80211_wowlan_tcp_data_seq { + __u32 start, offset, len; +}; + +/** + * struct nl80211_wowlan_tcp_data_token - WoWLAN TCP data token config + * @offset: offset of token in packet + * @len: length of each token + * @token_stream: stream of data to be used for the tokens, the length must + * be a multiple of @len for this to make sense + */ +struct nl80211_wowlan_tcp_data_token { + __u32 offset, len; + __u8 token_stream[]; +}; + +/** + * struct nl80211_wowlan_tcp_data_token_feature - data token features + * @min_len: minimum token length + * @max_len: maximum token length + * @bufsize: total available token buffer size (max size of @token_stream) + */ +struct nl80211_wowlan_tcp_data_token_feature { + __u32 min_len, max_len, bufsize; +}; + +/** + * enum nl80211_wowlan_tcp_attrs - WoWLAN TCP connection parameters + * @__NL80211_WOWLAN_TCP_INVALID: invalid number for nested attributes + * @NL80211_WOWLAN_TCP_SRC_IPV4: source IPv4 address (in network byte order) + * @NL80211_WOWLAN_TCP_DST_IPV4: destination IPv4 address + * (in network byte order) + * @NL80211_WOWLAN_TCP_DST_MAC: destination MAC address, this is given because + * route lookup when configured might be invalid by the time we suspend, + * and doing a route lookup when suspending is no longer possible as it + * might require ARP querying. + * @NL80211_WOWLAN_TCP_SRC_PORT: source port (u16); optional, if not given a + * socket and port will be allocated + * @NL80211_WOWLAN_TCP_DST_PORT: destination port (u16) + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD: data packet payload, at least one byte. + * For feature advertising, a u32 attribute holding the maximum length + * of the data payload. + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ: data packet sequence configuration + * (if desired), a &struct nl80211_wowlan_tcp_data_seq. For feature + * advertising it is just a flag + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN: data packet token configuration, + * see &struct nl80211_wowlan_tcp_data_token and for advertising see + * &struct nl80211_wowlan_tcp_data_token_feature. + * @NL80211_WOWLAN_TCP_DATA_INTERVAL: data interval in seconds, maximum + * interval in feature advertising (u32) + * @NL80211_WOWLAN_TCP_WAKE_PAYLOAD: wake packet payload, for advertising a + * u32 attribute holding the maximum length + * @NL80211_WOWLAN_TCP_WAKE_MASK: Wake packet payload mask, not used for + * feature advertising. The mask works like @NL80211_WOWLAN_PKTPAT_MASK + * but on the TCP payload only. + * @NUM_NL80211_WOWLAN_TCP: number of TCP attributes + * @MAX_NL80211_WOWLAN_TCP: highest attribute number + */ +enum nl80211_wowlan_tcp_attrs { + __NL80211_WOWLAN_TCP_INVALID, + NL80211_WOWLAN_TCP_SRC_IPV4, + NL80211_WOWLAN_TCP_DST_IPV4, + NL80211_WOWLAN_TCP_DST_MAC, + NL80211_WOWLAN_TCP_SRC_PORT, + NL80211_WOWLAN_TCP_DST_PORT, + NL80211_WOWLAN_TCP_DATA_PAYLOAD, + NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ, + NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + NL80211_WOWLAN_TCP_DATA_INTERVAL, + NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + NL80211_WOWLAN_TCP_WAKE_MASK, + + /* keep last */ + NUM_NL80211_WOWLAN_TCP, + MAX_NL80211_WOWLAN_TCP = NUM_NL80211_WOWLAN_TCP - 1 +}; + /** * enum nl80211_iface_limit_attrs - limit attributes * @NL80211_IFACE_LIMIT_UNSPEC: (reserved) diff --git a/net/wireless/core.h b/net/wireless/core.h index 37d70dc2fe82..949c9573d8d7 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -108,6 +108,9 @@ cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) for (i = 0; i < rdev->wowlan->n_patterns; i++) kfree(rdev->wowlan->patterns[i].mask); kfree(rdev->wowlan->patterns); + if (rdev->wowlan->tcp && rdev->wowlan->tcp->sock) + sock_release(rdev->wowlan->tcp->sock); + kfree(rdev->wowlan->tcp); kfree(rdev->wowlan); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index cc0fad30b8c9..d29a461b4981 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "core.h" #include "nl80211.h" #include "reg.h" @@ -399,6 +400,26 @@ nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = { [NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_RFKILL_RELEASE] = { .type = NLA_FLAG }, + [NL80211_WOWLAN_TRIG_TCP_CONNECTION] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = { + [NL80211_WOWLAN_TCP_SRC_IPV4] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_DST_IPV4] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_DST_MAC] = { .len = ETH_ALEN }, + [NL80211_WOWLAN_TCP_SRC_PORT] = { .type = NLA_U16 }, + [NL80211_WOWLAN_TCP_DST_PORT] = { .type = NLA_U16 }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD] = { .len = 1 }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ] = { + .len = sizeof(struct nl80211_wowlan_tcp_data_seq) + }, + [NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN] = { + .len = sizeof(struct nl80211_wowlan_tcp_data_token) + }, + [NL80211_WOWLAN_TCP_DATA_INTERVAL] = { .type = NLA_U32 }, + [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .len = 1 }, + [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 }, }; /* policy for GTK rekey offload attributes */ @@ -872,6 +893,48 @@ nla_put_failure: return -ENOBUFS; } +#ifdef CONFIG_PM +static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, + struct sk_buff *msg) +{ + const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp; + struct nlattr *nl_tcp; + + if (!tcp) + return 0; + + nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + if (!nl_tcp) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->data_payload_max)) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->data_payload_max)) + return -ENOBUFS; + + if (tcp->seq && nla_put_flag(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ)) + return -ENOBUFS; + + if (tcp->tok && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + sizeof(*tcp->tok), tcp->tok)) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, + tcp->data_interval_max)) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + tcp->wake_payload_max)) + return -ENOBUFS; + + nla_nest_end(msg, nl_tcp); + return 0; +} +#endif + static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *dev) { @@ -1246,6 +1309,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } + if (nl80211_send_wowlan_tcp_caps(dev, msg)) + goto nla_put_failure; + nla_nest_end(msg, nl_wowlan); } #endif @@ -6930,16 +6996,67 @@ static int nl80211_send_wowlan_patterns(struct sk_buff *msg, return 0; } +static int nl80211_send_wowlan_tcp(struct sk_buff *msg, + struct cfg80211_wowlan_tcp *tcp) +{ + struct nlattr *nl_tcp; + + if (!tcp) + return 0; + + nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + if (!nl_tcp) + return -ENOBUFS; + + if (nla_put_be32(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) || + nla_put_be32(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) || + nla_put(msg, NL80211_WOWLAN_TCP_DST_MAC, ETH_ALEN, tcp->dst_mac) || + nla_put_u16(msg, NL80211_WOWLAN_TCP_SRC_PORT, tcp->src_port) || + nla_put_u16(msg, NL80211_WOWLAN_TCP_DST_PORT, tcp->dst_port) || + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->payload_len, tcp->payload) || + nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, + tcp->data_interval) || + nla_put(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + tcp->wake_len, tcp->wake_data) || + nla_put(msg, NL80211_WOWLAN_TCP_WAKE_MASK, + DIV_ROUND_UP(tcp->wake_len, 8), tcp->wake_mask)) + return -ENOBUFS; + + if (tcp->payload_seq.len && + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ, + sizeof(tcp->payload_seq), &tcp->payload_seq)) + return -ENOBUFS; + + if (tcp->payload_tok.len && + nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + sizeof(tcp->payload_tok) + tcp->tokens_size, + &tcp->payload_tok)) + return -ENOBUFS; + + return 0; +} + static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct sk_buff *msg; void *hdr; + u32 size = NLMSG_DEFAULT_SIZE; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns) + if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && + !rdev->wiphy.wowlan.tcp) return -EOPNOTSUPP; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (rdev->wowlan && rdev->wowlan->tcp) { + /* adjust size to have room for all the data */ + size += rdev->wowlan->tcp->tokens_size + + rdev->wowlan->tcp->payload_len + + rdev->wowlan->tcp->wake_len + + rdev->wowlan->tcp->wake_len / 8; + } + + msg = nlmsg_new(size, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -6970,8 +7087,13 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) (rdev->wowlan->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; + if (nl80211_send_wowlan_patterns(msg, rdev)) goto nla_put_failure; + + if (nl80211_send_wowlan_tcp(msg, rdev->wowlan->tcp)) + goto nla_put_failure; + nla_nest_end(msg, nl_wowlan); } @@ -6983,6 +7105,150 @@ nla_put_failure: return -ENOBUFS; } +static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, + struct nlattr *attr, + struct cfg80211_wowlan *trig) +{ + struct nlattr *tb[NUM_NL80211_WOWLAN_TCP]; + struct cfg80211_wowlan_tcp *cfg; + struct nl80211_wowlan_tcp_data_token *tok = NULL; + struct nl80211_wowlan_tcp_data_seq *seq = NULL; + u32 size; + u32 data_size, wake_size, tokens_size = 0, wake_mask_size; + int err, port; + + if (!rdev->wiphy.wowlan.tcp) + return -EINVAL; + + err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP, + nla_data(attr), nla_len(attr), + nl80211_wowlan_tcp_policy); + if (err) + return err; + + if (!tb[NL80211_WOWLAN_TCP_SRC_IPV4] || + !tb[NL80211_WOWLAN_TCP_DST_IPV4] || + !tb[NL80211_WOWLAN_TCP_DST_MAC] || + !tb[NL80211_WOWLAN_TCP_DST_PORT] || + !tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD] || + !tb[NL80211_WOWLAN_TCP_DATA_INTERVAL] || + !tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD] || + !tb[NL80211_WOWLAN_TCP_WAKE_MASK]) + return -EINVAL; + + data_size = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]); + if (data_size > rdev->wiphy.wowlan.tcp->data_payload_max) + return -EINVAL; + + if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > + rdev->wiphy.wowlan.tcp->data_interval_max) + return -EINVAL; + + wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); + if (wake_size > rdev->wiphy.wowlan.tcp->wake_payload_max) + return -EINVAL; + + wake_mask_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_MASK]); + if (wake_mask_size != DIV_ROUND_UP(wake_size, 8)) + return -EINVAL; + + if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]) { + u32 tokln = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); + + tok = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); + tokens_size = tokln - sizeof(*tok); + + if (!tok->len || tokens_size % tok->len) + return -EINVAL; + if (!rdev->wiphy.wowlan.tcp->tok) + return -EINVAL; + if (tok->len > rdev->wiphy.wowlan.tcp->tok->max_len) + return -EINVAL; + if (tok->len < rdev->wiphy.wowlan.tcp->tok->min_len) + return -EINVAL; + if (tokens_size > rdev->wiphy.wowlan.tcp->tok->bufsize) + return -EINVAL; + if (tok->offset + tok->len > data_size) + return -EINVAL; + } + + if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]) { + seq = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]); + if (!rdev->wiphy.wowlan.tcp->seq) + return -EINVAL; + if (seq->len == 0 || seq->len > 4) + return -EINVAL; + if (seq->len + seq->offset > data_size) + return -EINVAL; + } + + size = sizeof(*cfg); + size += data_size; + size += wake_size + wake_mask_size; + size += tokens_size; + + cfg = kzalloc(size, GFP_KERNEL); + if (!cfg) + return -ENOMEM; + cfg->src = nla_get_be32(tb[NL80211_WOWLAN_TCP_SRC_IPV4]); + cfg->dst = nla_get_be32(tb[NL80211_WOWLAN_TCP_DST_IPV4]); + memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]), + ETH_ALEN); + if (tb[NL80211_WOWLAN_TCP_SRC_PORT]) + port = nla_get_u16(tb[NL80211_WOWLAN_TCP_SRC_PORT]); + else + port = 0; +#ifdef CONFIG_INET + /* allocate a socket and port for it and use it */ + err = __sock_create(wiphy_net(&rdev->wiphy), PF_INET, SOCK_STREAM, + IPPROTO_TCP, &cfg->sock, 1); + if (err) { + kfree(cfg); + return err; + } + if (inet_csk_get_port(cfg->sock->sk, port)) { + sock_release(cfg->sock); + kfree(cfg); + return -EADDRINUSE; + } + cfg->src_port = inet_sk(cfg->sock->sk)->inet_num; +#else + if (!port) { + kfree(cfg); + return -EINVAL; + } + cfg->src_port = port; +#endif + + cfg->dst_port = nla_get_u16(tb[NL80211_WOWLAN_TCP_DST_PORT]); + cfg->payload_len = data_size; + cfg->payload = (u8 *)cfg + sizeof(*cfg) + tokens_size; + memcpy((void *)cfg->payload, + nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]), + data_size); + if (seq) + cfg->payload_seq = *seq; + cfg->data_interval = nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]); + cfg->wake_len = wake_size; + cfg->wake_data = (u8 *)cfg + sizeof(*cfg) + tokens_size + data_size; + memcpy((void *)cfg->wake_data, + nla_data(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]), + wake_size); + cfg->wake_mask = (u8 *)cfg + sizeof(*cfg) + tokens_size + + data_size + wake_size; + memcpy((void *)cfg->wake_mask, + nla_data(tb[NL80211_WOWLAN_TCP_WAKE_MASK]), + wake_mask_size); + if (tok) { + cfg->tokens_size = tokens_size; + memcpy(&cfg->payload_tok, tok, sizeof(*tok) + tokens_size); + } + + trig->tcp = cfg; + + return 0; +} + static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6993,7 +7259,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) int err, i; bool prev_enabled = rdev->wowlan; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns) + if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && + !rdev->wiphy.wowlan.tcp) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { @@ -7120,6 +7387,14 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) } } + if (tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION]) { + err = nl80211_parse_wowlan_tcp( + rdev, tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION], + &new_triggers); + if (err) + goto error; + } + ntrig = kmemdup(&new_triggers, sizeof(new_triggers), GFP_KERNEL); if (!ntrig) { err = -ENOMEM; @@ -7137,6 +7412,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) for (i = 0; i < new_triggers.n_patterns; i++) kfree(new_triggers.patterns[i].mask); kfree(new_triggers.patterns); + if (new_triggers.tcp && new_triggers.tcp->sock) + sock_release(new_triggers.tcp->sock); + kfree(new_triggers.tcp); return err; } #endif @@ -9418,6 +9696,17 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, wakeup->pattern_idx)) goto free_msg; + if (wakeup->tcp_match) + nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH); + + if (wakeup->tcp_connlost) + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST); + + if (wakeup->tcp_nomoretokens) + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS); + if (wakeup->packet) { u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN; -- cgit v1.2.3 From 93be6ce0e91b6a94783e012b1857a347a5e6e9f2 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 11 Feb 2013 05:50:18 +0000 Subject: tcp: set and get per-socket timestamp A timestamp can be set, only if a socket is in the repair mode. This patch adds a new socket option TCP_TIMESTAMP, which allows to get and set current tcp times stamp. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Eric Dumazet Cc: Pavel Emelyanov Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 9 +++++++++ 2 files changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index e962faa5ab0d..6b1ead0b0c9d 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -111,6 +111,7 @@ enum { #define TCP_QUEUE_SEQ 21 #define TCP_REPAIR_OPTIONS 22 #define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ +#define TCP_TIMESTAMP 24 struct tcp_repair_opt { __u32 opt_code; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8a90bda96038..801b07b796f0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2714,6 +2714,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else err = -EINVAL; break; + case TCP_TIMESTAMP: + if (!tp->repair) + err = -EPERM; + else + tp->tsoffset = val - tcp_time_stamp; + break; default: err = -ENOPROTOOPT; break; @@ -2962,6 +2968,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_USER_TIMEOUT: val = jiffies_to_msecs(icsk->icsk_user_timeout); break; + case TCP_TIMESTAMP: + val = tcp_time_stamp + tp->tsoffset; + break; default: return -ENOPROTOOPT; } -- cgit v1.2.3 From 407af3299ef1ac7e87ce3fb530e32a009d1a9efd Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:12 +0000 Subject: bridge: Add netlink interface to configure vlans on bridge ports Add a netlink interface to add and remove vlan configuration on bridge port. The interface uses the RTM_SETLINK message and encodes the vlan configuration inside the IFLA_AF_SPEC. It is possble to include multiple vlans to either add or remove in a single message. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 + include/uapi/linux/if_bridge.h | 9 +++ net/bridge/br_device.c | 1 + net/bridge/br_if.c | 1 + net/bridge/br_netlink.c | 139 +++++++++++++++++++++++++++++++++++------ net/bridge/br_private.h | 1 + net/core/rtnetlink.c | 72 +++++++++++++++++++++ 7 files changed, 207 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 25bd46f52877..1b90f9401000 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1020,6 +1020,8 @@ struct net_device_ops { int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev); + int (*ndo_bridge_dellink)(struct net_device *dev, + struct nlmsghdr *nlh); int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); }; diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 5db297514aec..3ca9817ca7e8 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -108,15 +108,24 @@ struct __fdb_entry { * [IFLA_AF_SPEC] = { * [IFLA_BRIDGE_FLAGS] * [IFLA_BRIDGE_MODE] + * [IFLA_BRIDGE_VLAN_INFO] * } */ enum { IFLA_BRIDGE_FLAGS, IFLA_BRIDGE_MODE, + IFLA_BRIDGE_VLAN_INFO, __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) +#define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */ + +struct bridge_vlan_info { + u16 flags; + u16 vid; +}; + /* Bridge multicast database attributes * [MDBA_MDB] = { * [MDBA_MDB_ENTRY] = { diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 35a2c2c84f33..091bedf266a0 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -316,6 +316,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_dump = br_fdb_dump, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, + .ndo_bridge_dellink = br_dellink, }; static void br_dev_free(struct net_device *dev) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index af9d65ab4001..335c60cebfd1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "br_private.h" diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 39ca9796f3f7..534a9f4587a9 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "br_private.h" #include "br_private_stp.h" @@ -119,10 +120,14 @@ nla_put_failure: */ void br_ifinfo_notify(int event, struct net_bridge_port *port) { - struct net *net = dev_net(port->dev); + struct net *net; struct sk_buff *skb; int err = -ENOBUFS; + if (!port) + return; + + net = dev_net(port->dev); br_debug(port->br, "port %u(%s) event %d\n", (unsigned int)port->port_no, port->dev->name, event); @@ -144,6 +149,7 @@ errout: rtnl_set_sk_err(net, RTNLGRP_LINK, err); } + /* * Dump information about all ports, in response to GETLINK */ @@ -162,6 +168,64 @@ out: return err; } +const struct nla_policy ifla_br_policy[IFLA_MAX+1] = { + [IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 }, + [IFLA_BRIDGE_MODE] = { .type = NLA_U16 }, + [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY, + .len = sizeof(struct bridge_vlan_info), }, +}; + +static int br_afspec(struct net_bridge *br, + struct net_bridge_port *p, + struct nlattr *af_spec, + int cmd) +{ + struct nlattr *tb[IFLA_BRIDGE_MAX+1]; + int err = 0; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy); + if (err) + return err; + + if (tb[IFLA_BRIDGE_VLAN_INFO]) { + struct bridge_vlan_info *vinfo; + + vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); + + if (vinfo->vid >= VLAN_N_VID) + return -EINVAL; + + switch (cmd) { + case RTM_SETLINK: + if (p) { + err = nbp_vlan_add(p, vinfo->vid); + if (err) + break; + + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + err = br_vlan_add(p->br, vinfo->vid); + } else + err = br_vlan_add(br, vinfo->vid); + + if (err) + break; + + break; + + case RTM_DELLINK: + if (p) { + nbp_vlan_delete(p, vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + br_vlan_delete(p->br, vinfo->vid); + } else + br_vlan_delete(br, vinfo->vid); + break; + } + } + + return err; +} + static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, [IFLA_BRPORT_COST] = { .type = NLA_U32 }, @@ -241,6 +305,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { struct ifinfomsg *ifm; struct nlattr *protinfo; + struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; int err; @@ -248,38 +313,76 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); - if (!protinfo) + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!protinfo && !afspec) return 0; p = br_port_get_rtnl(dev); - if (!p) + /* We want to accept dev as bridge itself if the AF_SPEC + * is set to see if someone is setting vlan info on the brigde + */ + if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec)) return -EINVAL; - if (protinfo->nla_type & NLA_F_NESTED) { - err = nla_parse_nested(tb, IFLA_BRPORT_MAX, - protinfo, ifla_brport_policy); + if (p && protinfo) { + if (protinfo->nla_type & NLA_F_NESTED) { + err = nla_parse_nested(tb, IFLA_BRPORT_MAX, + protinfo, ifla_brport_policy); + if (err) + return err; + + spin_lock_bh(&p->br->lock); + err = br_setport(p, tb); + spin_unlock_bh(&p->br->lock); + } else { + /* Binary compatability with old RSTP */ + if (nla_len(protinfo) < sizeof(u8)) + return -EINVAL; + + spin_lock_bh(&p->br->lock); + err = br_set_port_state(p, nla_get_u8(protinfo)); + spin_unlock_bh(&p->br->lock); + } if (err) - return err; - - spin_lock_bh(&p->br->lock); - err = br_setport(p, tb); - spin_unlock_bh(&p->br->lock); - } else { - /* Binary compatability with old RSTP */ - if (nla_len(protinfo) < sizeof(u8)) - return -EINVAL; + goto out; + } - spin_lock_bh(&p->br->lock); - err = br_set_port_state(p, nla_get_u8(protinfo)); - spin_unlock_bh(&p->br->lock); + if (afspec) { + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_SETLINK); } if (err == 0) br_ifinfo_notify(RTM_NEWLINK, p); +out: return err; } +/* Delete port information */ +int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) +{ + struct ifinfomsg *ifm; + struct nlattr *afspec; + struct net_bridge_port *p; + int err; + + ifm = nlmsg_data(nlh); + + afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + if (!afspec) + return 0; + + p = br_port_get_rtnl(dev); + /* We want to accept dev as bridge itself as well */ + if (!p && !(dev->priv_flags & IFF_EBRIDGE)) + return -EINVAL; + + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_DELLINK); + + return err; +} static int br_validate(struct nlattr *tb[], struct nlattr *data[]) { if (tb[IFLA_ADDRESS]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f0f24610d111..a42f9d49a64e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -713,6 +713,7 @@ extern int br_netlink_init(void); extern void br_netlink_fini(void); extern void br_ifinfo_notify(int event, struct net_bridge_port *port); extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); +extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c1e4db60eeca..2c9ccbfbd93c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2464,6 +2464,77 @@ out: return err; } +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 oflags, flags = 0; + bool have_flags = false; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + dev = __dev_get_by_index(net, ifm->ifi_index); + if (!dev) { + pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + return -ENODEV; + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + have_flags = true; + flags = nla_get_u16(attr); + break; + } + } + } + + oflags = flags; + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + + if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) { + err = -EOPNOTSUPP; + goto out; + } + + err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + if (err) + goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; + } + + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_dellink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + + if (have_flags) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, oflags); +out: + return err; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2647,6 +2718,7 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); + rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } -- cgit v1.2.3 From 6cbdceeb1cb12c7d620161925a8c3e81daadb2e4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:13 +0000 Subject: bridge: Dump vlan information from a bridge port Using the RTM_GETLINK dump the vlan filter list of a given bridge port. The information depends on setting the filter flag similar to how nic VF info is dumped. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +- include/linux/netdevice.h | 3 +- include/uapi/linux/rtnetlink.h | 1 + net/bridge/br_netlink.c | 94 +++++++++++++++++++++++---- net/bridge/br_private.h | 3 +- net/bridge/br_vlan.c | 2 + net/core/rtnetlink.c | 16 +++-- 7 files changed, 104 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6999269b3a4a..4e2aa47193cb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7079,7 +7079,8 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, } static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, + u32 filter_mask) { struct ixgbe_adapter *adapter = netdev_priv(dev); u16 mode; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1b90f9401000..1964ca66df56 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1019,7 +1019,8 @@ struct net_device_ops { struct nlmsghdr *nlh); int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev); + struct net_device *dev, + u32 filter_mask); int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh); int (*ndo_change_carrier)(struct net_device *dev, diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 7a5eb196ade9..7a2144e1afae 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -630,6 +630,7 @@ struct tcamsg { /* New extended info filters for IFLA_EXT_MASK */ #define RTEXT_FILTER_VF (1 << 0) +#define RTEXT_FILTER_BRVLAN (1 << 1) /* End of information exported to user level */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 534a9f4587a9..fe1980d5a7e4 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -65,15 +65,21 @@ static int br_port_fill_attrs(struct sk_buff *skb, * Create one netlink message for one interface * Contains port and master info as well as carrier and bridge state. */ -static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port, - u32 pid, u32 seq, int event, unsigned int flags) +static int br_fill_ifinfo(struct sk_buff *skb, + const struct net_bridge_port *port, + u32 pid, u32 seq, int event, unsigned int flags, + u32 filter_mask, const struct net_device *dev) { - const struct net_bridge *br = port->br; - const struct net_device *dev = port->dev; + const struct net_bridge *br; struct ifinfomsg *hdr; struct nlmsghdr *nlh; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + if (port) + br = port->br; + else + br = netdev_priv(dev); + br_debug(br, "br_fill_info event %d port %s master %s\n", event, dev->name, br->dev->name); @@ -99,7 +105,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_put_u32(skb, IFLA_LINK, dev->iflink))) goto nla_put_failure; - if (event == RTM_NEWLINK) { + if (event == RTM_NEWLINK && port) { struct nlattr *nest = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); @@ -108,6 +114,40 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por nla_nest_end(skb, nest); } + /* Check if the VID information is requested */ + if (filter_mask & RTEXT_FILTER_BRVLAN) { + struct nlattr *af; + const struct net_port_vlans *pv; + struct bridge_vlan_info vinfo; + u16 vid; + + if (port) + pv = nbp_get_vlan_info(port); + else + pv = br_get_vlan_info(br); + + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) + goto done; + + af = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af) + goto nla_put_failure; + + for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + vid < BR_VLAN_BITMAP_LEN; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1)) { + vinfo.vid = vid; + vinfo.flags = 0; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + nla_nest_end(skb, af); + } + +done: return nlmsg_end(skb, nlh); nla_put_failure: @@ -135,7 +175,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) if (skb == NULL) goto errout; - err = br_fill_ifinfo(skb, port, 0, 0, event, 0); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev); if (err < 0) { /* -EMSGSIZE implies BUG in br_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -154,16 +194,17 @@ errout: * Dump information about all ports, in response to GETLINK */ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev) + struct net_device *dev, u32 filter_mask) { int err = 0; struct net_bridge_port *port = br_port_get_rcu(dev); - /* not a bridge port */ - if (!port) + /* not a bridge port and */ + if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) goto out; - err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI); + err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + filter_mask, dev); out: return err; } @@ -395,6 +436,29 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static size_t br_get_link_af_size(const struct net_device *dev) +{ + struct net_port_vlans *pv; + + if (br_port_exists(dev)) + pv = nbp_get_vlan_info(br_port_get_rcu(dev)); + else if (dev->priv_flags & IFF_EBRIDGE) + pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); + else + return 0; + + if (!pv) + return 0; + + /* Each VLAN is returned in bridge_vlan_info along with flags */ + return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); +} + +struct rtnl_af_ops br_af_ops = { + .family = AF_BRIDGE, + .get_link_af_size = br_get_link_af_size, +}; + struct rtnl_link_ops br_link_ops __read_mostly = { .kind = "bridge", .priv_size = sizeof(struct net_bridge), @@ -408,11 +472,18 @@ int __init br_netlink_init(void) int err; br_mdb_init(); - err = rtnl_link_register(&br_link_ops); + err = rtnl_af_register(&br_af_ops); if (err) goto out; + err = rtnl_link_register(&br_link_ops); + if (err) + goto out_af; + return 0; + +out_af: + rtnl_af_unregister(&br_af_ops); out: br_mdb_uninit(); return err; @@ -421,5 +492,6 @@ out: void __exit br_netlink_fini(void) { br_mdb_uninit(); + rtnl_af_unregister(&br_af_ops); rtnl_link_unregister(&br_link_ops); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a42f9d49a64e..ce2235255c2f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -73,6 +73,7 @@ struct net_port_vlans { } parent; struct rcu_head rcu; unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; + u16 num_vlans; }; struct net_bridge_fdb_entry @@ -715,7 +716,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port); extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev); + struct net_device *dev, u32 filter_mask); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index d8690bfe63d4..f2bf5a197ea3 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -28,6 +28,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid) } set_bit(vid, v->vlan_bitmap); + v->num_vlans++; return 0; } @@ -44,6 +45,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) } clear_bit(vid, v->vlan_bitmap); + v->num_vlans--; if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2c9ccbfbd93c..f3a112ec86d5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2315,6 +2315,13 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0; u32 portid = NETLINK_CB(cb->skb).portid; u32 seq = cb->nlh->nlmsg_seq; + struct nlattr *extfilt; + u32 filter_mask = 0; + + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg), + IFLA_EXT_MASK); + if (extfilt) + filter_mask = nla_get_u32(extfilt); rcu_read_lock(); for_each_netdev_rcu(net, dev) { @@ -2324,14 +2331,15 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && br_dev->netdev_ops->ndo_bridge_getlink( - skb, portid, seq, dev) < 0) + skb, portid, seq, dev, filter_mask) < 0) break; idx++; } if (ops->ndo_bridge_getlink) { if (idx >= cb->args[0] && - ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) + ops->ndo_bridge_getlink(skb, portid, seq, dev, + filter_mask) < 0) break; idx++; } @@ -2372,14 +2380,14 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags) if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { - err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } if ((flags & BRIDGE_FLAGS_SELF) && dev->netdev_ops->ndo_bridge_getlink) { - err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0); if (err < 0) goto errout; } -- cgit v1.2.3 From 552406c488ec2cf1aaf8b5bd24d1750c9fd6d8cc Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:15 +0000 Subject: bridge: Add the ability to configure pvid A user may designate a certain vlan as PVID. This means that any ingress frame that does not contain a vlan tag is assigned to this vlan and any forwarding decisions are made with this vlan in mind. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + net/bridge/br_netlink.c | 11 +++++++--- net/bridge/br_private.h | 8 +++---- net/bridge/br_vlan.c | 47 +++++++++++++++++++++++++++++++++--------- 4 files changed, 50 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 3ca9817ca7e8..c6c30e28f396 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -120,6 +120,7 @@ enum { #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) #define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */ +#define BRIDGE_VLAN_INFO_PVID (1<<1) /* VLAN is PVID, ingress untagged */ struct bridge_vlan_info { u16 flags; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fe1980d5a7e4..e044cc0b5650 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -120,6 +120,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_port_vlans *pv; struct bridge_vlan_info vinfo; u16 vid; + u16 pvid; if (port) pv = nbp_get_vlan_info(port); @@ -133,12 +134,15 @@ static int br_fill_ifinfo(struct sk_buff *skb, if (!af) goto nla_put_failure; + pvid = br_get_pvid(pv); for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); vid < BR_VLAN_BITMAP_LEN; vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) { vinfo.vid = vid; vinfo.flags = 0; + if (vid == pvid) + vinfo.flags |= BRIDGE_VLAN_INFO_PVID; if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, sizeof(vinfo), &vinfo)) goto nla_put_failure; @@ -239,14 +243,15 @@ static int br_afspec(struct net_bridge *br, switch (cmd) { case RTM_SETLINK: if (p) { - err = nbp_vlan_add(p, vinfo->vid); + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); if (err) break; if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) - err = br_vlan_add(p->br, vinfo->vid); + err = br_vlan_add(p->br, vinfo->vid, + vinfo->flags); } else - err = br_vlan_add(br, vinfo->vid); + err = br_vlan_add(br, vinfo->vid, vinfo->flags); if (err) break; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index ea8e7efd9137..1ae6395a0369 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -562,11 +562,11 @@ extern bool br_allowed_egress(struct net_bridge *br, extern struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_port_vlans *v, struct sk_buff *skb); -extern int br_vlan_add(struct net_bridge *br, u16 vid); +extern int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); extern int br_vlan_delete(struct net_bridge *br, u16 vid); extern void br_vlan_flush(struct net_bridge *br); extern int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); -extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid); +extern int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); extern int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); extern void nbp_vlan_flush(struct net_bridge_port *port); @@ -633,7 +633,7 @@ static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, return skb; } -static inline int br_vlan_add(struct net_bridge *br, u16 vid) +static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) { return -EOPNOTSUPP; } @@ -647,7 +647,7 @@ static inline void br_vlan_flush(struct net_bridge *br) { } -static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid) +static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { return -EOPNOTSUPP; } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 20057de56db0..c79940cff3a1 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -5,12 +5,33 @@ #include "br_private.h" -static int __vlan_add(struct net_port_vlans *v, u16 vid) +static void __vlan_add_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid == vid) + return; + + smp_wmb(); + v->pvid = vid; +} + +static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid != vid) + return; + + smp_wmb(); + v->pvid = 0; +} + +static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) { int err; - if (test_bit(vid, v->vlan_bitmap)) - return -EEXIST; + if (test_bit(vid, v->vlan_bitmap)) { + if (flags & BRIDGE_VLAN_INFO_PVID) + __vlan_add_pvid(v, vid); + return 0; + } if (v->port_idx && vid) { struct net_device *dev = v->parent.port->dev; @@ -29,6 +50,9 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid) set_bit(vid, v->vlan_bitmap); v->num_vlans++; + if (flags & BRIDGE_VLAN_INFO_PVID) + __vlan_add_pvid(v, vid); + return 0; } @@ -37,6 +61,8 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) if (!test_bit(vid, v->vlan_bitmap)) return -EINVAL; + __vlan_delete_pvid(v, vid); + if (v->port_idx && vid) { struct net_device *dev = v->parent.port->dev; @@ -58,6 +84,8 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) static void __vlan_flush(struct net_port_vlans *v) { + smp_wmb(); + v->pvid = 0; bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN); if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); @@ -185,7 +213,7 @@ bool br_allowed_egress(struct net_bridge *br, } /* Must be protected by RTNL */ -int br_vlan_add(struct net_bridge *br, u16 vid) +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) { struct net_port_vlans *pv = NULL; int err; @@ -194,7 +222,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid) pv = rtnl_dereference(br->vlan_info); if (pv) - return __vlan_add(pv, vid); + return __vlan_add(pv, vid, flags); /* Create port vlan infomration */ @@ -203,7 +231,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid) return -ENOMEM; pv->parent.br = br; - err = __vlan_add(pv, vid); + err = __vlan_add(pv, vid, flags); if (err) goto out; @@ -234,7 +262,6 @@ void br_vlan_flush(struct net_bridge *br) struct net_port_vlans *pv; ASSERT_RTNL(); - pv = rtnl_dereference(br->vlan_info); if (!pv) return; @@ -258,7 +285,7 @@ unlock: } /* Must be protected by RTNL */ -int nbp_vlan_add(struct net_bridge_port *port, u16 vid) +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { struct net_port_vlans *pv = NULL; int err; @@ -267,7 +294,7 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid) pv = rtnl_dereference(port->vlan_info); if (pv) - return __vlan_add(pv, vid); + return __vlan_add(pv, vid, flags); /* Create port vlan infomration */ @@ -279,7 +306,7 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid) pv->port_idx = port->port_no; pv->parent.port = port; - err = __vlan_add(pv, vid); + err = __vlan_add(pv, vid, flags); if (err) goto clean_up; -- cgit v1.2.3 From 1690be63a27b20ae65c792729a44f5970561ffa4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:18 +0000 Subject: bridge: Add vlan support to static neighbors When a user adds bridge neighbors, allow him to specify VLAN id. If the VLAN id is not specified, the neighbor will be added for VLANs currently in the ports filter list. If no VLANs are configured on the port, we use vlan 0 and only add 1 entry. Signed-off-by: Vlad Yasevich Acked-by: Jitendra Kalsaria Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 4 +- drivers/net/macvlan.c | 2 +- drivers/net/vxlan.c | 3 +- include/linux/netdevice.h | 4 +- include/uapi/linux/neighbour.h | 1 + net/bridge/br_fdb.c | 148 ++++++++++++++++++++--- net/bridge/br_private.h | 6 +- net/core/rtnetlink.c | 26 ++-- 10 files changed, 162 insertions(+), 35 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4e2aa47193cb..1c0efcb7920f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7002,7 +7002,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; } -static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, +static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 937bcc3d3212..5088dc5c3d1a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1959,6 +1959,7 @@ static int mlx4_en_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } static int mlx4_en_fdb_del(struct ndmsg *ndm, + struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index b745194391a1..b95316831587 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -247,8 +247,8 @@ static int qlcnic_set_mac(struct net_device *netdev, void *p) return 0; } -static int qlcnic_fdb_del(struct ndmsg *ndm, struct net_device *netdev, - const unsigned char *addr) +static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *netdev, const unsigned char *addr) { struct qlcnic_adapter *adapter = netdev_priv(netdev); int err = -EOPNOTSUPP; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index e4b8078e88a9..defcd8a85744 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -599,7 +599,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return err; } -static int macvlan_fdb_del(struct ndmsg *ndm, +static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr) { diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 72485b9b9005..9d70421cf3a0 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -393,7 +393,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } /* Delete entry (via netlink) */ -static int vxlan_fdb_delete(struct ndmsg *ndm, struct net_device *dev, +static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr) { struct vxlan_dev *vxlan = netdev_priv(dev); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1964ca66df56..9deb672d999f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -884,7 +884,8 @@ struct netdev_fcoe_hbainfo { * struct net_device *dev, * const unsigned char *addr, u16 flags) * Adds an FDB entry to dev for addr. - * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev, + * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], + * struct net_device *dev, * const unsigned char *addr) * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -1008,6 +1009,7 @@ struct net_device_ops { const unsigned char *addr, u16 flags); int (*ndo_fdb_del)(struct ndmsg *ndm, + struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); int (*ndo_fdb_dump)(struct sk_buff *skb, diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 275e5d65dcb2..adb068c53c4e 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -20,6 +20,7 @@ enum { NDA_LLADDR, NDA_CACHEINFO, NDA_PROBES, + NDA_VLAN, __NDA_MAX }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 276a52254606..4b75ad43aa85 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -505,6 +505,10 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, ci.ndm_refcnt = 0; if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; + + if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -516,6 +520,7 @@ static inline size_t fdb_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + nla_total_size(sizeof(struct nda_cacheinfo)); } @@ -617,6 +622,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, return 0; } +static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, + const unsigned char *addr, u16 nlh_flags, u16 vid) +{ + int err = 0; + + if (ndm->ndm_flags & NTF_USE) { + rcu_read_lock(); + br_fdb_update(p->br, p, addr, vid); + rcu_read_unlock(); + } else { + spin_lock_bh(&p->br->hash_lock); + err = fdb_add_entry(p, addr, ndm->ndm_state, + nlh_flags, vid); + spin_unlock_bh(&p->br->hash_lock); + } + + return err; +} + /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, @@ -624,12 +648,29 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { struct net_bridge_port *p; int err = 0; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); return -EINVAL; } + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } + p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", @@ -637,41 +678,90 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (ndm->ndm_flags & NTF_USE) { - rcu_read_lock(); - br_fdb_update(p->br, p, addr, 0); - rcu_read_unlock(); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_NEWNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + + /* VID was specified, so use it. */ + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { - spin_lock_bh(&p->br->hash_lock); - err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags, - 0); - spin_unlock_bh(&p->br->hash_lock); + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); + if (err) + goto out; + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } } +out: return err; } -static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr) +static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, + u16 vlan) { - struct net_bridge *br = p->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr, 0)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr, 0); + fdb = fdb_find(head, addr, vlan); if (!fdb) return -ENOENT; - fdb_delete(p->br, fdb); + fdb_delete(br, fdb); return 0; } +static int __br_fdb_delete(struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + int err; + + spin_lock_bh(&p->br->hash_lock); + err = fdb_delete_by_addr(p->br, addr, vid); + spin_unlock_bh(&p->br->hash_lock); + + return err; +} + /* Remove neighbor entry with RTM_DELNEIGH */ -int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, +int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr) { struct net_bridge_port *p; int err; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (vid >= VLAN_N_VID) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", @@ -679,9 +769,33 @@ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev, return -EINVAL; } - spin_lock_bh(&p->br->hash_lock); - err = fdb_delete_by_addr(p, addr); - spin_unlock_bh(&p->br->hash_lock); + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_DELNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + err = __br_fdb_delete(p, addr, vid); + } else { + if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + err = __br_fdb_delete(p, addr, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + err = -ENOENT; + vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); + while (vid < BR_VLAN_BITMAP_LEN) { + err &= __br_fdb_delete(p, addr, vid); + vid = find_next_bit(pv->vlan_bitmap, + BR_VLAN_BITMAP_LEN, vid+1); + } + } +out: return err; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 22915c8e9961..799dbb37e5a2 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -388,7 +388,7 @@ extern void br_fdb_update(struct net_bridge *br, const unsigned char *addr, u16 vid); -extern int br_fdb_delete(struct ndmsg *ndm, +extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr); extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], @@ -577,13 +577,13 @@ extern void nbp_vlan_flush(struct net_bridge_port *port); static inline struct net_port_vlans *br_get_vlan_info( const struct net_bridge *br) { - return rcu_dereference(br->vlan_info); + return rcu_dereference_rtnl(br->vlan_info); } static inline struct net_port_vlans *nbp_get_vlan_info( const struct net_bridge_port *p) { - return rcu_dereference(p->vlan_info); + return rcu_dereference_rtnl(p->vlan_info); } /* Since bridge now depends on 8021Q module, but the time bridge sees the diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f3a112ec86d5..d8aa20f6a46e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2119,13 +2119,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; - struct nlattr *llattr; + struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; int err = -EINVAL; __u8 *addr; - if (nlmsg_len(nlh) < sizeof(*ndm)) - return -EINVAL; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err < 0) + return err; ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { @@ -2139,13 +2143,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENODEV; } - llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR); - if (llattr == NULL || nla_len(llattr) != ETH_ALEN) { - pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n"); + if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n"); + return -EINVAL; + } + + addr = nla_data(tb[NDA_LLADDR]); + if (!is_valid_ether_addr(addr)) { + pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n"); return -EINVAL; } - addr = nla_data(llattr); err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ @@ -2155,7 +2163,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) const struct net_device_ops *ops = br_dev->netdev_ops; if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, dev, addr); + err = ops->ndo_fdb_del(ndm, tb, dev, addr); if (err) goto out; @@ -2165,7 +2173,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { - err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr); + err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); if (!err) { rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); -- cgit v1.2.3 From 35e03f3a0275a1ba57e432d7c948cf6f70fbb37a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 13 Feb 2013 12:00:20 +0000 Subject: bridge: Separate egress policy bitmap Add an ability to configure a separate "untagged" egress policy to the VLAN information of the bridge. This superseeds PVID policy and makes PVID ingress-only. The policy is configured with a new flag and is represented as a port bitmap per vlan. Egress frames with a VLAN id in "untagged" policy bitmap would egress the port without VLAN header. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + net/bridge/br_netlink.c | 4 ++++ net/bridge/br_private.h | 1 + net/bridge/br_vlan.c | 20 ++++++++++++++------ 4 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index c6c30e28f396..f1bf8d34ac9f 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -121,6 +121,7 @@ enum { #define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */ #define BRIDGE_VLAN_INFO_PVID (1<<1) /* VLAN is PVID, ingress untagged */ +#define BRIDGE_VLAN_INFO_UNTAGGED (1<<2) /* VLAN egresses untagged */ struct bridge_vlan_info { u16 flags; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e044cc0b5650..d1dda476d743 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -143,6 +143,10 @@ static int br_fill_ifinfo(struct sk_buff *skb, vinfo.flags = 0; if (vid == pvid) vinfo.flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, sizeof(vinfo), &vinfo)) goto nla_put_failure; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 32ecfa4ef47f..6d314c4e6bcb 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -75,6 +75,7 @@ struct net_port_vlans { } parent; struct rcu_head rcu; unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; + unsigned long untagged_bitmap[BR_VLAN_BITMAP_LEN]; u16 num_vlans; }; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 9ea358fbbf78..93dde75923f0 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -23,6 +23,15 @@ static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid) v->pvid = 0; } +static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) +{ + if (flags & BRIDGE_VLAN_INFO_PVID) + __vlan_add_pvid(v, vid); + + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) + set_bit(vid, v->untagged_bitmap); +} + static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) { struct net_bridge_port *p = NULL; @@ -31,8 +40,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) int err; if (test_bit(vid, v->vlan_bitmap)) { - if (flags & BRIDGE_VLAN_INFO_PVID) - __vlan_add_pvid(v, vid); + __vlan_add_flags(v, vid, flags); return 0; } @@ -69,8 +77,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) set_bit(vid, v->vlan_bitmap); v->num_vlans++; - if (flags & BRIDGE_VLAN_INFO_PVID) - __vlan_add_pvid(v, vid); + __vlan_add_flags(v, vid, flags); return 0; @@ -86,6 +93,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) return -EINVAL; __vlan_delete_pvid(v, vid); + clear_bit(vid, v->untagged_bitmap); if (v->port_idx && vid) { struct net_device *dev = v->parent.port->dev; @@ -144,11 +152,11 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, goto out; /* At this point, we know that the frame was filtered and contains - * a valid vlan id. If the vlan id matches the pvid of current port + * a valid vlan id. If the vlan id is set in the untagged bitmap, * send untagged; otherwise, send taged. */ br_vlan_get_tag(skb, &vid); - if (vid == br_get_pvid(pv)) + if (test_bit(vid, pv->untagged_bitmap)) skb = br_vlan_untag(skb); else { /* Egress policy says "send tagged". If output device -- cgit v1.2.3 From 9f89ec82521957de807dc0d56264ee226bbe9b98 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 14 Feb 2013 13:32:31 +0800 Subject: bridge: use __u16 in if_bridge.h We should use "__u16" instead of "u16" in the user-space visable header. Cc: Vlad Yasevich Cc: Stephen Hemminger Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index f1bf8d34ac9f..2d70d79ce2fd 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -124,8 +124,8 @@ enum { #define BRIDGE_VLAN_INFO_UNTAGGED (1<<2) /* VLAN egresses untagged */ struct bridge_vlan_info { - u16 flags; - u16 vid; + __u16 flags; + __u16 vid; }; /* Bridge multicast database attributes -- cgit v1.2.3 From 04f39047af2a6df64b763ea5a271db24879d0391 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Fri, 8 Feb 2013 18:16:19 +0100 Subject: nl80211/cfg80211: add radar detection command/event Add new NL80211_CMD_RADAR_DETECT, which starts the Channel Availability Check (CAC). This command will also notify the usermode about events (CAC finished, CAC aborted, radar detected, NOP finished). Once radar detection has started it should continuously monitor for radars as long as the channel is active. This patch enables DFS for AP mode in nl80211/cfg80211. Based on original patch by Victor Goldenshtein Signed-off-by: Simon Wunderlich [remove WIPHY_FLAG_HAS_RADAR_DETECT again -- my mistake] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 46 +++++++++++++++ include/uapi/linux/nl80211.h | 61 +++++++++++++++++++ net/wireless/chan.c | 129 ++++++++++++++++++++++++++++++++++++++++- net/wireless/core.c | 3 + net/wireless/core.h | 28 +++++++++ net/wireless/mlme.c | 120 ++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 135 +++++++++++++++++++++++++++++++++++++++++-- net/wireless/nl80211.h | 7 +++ net/wireless/reg.c | 3 + net/wireless/scan.c | 10 ---- net/wireless/trace.h | 45 +++++++++++++++ 11 files changed, 570 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7e6569e1f16f..ee11a3db730b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -114,6 +114,9 @@ enum ieee80211_channel_flags { #define IEEE80211_CHAN_NO_HT40 \ (IEEE80211_CHAN_NO_HT40PLUS | IEEE80211_CHAN_NO_HT40MINUS) +#define IEEE80211_DFS_MIN_CAC_TIME_MS 60000 +#define IEEE80211_DFS_MIN_NOP_TIME_MS (30 * 60 * 1000) + /** * struct ieee80211_channel - channel definition * @@ -134,6 +137,9 @@ enum ieee80211_channel_flags { * to enable this, this is useful only on 5 GHz band. * @orig_mag: internal use * @orig_mpwr: internal use + * @dfs_state: current state of this channel. Only relevant if radar is required + * on this channel. + * @dfs_state_entered: timestamp (jiffies) when the dfs state was entered. */ struct ieee80211_channel { enum ieee80211_band band; @@ -146,6 +152,8 @@ struct ieee80211_channel { bool beacon_found; u32 orig_flags; int orig_mag, orig_mpwr; + enum nl80211_dfs_state dfs_state; + unsigned long dfs_state_entered; }; /** @@ -569,6 +577,7 @@ struct cfg80211_acl_data { * @p2p_opp_ps: P2P opportunistic PS * @acl: ACL configuration used by the drivers which has support for * MAC address based access control + * @radar_required: set if radar detection is required */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -586,6 +595,7 @@ struct cfg80211_ap_settings { u8 p2p_ctwindow; bool p2p_opp_ps; const struct cfg80211_acl_data *acl; + bool radar_required; }; /** @@ -1909,6 +1919,8 @@ struct cfg80211_gtk_rekey_data { * this new list replaces the existing one. Driver has to clear its ACL * when number of MAC addresses entries is passed as 0. Drivers which * advertise the support for MAC based ACL have to implement this callback. + * + * @start_radar_detection: Start radar detection in the driver. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2132,6 +2144,10 @@ struct cfg80211_ops { int (*set_mac_acl)(struct wiphy *wiphy, struct net_device *dev, const struct cfg80211_acl_data *params); + + int (*start_radar_detection)(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_chan_def *chandef); }; /* @@ -2715,6 +2731,8 @@ struct cfg80211_cached_keys; * beacons, 0 when not valid * @address: The address for this device, valid only if @netdev is %NULL * @p2p_started: true if this is a P2P Device that has been started + * @cac_started: true if DFS channel availability check has been started + * @cac_start_time: timestamp (jiffies) when the dfs state was entered. */ struct wireless_dev { struct wiphy *wiphy; @@ -2766,6 +2784,9 @@ struct wireless_dev { u32 ap_unexpected_nlportid; + bool cac_started; + unsigned long cac_start_time; + #ifdef CONFIG_CFG80211_WEXT /* wext data */ struct { @@ -3754,6 +3775,31 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); +/** + * cfg80211_radar_event - radar detection event + * @wiphy: the wiphy + * @chandef: chandef for the current channel + * @gfp: context flags + * + * This function is called when a radar is detected on the current chanenl. + */ +void cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, gfp_t gfp); + +/** + * cfg80211_cac_event - Channel availability check (CAC) event + * @netdev: network device + * @event: type of event + * @gfp: context flags + * + * This function is called when a Channel availability check (CAC) is finished + * or aborted. This must be called to notify the completion of a CAC process, + * also by full-MAC drivers. + */ +void cfg80211_cac_event(struct net_device *netdev, + enum nl80211_radar_event event, gfp_t gfp); + + /** * cfg80211_cqm_pktloss_notify - notify userspace about packetloss to peer * @dev: network device diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5309b34930ea..90b7af86f392 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -603,6 +603,14 @@ * command is used in AP/P2P GO mode. Driver has to make sure to clear its * ACL list during %NL80211_CMD_STOP_AP. * + * @NL80211_CMD_RADAR_DETECT: Start a Channel availability check (CAC). Once + * a radar is detected or the channel availability scan (CAC) has finished + * or was aborted, or a radar was detected, usermode will be notified with + * this event. This command is also used to notify userspace about radars + * while operating on this channel. + * %NL80211_ATTR_RADAR_EVENT is used to inform about the type of the + * event. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -755,6 +763,8 @@ enum nl80211_commands { NL80211_CMD_SET_MAC_ACL, + NL80211_CMD_RADAR_DETECT, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1342,6 +1352,9 @@ enum nl80211_commands { * number of MAC addresses that a device can support for MAC * ACL. * + * @NL80211_ATTR_RADAR_EVENT: Type of radar event for notification to userspace, + * contains a value of enum nl80211_radar_event (u32). + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1620,6 +1633,8 @@ enum nl80211_attrs { NL80211_ATTR_MAC_ACL_MAX, + NL80211_ATTR_RADAR_EVENT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2022,6 +2037,10 @@ enum nl80211_band_attr { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm * (100 * dBm). + * @NL80211_FREQUENCY_ATTR_DFS_STATE: current state for DFS + * (enum nl80211_dfs_state) + * @NL80211_FREQUENCY_ATTR_DFS_TIME: time in miliseconds for how long + * this channel is in this DFS state. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -2034,6 +2053,8 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_IBSS, NL80211_FREQUENCY_ATTR_RADAR, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, + NL80211_FREQUENCY_ATTR_DFS_STATE, + NL80211_FREQUENCY_ATTR_DFS_TIME, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -3489,4 +3510,44 @@ enum nl80211_acl_policy { NL80211_ACL_POLICY_DENY_UNLESS_LISTED, }; +/** + * enum nl80211_radar_event - type of radar event for DFS operation + * + * Type of event to be used with NL80211_ATTR_RADAR_EVENT to inform userspace + * about detected radars or success of the channel available check (CAC) + * + * @NL80211_RADAR_DETECTED: A radar pattern has been detected. The channel is + * now unusable. + * @NL80211_RADAR_CAC_FINISHED: Channel Availability Check has been finished, + * the channel is now available. + * @NL80211_RADAR_CAC_ABORTED: Channel Availability Check has been aborted, no + * change to the channel status. + * @NL80211_RADAR_NOP_FINISHED: The Non-Occupancy Period for this channel is + * over, channel becomes usable. + */ +enum nl80211_radar_event { + NL80211_RADAR_DETECTED, + NL80211_RADAR_CAC_FINISHED, + NL80211_RADAR_CAC_ABORTED, + NL80211_RADAR_NOP_FINISHED, +}; + +/** + * enum nl80211_dfs_state - DFS states for channels + * + * Channel states used by the DFS code. + * + * @IEEE80211_DFS_USABLE: The channel can be used, but channel availability + * check (CAC) must be performed before using it for AP or IBSS. + * @IEEE80211_DFS_UNAVAILABLE: A radar has been detected on this channel, it + * is therefore marked as not available. + * @IEEE80211_DFS_AVAILABLE: The channel has been CAC checked and is available. + */ + +enum nl80211_dfs_state { + NL80211_DFS_USABLE, + NL80211_DFS_UNAVAILABLE, + NL80211_DFS_AVAILABLE, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 396373f3ec26..810c23cfb894 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -147,6 +147,32 @@ static void chandef_primary_freqs(const struct cfg80211_chan_def *c, } } +static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) +{ + int width; + + switch (c->width) { + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: + width = 20; + break; + case NL80211_CHAN_WIDTH_40: + width = 40; + break; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_80: + width = 80; + break; + case NL80211_CHAN_WIDTH_160: + width = 160; + break; + default: + WARN_ON_ONCE(1); + return -1; + } + return width; +} + const struct cfg80211_chan_def * cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, const struct cfg80211_chan_def *c2) @@ -192,6 +218,93 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, } EXPORT_SYMBOL(cfg80211_chandef_compatible); +static void cfg80211_set_chans_dfs_state(struct wiphy *wiphy, u32 center_freq, + u32 bandwidth, + enum nl80211_dfs_state dfs_state) +{ + struct ieee80211_channel *c; + u32 freq; + + for (freq = center_freq - bandwidth/2 + 10; + freq <= center_freq + bandwidth/2 - 10; + freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c || !(c->flags & IEEE80211_CHAN_RADAR)) + continue; + + c->dfs_state = dfs_state; + c->dfs_state_entered = jiffies; + } +} + +void cfg80211_set_dfs_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + enum nl80211_dfs_state dfs_state) +{ + int width; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return; + + cfg80211_set_chans_dfs_state(wiphy, chandef->center_freq1, + width, dfs_state); + + if (!chandef->center_freq2) + return; + cfg80211_set_chans_dfs_state(wiphy, chandef->center_freq2, + width, dfs_state); +} + +static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, + u32 center_freq, + u32 bandwidth) +{ + struct ieee80211_channel *c; + u32 freq; + + for (freq = center_freq - bandwidth/2 + 10; + freq <= center_freq + bandwidth/2 - 10; + freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c) + return -EINVAL; + + if (c->flags & IEEE80211_CHAN_RADAR) + return 1; + } + return 0; +} + + +int cfg80211_chandef_dfs_required(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef) +{ + int width; + int r; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return -EINVAL; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return -EINVAL; + + r = cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq1, + width); + if (r) + return r; + + if (!chandef->center_freq2) + return 0; + + return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2, + width); +} + static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 center_freq, u32 bandwidth, u32 prohibited_flags) @@ -203,7 +316,16 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, freq <= center_freq + bandwidth/2 - 10; freq += 20) { c = ieee80211_get_channel(wiphy, freq); - if (!c || c->flags & prohibited_flags) + if (!c) + return false; + + /* check for radar flags */ + if ((prohibited_flags & c->flags & IEEE80211_CHAN_RADAR) && + (c->dfs_state != NL80211_DFS_AVAILABLE)) + return false; + + /* check for the other flags */ + if (c->flags & prohibited_flags & ~IEEE80211_CHAN_RADAR) return false; } @@ -344,7 +466,10 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - if (wdev->beacon_interval) { + if (wdev->cac_started) { + *chan = wdev->channel; + *chanmode = CHAN_MODE_SHARED; + } else if (wdev->beacon_interval) { *chan = wdev->channel; *chanmode = CHAN_MODE_SHARED; } diff --git a/net/wireless/core.c b/net/wireless/core.c index f0a1bbe95cff..922002105062 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -324,6 +324,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) INIT_LIST_HEAD(&rdev->bss_list); INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results); + INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk, + cfg80211_dfs_channels_update_work); #ifdef CONFIG_CFG80211_WEXT rdev->wiphy.wext = &cfg80211_wext_handler; #endif @@ -695,6 +697,7 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->scan_done_wk); cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); + cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); if (rdev->wowlan && rdev->ops->set_wakeup) rdev_set_wakeup(rdev, false); diff --git a/net/wireless/core.h b/net/wireless/core.h index 949c9573d8d7..3aec0e429d8a 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -86,6 +86,8 @@ struct cfg80211_registered_device { struct cfg80211_wowlan *wowlan; + struct delayed_work dfs_update_channels_wk; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); @@ -431,6 +433,22 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, enum cfg80211_chan_mode chanmode, u8 radar_detect); +/** + * cfg80211_chandef_dfs_required - checks if radar detection is required + * @wiphy: the wiphy to validate against + * @chandef: the channel definition to check + * Return: 1 if radar detection is required, 0 if it is not, < 0 on error + */ +int cfg80211_chandef_dfs_required(struct wiphy *wiphy, + const struct cfg80211_chan_def *c); + +void cfg80211_set_dfs_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + enum nl80211_dfs_state dfs_state); + +void cfg80211_dfs_channels_update_work(struct work_struct *work); + + static inline int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, @@ -457,6 +475,16 @@ cfg80211_can_use_chan(struct cfg80211_registered_device *rdev, chan, chanmode, 0); } +static inline unsigned int elapsed_jiffies_msecs(unsigned long start) +{ + unsigned long end = jiffies; + + if (end >= start) + return jiffies_to_msecs(end - start); + + return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); +} + void cfg80211_get_chan_state(struct wireless_dev *wdev, struct ieee80211_channel **chan, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 8e6920728c43..caddca35d686 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -987,3 +987,123 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); } EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); + +void cfg80211_dfs_channels_update_work(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct cfg80211_registered_device *rdev; + struct cfg80211_chan_def chandef; + struct ieee80211_supported_band *sband; + struct ieee80211_channel *c; + struct wiphy *wiphy; + bool check_again = false; + unsigned long timeout, next_time = 0; + int bandid, i; + + delayed_work = container_of(work, struct delayed_work, work); + rdev = container_of(delayed_work, struct cfg80211_registered_device, + dfs_update_channels_wk); + wiphy = &rdev->wiphy; + + mutex_lock(&cfg80211_mutex); + for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) { + sband = wiphy->bands[bandid]; + if (!sband) + continue; + + for (i = 0; i < sband->n_channels; i++) { + c = &sband->channels[i]; + + if (c->dfs_state != NL80211_DFS_UNAVAILABLE) + continue; + + timeout = c->dfs_state_entered + + IEEE80211_DFS_MIN_NOP_TIME_MS; + + if (time_after_eq(jiffies, timeout)) { + c->dfs_state = NL80211_DFS_USABLE; + cfg80211_chandef_create(&chandef, c, + NL80211_CHAN_NO_HT); + + nl80211_radar_notify(rdev, &chandef, + NL80211_RADAR_NOP_FINISHED, + NULL, GFP_ATOMIC); + continue; + } + + if (!check_again) + next_time = timeout - jiffies; + else + next_time = min(next_time, timeout - jiffies); + check_again = true; + } + } + mutex_unlock(&cfg80211_mutex); + + /* reschedule if there are other channels waiting to be cleared again */ + if (check_again) + queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, + next_time); +} + + +void cfg80211_radar_event(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + unsigned long timeout; + + trace_cfg80211_radar_event(wiphy, chandef); + + /* only set the chandef supplied channel to unavailable, in + * case the radar is detected on only one of multiple channels + * spanned by the chandef. + */ + cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE); + + timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_NOP_TIME_MS); + queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, + timeout); + + nl80211_radar_notify(rdev, chandef, NL80211_RADAR_DETECTED, NULL, gfp); +} +EXPORT_SYMBOL(cfg80211_radar_event); + +void cfg80211_cac_event(struct net_device *netdev, + enum nl80211_radar_event event, gfp_t gfp) +{ + struct wireless_dev *wdev = netdev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_chan_def chandef; + unsigned long timeout; + + trace_cfg80211_cac_event(netdev, event); + + if (WARN_ON(!wdev->cac_started)) + return; + + if (WARN_ON(!wdev->channel)) + return; + + cfg80211_chandef_create(&chandef, wdev->channel, NL80211_CHAN_NO_HT); + + switch (event) { + case NL80211_RADAR_CAC_FINISHED: + timeout = wdev->cac_start_time + + msecs_to_jiffies(IEEE80211_DFS_MIN_CAC_TIME_MS); + WARN_ON(!time_after_eq(jiffies, timeout)); + cfg80211_set_dfs_state(wiphy, &chandef, NL80211_DFS_AVAILABLE); + break; + case NL80211_RADAR_CAC_ABORTED: + break; + default: + WARN_ON(1); + return; + } + wdev->cac_started = false; + + nl80211_radar_notify(rdev, &chandef, event, netdev, gfp); +} +EXPORT_SYMBOL(cfg80211_cac_event); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d29a461b4981..c1e18ccf4049 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -552,9 +552,16 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS)) goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_RADAR) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) - goto nla_put_failure; + if (chan->flags & IEEE80211_CHAN_RADAR) { + u32 time = elapsed_jiffies_msecs(chan->dfs_state_entered); + if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) + goto nla_put_failure; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_STATE, + chan->dfs_state)) + goto nla_put_failure; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME, time)) + goto nla_put_failure; + } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, DBM_TO_MBM(chan->max_power))) @@ -2775,6 +2782,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings params; int err; + u8 radar_detect_width = 0; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) @@ -2893,9 +2901,19 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (!cfg80211_reg_can_beacon(&rdev->wiphy, ¶ms.chandef)) return -EINVAL; + err = cfg80211_chandef_dfs_required(wdev->wiphy, ¶ms.chandef); + if (err < 0) + return err; + if (err) { + radar_detect_width = BIT(params.chandef.width); + params.radar_required = true; + } + mutex_lock(&rdev->devlist_mtx); - err = cfg80211_can_use_chan(rdev, wdev, params.chandef.chan, - CHAN_MODE_SHARED); + err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, + params.chandef.chan, + CHAN_MODE_SHARED, + radar_detect_width); mutex_unlock(&rdev->devlist_mtx); if (err) @@ -5055,6 +5073,54 @@ static int nl80211_stop_sched_scan(struct sk_buff *skb, return err; } +static int nl80211_start_radar_detection(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_chan_def chandef; + int err; + + err = nl80211_parse_chandef(rdev, info, &chandef); + if (err) + return err; + + if (wdev->cac_started) + return -EBUSY; + + err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef); + if (err < 0) + return err; + + if (err == 0) + return -EINVAL; + + if (chandef.chan->dfs_state != NL80211_DFS_USABLE) + return -EINVAL; + + if (!rdev->ops->start_radar_detection) + return -EOPNOTSUPP; + + mutex_lock(&rdev->devlist_mtx); + err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, + chandef.chan, CHAN_MODE_SHARED, + BIT(chandef.width)); + if (err) + goto err_locked; + + err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef); + if (!err) { + wdev->channel = chandef.chan; + wdev->cac_started = true; + wdev->cac_start_time = jiffies; + } +err_locked: + mutex_unlock(&rdev->devlist_mtx); + + return err; +} + static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, u32 seq, int flags, struct cfg80211_registered_device *rdev, @@ -8305,6 +8371,14 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_RADAR_DETECT, + .doit = nl80211_start_radar_detection, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -9501,6 +9575,57 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void +nl80211_radar_notify(struct cfg80211_registered_device *rdev, + struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event, + struct net_device *netdev, gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_RADAR_DETECT); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) + goto nla_put_failure; + + /* NOP and radar events don't need a netdev parameter */ + if (netdev) { + struct wireless_dev *wdev = netdev->ieee80211_ptr; + + if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) + goto nla_put_failure; + } + + if (nla_put_u32(msg, NL80211_ATTR_RADAR_EVENT, event)) + goto nla_put_failure; + + if (nl80211_send_chandef(msg, chandef)) + goto nla_put_failure; + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + void nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 2acba8477e9d..b061da4919e1 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -108,6 +108,13 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); + +void +nl80211_radar_notify(struct cfg80211_registered_device *rdev, + struct cfg80211_chan_def *chandef, + enum nl80211_radar_event event, + struct net_device *netdev, gfp_t gfp); + void nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 08d3da2c70ab..e97d5b071ab6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -884,6 +884,9 @@ static void handle_channel(struct wiphy *wiphy, return; } + chan->dfs_state = NL80211_DFS_USABLE; + chan->dfs_state_entered = jiffies; + chan->beacon_found = false; chan->flags = flags | bw_flags | map_regdom_flags(reg_rule->flags); chan->max_antenna_gain = diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d0fc6da2d097..f0d9b5154bab 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1210,16 +1210,6 @@ static void ieee80211_scan_add_ies(struct iw_request_info *info, } } -static inline unsigned int elapsed_jiffies_msecs(unsigned long start) -{ - unsigned long end = jiffies; - - if (end >= start) - return jiffies_to_msecs(end - start); - - return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); -} - static char * ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, struct cfg80211_internal_bss *bss, char *current_ev, diff --git a/net/wireless/trace.h b/net/wireless/trace.h index c9cafb0ea95f..b7a531380e19 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2051,6 +2051,21 @@ TRACE_EVENT(cfg80211_reg_can_beacon, WIPHY_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(cfg80211_chandef_dfs_required, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); + TRACE_EVENT(cfg80211_ch_switch_notify, TP_PROTO(struct net_device *netdev, struct cfg80211_chan_def *chandef), @@ -2067,6 +2082,36 @@ TRACE_EVENT(cfg80211_ch_switch_notify, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) ); +TRACE_EVENT(cfg80211_radar_event, + TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, chandef), + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); + +TRACE_EVENT(cfg80211_cac_event, + TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt), + TP_ARGS(netdev, evt), + TP_STRUCT__entry( + NETDEV_ENTRY + __field(enum nl80211_radar_event, evt) + ), + TP_fast_assign( + NETDEV_ASSIGN; + __entry->evt = evt; + ), + TP_printk(NETDEV_PR_FMT ", event: %d", + NETDEV_PR_ARG, __entry->evt) +); + DECLARE_EVENT_CLASS(cfg80211_rx_evt, TP_PROTO(struct net_device *netdev, const u8 *addr), TP_ARGS(netdev, addr), -- cgit v1.2.3 From 50640f169372b9977487a328dedf13a8debedff7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Dec 2012 17:59:39 +0100 Subject: nl80211: advertise HT/VHT channel limitations When drivers or regulatory have limitations on 40, 80 or 160 MHz channels, advertise these to userspace via nl80211. Also add a new feature flag to let userspace know this is supported. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 17 +++++++++++++++++ net/wireless/core.c | 3 ++- net/wireless/nl80211.c | 12 ++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 90b7af86f392..3880f6ad7ed1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2041,6 +2041,16 @@ enum nl80211_band_attr { * (enum nl80211_dfs_state) * @NL80211_FREQUENCY_ATTR_DFS_TIME: time in miliseconds for how long * this channel is in this DFS state. + * @NL80211_FREQUENCY_ATTR_NO_HT40_MINUS: HT40- isn't possible with this + * channel as the control channel + * @NL80211_FREQUENCY_ATTR_NO_HT40_PLUS: HT40+ isn't possible with this + * channel as the control channel + * @NL80211_FREQUENCY_ATTR_NO_80MHZ: any 80 MHz channel using this channel + * as the primary or any of the secondary channels isn't possible, + * this includes 80+80 channels + * @NL80211_FREQUENCY_ATTR_NO_160MHZ: any 160 MHz (but not 80+80) channel + * using this channel as the primary or any of the secondary channels + * isn't possible * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -2055,6 +2065,10 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_MAX_TX_POWER, NL80211_FREQUENCY_ATTR_DFS_STATE, NL80211_FREQUENCY_ATTR_DFS_TIME, + NL80211_FREQUENCY_ATTR_NO_HT40_MINUS, + NL80211_FREQUENCY_ATTR_NO_HT40_PLUS, + NL80211_FREQUENCY_ATTR_NO_80MHZ, + NL80211_FREQUENCY_ATTR_NO_160MHZ, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -3421,6 +3435,8 @@ enum nl80211_ap_sme_features { * Note that even for drivers that support this, the default is to add * stations in authenticated/associated state, so to add unauthenticated * stations the authenticated/associated bits have to be set in the mask. + * @NL80211_FEATURE_ADVERTISE_CHAN_LIMITS: cfg80211 advertises channel limits + * (HT40, VHT 80/160 MHz) if this flag is set */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3437,6 +3453,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11, NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12, NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 13, + NL80211_FEATURE_ADVERTISE_CHAN_LIMITS = 1 << 14, }; /** diff --git a/net/wireless/core.c b/net/wireless/core.c index 922002105062..33b75b9b8efa 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -367,7 +367,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.rts_threshold = (u32) -1; rdev->wiphy.coverage_class = 0; - rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH; + rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH | + NL80211_FEATURE_ADVERTISE_CHAN_LIMITS; return &rdev->wiphy; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c1e18ccf4049..7e40b9e82b45 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -562,6 +562,18 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME, time)) goto nla_put_failure; } + if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_HT40PLUS) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_PLUS)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_80MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_80MHZ)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ)) + goto nla_put_failure; if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, DBM_TO_MBM(chan->max_power))) -- cgit v1.2.3 From a50df0c4c0d97170a6c43573612acacc43e62fe7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Feb 2013 14:20:05 +0100 Subject: cfg80211: advertise extended capabilities to userspace In many cases, userspace may need to know which of the 802.11 extended capabilities ("Extended Capabilities element") are implemented in the driver or device, to include them e.g. in beacons, assoc request/response or other frames. Add a new nl80211 attribute to hold the extended capabilities bitmap for this. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ include/uapi/linux/nl80211.h | 9 +++++++++ net/wireless/nl80211.c | 9 +++++++++ 3 files changed, 29 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8a9200f2f4a4..a229046d86d4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2480,6 +2480,14 @@ struct wiphy_wowlan_support { * * @max_acl_mac_addrs: Maximum number of MAC addresses that the device * supports for ACL. + * + * @extended_capabilities: extended capabilities supported by the driver, + * additional capabilities might be supported by userspace; these are + * the 802.11 extended capabilities ("Extended Capabilities element") + * and are in the same format as in the information element. See + * 802.11-2012 8.4.2.29 for the defined fields. + * @extended_capabilities_mask: mask of the valid values + * @extended_capabilities_len: length of the extended capabilities */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -2546,6 +2554,9 @@ struct wiphy { */ u32 probe_resp_offload; + const u8 *extended_capabilities, *extended_capabilities_mask; + u8 extended_capabilities_len; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 3880f6ad7ed1..1fd6e5611896 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1355,6 +1355,12 @@ enum nl80211_commands { * @NL80211_ATTR_RADAR_EVENT: Type of radar event for notification to userspace, * contains a value of enum nl80211_radar_event (u32). * + * @NL80211_ATTR_EXT_CAPA: 802.11 extended capabilities that the kernel driver + * has and handles. The format is the same as the IE contents. See + * 802.11-2012 8.4.2.29 for more information. + * @NL80211_ATTR_EXT_CAPA_MASK: Extended capabilities that the kernel driver + * has set in the %NL80211_ATTR_EXT_CAPA value, for multibit fields. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1635,6 +1641,9 @@ enum nl80211_attrs { NL80211_ATTR_RADAR_EVENT, + NL80211_ATTR_EXT_CAPA, + NL80211_ATTR_EXT_CAPA_MASK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7e40b9e82b45..1237431c3efa 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1363,6 +1363,15 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag dev->wiphy.max_acl_mac_addrs)) goto nla_put_failure; + if (dev->wiphy.extended_capabilities && + (nla_put(msg, NL80211_ATTR_EXT_CAPA, + dev->wiphy.extended_capabilities_len, + dev->wiphy.extended_capabilities) || + nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK, + dev->wiphy.extended_capabilities_len, + dev->wiphy.extended_capabilities_mask))) + goto nla_put_failure; + return genlmsg_end(msg, hdr); nla_put_failure: -- cgit v1.2.3 From 9d62a98617298c1da288f50e84c5dd67732e79b7 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 14 Feb 2013 21:10:13 +0200 Subject: cfg80211: Pass station (extended) capability info to kernel The information of the peer's capabilities and extended capabilities are required for the driver to perform TDLS Peer UAPSD operations and off channel operations. This information of the peer is passed from user space using NL80211_CMD_SET_STATION command. This commit enhances the function nl80211_set_station to pass the capability information of the peer to the driver. Similarly, there may be need for capability information for other modes, so allow this to be provided with both add_station and change_station. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++++++ include/uapi/linux/nl80211.h | 10 ++++++++++ net/wireless/nl80211.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a229046d86d4..fa2612952c19 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -626,12 +626,14 @@ enum plink_actions { /** * enum station_parameters_apply_mask - station parameter values to apply * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp) + * @STATION_PARAM_APPLY_CAPABILITY: apply new capability * * Not all station parameters have in-band "no change" signalling, * for those that don't these flags will are used. */ enum station_parameters_apply_mask { STATION_PARAM_APPLY_UAPSD = BIT(0), + STATION_PARAM_APPLY_CAPABILITY = BIT(1), }; /** @@ -662,6 +664,9 @@ enum station_parameters_apply_mask { * see &enum station_parameters_apply_mask * @local_pm: local link-specific mesh power save mode (no change when set * to unknown) + * @capability: station capability + * @ext_capab: extended capabilities of the station + * @ext_capab_len: number of extended capabilities */ struct station_parameters { u8 *supported_rates; @@ -678,6 +683,9 @@ struct station_parameters { u8 uapsd_queues; u8 max_sp; enum nl80211_mesh_power_mode local_pm; + u16 capability; + u8 *ext_capab; + u8 ext_capab_len; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1fd6e5611896..f7c35ca01efc 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1361,6 +1361,13 @@ enum nl80211_commands { * @NL80211_ATTR_EXT_CAPA_MASK: Extended capabilities that the kernel driver * has set in the %NL80211_ATTR_EXT_CAPA value, for multibit fields. * + * @NL80211_ATTR_STA_CAPABILITY: Station capabilities (u16) are advertised to + * the driver, e.g., to enable TDLS power save (PU-APSD). + * + * @NL80211_ATTR_STA_EXT_CAPABILITY: Station extended capabilities are + * advertised to the driver, e.g., to enable TDLS off channel operations + * and PU-APSD. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1644,6 +1651,9 @@ enum nl80211_attrs { NL80211_ATTR_EXT_CAPA, NL80211_ATTR_EXT_CAPA_MASK, + NL80211_ATTR_STA_CAPABILITY, + NL80211_ATTR_STA_EXT_CAPABILITY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1237431c3efa..be9f2b5a403f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -368,6 +368,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, + [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, + [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, }; /* policy for the key attributes */ @@ -3435,6 +3437,19 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); } + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { + params.capability = + nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); + params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; + } + + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { + params.ext_capab = + nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + params.ext_capab_len = + nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + } + if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL] || info->attrs[NL80211_ATTR_HT_CAPABILITY]) return -EINVAL; @@ -3505,6 +3520,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) /* reject other things that can't change */ if (params.supported_rates) return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) + return -EINVAL; /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); @@ -3537,6 +3556,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (params.supported_rates) return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) + return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) + return -EINVAL; /* * No special handling for TDLS here -- the userspace * mesh code doesn't have this bug. @@ -3601,6 +3624,19 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!params.aid || params.aid > IEEE80211_MAX_AID) return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { + params.capability = + nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); + params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; + } + + if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { + params.ext_capab = + nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + params.ext_capab_len = + nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); + } + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); -- cgit v1.2.3 From 932dd97c5fef091dd6f605fb1d40143d67d91e09 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Feb 2013 11:56:13 +0100 Subject: nl80211: renumber NL80211_FEATURE_FULL_AP_CLIENT_STATE Adding the flag to mac80211 already without testing was clearly a mistake, one that we now pay for by having to reserve bit 13 forever. The problem is cfg80211 doesn't allow capability/rate changes for station entries that were added unassociated, so the station entries cannot be set up properly when marked associated. Change the NL80211_FEATURE_FULL_AP_CLIENT_STATE value to make it clear to userspace implementations that all current kernels don't actually support it, even though the previous bit is set, and of course also remove the flag from mac80211 until we test and fix the issues. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 ++- net/mac80211/main.c | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f7c35ca01efc..c46bb016f4e4 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3471,8 +3471,9 @@ enum nl80211_feature_flags { NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10, NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11, NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12, - NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 13, + /* bit 13 is reserved */ NL80211_FEATURE_ADVERTISE_CHAN_LIMITS = 1 << 14, + NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 15, }; /** diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 035344bc6b9c..f9747689d604 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -572,8 +572,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, wiphy->features |= NL80211_FEATURE_SK_TX_STATUS | NL80211_FEATURE_SAE | NL80211_FEATURE_HT_IBSS | - NL80211_FEATURE_VIF_TXPOWER | - NL80211_FEATURE_FULL_AP_CLIENT_STATE; + NL80211_FEATURE_VIF_TXPOWER; if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | -- cgit v1.2.3 From 84237a826b261de7ddd3d09ee53ee68cb4138937 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 18 Feb 2013 10:11:13 -0700 Subject: vfio-pci: Add support for VGA region access PCI defines display class VGA regions at I/O port address 0x3b0, 0x3c0 and MMIO address 0xa0000. As these are non-overlapping, we can ignore the I/O port vs MMIO difference and expose them both in a single region. We make use of the VGA arbiter around each access to configure chipset access as necessary. Signed-off-by: Alex Williamson --- drivers/vfio/pci/Kconfig | 10 ++++++ drivers/vfio/pci/vfio_pci.c | 18 +++++++++++ drivers/vfio/pci/vfio_pci_private.h | 4 +++ drivers/vfio/pci/vfio_pci_rdwr.c | 61 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h | 9 ++++++ 5 files changed, 102 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 5980758563eb..e84300b268b6 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -6,3 +6,13 @@ config VFIO_PCI use of PCI drivers using the VFIO framework. If you don't know what to do here, say N. + +config VFIO_PCI_VGA + bool "VFIO PCI support for VGA devices" + depends on VFIO_PCI && X86 && VGA_ARB && EXPERIMENTAL + help + Support for VGA extension to VFIO PCI. This exposes an additional + region on VGA devices for accessing legacy VGA addresses used by + BIOS and generic video drivers. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index bb8c8c2be960..8189cb6a86af 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -84,6 +84,11 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) } else vdev->msix_bar = 0xFF; +#ifdef CONFIG_VFIO_PCI_VGA + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + vdev->has_vga = true; +#endif + return 0; } @@ -285,6 +290,16 @@ static long vfio_pci_ioctl(void *device_data, info.flags = VFIO_REGION_INFO_FLAG_READ; break; } + case VFIO_PCI_VGA_REGION_INDEX: + if (!vdev->has_vga) + return -EINVAL; + + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = 0xc0000; + info.flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE; + + break; default: return -EINVAL; } @@ -386,6 +401,9 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf, case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite); + + case VFIO_PCI_VGA_REGION_INDEX: + return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite); } return -EINVAL; diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 00d19b953ce4..d7e55d03f49e 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -53,6 +53,7 @@ struct vfio_pci_device { bool reset_works; bool extended_caps; bool bardirty; + bool has_vga; struct pci_saved_state *pci_saved_state; atomic_t refcnt; }; @@ -77,6 +78,9 @@ extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); +extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); + extern int vfio_pci_init_perm_bits(void); extern void vfio_pci_uninit_perm_bits(void); diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index e9d78eb91ed7..210db24d2204 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "vfio_pci_private.h" @@ -175,3 +176,63 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, return done; } + +ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) +{ + int ret; + loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK; + void __iomem *iomem = NULL; + unsigned int rsrc; + bool is_ioport; + ssize_t done; + + if (!vdev->has_vga) + return -EINVAL; + + switch (pos) { + case 0xa0000 ... 0xbffff: + count = min(count, (size_t)(0xc0000 - pos)); + iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1); + off = pos - 0xa0000; + rsrc = VGA_RSRC_LEGACY_MEM; + is_ioport = false; + break; + case 0x3b0 ... 0x3bb: + count = min(count, (size_t)(0x3bc - pos)); + iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1); + off = pos - 0x3b0; + rsrc = VGA_RSRC_LEGACY_IO; + is_ioport = true; + break; + case 0x3c0 ... 0x3df: + count = min(count, (size_t)(0x3e0 - pos)); + iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1); + off = pos - 0x3c0; + rsrc = VGA_RSRC_LEGACY_IO; + is_ioport = true; + break; + default: + return -EINVAL; + } + + if (!iomem) + return -ENOMEM; + + ret = vga_get_interruptible(vdev->pdev, rsrc); + if (ret) { + is_ioport ? ioport_unmap(iomem) : iounmap(iomem); + return ret; + } + + done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite); + + vga_put(vdev->pdev, rsrc); + + is_ioport ? ioport_unmap(iomem) : iounmap(iomem); + + if (done >= 0) + *ppos += done; + + return done; +} diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 4758d1bfcf41..4f41f309911e 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -303,6 +303,15 @@ enum { VFIO_PCI_BAR5_REGION_INDEX, VFIO_PCI_ROM_REGION_INDEX, VFIO_PCI_CONFIG_REGION_INDEX, + /* + * Expose VGA regions defined for PCI base class 03, subclass 00. + * This includes I/O port ranges 0x3b0 to 0x3bb and 0x3c0 to 0x3df + * as well as the MMIO range 0xa0000 to 0xbffff. Each implemented + * range is found at it's identity mapped offset from the region + * offset, for example 0x3b0 is region_info.offset + 0x3b0. Areas + * between described ranges are unimplemented. + */ + VFIO_PCI_VGA_REGION_INDEX, VFIO_PCI_NUM_REGIONS }; -- cgit v1.2.3 From 5b8ca5344f82e594e21c9fbbdf3b13507ecdb5a2 Mon Sep 17 00:00:00 2001 From: Andy King Date: Mon, 18 Feb 2013 06:04:12 +0000 Subject: VSOCK: Remove hypervisor-only socket option Remove hypervisor-only socket option. Reported-by: Gerd Hoffmann Acked-by: Dmitry Torokhov Signed-off-by: Andy King Signed-off-by: David S. Miller --- include/uapi/linux/vm_sockets.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index f7f2e99dec84..df91301847ec 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -52,14 +52,6 @@ #define SO_VM_SOCKETS_PEER_HOST_VM_ID 3 -/* Option name for socket's service label. Use as the option name in - * setsockopt(3) or getsockopt(3) to set or get the service label for a socket. - * The service label is a C-style NUL-terminated string. Only available for - * hypervisor endpoints. - */ - -#define SO_VM_SOCKETS_SERVICE_LABEL 4 - /* Option name for determining if a socket is trusted. Use as the option name * in getsockopt(3) to determine if a socket is trusted. The value is a * signed integer. -- cgit v1.2.3 From 55e301fd57a6239ec14b91a1cf2e70b3dd135194 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Tue, 29 Jan 2013 06:04:50 +0000 Subject: Btrfs: move fs/btrfs/ioctl.h to include/uapi/linux/btrfs.h The header file will then be installed under /usr/include/linux so that userspace applications can refer to Btrfs ioctls by name and use the same structs used internally in the kernel. Signed-off-by: Filipe Brandenburger Signed-off-by: Josef Bacik --- fs/btrfs/backref.h | 2 +- fs/btrfs/ctree.h | 2 +- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 2 +- fs/btrfs/ioctl.c | 2 +- fs/btrfs/ioctl.h | 502 -------------------------------------------- fs/btrfs/qgroup.c | 2 +- fs/btrfs/super.c | 2 +- fs/btrfs/volumes.h | 2 +- include/linux/btrfs.h | 6 + include/uapi/linux/Kbuild | 1 + include/uapi/linux/btrfs.h | 503 +++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 518 insertions(+), 510 deletions(-) delete mode 100644 fs/btrfs/ioctl.h create mode 100644 include/linux/btrfs.h create mode 100644 include/uapi/linux/btrfs.h (limited to 'include/uapi/linux') diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index d61feca79455..310a7f6d09b1 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -19,7 +19,7 @@ #ifndef __BTRFS_BACKREF__ #define __BTRFS_BACKREF__ -#include "ioctl.h" +#include #include "ulist.h" #include "extent_io.h" diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 541ce9a9949e..69321013683c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -31,10 +31,10 @@ #include #include #include +#include #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" -#include "ioctl.h" struct btrfs_trans_handle; struct btrfs_transaction; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 083abca56055..13c78ea3ebce 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -30,11 +30,11 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "tree-log.h" #include "locking.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 60ec7589900c..fc8aa8bf80a1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -39,12 +39,12 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "ordered-data.h" #include "xattr.h" diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1b554b47e814..96ecefc1724f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -42,12 +42,12 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "volumes.h" #include "locking.h" diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h deleted file mode 100644 index dabca9cc8c2e..000000000000 --- a/fs/btrfs/ioctl.h +++ /dev/null @@ -1,502 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef __IOCTL_ -#define __IOCTL_ -#include - -#define BTRFS_IOCTL_MAGIC 0x94 -#define BTRFS_VOL_NAME_MAX 255 - -/* this should be 4k */ -#define BTRFS_PATH_NAME_MAX 4087 -struct btrfs_ioctl_vol_args { - __s64 fd; - char name[BTRFS_PATH_NAME_MAX + 1]; -}; - -#define BTRFS_DEVICE_PATH_NAME_MAX 1024 - -#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) -#define BTRFS_SUBVOL_RDONLY (1ULL << 1) -#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) -#define BTRFS_FSID_SIZE 16 -#define BTRFS_UUID_SIZE 16 - -#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) - -struct btrfs_qgroup_limit { - __u64 flags; - __u64 max_rfer; - __u64 max_excl; - __u64 rsv_rfer; - __u64 rsv_excl; -}; - -struct btrfs_qgroup_inherit { - __u64 flags; - __u64 num_qgroups; - __u64 num_ref_copies; - __u64 num_excl_copies; - struct btrfs_qgroup_limit lim; - __u64 qgroups[0]; -}; - -struct btrfs_ioctl_qgroup_limit_args { - __u64 qgroupid; - struct btrfs_qgroup_limit lim; -}; - -#define BTRFS_SUBVOL_NAME_MAX 4039 -struct btrfs_ioctl_vol_args_v2 { - __s64 fd; - __u64 transid; - __u64 flags; - union { - struct { - __u64 size; - struct btrfs_qgroup_inherit __user *qgroup_inherit; - }; - __u64 unused[4]; - }; - char name[BTRFS_SUBVOL_NAME_MAX + 1]; -}; - -/* - * structure to report errors and progress to userspace, either as a - * result of a finished scrub, a canceled scrub or a progress inquiry - */ -struct btrfs_scrub_progress { - __u64 data_extents_scrubbed; /* # of data extents scrubbed */ - __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */ - __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */ - __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */ - __u64 read_errors; /* # of read errors encountered (EIO) */ - __u64 csum_errors; /* # of failed csum checks */ - __u64 verify_errors; /* # of occurences, where the metadata - * of a tree block did not match the - * expected values, like generation or - * logical */ - __u64 no_csum; /* # of 4k data block for which no csum - * is present, probably the result of - * data written with nodatasum */ - __u64 csum_discards; /* # of csum for which no data was found - * in the extent tree. */ - __u64 super_errors; /* # of bad super blocks encountered */ - __u64 malloc_errors; /* # of internal kmalloc errors. These - * will likely cause an incomplete - * scrub */ - __u64 uncorrectable_errors; /* # of errors where either no intact - * copy was found or the writeback - * failed */ - __u64 corrected_errors; /* # of errors corrected */ - __u64 last_physical; /* last physical address scrubbed. In - * case a scrub was aborted, this can - * be used to restart the scrub */ - __u64 unverified_errors; /* # of occurences where a read for a - * full (64k) bio failed, but the re- - * check succeeded for each 4k piece. - * Intermittent error. */ -}; - -#define BTRFS_SCRUB_READONLY 1 -struct btrfs_ioctl_scrub_args { - __u64 devid; /* in */ - __u64 start; /* in */ - __u64 end; /* in */ - __u64 flags; /* in */ - struct btrfs_scrub_progress progress; /* out */ - /* pad to 1k */ - __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; -}; - -#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 -#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 -struct btrfs_ioctl_dev_replace_start_params { - __u64 srcdevid; /* in, if 0, use srcdev_name instead */ - __u64 cont_reading_from_srcdev_mode; /* in, see #define - * above */ - __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ - __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ -}; - -#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3 -#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4 -struct btrfs_ioctl_dev_replace_status_params { - __u64 replace_state; /* out, see #define above */ - __u64 progress_1000; /* out, 0 <= x <= 1000 */ - __u64 time_started; /* out, seconds since 1-Jan-1970 */ - __u64 time_stopped; /* out, seconds since 1-Jan-1970 */ - __u64 num_write_errors; /* out */ - __u64 num_uncorrectable_read_errors; /* out */ -}; - -#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0 -#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1 -#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2 -#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0 -#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1 -#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2 -struct btrfs_ioctl_dev_replace_args { - __u64 cmd; /* in */ - __u64 result; /* out */ - - union { - struct btrfs_ioctl_dev_replace_start_params start; - struct btrfs_ioctl_dev_replace_status_params status; - }; /* in/out */ - - __u64 spare[64]; -}; - -struct btrfs_ioctl_dev_info_args { - __u64 devid; /* in/out */ - __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ - __u64 bytes_used; /* out */ - __u64 total_bytes; /* out */ - __u64 unused[379]; /* pad to 4k */ - __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */ -}; - -struct btrfs_ioctl_fs_info_args { - __u64 max_id; /* out */ - __u64 num_devices; /* out */ - __u8 fsid[BTRFS_FSID_SIZE]; /* out */ - __u64 reserved[124]; /* pad to 1k */ -}; - -/* balance control ioctl modes */ -#define BTRFS_BALANCE_CTL_PAUSE 1 -#define BTRFS_BALANCE_CTL_CANCEL 2 - -/* - * this is packed, because it should be exactly the same as its disk - * byte order counterpart (struct btrfs_disk_balance_args) - */ -struct btrfs_balance_args { - __u64 profiles; - __u64 usage; - __u64 devid; - __u64 pstart; - __u64 pend; - __u64 vstart; - __u64 vend; - - __u64 target; - - __u64 flags; - - __u64 unused[8]; -} __attribute__ ((__packed__)); - -/* report balance progress to userspace */ -struct btrfs_balance_progress { - __u64 expected; /* estimated # of chunks that will be - * relocated to fulfill the request */ - __u64 considered; /* # of chunks we have considered so far */ - __u64 completed; /* # of chunks relocated so far */ -}; - -#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0) -#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1) -#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2) - -struct btrfs_ioctl_balance_args { - __u64 flags; /* in/out */ - __u64 state; /* out */ - - struct btrfs_balance_args data; /* in/out */ - struct btrfs_balance_args meta; /* in/out */ - struct btrfs_balance_args sys; /* in/out */ - - struct btrfs_balance_progress stat; /* out */ - - __u64 unused[72]; /* pad to 1k */ -}; - -#define BTRFS_INO_LOOKUP_PATH_MAX 4080 -struct btrfs_ioctl_ino_lookup_args { - __u64 treeid; - __u64 objectid; - char name[BTRFS_INO_LOOKUP_PATH_MAX]; -}; - -struct btrfs_ioctl_search_key { - /* which root are we searching. 0 is the tree of tree roots */ - __u64 tree_id; - - /* keys returned will be >= min and <= max */ - __u64 min_objectid; - __u64 max_objectid; - - /* keys returned will be >= min and <= max */ - __u64 min_offset; - __u64 max_offset; - - /* max and min transids to search for */ - __u64 min_transid; - __u64 max_transid; - - /* keys returned will be >= min and <= max */ - __u32 min_type; - __u32 max_type; - - /* - * how many items did userland ask for, and how many are we - * returning - */ - __u32 nr_items; - - /* align to 64 bits */ - __u32 unused; - - /* some extra for later */ - __u64 unused1; - __u64 unused2; - __u64 unused3; - __u64 unused4; -}; - -struct btrfs_ioctl_search_header { - __u64 transid; - __u64 objectid; - __u64 offset; - __u32 type; - __u32 len; -}; - -#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key)) -/* - * the buf is an array of search headers where - * each header is followed by the actual item - * the type field is expanded to 32 bits for alignment - */ -struct btrfs_ioctl_search_args { - struct btrfs_ioctl_search_key key; - char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; -}; - -struct btrfs_ioctl_clone_range_args { - __s64 src_fd; - __u64 src_offset, src_length; - __u64 dest_offset; -}; - -/* flags for the defrag range ioctl */ -#define BTRFS_DEFRAG_RANGE_COMPRESS 1 -#define BTRFS_DEFRAG_RANGE_START_IO 2 - -struct btrfs_ioctl_space_info { - __u64 flags; - __u64 total_bytes; - __u64 used_bytes; -}; - -struct btrfs_ioctl_space_args { - __u64 space_slots; - __u64 total_spaces; - struct btrfs_ioctl_space_info spaces[0]; -}; - -struct btrfs_data_container { - __u32 bytes_left; /* out -- bytes not needed to deliver output */ - __u32 bytes_missing; /* out -- additional bytes needed for result */ - __u32 elem_cnt; /* out */ - __u32 elem_missed; /* out */ - __u64 val[0]; /* out */ -}; - -struct btrfs_ioctl_ino_path_args { - __u64 inum; /* in */ - __u64 size; /* in */ - __u64 reserved[4]; - /* struct btrfs_data_container *fspath; out */ - __u64 fspath; /* out */ -}; - -struct btrfs_ioctl_logical_ino_args { - __u64 logical; /* in */ - __u64 size; /* in */ - __u64 reserved[4]; - /* struct btrfs_data_container *inodes; out */ - __u64 inodes; -}; - -enum btrfs_dev_stat_values { - /* disk I/O failure stats */ - BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ - BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ - BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ - - /* stats for indirect indications for I/O failures */ - BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or - * contents is illegal: this is an - * indication that the block was damaged - * during read or write, or written to - * wrong location or read from wrong - * location */ - BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not - * been written */ - - BTRFS_DEV_STAT_VALUES_MAX -}; - -/* Reset statistics after reading; needs SYS_ADMIN capability */ -#define BTRFS_DEV_STATS_RESET (1ULL << 0) - -struct btrfs_ioctl_get_dev_stats { - __u64 devid; /* in */ - __u64 nr_items; /* in/out */ - __u64 flags; /* in/out */ - - /* out values: */ - __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; - - __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ -}; - -#define BTRFS_QUOTA_CTL_ENABLE 1 -#define BTRFS_QUOTA_CTL_DISABLE 2 -#define BTRFS_QUOTA_CTL_RESCAN 3 -struct btrfs_ioctl_quota_ctl_args { - __u64 cmd; - __u64 status; -}; - -struct btrfs_ioctl_qgroup_assign_args { - __u64 assign; - __u64 src; - __u64 dst; -}; - -struct btrfs_ioctl_qgroup_create_args { - __u64 create; - __u64 qgroupid; -}; -struct btrfs_ioctl_timespec { - __u64 sec; - __u32 nsec; -}; - -struct btrfs_ioctl_received_subvol_args { - char uuid[BTRFS_UUID_SIZE]; /* in */ - __u64 stransid; /* in */ - __u64 rtransid; /* out */ - struct btrfs_ioctl_timespec stime; /* in */ - struct btrfs_ioctl_timespec rtime; /* out */ - __u64 flags; /* in */ - __u64 reserved[16]; /* in */ -}; - -struct btrfs_ioctl_send_args { - __s64 send_fd; /* in */ - __u64 clone_sources_count; /* in */ - __u64 __user *clone_sources; /* in */ - __u64 parent_root; /* in */ - __u64 flags; /* in */ - __u64 reserved[4]; /* in */ -}; - -#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ - struct btrfs_ioctl_vol_args) -/* trans start and trans end are dangerous, and only for - * use by applications that know how to avoid the - * resulting deadlocks - */ -#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) -#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) -#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) - -#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) -#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ - struct btrfs_ioctl_vol_args) - -#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ - struct btrfs_ioctl_clone_range_args) - -#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \ - struct btrfs_ioctl_defrag_range_args) -#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ - struct btrfs_ioctl_search_args) -#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ - struct btrfs_ioctl_ino_lookup_args) -#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) -#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ - struct btrfs_ioctl_space_args) -#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) -#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) -#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ - struct btrfs_ioctl_vol_args_v2) -#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ - struct btrfs_ioctl_vol_args_v2) -#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) -#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) -#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ - struct btrfs_ioctl_scrub_args) -#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28) -#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \ - struct btrfs_ioctl_scrub_args) -#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \ - struct btrfs_ioctl_dev_info_args) -#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ - struct btrfs_ioctl_fs_info_args) -#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ - struct btrfs_ioctl_balance_args) -#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int) -#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \ - struct btrfs_ioctl_balance_args) -#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ - struct btrfs_ioctl_ino_path_args) -#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ - struct btrfs_ioctl_ino_path_args) -#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ - struct btrfs_ioctl_received_subvol_args) -#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) -#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ - struct btrfs_ioctl_vol_args) -#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ - struct btrfs_ioctl_quota_ctl_args) -#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ - struct btrfs_ioctl_qgroup_assign_args) -#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ - struct btrfs_ioctl_qgroup_create_args) -#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ - struct btrfs_ioctl_qgroup_limit_args) -#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ - struct btrfs_ioctl_get_dev_stats) -#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ - struct btrfs_ioctl_dev_replace_args) - -#endif diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index a5c856234323..a0d6368249fa 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -23,13 +23,13 @@ #include #include #include +#include #include "ctree.h" #include "transaction.h" #include "disk-io.h" #include "locking.h" #include "ulist.h" -#include "ioctl.h" #include "backref.h" /* TODO XXX FIXME diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 67b373bf3ff9..6846ededfe95 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -41,13 +41,13 @@ #include #include #include +#include #include "compat.h" #include "delayed-inode.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "ioctl.h" #include "print-tree.h" #include "xattr.h" #include "volumes.h" diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d3c3939ac751..12bb84166a5f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -21,8 +21,8 @@ #include #include +#include #include "async-thread.h" -#include "ioctl.h" #define BTRFS_STRIPE_LEN (64 * 1024) diff --git a/include/linux/btrfs.h b/include/linux/btrfs.h new file mode 100644 index 000000000000..22d799147db2 --- /dev/null +++ b/include/linux/btrfs.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_BTRFS_H +#define _LINUX_BTRFS_H + +#include + +#endif /* _LINUX_BTRFS_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 19e765fbfef7..896ee1247294 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -68,6 +68,7 @@ header-y += blkpg.h header-y += blktrace_api.h header-y += bpqether.h header-y += bsg.h +header-y += btrfs.h header-y += can.h header-y += capability.h header-y += capi.h diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h new file mode 100644 index 000000000000..cffbb582dd90 --- /dev/null +++ b/include/uapi/linux/btrfs.h @@ -0,0 +1,503 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef _UAPI_LINUX_BTRFS_H +#define _UAPI_LINUX_BTRFS_H +#include +#include + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_VOL_NAME_MAX 255 + +/* this should be 4k */ +#define BTRFS_PATH_NAME_MAX 4087 +struct btrfs_ioctl_vol_args { + __s64 fd; + char name[BTRFS_PATH_NAME_MAX + 1]; +}; + +#define BTRFS_DEVICE_PATH_NAME_MAX 1024 + +#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) +#define BTRFS_SUBVOL_RDONLY (1ULL << 1) +#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) +#define BTRFS_FSID_SIZE 16 +#define BTRFS_UUID_SIZE 16 + +#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) + +struct btrfs_qgroup_limit { + __u64 flags; + __u64 max_rfer; + __u64 max_excl; + __u64 rsv_rfer; + __u64 rsv_excl; +}; + +struct btrfs_qgroup_inherit { + __u64 flags; + __u64 num_qgroups; + __u64 num_ref_copies; + __u64 num_excl_copies; + struct btrfs_qgroup_limit lim; + __u64 qgroups[0]; +}; + +struct btrfs_ioctl_qgroup_limit_args { + __u64 qgroupid; + struct btrfs_qgroup_limit lim; +}; + +#define BTRFS_SUBVOL_NAME_MAX 4039 +struct btrfs_ioctl_vol_args_v2 { + __s64 fd; + __u64 transid; + __u64 flags; + union { + struct { + __u64 size; + struct btrfs_qgroup_inherit __user *qgroup_inherit; + }; + __u64 unused[4]; + }; + char name[BTRFS_SUBVOL_NAME_MAX + 1]; +}; + +/* + * structure to report errors and progress to userspace, either as a + * result of a finished scrub, a canceled scrub or a progress inquiry + */ +struct btrfs_scrub_progress { + __u64 data_extents_scrubbed; /* # of data extents scrubbed */ + __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */ + __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */ + __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */ + __u64 read_errors; /* # of read errors encountered (EIO) */ + __u64 csum_errors; /* # of failed csum checks */ + __u64 verify_errors; /* # of occurences, where the metadata + * of a tree block did not match the + * expected values, like generation or + * logical */ + __u64 no_csum; /* # of 4k data block for which no csum + * is present, probably the result of + * data written with nodatasum */ + __u64 csum_discards; /* # of csum for which no data was found + * in the extent tree. */ + __u64 super_errors; /* # of bad super blocks encountered */ + __u64 malloc_errors; /* # of internal kmalloc errors. These + * will likely cause an incomplete + * scrub */ + __u64 uncorrectable_errors; /* # of errors where either no intact + * copy was found or the writeback + * failed */ + __u64 corrected_errors; /* # of errors corrected */ + __u64 last_physical; /* last physical address scrubbed. In + * case a scrub was aborted, this can + * be used to restart the scrub */ + __u64 unverified_errors; /* # of occurences where a read for a + * full (64k) bio failed, but the re- + * check succeeded for each 4k piece. + * Intermittent error. */ +}; + +#define BTRFS_SCRUB_READONLY 1 +struct btrfs_ioctl_scrub_args { + __u64 devid; /* in */ + __u64 start; /* in */ + __u64 end; /* in */ + __u64 flags; /* in */ + struct btrfs_scrub_progress progress; /* out */ + /* pad to 1k */ + __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; +}; + +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 +#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 +struct btrfs_ioctl_dev_replace_start_params { + __u64 srcdevid; /* in, if 0, use srcdev_name instead */ + __u64 cont_reading_from_srcdev_mode; /* in, see #define + * above */ + __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ + __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3 +#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4 +struct btrfs_ioctl_dev_replace_status_params { + __u64 replace_state; /* out, see #define above */ + __u64 progress_1000; /* out, 0 <= x <= 1000 */ + __u64 time_started; /* out, seconds since 1-Jan-1970 */ + __u64 time_stopped; /* out, seconds since 1-Jan-1970 */ + __u64 num_write_errors; /* out */ + __u64 num_uncorrectable_read_errors; /* out */ +}; + +#define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1 +#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1 +#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2 +struct btrfs_ioctl_dev_replace_args { + __u64 cmd; /* in */ + __u64 result; /* out */ + + union { + struct btrfs_ioctl_dev_replace_start_params start; + struct btrfs_ioctl_dev_replace_status_params status; + }; /* in/out */ + + __u64 spare[64]; +}; + +struct btrfs_ioctl_dev_info_args { + __u64 devid; /* in/out */ + __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ + __u64 bytes_used; /* out */ + __u64 total_bytes; /* out */ + __u64 unused[379]; /* pad to 4k */ + __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */ +}; + +struct btrfs_ioctl_fs_info_args { + __u64 max_id; /* out */ + __u64 num_devices; /* out */ + __u8 fsid[BTRFS_FSID_SIZE]; /* out */ + __u64 reserved[124]; /* pad to 1k */ +}; + +/* balance control ioctl modes */ +#define BTRFS_BALANCE_CTL_PAUSE 1 +#define BTRFS_BALANCE_CTL_CANCEL 2 + +/* + * this is packed, because it should be exactly the same as its disk + * byte order counterpart (struct btrfs_disk_balance_args) + */ +struct btrfs_balance_args { + __u64 profiles; + __u64 usage; + __u64 devid; + __u64 pstart; + __u64 pend; + __u64 vstart; + __u64 vend; + + __u64 target; + + __u64 flags; + + __u64 unused[8]; +} __attribute__ ((__packed__)); + +/* report balance progress to userspace */ +struct btrfs_balance_progress { + __u64 expected; /* estimated # of chunks that will be + * relocated to fulfill the request */ + __u64 considered; /* # of chunks we have considered so far */ + __u64 completed; /* # of chunks relocated so far */ +}; + +#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0) +#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1) +#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2) + +struct btrfs_ioctl_balance_args { + __u64 flags; /* in/out */ + __u64 state; /* out */ + + struct btrfs_balance_args data; /* in/out */ + struct btrfs_balance_args meta; /* in/out */ + struct btrfs_balance_args sys; /* in/out */ + + struct btrfs_balance_progress stat; /* out */ + + __u64 unused[72]; /* pad to 1k */ +}; + +#define BTRFS_INO_LOOKUP_PATH_MAX 4080 +struct btrfs_ioctl_ino_lookup_args { + __u64 treeid; + __u64 objectid; + char name[BTRFS_INO_LOOKUP_PATH_MAX]; +}; + +struct btrfs_ioctl_search_key { + /* which root are we searching. 0 is the tree of tree roots */ + __u64 tree_id; + + /* keys returned will be >= min and <= max */ + __u64 min_objectid; + __u64 max_objectid; + + /* keys returned will be >= min and <= max */ + __u64 min_offset; + __u64 max_offset; + + /* max and min transids to search for */ + __u64 min_transid; + __u64 max_transid; + + /* keys returned will be >= min and <= max */ + __u32 min_type; + __u32 max_type; + + /* + * how many items did userland ask for, and how many are we + * returning + */ + __u32 nr_items; + + /* align to 64 bits */ + __u32 unused; + + /* some extra for later */ + __u64 unused1; + __u64 unused2; + __u64 unused3; + __u64 unused4; +}; + +struct btrfs_ioctl_search_header { + __u64 transid; + __u64 objectid; + __u64 offset; + __u32 type; + __u32 len; +}; + +#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key)) +/* + * the buf is an array of search headers where + * each header is followed by the actual item + * the type field is expanded to 32 bits for alignment + */ +struct btrfs_ioctl_search_args { + struct btrfs_ioctl_search_key key; + char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; +}; + +struct btrfs_ioctl_clone_range_args { + __s64 src_fd; + __u64 src_offset, src_length; + __u64 dest_offset; +}; + +/* flags for the defrag range ioctl */ +#define BTRFS_DEFRAG_RANGE_COMPRESS 1 +#define BTRFS_DEFRAG_RANGE_START_IO 2 + +struct btrfs_ioctl_space_info { + __u64 flags; + __u64 total_bytes; + __u64 used_bytes; +}; + +struct btrfs_ioctl_space_args { + __u64 space_slots; + __u64 total_spaces; + struct btrfs_ioctl_space_info spaces[0]; +}; + +struct btrfs_data_container { + __u32 bytes_left; /* out -- bytes not needed to deliver output */ + __u32 bytes_missing; /* out -- additional bytes needed for result */ + __u32 elem_cnt; /* out */ + __u32 elem_missed; /* out */ + __u64 val[0]; /* out */ +}; + +struct btrfs_ioctl_ino_path_args { + __u64 inum; /* in */ + __u64 size; /* in */ + __u64 reserved[4]; + /* struct btrfs_data_container *fspath; out */ + __u64 fspath; /* out */ +}; + +struct btrfs_ioctl_logical_ino_args { + __u64 logical; /* in */ + __u64 size; /* in */ + __u64 reserved[4]; + /* struct btrfs_data_container *inodes; out */ + __u64 inodes; +}; + +enum btrfs_dev_stat_values { + /* disk I/O failure stats */ + BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ + BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ + + /* stats for indirect indications for I/O failures */ + BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or + * contents is illegal: this is an + * indication that the block was damaged + * during read or write, or written to + * wrong location or read from wrong + * location */ + BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not + * been written */ + + BTRFS_DEV_STAT_VALUES_MAX +}; + +/* Reset statistics after reading; needs SYS_ADMIN capability */ +#define BTRFS_DEV_STATS_RESET (1ULL << 0) + +struct btrfs_ioctl_get_dev_stats { + __u64 devid; /* in */ + __u64 nr_items; /* in/out */ + __u64 flags; /* in/out */ + + /* out values: */ + __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; + + __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ +}; + +#define BTRFS_QUOTA_CTL_ENABLE 1 +#define BTRFS_QUOTA_CTL_DISABLE 2 +#define BTRFS_QUOTA_CTL_RESCAN 3 +struct btrfs_ioctl_quota_ctl_args { + __u64 cmd; + __u64 status; +}; + +struct btrfs_ioctl_qgroup_assign_args { + __u64 assign; + __u64 src; + __u64 dst; +}; + +struct btrfs_ioctl_qgroup_create_args { + __u64 create; + __u64 qgroupid; +}; +struct btrfs_ioctl_timespec { + __u64 sec; + __u32 nsec; +}; + +struct btrfs_ioctl_received_subvol_args { + char uuid[BTRFS_UUID_SIZE]; /* in */ + __u64 stransid; /* in */ + __u64 rtransid; /* out */ + struct btrfs_ioctl_timespec stime; /* in */ + struct btrfs_ioctl_timespec rtime; /* out */ + __u64 flags; /* in */ + __u64 reserved[16]; /* in */ +}; + +struct btrfs_ioctl_send_args { + __s64 send_fd; /* in */ + __u64 clone_sources_count; /* in */ + __u64 __user *clone_sources; /* in */ + __u64 parent_root; /* in */ + __u64 flags; /* in */ + __u64 reserved[4]; /* in */ +}; + +#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ + struct btrfs_ioctl_vol_args) +/* trans start and trans end are dangerous, and only for + * use by applications that know how to avoid the + * resulting deadlocks + */ +#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) +#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) +#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) + +#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) +#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ + struct btrfs_ioctl_vol_args) + +#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ + struct btrfs_ioctl_clone_range_args) + +#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \ + struct btrfs_ioctl_defrag_range_args) +#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ + struct btrfs_ioctl_search_args) +#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ + struct btrfs_ioctl_ino_lookup_args) +#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) +#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ + struct btrfs_ioctl_space_args) +#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) +#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) +#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) +#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) +#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28) +#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \ + struct btrfs_ioctl_scrub_args) +#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \ + struct btrfs_ioctl_dev_info_args) +#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ + struct btrfs_ioctl_fs_info_args) +#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \ + struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int) +#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \ + struct btrfs_ioctl_balance_args) +#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ + struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ + struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \ + struct btrfs_ioctl_received_subvol_args) +#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args) +#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \ + struct btrfs_ioctl_quota_ctl_args) +#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \ + struct btrfs_ioctl_qgroup_assign_args) +#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \ + struct btrfs_ioctl_qgroup_create_args) +#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ + struct btrfs_ioctl_qgroup_limit_args) +#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ + struct btrfs_ioctl_get_dev_stats) +#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ + struct btrfs_ioctl_dev_replace_args) + +#endif /* _UAPI_LINUX_BTRFS_H */ -- cgit v1.2.3 From cb95e7bf7ba481c3d35b238b1cd671b63f54238a Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Mon, 4 Feb 2013 20:54:57 +0000 Subject: btrfs: add "no file data" flag to btrfs send ioctl This patch adds the flag, BTRFS_SEND_FLAG_NO_FILE_DATA to the btrfs send ioctl code. When this flag is set, the btrfs send code will never write file data into the stream (thus also avoiding expensive reads of that data in the first place). BTRFS_SEND_C_UPDATE_EXTENT commands will be sent (instead of BTRFS_SEND_C_WRITE) with an offset, length pair indicating the extent in question. This patch does not affect the operation of BTRFS_SEND_C_CLONE commands - they will continue to be sent when a search finds an appropriate extent to clone from. Signed-off-by: Mark Fasheh Signed-off-by: Josef Bacik --- fs/btrfs/send.c | 50 ++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/send.h | 1 + include/uapi/linux/btrfs.h | 7 +++++++ 3 files changed, 54 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 614da0d44d56..68da757615ae 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -85,6 +85,7 @@ struct send_ctx { u32 send_max_size; u64 total_send_size; u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; + u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ struct vfsmount *mnt; @@ -3709,6 +3710,39 @@ out: return ret; } +/* + * Send an update extent command to user space. + */ +static int send_update_extent(struct send_ctx *sctx, + u64 offset, u32 len) +{ + int ret = 0; + struct fs_path *p; + + p = fs_path_alloc(sctx); + if (!p) + return -ENOMEM; + + ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT); + if (ret < 0) + goto out; + + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); + if (ret < 0) + goto out; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); + TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); + TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); + + ret = send_cmd(sctx); + +tlv_put_failure: +out: + fs_path_free(sctx, p); + return ret; +} + static int send_write_or_clone(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_key *key, @@ -3744,7 +3778,11 @@ static int send_write_or_clone(struct send_ctx *sctx, goto out; } - if (!clone_root) { + if (clone_root) { + ret = send_clone(sctx, offset, len, clone_root); + } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) { + ret = send_update_extent(sctx, offset, len); + } else { while (pos < len) { l = len - pos; if (l > BTRFS_SEND_READ_SIZE) @@ -3757,10 +3795,7 @@ static int send_write_or_clone(struct send_ctx *sctx, pos += ret; } ret = 0; - } else { - ret = send_clone(sctx, offset, len, clone_root); } - out: return ret; } @@ -4560,6 +4595,11 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) goto out; } + if (arg->flags & ~BTRFS_SEND_FLAG_NO_FILE_DATA) { + ret = -EINVAL; + goto out; + } + sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); if (!sctx) { ret = -ENOMEM; @@ -4571,6 +4611,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); INIT_LIST_HEAD(&sctx->name_cache_list); + sctx->flags = arg->flags; + sctx->send_filp = fget(arg->send_fd); if (IS_ERR(sctx->send_filp)) { ret = PTR_ERR(sctx->send_filp); diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 1bf4f32fd4ef..8bb18f7ccaa6 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h @@ -86,6 +86,7 @@ enum btrfs_send_cmd { BTRFS_SEND_C_UTIMES, BTRFS_SEND_C_END, + BTRFS_SEND_C_UPDATE_EXTENT, __BTRFS_SEND_C_MAX, }; #define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1) diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index cffbb582dd90..dd9f1293ab35 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -407,6 +407,13 @@ struct btrfs_ioctl_received_subvol_args { __u64 reserved[16]; /* in */ }; +/* + * Caller doesn't want file data in the send stream, even if the + * search of clone sources doesn't find an extent. UPDATE_EXTENT + * commands will be sent instead of WRITE commands. + */ +#define BTRFS_SEND_FLAG_NO_FILE_DATA 0x1 + struct btrfs_ioctl_send_args { __s64 send_fd; /* in */ __u64 clone_sources_count; /* in */ -- cgit v1.2.3 From 867ab667e74377160c4a683375ee5b8bf8801724 Mon Sep 17 00:00:00 2001 From: "jeff.liu" Date: Sat, 5 Jan 2013 02:48:01 +0000 Subject: Btrfs: Add a new ioctl to get the label of a mounted file system Add a new ioctl(2) BTRFS_IOC_GET_FSLABLE, so that we can get the label upon a mounted filesystem. Signed-off-by: Jie Liu Signed-off-by: Anand Jain Cc: Miao Xie Cc: Goffredo Baroncelli Cc: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/ioctl.c | 21 +++++++++++++++++++++ include/uapi/linux/btrfs.h | 2 ++ 2 files changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d02ec577f70f..fcc15a6804a9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3917,6 +3917,25 @@ out: return ret; } +static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) +{ + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + const char *label = root->fs_info->super_copy->label; + size_t len = strnlen(label, BTRFS_LABEL_SIZE); + int ret; + + if (len == BTRFS_LABEL_SIZE) { + pr_warn("btrfs: label is too long, return the first %zu bytes\n", + --len); + } + + mutex_lock(&root->fs_info->volume_mutex); + ret = copy_to_user(arg, label, len); + mutex_unlock(&root->fs_info->volume_mutex); + + return ret ? -EFAULT : 0; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -4017,6 +4036,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_qgroup_limit(file, argp); case BTRFS_IOC_DEV_REPLACE: return btrfs_ioctl_dev_replace(root, argp); + case BTRFS_IOC_GET_FSLABEL: + return btrfs_ioctl_get_fslabel(file, argp); } return -ENOTTY; diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index dd9f1293ab35..51c0b335e0c8 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -502,6 +502,8 @@ struct btrfs_ioctl_send_args { struct btrfs_ioctl_qgroup_create_args) #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ struct btrfs_ioctl_qgroup_limit_args) +#define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \ + char[BTRFS_LABEL_SIZE]) #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ struct btrfs_ioctl_get_dev_stats) #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ -- cgit v1.2.3 From a8bfd4abea3da0e28f215e2a2b8c2f1ca27ebe80 Mon Sep 17 00:00:00 2001 From: "jeff.liu" Date: Sat, 5 Jan 2013 02:48:08 +0000 Subject: Btrfs: set/change the label of a mounted file system With this new ioctl(2) BTRFS_IOC_SET_FSLABEL, we can set/change the label of a mounted file system. Signed-off-by: Jie Liu Signed-off-by: Anand Jain Reviewed-by: Miao Xie Reviewed-by: Goffredo Baroncelli Reviewed-by: David Sterba Reviewed-by: Goffredo Baroncelli Signed-off-by: Josef Bacik --- fs/btrfs/ioctl.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/btrfs.h | 2 ++ 2 files changed, 44 insertions(+) (limited to 'include/uapi/linux') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fcc15a6804a9..0f68729f261e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3936,6 +3936,46 @@ static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) return ret ? -EFAULT : 0; } +static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) +{ + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; + struct btrfs_super_block *super_block = root->fs_info->super_copy; + struct btrfs_trans_handle *trans; + char label[BTRFS_LABEL_SIZE]; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(label, arg, sizeof(label))) + return -EFAULT; + + if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { + pr_err("btrfs: unable to set label with more than %d bytes\n", + BTRFS_LABEL_SIZE - 1); + return -EINVAL; + } + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + mutex_lock(&root->fs_info->volume_mutex); + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out_unlock; + } + + strcpy(super_block->label, label); + ret = btrfs_end_transaction(trans, root); + +out_unlock: + mutex_unlock(&root->fs_info->volume_mutex); + mnt_drop_write_file(file); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -4038,6 +4078,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_dev_replace(root, argp); case BTRFS_IOC_GET_FSLABEL: return btrfs_ioctl_get_fslabel(file, argp); + case BTRFS_IOC_SET_FSLABEL: + return btrfs_ioctl_set_fslabel(file, argp); } return -ENOTTY; diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 51c0b335e0c8..fa3a5f9338fc 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -504,6 +504,8 @@ struct btrfs_ioctl_send_args { struct btrfs_ioctl_qgroup_limit_args) #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \ char[BTRFS_LABEL_SIZE]) +#define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \ + char[BTRFS_LABEL_SIZE]) #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ struct btrfs_ioctl_get_dev_stats) #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ -- cgit v1.2.3 From b531f81b0d70ffbe8d70500512483227cc532608 Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Thu, 21 Feb 2013 01:55:50 +0000 Subject: ALSA: usb: Fix Processing Unit Descriptor parsers Commit 99fc86450c439039d2ef88d06b222fd51a779176 "ALSA: usb-mixer: parse descriptors with structs" introduced a set of useful parsers for descriptors. Unfortunately the parses for the Processing Unit Descriptor came with a very subtle bug... Functions uac_processing_unit_iProcessing() and uac_processing_unit_specific() were indexing the baSourceID array forgetting the fields before the iProcessing and process-specific descriptors. The problem was observed with Sound Blaster Extigy mixer, where nNrModes in Up/Down-mix Processing Unit Descriptor was accessed at offset 10 of the descriptor (value 0) instead of offset 15 (value 7). In result the resulting control had interesting limit values: Simple mixer control 'Channel Routing Mode Select',0 Capabilities: volume volume-joined penum Playback channels: Mono Capture channels: Mono Limits: 0 - -1 Mono: -1 [100%] Fixed by starting from the bmControls, which was calculated correctly, instead of baSourceID. Now the mentioned control is fine: Simple mixer control 'Channel Routing Mode Select',0 Capabilities: volume volume-joined penum Playback channels: Mono Capture channels: Mono Limits: 0 - 6 Mono: 0 [0%] Signed-off-by: Pawel Moll Cc: Signed-off-by: Takashi Iwai --- include/uapi/linux/usb/audio.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h index ac90037894d9..d2314be4f0c0 100644 --- a/include/uapi/linux/usb/audio.h +++ b/include/uapi/linux/usb/audio.h @@ -384,14 +384,16 @@ static inline __u8 uac_processing_unit_iProcessing(struct uac_processing_unit_de int protocol) { __u8 control_size = uac_processing_unit_bControlSize(desc, protocol); - return desc->baSourceID[desc->bNrInPins + control_size]; + return *(uac_processing_unit_bmControls(desc, protocol) + + control_size); } static inline __u8 *uac_processing_unit_specific(struct uac_processing_unit_descriptor *desc, int protocol) { __u8 control_size = uac_processing_unit_bControlSize(desc, protocol); - return &desc->baSourceID[desc->bNrInPins + control_size + 1]; + return uac_processing_unit_bmControls(desc, protocol) + + control_size + 1; } /* 4.5.2 Class-Specific AS Interface Descriptor */ -- cgit v1.2.3 From ffecfd1a72fccfcee3dabb99b9ecba9735318f90 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Feb 2013 16:42:55 -0800 Subject: block: optionally snapshot page contents to provide stable pages during write This provides a band-aid to provide stable page writes on jbd without needing to backport the fixed locking and page writeback bit handling schemes of jbd2. The band-aid works by using bounce buffers to snapshot page contents instead of waiting. For those wondering about the ext3 bandage -- fixing the jbd locking (which was done as part of ext4dev years ago) is a lot of surgery, and setting PG_writeback on data pages when we actually hold the page lock dropped ext3 performance by nearly an order of magnitude. If we're going to migrate iscsi and raid to use stable page writes, the complaints about high latency will likely return. We might as well centralize their page snapshotting thing to one place. Signed-off-by: Darrick J. Wong Tested-by: Andy Lutomirski Cc: Adrian Hunter Cc: Artem Bityutskiy Reviewed-by: Jan Kara Cc: Joel Becker Cc: Mark Fasheh Cc: Steven Whitehouse Cc: Jens Axboe Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/Kconfig | 6 ------ block/blk-core.c | 8 +++++--- fs/ext3/super.c | 1 + include/uapi/linux/fs.h | 3 +++ mm/Kconfig | 13 +++++++++++++ mm/bounce.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---- mm/page-writeback.c | 4 ++++ 7 files changed, 70 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 1bb7ad4aeff4..b1e68f52029c 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -412,12 +412,6 @@ config TILE_USB Provides USB host adapter support for the built-in EHCI and OHCI interfaces on TILE-Gx chips. -# USB OHCI needs the bounce pool since tilegx will often have more -# than 4GB of memory, but we don't currently use the IOTLB to present -# a 32-bit address to OHCI. So we need to use a bounce pool instead. -config NEED_BOUNCE_POOL - def_bool USB_OHCI_HCD - source "drivers/pci/hotplug/Kconfig" endmenu diff --git a/block/blk-core.c b/block/blk-core.c index c973249d68cd..277134cb5d32 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1474,6 +1474,11 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) */ blk_queue_bounce(q, &bio); + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { + bio_endio(bio, -EIO); + return; + } + if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { spin_lock_irq(q->queue_lock); where = ELEVATOR_INSERT_FLUSH; @@ -1714,9 +1719,6 @@ generic_make_request_checks(struct bio *bio) */ blk_partition_remap(bio); - if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) - goto end_io; - if (bio_check_eod(bio, nr_sectors)) goto end_io; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6e50223b3299..4ba2683c1d44 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2065,6 +2065,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": "writeback"); + sb->s_flags |= MS_SNAP_STABLE; return 0; diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 780d4c6093eb..c7fc1e6517c3 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -86,6 +86,9 @@ struct inodes_stat_t { #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ #define MS_I_VERSION (1<<23) /* Update inode I_version field */ #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ + +/* These sb flags are internal to the kernel */ +#define MS_SNAP_STABLE (1<<27) /* Snapshot pages during writeback, if needed */ #define MS_NOSEC (1<<28) #define MS_BORN (1<<29) #define MS_ACTIVE (1<<30) diff --git a/mm/Kconfig b/mm/Kconfig index 278e3ab1f169..7901d839aab2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -258,6 +258,19 @@ config BOUNCE def_bool y depends on BLOCK && MMU && (ZONE_DMA || HIGHMEM) +# On the 'tile' arch, USB OHCI needs the bounce pool since tilegx will often +# have more than 4GB of memory, but we don't currently use the IOTLB to present +# a 32-bit address to OHCI. So we need to use a bounce pool instead. +# +# We also use the bounce pool to provide stable page writes for jbd. jbd +# initiates buffer writeback without locking the page or setting PG_writeback, +# and fixing that behavior (a second time; jbd2 doesn't have this problem) is +# a major rework effort. Instead, use the bounce buffer to snapshot pages +# (until jbd goes away). The only jbd user is ext3. +config NEED_BOUNCE_POOL + bool + default y if (TILE && USB_OHCI_HCD) || (BLK_DEV_INTEGRITY && JBD) + config NR_QUICK int depends on QUICKLIST diff --git a/mm/bounce.c b/mm/bounce.c index 042086775561..5f8901768602 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -178,8 +178,45 @@ static void bounce_end_io_read_isa(struct bio *bio, int err) __bounce_end_io_read(bio, isa_page_pool, err); } +#ifdef CONFIG_NEED_BOUNCE_POOL +static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) +{ + struct page *page; + struct backing_dev_info *bdi; + struct address_space *mapping; + struct bio_vec *from; + int i; + + if (bio_data_dir(bio) != WRITE) + return 0; + + if (!bdi_cap_stable_pages_required(&q->backing_dev_info)) + return 0; + + /* + * Based on the first page that has a valid mapping, decide whether or + * not we have to employ bounce buffering to guarantee stable pages. + */ + bio_for_each_segment(from, bio, i) { + page = from->bv_page; + mapping = page_mapping(page); + if (!mapping) + continue; + bdi = mapping->backing_dev_info; + return mapping->host->i_sb->s_flags & MS_SNAP_STABLE; + } + + return 0; +} +#else +static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) +{ + return 0; +} +#endif /* CONFIG_NEED_BOUNCE_POOL */ + static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, - mempool_t *pool) + mempool_t *pool, int force) { struct page *page; struct bio *bio = NULL; @@ -192,7 +229,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, /* * is destination page below bounce pfn? */ - if (page_to_pfn(page) <= queue_bounce_pfn(q)) + if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) continue; /* @@ -270,6 +307,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) { + int must_bounce; mempool_t *pool; /* @@ -278,13 +316,15 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) if (!bio_has_data(*bio_orig)) return; + must_bounce = must_snapshot_stable_pages(q, *bio_orig); + /* * for non-isa bounce case, just check if the bounce pfn is equal * to or bigger than the highest pfn in the system -- in that case, * don't waste time iterating over bio segments */ if (!(q->bounce_gfp & GFP_DMA)) { - if (queue_bounce_pfn(q) >= blk_max_pfn) + if (queue_bounce_pfn(q) >= blk_max_pfn && !must_bounce) return; pool = page_pool; } else { @@ -295,7 +335,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) /* * slow path */ - __blk_queue_bounce(q, bio_orig, pool); + __blk_queue_bounce(q, bio_orig, pool, must_bounce); } EXPORT_SYMBOL(blk_queue_bounce); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 355d5ee69058..7300c9d5e1d9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2306,6 +2306,10 @@ void wait_for_stable_page(struct page *page) if (!bdi_cap_stable_pages_required(bdi)) return; +#ifdef CONFIG_NEED_BOUNCE_POOL + if (mapping->host->i_sb->s_flags & MS_SNAP_STABLE) + return; +#endif /* CONFIG_NEED_BOUNCE_POOL */ wait_on_page_writeback(page); } -- cgit v1.2.3 From 242260fb858e99674289484bc2bfe3b41f9c4cbb Mon Sep 17 00:00:00 2001 From: Christian Kujau Date: Thu, 21 Feb 2013 16:43:05 -0800 Subject: sun.com documentation fixes After I came across a help text for SUNGEM mentioning a broken sun.com URL, I felt like fixing those up, as they are now pointing to oracle.com URLs. Signed-off-by: Christian Kujau Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/devicetree/booting-without-of.txt | 2 +- drivers/net/ethernet/sun/Kconfig | 4 ++-- include/uapi/linux/elf.h | 12 ++++++------ net/ipv6/Kconfig | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt index d4d66757354e..b2fb2f5e1922 100644 --- a/Documentation/devicetree/booting-without-of.txt +++ b/Documentation/devicetree/booting-without-of.txt @@ -1228,7 +1228,7 @@ hierarchy and routing of interrupts in the hardware. The interrupt tree model is fully described in the document "Open Firmware Recommended Practice: Interrupt Mapping Version 0.9". The document is available at: -. + 1) interrupts property ---------------------- diff --git a/drivers/net/ethernet/sun/Kconfig b/drivers/net/ethernet/sun/Kconfig index 57bfd8599679..208c39dedc2e 100644 --- a/drivers/net/ethernet/sun/Kconfig +++ b/drivers/net/ethernet/sun/Kconfig @@ -61,7 +61,7 @@ config SUNGEM select SUNGEM_PHY ---help--- Support for the Sun GEM chip, aka Sun GigabitEthernet/P 2.0. See also - . + . config CASSINI tristate "Sun Cassini support" @@ -69,7 +69,7 @@ config CASSINI select CRC32 ---help--- Support for the Sun Cassini chip, aka Sun GigaSwift Ethernet. See also - + . config SUNVNET tristate "Sun Virtual Network support" diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 126a8175e3e2..900b9484445b 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -49,14 +49,14 @@ typedef __s64 Elf64_Sxword; * * Specifications are available in: * - * - Sun microsystems: Linker and Libraries. - * Part No: 817-1984-17, September 2008. - * URL: http://docs.sun.com/app/docs/doc/817-1984 + * - Oracle: Linker and Libraries. + * Part No: 817–1984–19, August 2011. + * http://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf * * - System V ABI AMD64 Architecture Processor Supplement - * Draft Version 0.99., - * May 11, 2009. - * URL: http://www.x86-64.org/ + * Draft Version 0.99.4, + * January 13, 2010. + * http://www.cs.washington.edu/education/courses/cse351/12wi/supp-docs/abi.pdf */ #define PN_XNUM 0xffff diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 4f7fe7270e37..a2246afc0007 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -11,7 +11,7 @@ menuconfig IPV6 You will still be able to do traditional IPv4 networking as well. For general information about IPv6, see - . + . For Linux IPv6 development information, see . For specific information about IPv6 under Linux, read the HOWTO at . -- cgit v1.2.3 From 5841ca09b35df4ecb0fee4e8fbd21ef177509a71 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Wed, 27 Feb 2013 17:02:59 -0800 Subject: hfsplus: add osx.* prefix for handling namespace of Mac OS X extended attributes hfsplus: reworked support of extended attributes. Current mainline implementation of hfsplus file system driver treats as extended attributes only two fields (fdType and fdCreator) of user_info field in file description record (struct hfsplus_cat_file). It is possible to get or set only these two fields as extended attributes. But HFS+ treats as com.apple.FinderInfo extended attribute an union of user_info and finder_info fields as for file (struct hfsplus_cat_file) as for folder (struct hfsplus_cat_folder). Moreover, current mainline implementation of hfsplus file system driver doesn't support special metadata file - attributes tree. Mac OS X 10.4 and later support extended attributes by making use of the HFS+ filesystem Attributes file B*-tree feature which allows for named forks. Mac OS X supports only inline extended attributes, limiting their size to 3802 bytes. Any regular file may have a list of extended attributes. HFS+ supports an arbitrary number of named forks. Each attribute is denoted by a name and the associated data. The name is a null-terminated Unicode string. It is possible to list, to get, to set, and to remove extended attributes from files or directories. It exists some peculiarity during getting of extended attributes list by means of getfattr utility. The getfattr utility expects prefix "user." before any extended attribute's name. So, it ignores any names that don't contained such prefix. Such behavior of getfattr utility results in unexpected empty output of extended attributes list even in the case when file (or folder) contains extended attributes. It needs to use empty string as regular expression pattern for names matching (getfattr --match=""). For support of extended attributes in HFS+: 1. It was added necessary on-disk layout declarations related to Attributes tree into hfsplus_raw.h file. 2. It was added attributes.c file with implementation of functionality of manipulation by records in Attributes tree. 3. It was reworked hfsplus_listxattr, hfsplus_getxattr, hfsplus_setxattr functions in ioctl.c. Moreover, it was added hfsplus_removexattr method. This patch: Add osx.* prefix for handling namespace of Mac OS X extended attributes. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Vyacheslav Dubeyko Reported-by: Hin-Tak Leung Cc: Al Viro Cc: Christoph Hellwig Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/xattr.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h index 26607bd965fa..e4629b93bdd6 100644 --- a/include/uapi/linux/xattr.h +++ b/include/uapi/linux/xattr.h @@ -15,19 +15,22 @@ /* Namespaces */ #define XATTR_OS2_PREFIX "os2." -#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1) +#define XATTR_OS2_PREFIX_LEN (sizeof(XATTR_OS2_PREFIX) - 1) + +#define XATTR_MAC_OSX_PREFIX "osx." +#define XATTR_MAC_OSX_PREFIX_LEN (sizeof(XATTR_MAC_OSX_PREFIX) - 1) #define XATTR_SECURITY_PREFIX "security." -#define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1) +#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1) #define XATTR_SYSTEM_PREFIX "system." -#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1) +#define XATTR_SYSTEM_PREFIX_LEN (sizeof(XATTR_SYSTEM_PREFIX) - 1) #define XATTR_TRUSTED_PREFIX "trusted." -#define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1) +#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1) #define XATTR_USER_PREFIX "user." -#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1) +#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1) /* Security namespace */ #define XATTR_EVM_SUFFIX "evm" -- cgit v1.2.3 From 6b46419b0462ae565880f02e9cd0baf9b25ea71f Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 27 Feb 2013 17:03:07 -0800 Subject: fat: add extended fileds to struct fat_boot_sector Later we will need "state" field to check if volume was cleanly unmounted. Signed-off-by: Oleksij Rempel Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/inode.c | 8 ++++---- include/uapi/linux/msdos_fs.h | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/fat/inode.c b/fs/fat/inode.c index f8f491677a4a..4b4d4ef910f3 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1298,17 +1298,17 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->prev_free = FAT_START_ENT; sb->s_maxbytes = 0xffffffff; - if (!sbi->fat_length && b->fat32_length) { + if (!sbi->fat_length && b->fat32.length) { struct fat_boot_fsinfo *fsinfo; struct buffer_head *fsinfo_bh; /* Must be FAT32 */ sbi->fat_bits = 32; - sbi->fat_length = le32_to_cpu(b->fat32_length); - sbi->root_cluster = le32_to_cpu(b->root_cluster); + sbi->fat_length = le32_to_cpu(b->fat32.length); + sbi->root_cluster = le32_to_cpu(b->fat32.root_cluster); /* MC - if info_sector is 0, don't multiply by 0 */ - sbi->fsinfo_sector = le16_to_cpu(b->info_sector); + sbi->fsinfo_sector = le16_to_cpu(b->fat32.info_sector); if (sbi->fsinfo_sector == 0) sbi->fsinfo_sector = 1; diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h index 996719f82e28..b9f12450efe8 100644 --- a/include/uapi/linux/msdos_fs.h +++ b/include/uapi/linux/msdos_fs.h @@ -120,14 +120,34 @@ struct fat_boot_sector { __le32 hidden; /* hidden sectors (unused) */ __le32 total_sect; /* number of sectors (if sectors == 0) */ - /* The following fields are only used by FAT32 */ - __le32 fat32_length; /* sectors/FAT */ - __le16 flags; /* bit 8: fat mirroring, low 4: active fat */ - __u8 version[2]; /* major, minor filesystem version */ - __le32 root_cluster; /* first cluster in root directory */ - __le16 info_sector; /* filesystem info sector */ - __le16 backup_boot; /* backup boot sector */ - __le16 reserved2[6]; /* Unused */ + union { + struct { + /* Extended BPB Fields for FAT16 */ + __u8 drive_number; /* Physical drive number */ + __u8 state; /* undocumented, but used + for mount state. */ + /* other fiealds are not added here */ + } fat16; + + struct { + /* only used by FAT32 */ + __le32 length; /* sectors/FAT */ + __le16 flags; /* bit 8: fat mirroring, + low 4: active fat */ + __u8 version[2]; /* major, minor filesystem + version */ + __le32 root_cluster; /* first cluster in + root directory */ + __le16 info_sector; /* filesystem info sector */ + __le16 backup_boot; /* backup boot sector */ + __le16 reserved2[6]; /* Unused */ + /* Extended BPB Fields for FAT32 */ + __u8 drive_number; /* Physical drive number */ + __u8 state; /* undocumented, but used + for mount state. */ + /* other fiealds are not added here */ + } fat32; + }; }; struct fat_boot_fsinfo { -- cgit v1.2.3 From b88a105802e9aeb6e234e8106659f5d1271081bb Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 27 Feb 2013 17:03:09 -0800 Subject: fat: mark fs as dirty on mount and clean on umount There is no documented methods to mark FAT as dirty. Unofficially MS started to use reserved Byte in boot sector for this purpose, at least since Win 2000. With Win 7 user is warned if fs is dirty and asked to clean it. Different versions of Win, handle it in different ways, but always have same meaning: - Win 2000 and XP, set it on write operations and remove it after operation was finnished - Win 7, set dirty flag on first write and remove it on umount. We will do it as follows: - set dirty flag on mount. If fs was initially dirty, warn user, remember it and do not do any changes to boot sector. - clean it on umount. If fs was initially dirty, leave it dirty. - do not do any thing if fs mounted read-only. - TODO: leave fs dirty if we found some error after mount. Signed-off-by: Oleksij Rempel Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/fat.h | 2 ++ fs/fat/inode.c | 66 +++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/msdos_fs.h | 2 ++ 3 files changed, 70 insertions(+) (limited to 'include/uapi/linux') diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 12701a567752..e9cc3f0d58e2 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -95,6 +95,8 @@ struct msdos_sb_info { spinlock_t dir_hash_lock; struct hlist_head dir_hashtable[FAT_HASH_SIZE]; + + unsigned int dirty; /* fs state before mount */ }; #define FAT_CACHE_VALID 0 /* special case for valid cache */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 4b4d4ef910f3..780e20806346 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -488,10 +488,59 @@ static void fat_evict_inode(struct inode *inode) fat_detach(inode); } +static void fat_set_state(struct super_block *sb, + unsigned int set, unsigned int force) +{ + struct buffer_head *bh; + struct fat_boot_sector *b; + struct msdos_sb_info *sbi = sb->s_fs_info; + + /* do not change any thing if mounted read only */ + if ((sb->s_flags & MS_RDONLY) && !force) + return; + + /* do not change state if fs was dirty */ + if (sbi->dirty) { + /* warn only on set (mount). */ + if (set) + fat_msg(sb, KERN_WARNING, "Volume was not properly " + "unmounted. Some data may be corrupt. " + "Please run fsck."); + return; + } + + bh = sb_bread(sb, 0); + if (bh == NULL) { + fat_msg(sb, KERN_ERR, "unable to read boot sector " + "to mark fs as dirty"); + return; + } + + b = (struct fat_boot_sector *) bh->b_data; + + if (sbi->fat_bits == 32) { + if (set) + b->fat32.state |= FAT_STATE_DIRTY; + else + b->fat32.state &= ~FAT_STATE_DIRTY; + } else /* fat 16 and 12 */ { + if (set) + b->fat16.state |= FAT_STATE_DIRTY; + else + b->fat16.state &= ~FAT_STATE_DIRTY; + } + + mark_buffer_dirty(bh); + sync_dirty_buffer(bh); + brelse(bh); +} + static void fat_put_super(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); + fat_set_state(sb, 0, 0); + iput(sbi->fsinfo_inode); iput(sbi->fat_inode); @@ -566,8 +615,18 @@ static void __exit fat_destroy_inodecache(void) static int fat_remount(struct super_block *sb, int *flags, char *data) { + int new_rdonly; struct msdos_sb_info *sbi = MSDOS_SB(sb); *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); + + /* make sure we update state on remount. */ + new_rdonly = *flags & MS_RDONLY; + if (new_rdonly != (sb->s_flags & MS_RDONLY)) { + if (new_rdonly) + fat_set_state(sb, 0, 0); + else + fat_set_state(sb, 1, 1); + } return 0; } @@ -1362,6 +1421,12 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, if (sbi->fat_bits != 32) sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12; + /* some OSes set FAT_STATE_DIRTY and clean it on unmount. */ + if (sbi->fat_bits == 32) + sbi->dirty = b->fat32.state & FAT_STATE_DIRTY; + else /* fat 16 or 12 */ + sbi->dirty = b->fat16.state & FAT_STATE_DIRTY; + /* check that FAT table does not overflow */ fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits; total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); @@ -1456,6 +1521,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, "the device does not support discard"); } + fat_set_state(sb, 1, 0); return 0; out_invalid: diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h index b9f12450efe8..f055e58b3147 100644 --- a/include/uapi/linux/msdos_fs.h +++ b/include/uapi/linux/msdos_fs.h @@ -87,6 +87,8 @@ #define IS_FSINFO(x) (le32_to_cpu((x)->signature1) == FAT_FSINFO_SIG1 \ && le32_to_cpu((x)->signature2) == FAT_FSINFO_SIG2) +#define FAT_STATE_DIRTY 0x01 + struct __fat_dirent { long d_ino; __kernel_off_t d_off; -- cgit v1.2.3 From 59fb1b9f5d9910c2eb97107dd0eb7e3bce8f0dde Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Wed, 27 Feb 2013 17:05:11 -0800 Subject: ipmi: remove superfluous kernel/userspace explanation Given the obvious distinction between kernel and userspace supported by uapi/, it seems unnecessary to comment on that. Signed-off-by: Robert P. J. Day Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipmi.h | 4 ---- include/uapi/linux/ipmi.h | 10 +--------- 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 1487e7906bbd..1f9f56e28851 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -35,10 +35,6 @@ #include - -/* - * The in-kernel interface. - */ #include #include diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h index 33fbc99b3812..7b26a62e5707 100644 --- a/include/uapi/linux/ipmi.h +++ b/include/uapi/linux/ipmi.h @@ -59,15 +59,7 @@ * if it becomes full and it is queried once a second to see if * anything is in it. Incoming commands to the driver will get * delivered as commands. - * - * This driver provides two main interfaces: one for in-kernel - * applications and another for userland applications. The - * capabilities are basically the same for both interface, although - * the interfaces are somewhat different. The stuff in the - * #ifdef __KERNEL__ below is the in-kernel interface. The userland - * interface is defined later in the file. */ - - + */ /* * This is an overlay for all the address types, so it's easy to -- cgit v1.2.3 From 75f187aba5e7a3eea259041f85099029774a4c5b Mon Sep 17 00:00:00 2001 From: Alex Bligh Date: Wed, 27 Feb 2013 17:05:23 -0800 Subject: nbd: support FLUSH requests Currently, the NBD device does not accept flush requests from the Linux block layer. If the NBD server opened the target with neither O_SYNC nor O_DSYNC, however, the device will be effectively backed by a writeback cache. Without issuing flushes properly, operation of the NBD device will not be safe against power losses. The NBD protocol has support for both a cache flush command and a FUA command flag; the server will also pass a flag to note its support for these features. This patch adds support for the cache flush command and flag. In the kernel, we receive the flags via the NBD_SET_FLAGS ioctl, and map NBD_FLAG_SEND_FLUSH to the argument of blk_queue_flush. When the flag is active the block layer will send REQ_FLUSH requests, which we translate to NBD_CMD_FLUSH commands. FUA support is not included in this patch because all free software servers implement it with a full fdatasync; thus it has no advantage over supporting flush only. Because I [Paolo] cannot really benchmark it in a realistic scenario, I cannot tell if it is a good idea or not. It is also not clear if it is valid for an NBD server to support FUA but not flush. The Linux block layer gives a warning for this combination, the NBD protocol documentation says nothing about it. The patch also fixes a small problem in the handling of flags: nbd->flags must be cleared at the end of NBD_DO_IT, but the driver was not doing that. The bug manifests itself as follows. Suppose you two different client/server pairs to start the NBD device. Suppose also that the first client supports NBD_SET_FLAGS, and the first server sends NBD_FLAG_SEND_FLUSH; the second pair instead does neither of these two things. Before this patch, the second invocation of NBD_DO_IT will use a stale value of nbd->flags, and the second server will issue an error every time it receives an NBD_CMD_FLUSH command. This bug is pre-existing, but it becomes much more important after this patch; flush failures make the device pretty much unusable, unlike Signed-off-by: Paolo Bonzini Signed-off-by: Alex Bligh Acked-by: Paul Clements Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/nbd.c | 22 ++++++++++++++++++++-- include/uapi/linux/nbd.h | 3 ++- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ade146bf65e5..695c68fedd32 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -98,6 +98,7 @@ static const char *nbdcmd_to_ascii(int cmd) case NBD_CMD_READ: return "read"; case NBD_CMD_WRITE: return "write"; case NBD_CMD_DISC: return "disconnect"; + case NBD_CMD_FLUSH: return "flush"; case NBD_CMD_TRIM: return "trim/discard"; } return "invalid"; @@ -244,8 +245,15 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) request.magic = htonl(NBD_REQUEST_MAGIC); request.type = htonl(nbd_cmd(req)); - request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); - request.len = htonl(size); + + if (nbd_cmd(req) == NBD_CMD_FLUSH) { + /* Other values are reserved for FLUSH requests. */ + request.from = 0; + request.len = 0; + } else { + request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); + request.len = htonl(size); + } memcpy(request.handle, &req, sizeof(req)); dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", @@ -482,6 +490,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) } } + if (req->cmd_flags & REQ_FLUSH) { + BUG_ON(unlikely(blk_rq_sectors(req))); + nbd_cmd(req) = NBD_CMD_FLUSH; + } + req->errors = 0; mutex_lock(&nbd->tx_lock); @@ -684,6 +697,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (nbd->flags & NBD_FLAG_SEND_TRIM) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + if (nbd->flags & NBD_FLAG_SEND_FLUSH) + blk_queue_flush(nbd->disk->queue, REQ_FLUSH); + else + blk_queue_flush(nbd->disk->queue, 0); thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); if (IS_ERR(thread)) { @@ -705,6 +722,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); if (file) fput(file); + nbd->flags = 0; nbd->bytesize = 0; bdev->bd_inode->i_size = 0; set_capacity(nbd->disk, 0); diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index dfb514472cbc..4f52549b23ff 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -33,13 +33,14 @@ enum { NBD_CMD_READ = 0, NBD_CMD_WRITE = 1, NBD_CMD_DISC = 2, - /* there is a gap here to match userspace */ + NBD_CMD_FLUSH = 3, NBD_CMD_TRIM = 4 }; /* values for flags field */ #define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */ #define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */ +#define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */ /* there is a gap here to match userspace */ #define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ -- cgit v1.2.3 From 02cde50b7ea74557d32ff778c73809322445ccd2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 1 Mar 2013 22:45:49 +0000 Subject: dm ioctl: optimize functions without variable params Device-mapper ioctls receive and send data in a buffer supplied by userspace. The buffer has two parts. The first part contains a 'struct dm_ioctl' and has a fixed size. The second part depends on the ioctl and has a variable size. This patch recognises the specific ioctls that do not use the variable part of the buffer and skips allocating memory for it. In particular, when a device is suspended and a resume ioctl is sent, this now avoid memory allocation completely. The variable "struct dm_ioctl tmp" is moved from the function copy_params to its caller ctl_ioctl and renamed to param_kernel. It is used directly when the ioctl function doesn't need any arguments. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 52 ++++++++++++++++++++++++++++--------------- include/uapi/linux/dm-ioctl.h | 6 ++--- 2 files changed, 37 insertions(+), 21 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 9ae11b2994f8..7eb0682d574f 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1560,7 +1560,8 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user) return r; } -#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */ +#define DM_PARAMS_KMALLOC 0x0001 /* Params alloced with kmalloc */ +#define DM_PARAMS_VMALLOC 0x0002 /* Params alloced with vmalloc */ #define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */ static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags) @@ -1568,66 +1569,80 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla if (param_flags & DM_WIPE_BUFFER) memset(param, 0, param_size); + if (param_flags & DM_PARAMS_KMALLOC) + kfree(param); if (param_flags & DM_PARAMS_VMALLOC) vfree(param); - else - kfree(param); } -static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags) +static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel, + int ioctl_flags, + struct dm_ioctl **param, int *param_flags) { - struct dm_ioctl tmp, *dmi; + struct dm_ioctl *dmi; int secure_data; + const size_t minimum_data_size = sizeof(*param_kernel) - sizeof(param_kernel->data); - if (copy_from_user(&tmp, user, sizeof(tmp) - sizeof(tmp.data))) + if (copy_from_user(param_kernel, user, minimum_data_size)) return -EFAULT; - if (tmp.data_size < (sizeof(tmp) - sizeof(tmp.data))) + if (param_kernel->data_size < minimum_data_size) return -EINVAL; - secure_data = tmp.flags & DM_SECURE_DATA_FLAG; + secure_data = param_kernel->flags & DM_SECURE_DATA_FLAG; *param_flags = secure_data ? DM_WIPE_BUFFER : 0; + if (ioctl_flags & IOCTL_FLAGS_NO_PARAMS) { + dmi = param_kernel; + dmi->data_size = minimum_data_size; + goto data_copied; + } + /* * Try to avoid low memory issues when a device is suspended. * Use kmalloc() rather than vmalloc() when we can. */ dmi = NULL; - if (tmp.data_size <= KMALLOC_MAX_SIZE) - dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (param_kernel->data_size <= KMALLOC_MAX_SIZE) { + dmi = kmalloc(param_kernel->data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (dmi) + *param_flags |= DM_PARAMS_KMALLOC; + } if (!dmi) { - dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); - *param_flags |= DM_PARAMS_VMALLOC; + dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); + if (dmi) + *param_flags |= DM_PARAMS_VMALLOC; } if (!dmi) { - if (secure_data && clear_user(user, tmp.data_size)) + if (secure_data && clear_user(user, param_kernel->data_size)) return -EFAULT; return -ENOMEM; } - if (copy_from_user(dmi, user, tmp.data_size)) + if (copy_from_user(dmi, user, param_kernel->data_size)) goto bad; +data_copied: /* * Abort if something changed the ioctl data while it was being copied. */ - if (dmi->data_size != tmp.data_size) { + if (dmi->data_size != param_kernel->data_size) { DMERR("rejecting ioctl: data size modified while processing parameters"); goto bad; } /* Wipe the user buffer so we do not return it to userspace */ - if (secure_data && clear_user(user, tmp.data_size)) + if (secure_data && clear_user(user, param_kernel->data_size)) goto bad; *param = dmi; return 0; bad: - free_params(dmi, tmp.data_size, *param_flags); + free_params(dmi, param_kernel->data_size, *param_flags); return -EFAULT; } @@ -1671,6 +1686,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) struct dm_ioctl *uninitialized_var(param); ioctl_fn fn = NULL; size_t input_param_size; + struct dm_ioctl param_kernel; /* only root can play with this */ if (!capable(CAP_SYS_ADMIN)) @@ -1704,7 +1720,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) /* * Copy the parameters into kernel space. */ - r = copy_params(user, ¶m, ¶m_flags); + r = copy_params(user, ¶m_kernel, ioctl_flags, ¶m, ¶m_flags); if (r) return r; diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 539b179b349c..b8a6bddf0727 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 23 -#define DM_VERSION_PATCHLEVEL 1 -#define DM_VERSION_EXTRA "-ioctl (2012-12-18)" +#define DM_VERSION_MINOR 24 +#define DM_VERSION_PATCHLEVEL 0 +#define DM_VERSION_EXTRA "-ioctl (2013-01-15)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From a26062416ef8add48f16fbadded2b5f6fb84d024 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 1 Mar 2013 22:45:49 +0000 Subject: dm ioctl: allow message to return data This patch introduces enhanced message support that allows the device-mapper core to recognise messages that are common to all devices, and for messages to return data to userspace. Core messages are processed by the function "message_for_md". If the device mapper doesn't support the message, it is passed to the target driver. If the message returns data, the kernel sets the flag DM_MESSAGE_OUT_FLAG. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 36 +++++++++++++++++++++++++++++++++++- include/uapi/linux/dm-ioctl.h | 5 +++++ 2 files changed, 40 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 7eb0682d574f..aa04f0224642 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1414,6 +1414,22 @@ static int table_status(struct dm_ioctl *param, size_t param_size) return 0; } +static bool buffer_test_overflow(char *result, unsigned maxlen) +{ + return !maxlen || strlen(result) + 1 >= maxlen; +} + +/* + * Process device-mapper dependent messages. + * Returns a number <= 1 if message was processed by device mapper. + * Returns 2 if message should be delivered to the target. + */ +static int message_for_md(struct mapped_device *md, unsigned argc, char **argv, + char *result, unsigned maxlen) +{ + return 2; +} + /* * Pass a message to the target that's at the supplied device offset. */ @@ -1425,6 +1441,8 @@ static int target_message(struct dm_ioctl *param, size_t param_size) struct dm_table *table; struct dm_target *ti; struct dm_target_msg *tmsg = (void *) param + param->data_start; + size_t maxlen; + char *result = get_result_buffer(param, param_size, &maxlen); md = find_device(param); if (!md) @@ -1448,6 +1466,10 @@ static int target_message(struct dm_ioctl *param, size_t param_size) goto out_argv; } + r = message_for_md(md, argc, argv, result, maxlen); + if (r <= 1) + goto out_argv; + table = dm_get_live_table(md); if (!table) goto out_argv; @@ -1473,7 +1495,18 @@ static int target_message(struct dm_ioctl *param, size_t param_size) out_argv: kfree(argv); out: - param->data_size = 0; + if (r >= 0) + __dev_status(md, param); + + if (r == 1) { + param->flags |= DM_DATA_OUT_FLAG; + if (buffer_test_overflow(result, maxlen)) + param->flags |= DM_BUFFER_FULL_FLAG; + else + param->data_size = param->data_start + strlen(result) + 1; + r = 0; + } + dm_put(md); return r; } @@ -1653,6 +1686,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param) param->flags &= ~DM_BUFFER_FULL_FLAG; param->flags &= ~DM_UEVENT_GENERATED_FLAG; param->flags &= ~DM_SECURE_DATA_FLAG; + param->flags &= ~DM_DATA_OUT_FLAG; /* Ignores parameters */ if (cmd == DM_REMOVE_ALL_CMD || diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index b8a6bddf0727..7e75b6fd8d45 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -336,4 +336,9 @@ enum { */ #define DM_SECURE_DATA_FLAG (1 << 15) /* In */ +/* + * If set, a message generated output data. + */ +#define DM_DATA_OUT_FLAG (1 << 16) /* Out */ + #endif /* _LINUX_DM_IOCTL_H */ -- cgit v1.2.3 From bc3966bf1583a6c22b76397535174445c43952de Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 9 Oct 2012 10:54:36 +0100 Subject: metag: ptrace The ptrace interface for metag provides access to some core register sets using the PTRACE_GETREGSET and PTRACE_SETREGSET operations. The details of the internal context structures is abstracted into user API structures to both ease use and allow flexibility to change the internal context layouts. Copyin and copyout functions for these register sets are exposed to allow signal handling code to use them to copy to and from the signal context. struct user_gp_regs (NT_PRSTATUS) provides access to the core general purpose register context. struct user_cb_regs (NT_METAG_CBUF) provides access to the TXCATCH* registers which contains information abuot a memory fault, unaligned access error or watchpoint. This can be modified to alter the way the fault is replayed on resume ("catch replay"), or to prevent the replay taking place. struct user_rp_state (NT_METAG_RPIPE) provides access to the state of the Meta read pipeline which can be used to hide memory latencies in hand optimised data loops. Extended DSP register state, DSP RAM, and hardware breakpoint registers aren't yet exposed through ptrace. Signed-off-by: James Hogan Cc: Andrew Morton Cc: Denys Vlasenko Cc: Arnd Bergmann Cc: Tony Lindgren Cc: "Paul E. McKenney" --- arch/metag/include/asm/ptrace.h | 60 ++++++ arch/metag/include/uapi/asm/ptrace.h | 113 +++++++++++ arch/metag/kernel/ptrace.c | 380 +++++++++++++++++++++++++++++++++++ include/uapi/linux/elf.h | 2 + 4 files changed, 555 insertions(+) create mode 100644 arch/metag/include/asm/ptrace.h create mode 100644 arch/metag/include/uapi/asm/ptrace.h create mode 100644 arch/metag/kernel/ptrace.c (limited to 'include/uapi/linux') diff --git a/arch/metag/include/asm/ptrace.h b/arch/metag/include/asm/ptrace.h new file mode 100644 index 000000000000..fcabc18daf25 --- /dev/null +++ b/arch/metag/include/asm/ptrace.h @@ -0,0 +1,60 @@ +#ifndef _METAG_PTRACE_H +#define _METAG_PTRACE_H + +#include +#include +#include + +#ifndef __ASSEMBLY__ + +/* this struct defines the way the registers are stored on the + stack during a system call. */ + +struct pt_regs { + TBICTX ctx; + TBICTXEXTCB0 extcb0[5]; +}; + +#define user_mode(regs) (((regs)->ctx.SaveMask & TBICTX_PRIV_BIT) > 0) + +#define instruction_pointer(regs) ((unsigned long)(regs)->ctx.CurrPC) +#define profile_pc(regs) instruction_pointer(regs) + +#define task_pt_regs(task) \ + ((struct pt_regs *)(task_stack_page(task) + \ + sizeof(struct thread_info))) + +#define current_pt_regs() \ + ((struct pt_regs *)((char *)current_thread_info() + \ + sizeof(struct thread_info))) + +int syscall_trace_enter(struct pt_regs *regs); +void syscall_trace_leave(struct pt_regs *regs); + +/* copy a struct user_gp_regs out to user */ +int metag_gp_regs_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf); +/* copy a struct user_gp_regs in from user */ +int metag_gp_regs_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +/* copy a struct user_cb_regs out to user */ +int metag_cb_regs_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf); +/* copy a struct user_cb_regs in from user */ +int metag_cb_regs_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); +/* copy a struct user_rp_state out to user */ +int metag_rp_state_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf); +/* copy a struct user_rp_state in from user */ +int metag_rp_state_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf); + +#endif /* __ASSEMBLY__ */ +#endif /* _METAG_PTRACE_H */ diff --git a/arch/metag/include/uapi/asm/ptrace.h b/arch/metag/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..45d97809d33e --- /dev/null +++ b/arch/metag/include/uapi/asm/ptrace.h @@ -0,0 +1,113 @@ +#ifndef _UAPI_METAG_PTRACE_H +#define _UAPI_METAG_PTRACE_H + +#ifndef __ASSEMBLY__ + +/* + * These are the layouts of the regsets returned by the GETREGSET ptrace call + */ + +/* user_gp_regs::status */ + +/* CBMarker bit (indicates catch state / catch replay) */ +#define USER_GP_REGS_STATUS_CATCH_BIT (1 << 22) +#define USER_GP_REGS_STATUS_CATCH_S 22 +/* LSM_STEP field (load/store multiple step) */ +#define USER_GP_REGS_STATUS_LSM_STEP_BITS (0x7 << 8) +#define USER_GP_REGS_STATUS_LSM_STEP_S 8 +/* SCC bit (indicates split 16x16 condition flags) */ +#define USER_GP_REGS_STATUS_SCC_BIT (1 << 4) +#define USER_GP_REGS_STATUS_SCC_S 4 + +/* normal condition flags */ +/* CF_Z bit (Zero flag) */ +#define USER_GP_REGS_STATUS_CF_Z_BIT (1 << 3) +#define USER_GP_REGS_STATUS_CF_Z_S 3 +/* CF_N bit (Negative flag) */ +#define USER_GP_REGS_STATUS_CF_N_BIT (1 << 2) +#define USER_GP_REGS_STATUS_CF_N_S 2 +/* CF_V bit (oVerflow flag) */ +#define USER_GP_REGS_STATUS_CF_V_BIT (1 << 1) +#define USER_GP_REGS_STATUS_CF_V_S 1 +/* CF_C bit (Carry flag) */ +#define USER_GP_REGS_STATUS_CF_C_BIT (1 << 0) +#define USER_GP_REGS_STATUS_CF_C_S 0 + +/* split 16x16 condition flags */ +/* SCF_LZ bit (Low Zero flag) */ +#define USER_GP_REGS_STATUS_SCF_LZ_BIT (1 << 3) +#define USER_GP_REGS_STATUS_SCF_LZ_S 3 +/* SCF_HZ bit (High Zero flag) */ +#define USER_GP_REGS_STATUS_SCF_HZ_BIT (1 << 2) +#define USER_GP_REGS_STATUS_SCF_HZ_S 2 +/* SCF_HC bit (High Carry flag) */ +#define USER_GP_REGS_STATUS_SCF_HC_BIT (1 << 1) +#define USER_GP_REGS_STATUS_SCF_HC_S 1 +/* SCF_LC bit (Low Carry flag) */ +#define USER_GP_REGS_STATUS_SCF_LC_BIT (1 << 0) +#define USER_GP_REGS_STATUS_SCF_LC_S 0 + +/** + * struct user_gp_regs - User general purpose registers + * @dx: GP data unit regs (dx[reg][unit] = D{unit:0-1}.{reg:0-7}) + * @ax: GP address unit regs (ax[reg][unit] = A{unit:0-1}.{reg:0-3}) + * @pc: PC register + * @status: TXSTATUS register (condition flags, LSM_STEP etc) + * @rpt: TXRPT registers (branch repeat counter) + * @bpobits: TXBPOBITS register ("branch prediction other" bits) + * @mode: TXMODE register + * @_pad1: Reserved padding to make sizeof obviously 64bit aligned + * + * This is the user-visible general purpose register state structure. + * + * It can be accessed through PTRACE_GETREGSET with NT_PRSTATUS. + * + * It is also used in the signal context. + */ +struct user_gp_regs { + unsigned long dx[8][2]; + unsigned long ax[4][2]; + unsigned long pc; + unsigned long status; + unsigned long rpt; + unsigned long bpobits; + unsigned long mode; + unsigned long _pad1; +}; + +/** + * struct user_cb_regs - User catch buffer registers + * @flags: TXCATCH0 register (fault flags) + * @addr: TXCATCH1 register (fault address) + * @data: TXCATCH2 and TXCATCH3 registers (low and high data word) + * + * This is the user-visible catch buffer register state structure containing + * information about a failed memory access, and allowing the access to be + * modified and replayed. + * + * It can be accessed through PTRACE_GETREGSET with NT_METAG_CBUF. + */ +struct user_cb_regs { + unsigned long flags; + unsigned long addr; + unsigned long long data; +}; + +/** + * struct user_rp_state - User read pipeline state + * @entries: Read pipeline entries + * @mask: Mask of valid pipeline entries (RPMask from TXDIVTIME register) + * + * This is the user-visible read pipeline state structure containing the entries + * currently in the read pipeline and the mask of valid entries. + * + * It can be accessed through PTRACE_GETREGSET with NT_METAG_RPIPE. + */ +struct user_rp_state { + unsigned long long entries[6]; + unsigned long mask; +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* _UAPI_METAG_PTRACE_H */ diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c new file mode 100644 index 000000000000..47a8828615a5 --- /dev/null +++ b/arch/metag/kernel/ptrace.c @@ -0,0 +1,380 @@ +/* + * Copyright (C) 2005-2012 Imagination Technologies Ltd. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory of + * this archive for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +/* + * user_regset definitions. + */ + +int metag_gp_regs_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const void *ptr; + unsigned long data; + int ret; + + /* D{0-1}.{0-7} */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs->ctx.DX, 0, 4*16); + if (ret) + goto out; + /* A{0-1}.{0-1} */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs->ctx.AX, 4*16, 4*20); + if (ret) + goto out; + /* A{0-1}.2 */ + if (regs->ctx.SaveMask & TBICTX_XEXT_BIT) + ptr = regs->ctx.Ext.Ctx.pExt; + else + ptr = ®s->ctx.Ext.AX2; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ptr, 4*20, 4*22); + if (ret) + goto out; + /* A{0-1}.3 */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ®s->ctx.AX3, 4*22, 4*24); + if (ret) + goto out; + /* PC */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ®s->ctx.CurrPC, 4*24, 4*25); + if (ret) + goto out; + /* TXSTATUS */ + data = (unsigned long)regs->ctx.Flags; + if (regs->ctx.SaveMask & TBICTX_CBUF_BIT) + data |= USER_GP_REGS_STATUS_CATCH_BIT; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &data, 4*25, 4*26); + if (ret) + goto out; + /* TXRPT, TXBPOBITS, TXMODE */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ®s->ctx.CurrRPT, 4*26, 4*29); + if (ret) + goto out; + /* Padding */ + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 4*29, 4*30); +out: + return ret; +} + +int metag_gp_regs_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + void *ptr; + unsigned long data; + int ret; + + /* D{0-1}.{0-7} */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->ctx.DX, 0, 4*16); + if (ret) + goto out; + /* A{0-1}.{0-1} */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->ctx.AX, 4*16, 4*20); + if (ret) + goto out; + /* A{0-1}.2 */ + if (regs->ctx.SaveMask & TBICTX_XEXT_BIT) + ptr = regs->ctx.Ext.Ctx.pExt; + else + ptr = ®s->ctx.Ext.AX2; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ptr, 4*20, 4*22); + if (ret) + goto out; + /* A{0-1}.3 */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->ctx.AX3, 4*22, 4*24); + if (ret) + goto out; + /* PC */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->ctx.CurrPC, 4*24, 4*25); + if (ret) + goto out; + /* TXSTATUS */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &data, 4*25, 4*26); + if (ret) + goto out; + regs->ctx.Flags = data & 0xffff; + if (data & USER_GP_REGS_STATUS_CATCH_BIT) + regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBUF_BIT; + else + regs->ctx.SaveMask &= ~TBICTX_CBUF_BIT; + /* TXRPT, TXBPOBITS, TXMODE */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->ctx.CurrRPT, 4*26, 4*29); +out: + return ret; +} + +static int metag_gp_regs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct pt_regs *regs = task_pt_regs(target); + return metag_gp_regs_copyout(regs, pos, count, kbuf, ubuf); +} + +static int metag_gp_regs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + return metag_gp_regs_copyin(regs, pos, count, kbuf, ubuf); +} + +int metag_cb_regs_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + /* TXCATCH{0-3} */ + if (regs->ctx.SaveMask & TBICTX_XCBF_BIT) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs->extcb0, 0, 4*4); + else + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 0, 4*4); + return ret; +} + +int metag_cb_regs_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + /* TXCATCH{0-3} */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->extcb0, 0, 4*4); + return ret; +} + +static int metag_cb_regs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct pt_regs *regs = task_pt_regs(target); + return metag_cb_regs_copyout(regs, pos, count, kbuf, ubuf); +} + +static int metag_cb_regs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + return metag_cb_regs_copyin(regs, pos, count, kbuf, ubuf); +} + +int metag_rp_state_copyout(const struct pt_regs *regs, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned long mask; + u64 *ptr; + int ret, i; + + /* Empty read pipeline */ + if (!(regs->ctx.SaveMask & TBICTX_CBRP_BIT)) { + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + 0, 4*13); + goto out; + } + + mask = (regs->ctx.CurrDIVTIME & TXDIVTIME_RPMASK_BITS) >> + TXDIVTIME_RPMASK_S; + + /* Read pipeline entries */ + ptr = (void *)®s->extcb0[1]; + for (i = 0; i < 6; ++i, ++ptr) { + if (mask & (1 << i)) + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ptr, 8*i, 8*(i + 1)); + else + ret = user_regset_copyout_zero(&pos, &count, &kbuf, + &ubuf, 8*i, 8*(i + 1)); + if (ret) + goto out; + } + /* Mask of entries */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &mask, 4*12, 4*13); +out: + return ret; +} + +int metag_rp_state_copyin(struct pt_regs *regs, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_rp_state rp; + unsigned long long *ptr; + int ret, i; + + /* Read the entire pipeline before making any changes */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &rp, 0, 4*13); + if (ret) + goto out; + + /* Write pipeline entries */ + ptr = (void *)®s->extcb0[1]; + for (i = 0; i < 6; ++i, ++ptr) + if (rp.mask & (1 << i)) + *ptr = rp.entries[i]; + + /* Update RPMask in TXDIVTIME */ + regs->ctx.CurrDIVTIME &= ~TXDIVTIME_RPMASK_BITS; + regs->ctx.CurrDIVTIME |= (rp.mask << TXDIVTIME_RPMASK_S) + & TXDIVTIME_RPMASK_BITS; + + /* Set/clear flags to indicate catch/read pipeline state */ + if (rp.mask) + regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBRP_BIT; + else + regs->ctx.SaveMask &= ~TBICTX_CBRP_BIT; +out: + return ret; +} + +static int metag_rp_state_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct pt_regs *regs = task_pt_regs(target); + return metag_rp_state_copyout(regs, pos, count, kbuf, ubuf); +} + +static int metag_rp_state_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + return metag_rp_state_copyin(regs, pos, count, kbuf, ubuf); +} + +enum metag_regset { + REGSET_GENERAL, + REGSET_CBUF, + REGSET_READPIPE, +}; + +static const struct user_regset metag_regsets[] = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(long), + .align = sizeof(long long), + .get = metag_gp_regs_get, + .set = metag_gp_regs_set, + }, + [REGSET_CBUF] = { + .core_note_type = NT_METAG_CBUF, + .n = sizeof(struct user_cb_regs) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long long), + .get = metag_cb_regs_get, + .set = metag_cb_regs_set, + }, + [REGSET_READPIPE] = { + .core_note_type = NT_METAG_RPIPE, + .n = sizeof(struct user_rp_state) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long long), + .get = metag_rp_state_get, + .set = metag_rp_state_set, + }, +}; + +static const struct user_regset_view user_metag_view = { + .name = "metag", + .e_machine = EM_METAG, + .regsets = metag_regsets, + .n = ARRAY_SIZE(metag_regsets) +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + return &user_metag_view; +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure single step bits etc are not set. + */ +void ptrace_disable(struct task_struct *child) +{ + /* nothing to do.. */ +} + +long arch_ptrace(struct task_struct *child, long request, unsigned long addr, + unsigned long data) +{ + int ret; + + switch (request) { + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; +} + +int syscall_trace_enter(struct pt_regs *regs) +{ + int ret = 0; + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + ret = tracehook_report_syscall_entry(regs); + + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_enter(regs, regs->ctx.DX[0].U1); + + return ret ? -1 : regs->ctx.DX[0].U1; +} + +void syscall_trace_leave(struct pt_regs *regs) +{ + if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) + trace_sys_exit(regs, regs->ctx.DX[0].U1); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, 0); +} diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 126a8175e3e2..eb164a298b98 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -395,6 +395,8 @@ typedef struct elf64_shdr { #define NT_ARM_TLS 0x401 /* ARM TLS register */ #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ +#define NT_METAG_CBUF 0x500 /* Metag catch buffer registers */ +#define NT_METAG_RPIPE 0x501 /* Metag read pipeline state */ /* Note header in a PT_NOTE section */ -- cgit v1.2.3 From e06c93cacb82dd147266fd1bdb2d0a0bd45ff2c1 Mon Sep 17 00:00:00 2001 From: Ley Foon Tan Date: Thu, 7 Mar 2013 10:28:37 +0800 Subject: tty/serial: Add support for Altera serial port Add support for Altera 8250/16550 compatible serial port. Signed-off-by: Ley Foon Tan Cc: stable Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/tty/serial/of-serial.txt | 3 +++ drivers/tty/serial/8250/8250.c | 23 +++++++++++++++++++++- drivers/tty/serial/of_serial.c | 6 ++++++ include/uapi/linux/serial_core.h | 5 ++++- 4 files changed, 35 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/devicetree/bindings/tty/serial/of-serial.txt b/Documentation/devicetree/bindings/tty/serial/of-serial.txt index 1e1145ca4f3c..8f01cb190f25 100644 --- a/Documentation/devicetree/bindings/tty/serial/of-serial.txt +++ b/Documentation/devicetree/bindings/tty/serial/of-serial.txt @@ -11,6 +11,9 @@ Required properties: - "nvidia,tegra20-uart" - "nxp,lpc3220-uart" - "ibm,qpace-nwp-serial" + - "altr,16550-FIFO32" + - "altr,16550-FIFO64" + - "altr,16550-FIFO128" - "serial" if the port type is unknown. - reg : offset and length of the register set for the device. - interrupts : should contain uart interrupt. diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c index 0efc815a4968..661096d25620 100644 --- a/drivers/tty/serial/8250/8250.c +++ b/drivers/tty/serial/8250/8250.c @@ -301,7 +301,28 @@ static const struct serial8250_config uart_config[] = { }, [PORT_8250_CIR] = { .name = "CIR port" - } + }, + [PORT_ALTR_16550_F32] = { + .name = "Altera 16550 FIFO32", + .fifo_size = 32, + .tx_loadsz = 32, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, + [PORT_ALTR_16550_F64] = { + .name = "Altera 16550 FIFO64", + .fifo_size = 64, + .tx_loadsz = 64, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, + [PORT_ALTR_16550_F128] = { + .name = "Altera 16550 FIFO128", + .fifo_size = 128, + .tx_loadsz = 128, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, }; /* Uart divisor latch read */ diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c index d5874605682b..b025d5438275 100644 --- a/drivers/tty/serial/of_serial.c +++ b/drivers/tty/serial/of_serial.c @@ -241,6 +241,12 @@ static struct of_device_id of_platform_serial_table[] = { { .compatible = "ns16850", .data = (void *)PORT_16850, }, { .compatible = "nvidia,tegra20-uart", .data = (void *)PORT_TEGRA, }, { .compatible = "nxp,lpc3220-uart", .data = (void *)PORT_LPC3220, }, + { .compatible = "altr,16550-FIFO32", + .data = (void *)PORT_ALTR_16550_F32, }, + { .compatible = "altr,16550-FIFO64", + .data = (void *)PORT_ALTR_16550_F64, }, + { .compatible = "altr,16550-FIFO128", + .data = (void *)PORT_ALTR_16550_F128, }, #ifdef CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL { .compatible = "ibm,qpace-nwp-serial", .data = (void *)PORT_NWPSERIAL, }, diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index b6a23a483d74..74c2bf7211f8 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -51,7 +51,10 @@ #define PORT_8250_CIR 23 /* CIR infrared port, has its own driver */ #define PORT_XR17V35X 24 /* Exar XR17V35x UARTs */ #define PORT_BRCM_TRUMANAGE 25 -#define PORT_MAX_8250 25 /* max port ID */ +#define PORT_ALTR_16550_F32 26 /* Altera 16550 UART with 32 FIFOs */ +#define PORT_ALTR_16550_F64 27 /* Altera 16550 UART with 64 FIFOs */ +#define PORT_ALTR_16550_F128 28 /* Altera 16550 UART with 128 FIFOs */ +#define PORT_MAX_8250 28 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed -- cgit v1.2.3 From 51b154ed5289682364b830858a4a1ca47fcd04e7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 13 Mar 2013 14:59:45 -0700 Subject: UAPI: fix endianness conditionals in linux/aio_abi.h In the UAPI header files, __BIG_ENDIAN and __LITTLE_ENDIAN must be compared against __BYTE_ORDER in preprocessor conditionals where these are exposed to userspace (that is they're not inside __KERNEL__ conditionals). However, in the main kernel the norm is to check for "defined(__XXX_ENDIAN)" rather than comparing against __BYTE_ORDER and this has incorrectly leaked into the userspace headers. The definition of PADDED() in linux/aio_abi.h is wrong in this way. Note that userspace will likely interpret this and thus the order of fields in struct iocb incorrectly as the little-endian variant on big-endian machines - depending on header inclusion order. [!!!] NOTE [!!!] This patch may adversely change the userspace API. It might be better to fix the ordering of aio_key and aio_reserved1 in struct iocb. Signed-off-by: David Howells Acked-by: Benjamin LaHaise Acked-by: Jeff Moyer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/aio_abi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index 86fa7a71336a..bb2554f7fbd1 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -62,9 +62,9 @@ struct io_event { __s64 res2; /* secondary result */ }; -#if defined(__LITTLE_ENDIAN) +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) #define PADDED(x,y) x, y -#elif defined(__BIG_ENDIAN) +#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) #define PADDED(x,y) y, x #else #error edit for your odd byteorder. -- cgit v1.2.3 From 29ba06b9ed51d49dea6c79c3c16b961d661262bd Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 13 Mar 2013 14:59:46 -0700 Subject: UAPI: fix endianness conditionals in linux/acct.h In the UAPI header files, __BIG_ENDIAN and __LITTLE_ENDIAN must be compared against __BYTE_ORDER in preprocessor conditionals where these are exposed to userspace (that is they're not inside __KERNEL__ conditionals). However, in the main kernel the norm is to check for "defined(__XXX_ENDIAN)" rather than comparing against __BYTE_ORDER and this has incorrectly leaked into the userspace headers. The definition of ACCT_BYTEORDER in linux/acct.h is wrong in this way. Note that userspace will likely interpret this incorrectly as the big-endian variant on little-endian machines - depending on header inclusion order. [!!!] NOTE [!!!] This patch may adversely change the userspace API. It might be better to fix the value of ACCT_BYTEORDER. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/acct.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/acct.h b/include/uapi/linux/acct.h index 11b6ca3e0873..df2f9a0bba6a 100644 --- a/include/uapi/linux/acct.h +++ b/include/uapi/linux/acct.h @@ -107,10 +107,12 @@ struct acct_v3 #define ACORE 0x08 /* ... dumped core */ #define AXSIG 0x10 /* ... was killed by a signal */ -#ifdef __BIG_ENDIAN +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) #define ACCT_BYTEORDER 0x80 /* accounting file is big endian */ -#else +#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) #define ACCT_BYTEORDER 0x00 /* accounting file is little endian */ +#else +#error unspecified endianness #endif #ifndef __KERNEL__ -- cgit v1.2.3 From ca044f9a9ed492f0f7e52df999c10ca6f7cfc5c0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 13 Mar 2013 14:59:47 -0700 Subject: UAPI: fix endianness conditionals in linux/raid/md_p.h In the UAPI header files, __BIG_ENDIAN and __LITTLE_ENDIAN must be compared against __BYTE_ORDER in preprocessor conditionals where these are exposed to userspace (that is they're not inside __KERNEL__ conditionals). However, in the main kernel the norm is to check for "defined(__XXX_ENDIAN)" rather than comparing against __BYTE_ORDER and this has incorrectly leaked into the userspace headers. The definition of struct mdp_superblock_s in linux/raid/md_p.h is wrong in this way. Note that userspace will likely interpret the ordering of the fields incorrectly as the big-endian variant on a little-endian machines - depending on header inclusion order. [!!!] NOTE [!!!] This patch may adversely change the userspace API. It might be better to fix the ordering of events_hi, events_lo, cp_events_hi and cp_events_lo in struct mdp_superblock_s / typedef mdp_super_t. Signed-off-by: David Howells Acked-by: NeilBrown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/raid/md_p.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index ee753536ab70..fe1a5406d4d9 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -145,16 +145,18 @@ typedef struct mdp_superblock_s { __u32 failed_disks; /* 4 Number of failed disks */ __u32 spare_disks; /* 5 Number of spare disks */ __u32 sb_csum; /* 6 checksum of the whole superblock */ -#ifdef __BIG_ENDIAN +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) __u32 events_hi; /* 7 high-order of superblock update count */ __u32 events_lo; /* 8 low-order of superblock update count */ __u32 cp_events_hi; /* 9 high-order of checkpoint update count */ __u32 cp_events_lo; /* 10 low-order of checkpoint update count */ -#else +#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) __u32 events_lo; /* 7 low-order of superblock update count */ __u32 events_hi; /* 8 high-order of superblock update count */ __u32 cp_events_lo; /* 9 low-order of checkpoint update count */ __u32 cp_events_hi; /* 10 high-order of checkpoint update count */ +#else +#error unspecified endianness #endif __u32 recovery_cp; /* 11 recovery checkpoint sector count */ /* There are only valid for minor_version > 90 */ -- cgit v1.2.3