From f2af74123f8c5a735248547f4286a3adc28633c1 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Tue, 20 May 2014 09:38:03 -1000 Subject: tools: ffs-test: convert to new descriptor format fixing compilation error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit [ac8dde11: “usb: gadget: f_fs: Add flags to descriptors block”] which introduced a new descriptor format for FunctionFS removed the usb_functionfs_descs_head structure, which is still used by ffs-test. tool. Convert ffs-test by converting it to use the new header format. For testing kernels prior to 3.14 (when the new format was introduced) and parsing of the legacy headers in the new kernels, provide a compilation flag to make the tool use the old format. Finally, include information as to when the legacy FunctionFS headers format has been deprecated (which is also when the new one has been introduced). Reported-by: Lad, Prabhakar Signed-off-by: Michal Nazarewicz Signed-off-by: Felipe Balbi --- include/uapi/linux/usb/functionfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h index 2a4b4a72a4f9..ecb3a31f7ca6 100644 --- a/include/uapi/linux/usb/functionfs.h +++ b/include/uapi/linux/usb/functionfs.h @@ -53,7 +53,7 @@ struct usb_endpoint_descriptor_no_audio { * structure. Any flags that are not recognised cause the whole block to be * rejected with -ENOSYS. * - * Legacy descriptors format: + * Legacy descriptors format (deprecated as of 3.14): * * | off | name | type | description | * |-----+-----------+--------------+--------------------------------------| -- cgit v1.2.3 From 31fa97c5defca3895dc6c823096d7ba59df76125 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Wed, 11 Jun 2014 17:18:21 +0300 Subject: cfg80211: pass TDLS initiator in tdls_mgmt operations The TDLS initiator is set once during link setup. If determines the address ordering in the link identifier IE. Fix dependent drivers - mwifiex and mac80211. Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg --- drivers/net/wireless/mwifiex/cfg80211.c | 3 ++- include/net/cfg80211.h | 2 +- include/uapi/linux/nl80211.h | 5 +++++ net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/tdls.c | 3 ++- net/wireless/nl80211.c | 4 ++++ net/wireless/rdev-ops.h | 6 +++--- net/wireless/trace.h | 10 +++++++--- 8 files changed, 26 insertions(+), 10 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index e95dec91a561..149d2e693bdd 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -2631,7 +2631,8 @@ static int mwifiex_cfg80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, - const u8 *extra_ies, size_t extra_ies_len) + bool initiator, const u8 *extra_ies, + size_t extra_ies_len) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); int ret; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 29cb4b2bee5a..b9eeae3990cf 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2500,7 +2500,7 @@ struct cfg80211_ops { int (*tdls_mgmt)(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, - const u8 *buf, size_t len); + bool initiator, const u8 *buf, size_t len); int (*tdls_oper)(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index be9519b52bb1..f1db15b9c041 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1591,6 +1591,9 @@ enum nl80211_commands { * creation then the new interface will be owned by the netlink socket * that created it and will be destroyed when the socket is closed * + * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is + * the TDLS link initiator. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1931,6 +1934,8 @@ enum nl80211_attrs { NL80211_ATTR_CSA_C_OFFSETS_TX, NL80211_ATTR_MAX_CSA_COUNTERS, + NL80211_ATTR_TDLS_INITIATOR, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index fc687d2a7518..75f79c168e90 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1867,7 +1867,8 @@ int ieee80211_max_num_channels(struct ieee80211_local *local); int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, - const u8 *extra_ies, size_t extra_ies_len); + bool initiator, const u8 *extra_ies, + size_t extra_ies_len); int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index cafcbde70018..0b3ca2ce7ea4 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -299,7 +299,8 @@ fail: int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, - const u8 *extra_ies, size_t extra_ies_len) + bool initiator, const u8 *extra_ies, + size_t extra_ies_len) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ba4f1723c83a..8f46b8ffbcf6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -337,6 +337,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_TDLS_OPERATION] = { .type = NLA_U8 }, [NL80211_ATTR_TDLS_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_TDLS_EXTERNAL_SETUP] = { .type = NLA_FLAG }, + [NL80211_ATTR_TDLS_INITIATOR] = { .type = NLA_FLAG }, [NL80211_ATTR_DONT_WAIT_FOR_ACK] = { .type = NLA_FLAG }, [NL80211_ATTR_PROBE_RESP] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, @@ -7365,6 +7366,7 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info) u32 peer_capability = 0; u16 status_code; u8 *peer; + bool initiator; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) || !rdev->ops->tdls_mgmt) @@ -7381,12 +7383,14 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info) action_code = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_ACTION]); status_code = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]); dialog_token = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_DIALOG_TOKEN]); + initiator = nla_get_flag(info->attrs[NL80211_ATTR_TDLS_INITIATOR]); if (info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]) peer_capability = nla_get_u32(info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]); return rdev_tdls_mgmt(rdev, dev, peer, action_code, dialog_token, status_code, peer_capability, + initiator, nla_data(info->attrs[NL80211_ATTR_IE]), nla_len(info->attrs[NL80211_ATTR_IE])); } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index f552b0abbd70..56c2240c30ce 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -751,15 +751,15 @@ static inline int rdev_tdls_mgmt(struct cfg80211_registered_device *rdev, struct net_device *dev, u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, - const u8 *buf, size_t len) + bool initiator, const u8 *buf, size_t len) { int ret; trace_rdev_tdls_mgmt(&rdev->wiphy, dev, peer, action_code, dialog_token, status_code, peer_capability, - buf, len); + initiator, buf, len); ret = rdev->ops->tdls_mgmt(&rdev->wiphy, dev, peer, action_code, dialog_token, status_code, peer_capability, - buf, len); + initiator, buf, len); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 174559aade57..85474ee501eb 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1454,9 +1454,9 @@ TRACE_EVENT(rdev_tdls_mgmt, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *peer, u8 action_code, u8 dialog_token, u16 status_code, u32 peer_capability, - const u8 *buf, size_t len), + bool initiator, const u8 *buf, size_t len), TP_ARGS(wiphy, netdev, peer, action_code, dialog_token, status_code, - peer_capability, buf, len), + peer_capability, initiator, buf, len), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY @@ -1465,6 +1465,7 @@ TRACE_EVENT(rdev_tdls_mgmt, __field(u8, dialog_token) __field(u16, status_code) __field(u32, peer_capability) + __field(bool, initiator) __dynamic_array(u8, buf, len) ), TP_fast_assign( @@ -1475,13 +1476,16 @@ TRACE_EVENT(rdev_tdls_mgmt, __entry->dialog_token = dialog_token; __entry->status_code = status_code; __entry->peer_capability = peer_capability; + __entry->initiator = initiator; memcpy(__get_dynamic_array(buf), buf, len); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", action_code: %u, " - "dialog_token: %u, status_code: %u, peer_capability: %u buf: %#.2x ", + "dialog_token: %u, status_code: %u, peer_capability: %u " + "initiator: %s buf: %#.2x ", WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->action_code, __entry->dialog_token, __entry->status_code, __entry->peer_capability, + BOOL_TO_STR(__entry->initiator), ((u8 *)__get_dynamic_array(buf))[0]) ); -- cgit v1.2.3 From 7200135bc1e61f1437dc326ae2ef2f310c50b4eb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 16 Jun 2014 13:01:52 +0200 Subject: netfilter: kill ulog targets This has been marked as deprecated for quite some time and the NFLOG target replacement has been also available since 2006. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_bridge/Kbuild | 1 - include/uapi/linux/netfilter_bridge/ebt_ulog.h | 38 -- include/uapi/linux/netfilter_ipv4/Kbuild | 1 - include/uapi/linux/netfilter_ipv4/ipt_ULOG.h | 49 --- net/bridge/netfilter/Kconfig | 16 - net/bridge/netfilter/ebt_ulog.c | 393 ------------------- net/ipv4/netfilter/Kconfig | 19 - net/ipv4/netfilter/ipt_ULOG.c | 498 ------------------------- 8 files changed, 1015 deletions(-) delete mode 100644 include/uapi/linux/netfilter_bridge/ebt_ulog.h delete mode 100644 include/uapi/linux/netfilter_ipv4/ipt_ULOG.h delete mode 100644 net/bridge/netfilter/ebt_ulog.c delete mode 100644 net/ipv4/netfilter/ipt_ULOG.c (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter_bridge/Kbuild b/include/uapi/linux/netfilter_bridge/Kbuild index 348717c3a22f..0fbad8ef96de 100644 --- a/include/uapi/linux/netfilter_bridge/Kbuild +++ b/include/uapi/linux/netfilter_bridge/Kbuild @@ -14,6 +14,5 @@ header-y += ebt_nflog.h header-y += ebt_pkttype.h header-y += ebt_redirect.h header-y += ebt_stp.h -header-y += ebt_ulog.h header-y += ebt_vlan.h header-y += ebtables.h diff --git a/include/uapi/linux/netfilter_bridge/ebt_ulog.h b/include/uapi/linux/netfilter_bridge/ebt_ulog.h deleted file mode 100644 index 89a6becb5269..000000000000 --- a/include/uapi/linux/netfilter_bridge/ebt_ulog.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef _EBT_ULOG_H -#define _EBT_ULOG_H - -#include - -#define EBT_ULOG_DEFAULT_NLGROUP 0 -#define EBT_ULOG_DEFAULT_QTHRESHOLD 1 -#define EBT_ULOG_MAXNLGROUPS 32 /* hardcoded netlink max */ -#define EBT_ULOG_PREFIX_LEN 32 -#define EBT_ULOG_MAX_QLEN 50 -#define EBT_ULOG_WATCHER "ulog" -#define EBT_ULOG_VERSION 1 - -struct ebt_ulog_info { - __u32 nlgroup; - unsigned int cprange; - unsigned int qthreshold; - char prefix[EBT_ULOG_PREFIX_LEN]; -}; - -typedef struct ebt_ulog_packet_msg { - int version; - char indev[IFNAMSIZ]; - char outdev[IFNAMSIZ]; - char physindev[IFNAMSIZ]; - char physoutdev[IFNAMSIZ]; - char prefix[EBT_ULOG_PREFIX_LEN]; - struct timeval stamp; - unsigned long mark; - unsigned int hook; - size_t data_len; - /* The complete packet, including Ethernet header and perhaps - * the VLAN header is appended */ - unsigned char data[0] __attribute__ - ((aligned (__alignof__(struct ebt_ulog_info)))); -} ebt_ulog_packet_msg_t; - -#endif /* _EBT_ULOG_H */ diff --git a/include/uapi/linux/netfilter_ipv4/Kbuild b/include/uapi/linux/netfilter_ipv4/Kbuild index fb008437dde1..ecb291df390e 100644 --- a/include/uapi/linux/netfilter_ipv4/Kbuild +++ b/include/uapi/linux/netfilter_ipv4/Kbuild @@ -5,7 +5,6 @@ header-y += ipt_ECN.h header-y += ipt_LOG.h header-y += ipt_REJECT.h header-y += ipt_TTL.h -header-y += ipt_ULOG.h header-y += ipt_ah.h header-y += ipt_ecn.h header-y += ipt_ttl.h diff --git a/include/uapi/linux/netfilter_ipv4/ipt_ULOG.h b/include/uapi/linux/netfilter_ipv4/ipt_ULOG.h deleted file mode 100644 index 417aad280bcc..000000000000 --- a/include/uapi/linux/netfilter_ipv4/ipt_ULOG.h +++ /dev/null @@ -1,49 +0,0 @@ -/* Header file for IP tables userspace logging, Version 1.8 - * - * (C) 2000-2002 by Harald Welte - * - * Distributed under the terms of GNU GPL */ - -#ifndef _IPT_ULOG_H -#define _IPT_ULOG_H - -#ifndef NETLINK_NFLOG -#define NETLINK_NFLOG 5 -#endif - -#define ULOG_DEFAULT_NLGROUP 1 -#define ULOG_DEFAULT_QTHRESHOLD 1 - -#define ULOG_MAC_LEN 80 -#define ULOG_PREFIX_LEN 32 - -#define ULOG_MAX_QLEN 50 -/* Why 50? Well... there is a limit imposed by the slab cache 131000 - * bytes. So the multipart netlink-message has to be < 131000 bytes. - * Assuming a standard ethernet-mtu of 1500, we could define this up - * to 80... but even 50 seems to be big enough. */ - -/* private data structure for each rule with a ULOG target */ -struct ipt_ulog_info { - unsigned int nl_group; - size_t copy_range; - size_t qthreshold; - char prefix[ULOG_PREFIX_LEN]; -}; - -/* Format of the ULOG packets passed through netlink */ -typedef struct ulog_packet_msg { - unsigned long mark; - long timestamp_sec; - long timestamp_usec; - unsigned int hook; - char indev_name[IFNAMSIZ]; - char outdev_name[IFNAMSIZ]; - size_t data_len; - char prefix[ULOG_PREFIX_LEN]; - unsigned char mac_len; - unsigned char mac[ULOG_MAC_LEN]; - unsigned char payload[0]; -} ulog_packet_msg_t; - -#endif /*_IPT_ULOG_H*/ diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 629dc77874a9..3a76ac7b7141 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -202,22 +202,6 @@ config BRIDGE_EBT_LOG To compile it as a module, choose M here. If unsure, say N. -config BRIDGE_EBT_ULOG - tristate "ebt: ulog support (OBSOLETE)" - help - This option enables the old bridge-specific "ebt_ulog" implementation - which has been obsoleted by the new "nfnetlink_log" code (see - CONFIG_NETFILTER_NETLINK_LOG). - - This option adds the ulog watcher, that you can use in any rule - in any ebtables table. The packet is passed to a userspace - logging daemon using netlink multicast sockets. This differs - from the log watcher in the sense that the complete packet is - sent to userspace instead of a descriptive text and that - netlink multicast sockets are used instead of the syslog. - - To compile it as a module, choose M here. If unsure, say N. - config BRIDGE_EBT_NFLOG tristate "ebt: nflog support" help diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c deleted file mode 100644 index 7c470c371e14..000000000000 --- a/net/bridge/netfilter/ebt_ulog.c +++ /dev/null @@ -1,393 +0,0 @@ -/* - * netfilter module for userspace bridged Ethernet frames logging daemons - * - * Authors: - * Bart De Schuymer - * Harald Welte - * - * November, 2004 - * - * Based on ipt_ULOG.c, which is - * (C) 2000-2002 by Harald Welte - * - * This module accepts two parameters: - * - * nlbufsiz: - * The parameter specifies how big the buffer for each netlink multicast - * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will - * get accumulated in the kernel until they are sent to userspace. It is - * NOT possible to allocate more than 128kB, and it is strongly discouraged, - * because atomically allocating 128kB inside the network rx softirq is not - * reliable. Please also keep in mind that this buffer size is allocated for - * each nlgroup you are using, so the total kernel memory usage increases - * by that factor. - * - * flushtimeout: - * Specify, after how many hundredths of a second the queue should be - * flushed even if it is not full yet. - * - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "../br_private.h" - -static unsigned int nlbufsiz = NLMSG_GOODSIZE; -module_param(nlbufsiz, uint, 0600); -MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) " - "(defaults to 4096)"); - -static unsigned int flushtimeout = 10; -module_param(flushtimeout, uint, 0600); -MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths ofa second) " - "(defaults to 10)"); - -typedef struct { - unsigned int qlen; /* number of nlmsgs' in the skb */ - struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */ - struct sk_buff *skb; /* the pre-allocated skb */ - struct timer_list timer; /* the timer function */ - spinlock_t lock; /* the per-queue lock */ -} ebt_ulog_buff_t; - -static int ebt_ulog_net_id __read_mostly; -struct ebt_ulog_net { - unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS]; - ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; - struct sock *ebtulognl; -}; - -static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net) -{ - return net_generic(net, ebt_ulog_net_id); -} - -/* send one ulog_buff_t to userspace */ -static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup) -{ - ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup]; - - del_timer(&ub->timer); - - if (!ub->skb) - return; - - /* last nlmsg needs NLMSG_DONE */ - if (ub->qlen > 1) - ub->lastnlh->nlmsg_type = NLMSG_DONE; - - NETLINK_CB(ub->skb).dst_group = nlgroup + 1; - netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); - - ub->qlen = 0; - ub->skb = NULL; -} - -/* timer function to flush queue in flushtimeout time */ -static void ulog_timer(unsigned long data) -{ - struct ebt_ulog_net *ebt = container_of((void *)data, - struct ebt_ulog_net, - nlgroup[*(unsigned int *)data]); - - ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data]; - spin_lock_bh(&ub->lock); - if (ub->skb) - ulog_send(ebt, *(unsigned int *)data); - spin_unlock_bh(&ub->lock); -} - -static struct sk_buff *ulog_alloc_skb(unsigned int size) -{ - struct sk_buff *skb; - unsigned int n; - - n = max(size, nlbufsiz); - skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); - if (!skb) { - if (n > size) { - /* try to allocate only as much as we need for - * current packet */ - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) - pr_debug("cannot even allocate buffer of size %ub\n", - size); - } - } - - return skb; -} - -static void ebt_ulog_packet(struct net *net, unsigned int hooknr, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct ebt_ulog_info *uloginfo, - const char *prefix) -{ - ebt_ulog_packet_msg_t *pm; - size_t size, copy_len; - struct nlmsghdr *nlh; - struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); - unsigned int group = uloginfo->nlgroup; - ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group]; - spinlock_t *lock = &ub->lock; - ktime_t kt; - - if ((uloginfo->cprange == 0) || - (uloginfo->cprange > skb->len + ETH_HLEN)) - copy_len = skb->len + ETH_HLEN; - else - copy_len = uloginfo->cprange; - - size = nlmsg_total_size(sizeof(*pm) + copy_len); - if (size > nlbufsiz) { - pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz); - return; - } - - spin_lock_bh(lock); - - if (!ub->skb) { - if (!(ub->skb = ulog_alloc_skb(size))) - goto unlock; - } else if (size > skb_tailroom(ub->skb)) { - ulog_send(ebt, group); - - if (!(ub->skb = ulog_alloc_skb(size))) - goto unlock; - } - - nlh = nlmsg_put(ub->skb, 0, ub->qlen, 0, - size - NLMSG_ALIGN(sizeof(*nlh)), 0); - if (!nlh) { - kfree_skb(ub->skb); - ub->skb = NULL; - goto unlock; - } - ub->qlen++; - - pm = nlmsg_data(nlh); - memset(pm, 0, sizeof(*pm)); - - /* Fill in the ulog data */ - pm->version = EBT_ULOG_VERSION; - kt = ktime_get_real(); - pm->stamp = ktime_to_timeval(kt); - if (ub->qlen == 1) - ub->skb->tstamp = kt; - pm->data_len = copy_len; - pm->mark = skb->mark; - pm->hook = hooknr; - if (uloginfo->prefix != NULL) - strcpy(pm->prefix, uloginfo->prefix); - - if (in) { - strcpy(pm->physindev, in->name); - /* If in isn't a bridge, then physindev==indev */ - if (br_port_exists(in)) - /* rcu_read_lock()ed by nf_hook_slow */ - strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name); - else - strcpy(pm->indev, in->name); - } - - if (out) { - /* If out exists, then out is a bridge port */ - strcpy(pm->physoutdev, out->name); - /* rcu_read_lock()ed by nf_hook_slow */ - strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name); - } - - if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0) - BUG(); - - if (ub->qlen > 1) - ub->lastnlh->nlmsg_flags |= NLM_F_MULTI; - - ub->lastnlh = nlh; - - if (ub->qlen >= uloginfo->qthreshold) - ulog_send(ebt, group); - else if (!timer_pending(&ub->timer)) { - ub->timer.expires = jiffies + flushtimeout * HZ / 100; - add_timer(&ub->timer); - } - -unlock: - spin_unlock_bh(lock); -} - -/* this function is registered with the netfilter core */ -static void ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, - const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct nf_loginfo *li, - const char *prefix) -{ - struct ebt_ulog_info loginfo; - - if (!li || li->type != NF_LOG_TYPE_ULOG) { - loginfo.nlgroup = EBT_ULOG_DEFAULT_NLGROUP; - loginfo.cprange = 0; - loginfo.qthreshold = EBT_ULOG_DEFAULT_QTHRESHOLD; - loginfo.prefix[0] = '\0'; - } else { - loginfo.nlgroup = li->u.ulog.group; - loginfo.cprange = li->u.ulog.copy_len; - loginfo.qthreshold = li->u.ulog.qthreshold; - strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); - } - - ebt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix); -} - -static unsigned int -ebt_ulog_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct net *net = dev_net(par->in ? par->in : par->out); - - ebt_ulog_packet(net, par->hooknum, skb, par->in, par->out, - par->targinfo, NULL); - return EBT_CONTINUE; -} - -static int ebt_ulog_tg_check(const struct xt_tgchk_param *par) -{ - struct ebt_ulog_info *uloginfo = par->targinfo; - - if (!par->net->xt.ebt_ulog_warn_deprecated) { - pr_info("ebt_ulog is deprecated and it will be removed soon, " - "use ebt_nflog instead\n"); - par->net->xt.ebt_ulog_warn_deprecated = true; - } - - if (uloginfo->nlgroup > 31) - return -EINVAL; - - uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0'; - - if (uloginfo->qthreshold > EBT_ULOG_MAX_QLEN) - uloginfo->qthreshold = EBT_ULOG_MAX_QLEN; - - return 0; -} - -static struct xt_target ebt_ulog_tg_reg __read_mostly = { - .name = "ulog", - .revision = 0, - .family = NFPROTO_BRIDGE, - .target = ebt_ulog_tg, - .checkentry = ebt_ulog_tg_check, - .targetsize = sizeof(struct ebt_ulog_info), - .me = THIS_MODULE, -}; - -static struct nf_logger ebt_ulog_logger __read_mostly = { - .name = "ebt_ulog", - .logfn = &ebt_log_packet, - .me = THIS_MODULE, -}; - -static int __net_init ebt_ulog_net_init(struct net *net) -{ - int i; - struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); - - struct netlink_kernel_cfg cfg = { - .groups = EBT_ULOG_MAXNLGROUPS, - }; - - /* initialize ulog_buffers */ - for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { - ebt->nlgroup[i] = i; - setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer, - (unsigned long)&ebt->nlgroup[i]); - spin_lock_init(&ebt->ulog_buffers[i].lock); - } - - ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); - if (!ebt->ebtulognl) - return -ENOMEM; - - nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger); - return 0; -} - -static void __net_exit ebt_ulog_net_fini(struct net *net) -{ - int i; - struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); - - nf_log_unset(net, &ebt_ulog_logger); - for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { - ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i]; - del_timer(&ub->timer); - - if (ub->skb) { - kfree_skb(ub->skb); - ub->skb = NULL; - } - } - netlink_kernel_release(ebt->ebtulognl); -} - -static struct pernet_operations ebt_ulog_net_ops = { - .init = ebt_ulog_net_init, - .exit = ebt_ulog_net_fini, - .id = &ebt_ulog_net_id, - .size = sizeof(struct ebt_ulog_net), -}; - -static int __init ebt_ulog_init(void) -{ - int ret; - - if (nlbufsiz >= 128*1024) { - pr_warn("Netlink buffer has to be <= 128kB," - "please try a smaller nlbufsiz parameter.\n"); - return -EINVAL; - } - - ret = register_pernet_subsys(&ebt_ulog_net_ops); - if (ret) - goto out_pernet; - - ret = xt_register_target(&ebt_ulog_tg_reg); - if (ret) - goto out_target; - - nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); - - return 0; - -out_target: - unregister_pernet_subsys(&ebt_ulog_net_ops); -out_pernet: - return ret; -} - -static void __exit ebt_ulog_fini(void) -{ - nf_log_unregister(&ebt_ulog_logger); - xt_unregister_target(&ebt_ulog_tg_reg); - unregister_pernet_subsys(&ebt_ulog_net_ops); -} - -module_init(ebt_ulog_init); -module_exit(ebt_ulog_fini); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Bart De Schuymer "); -MODULE_DESCRIPTION("Ebtables: Packet logging to netlink using ULOG"); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index a26ce035e3fa..730faacbc5f8 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -159,25 +159,6 @@ config IP_NF_TARGET_SYNPROXY To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_ULOG - tristate "ULOG target support (obsolete)" - default m if NETFILTER_ADVANCED=n - ---help--- - - This option enables the old IPv4-only "ipt_ULOG" implementation - which has been obsoleted by the new "nfnetlink_log" code (see - CONFIG_NETFILTER_NETLINK_LOG). - - This option adds a `ULOG' target, which allows you to create rules in - any iptables table. The packet is passed to a userspace logging - daemon using netlink multicast sockets; unlike the LOG target - which can only be viewed through syslog. - - The appropriate userspace logging daemon (ulogd) may be obtained from - - - To compile it as a module, choose M here. If unsure, say N. - # NAT + specific targets: nf_conntrack config NF_NAT_IPV4 tristate "IPv4 NAT" diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c deleted file mode 100644 index 9cb993cd224b..000000000000 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ /dev/null @@ -1,498 +0,0 @@ -/* - * netfilter module for userspace packet logging daemons - * - * (C) 2000-2004 by Harald Welte - * (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team - * (C) 2005-2007 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This module accepts two parameters: - * - * nlbufsiz: - * The parameter specifies how big the buffer for each netlink multicast - * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will - * get accumulated in the kernel until they are sent to userspace. It is - * NOT possible to allocate more than 128kB, and it is strongly discouraged, - * because atomically allocating 128kB inside the network rx softirq is not - * reliable. Please also keep in mind that this buffer size is allocated for - * each nlgroup you are using, so the total kernel memory usage increases - * by that factor. - * - * Actually you should use nlbufsiz a bit smaller than PAGE_SIZE, since - * nlbufsiz is used with alloc_skb, which adds another - * sizeof(struct skb_shared_info). Use NLMSG_GOODSIZE instead. - * - * flushtimeout: - * Specify, after how many hundredths of a second the queue should be - * flushed even if it is not full yet. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("Xtables: packet logging to netlink using ULOG"); -MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); - -#define ULOG_NL_EVENT 111 /* Harald's favorite number */ -#define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ - -static unsigned int nlbufsiz = NLMSG_GOODSIZE; -module_param(nlbufsiz, uint, 0400); -MODULE_PARM_DESC(nlbufsiz, "netlink buffer size"); - -static unsigned int flushtimeout = 10; -module_param(flushtimeout, uint, 0600); -MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)"); - -static bool nflog = true; -module_param(nflog, bool, 0400); -MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); - -/* global data structures */ - -typedef struct { - unsigned int qlen; /* number of nlmsgs' in the skb */ - struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */ - struct sk_buff *skb; /* the pre-allocated skb */ - struct timer_list timer; /* the timer function */ -} ulog_buff_t; - -static int ulog_net_id __read_mostly; -struct ulog_net { - unsigned int nlgroup[ULOG_MAXNLGROUPS]; - ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; - struct sock *nflognl; - spinlock_t lock; -}; - -static struct ulog_net *ulog_pernet(struct net *net) -{ - return net_generic(net, ulog_net_id); -} - -/* send one ulog_buff_t to userspace */ -static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum) -{ - ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum]; - - pr_debug("ulog_send: timer is deleting\n"); - del_timer(&ub->timer); - - if (!ub->skb) { - pr_debug("ulog_send: nothing to send\n"); - return; - } - - /* last nlmsg needs NLMSG_DONE */ - if (ub->qlen > 1) - ub->lastnlh->nlmsg_type = NLMSG_DONE; - - NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; - pr_debug("throwing %d packets to netlink group %u\n", - ub->qlen, nlgroupnum + 1); - netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1, - GFP_ATOMIC); - - ub->qlen = 0; - ub->skb = NULL; - ub->lastnlh = NULL; -} - - -/* timer function to flush queue in flushtimeout time */ -static void ulog_timer(unsigned long data) -{ - unsigned int groupnum = *((unsigned int *)data); - struct ulog_net *ulog = container_of((void *)data, - struct ulog_net, - nlgroup[groupnum]); - pr_debug("timer function called, calling ulog_send\n"); - - /* lock to protect against somebody modifying our structure - * from ipt_ulog_target at the same time */ - spin_lock_bh(&ulog->lock); - ulog_send(ulog, groupnum); - spin_unlock_bh(&ulog->lock); -} - -static struct sk_buff *ulog_alloc_skb(unsigned int size) -{ - struct sk_buff *skb; - unsigned int n; - - /* alloc skb which should be big enough for a whole - * multipart message. WARNING: has to be <= 131000 - * due to slab allocator restrictions */ - - n = max(size, nlbufsiz); - skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); - if (!skb) { - if (n > size) { - /* try to allocate only as much as we need for - * current packet */ - - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) - pr_debug("cannot even allocate %ub\n", size); - } - } - - return skb; -} - -static void ipt_ulog_packet(struct net *net, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct ipt_ulog_info *loginfo, - const char *prefix) -{ - ulog_buff_t *ub; - ulog_packet_msg_t *pm; - size_t size, copy_len; - struct nlmsghdr *nlh; - struct timeval tv; - struct ulog_net *ulog = ulog_pernet(net); - - /* ffs == find first bit set, necessary because userspace - * is already shifting groupnumber, but we need unshifted. - * ffs() returns [1..32], we need [0..31] */ - unsigned int groupnum = ffs(loginfo->nl_group) - 1; - - /* calculate the size of the skb needed */ - if (loginfo->copy_range == 0 || loginfo->copy_range > skb->len) - copy_len = skb->len; - else - copy_len = loginfo->copy_range; - - size = nlmsg_total_size(sizeof(*pm) + copy_len); - - ub = &ulog->ulog_buffers[groupnum]; - - spin_lock_bh(&ulog->lock); - - if (!ub->skb) { - if (!(ub->skb = ulog_alloc_skb(size))) - goto alloc_failure; - } else if (ub->qlen >= loginfo->qthreshold || - size > skb_tailroom(ub->skb)) { - /* either the queue len is too high or we don't have - * enough room in nlskb left. send it to userspace. */ - - ulog_send(ulog, groupnum); - - if (!(ub->skb = ulog_alloc_skb(size))) - goto alloc_failure; - } - - pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold); - - nlh = nlmsg_put(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, - sizeof(*pm)+copy_len, 0); - if (!nlh) { - pr_debug("error during nlmsg_put\n"); - goto out_unlock; - } - ub->qlen++; - - pm = nlmsg_data(nlh); - memset(pm, 0, sizeof(*pm)); - - /* We might not have a timestamp, get one */ - if (skb->tstamp.tv64 == 0) - __net_timestamp((struct sk_buff *)skb); - - /* copy hook, prefix, timestamp, payload, etc. */ - pm->data_len = copy_len; - tv = ktime_to_timeval(skb->tstamp); - put_unaligned(tv.tv_sec, &pm->timestamp_sec); - put_unaligned(tv.tv_usec, &pm->timestamp_usec); - put_unaligned(skb->mark, &pm->mark); - pm->hook = hooknum; - if (prefix != NULL) { - strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1); - pm->prefix[sizeof(pm->prefix) - 1] = '\0'; - } - else if (loginfo->prefix[0] != '\0') - strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix)); - - if (in && in->hard_header_len > 0 && - skb->mac_header != skb->network_header && - in->hard_header_len <= ULOG_MAC_LEN) { - memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len); - pm->mac_len = in->hard_header_len; - } else - pm->mac_len = 0; - - if (in) - strncpy(pm->indev_name, in->name, sizeof(pm->indev_name)); - - if (out) - strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); - - /* copy_len <= skb->len, so can't fail. */ - if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0) - BUG(); - - /* check if we are building multi-part messages */ - if (ub->qlen > 1) - ub->lastnlh->nlmsg_flags |= NLM_F_MULTI; - - ub->lastnlh = nlh; - - /* if timer isn't already running, start it */ - if (!timer_pending(&ub->timer)) { - ub->timer.expires = jiffies + flushtimeout * HZ / 100; - add_timer(&ub->timer); - } - - /* if threshold is reached, send message to userspace */ - if (ub->qlen >= loginfo->qthreshold) { - if (loginfo->qthreshold > 1) - nlh->nlmsg_type = NLMSG_DONE; - ulog_send(ulog, groupnum); - } -out_unlock: - spin_unlock_bh(&ulog->lock); - - return; - -alloc_failure: - pr_debug("Error building netlink message\n"); - spin_unlock_bh(&ulog->lock); -} - -static unsigned int -ulog_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct net *net = dev_net(par->in ? par->in : par->out); - - ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out, - par->targinfo, NULL); - return XT_CONTINUE; -} - -static void ipt_logfn(struct net *net, - u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *li, - const char *prefix) -{ - struct ipt_ulog_info loginfo; - - if (!li || li->type != NF_LOG_TYPE_ULOG) { - loginfo.nl_group = ULOG_DEFAULT_NLGROUP; - loginfo.copy_range = 0; - loginfo.qthreshold = ULOG_DEFAULT_QTHRESHOLD; - loginfo.prefix[0] = '\0'; - } else { - loginfo.nl_group = li->u.ulog.group; - loginfo.copy_range = li->u.ulog.copy_len; - loginfo.qthreshold = li->u.ulog.qthreshold; - strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); - } - - ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix); -} - -static int ulog_tg_check(const struct xt_tgchk_param *par) -{ - const struct ipt_ulog_info *loginfo = par->targinfo; - - if (!par->net->xt.ulog_warn_deprecated) { - pr_info("ULOG is deprecated and it will be removed soon, " - "use NFLOG instead\n"); - par->net->xt.ulog_warn_deprecated = true; - } - - if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { - pr_debug("prefix not null-terminated\n"); - return -EINVAL; - } - if (loginfo->qthreshold > ULOG_MAX_QLEN) { - pr_debug("queue threshold %Zu > MAX_QLEN\n", - loginfo->qthreshold); - return -EINVAL; - } - return 0; -} - -#ifdef CONFIG_COMPAT -struct compat_ipt_ulog_info { - compat_uint_t nl_group; - compat_size_t copy_range; - compat_size_t qthreshold; - char prefix[ULOG_PREFIX_LEN]; -}; - -static void ulog_tg_compat_from_user(void *dst, const void *src) -{ - const struct compat_ipt_ulog_info *cl = src; - struct ipt_ulog_info l = { - .nl_group = cl->nl_group, - .copy_range = cl->copy_range, - .qthreshold = cl->qthreshold, - }; - - memcpy(l.prefix, cl->prefix, sizeof(l.prefix)); - memcpy(dst, &l, sizeof(l)); -} - -static int ulog_tg_compat_to_user(void __user *dst, const void *src) -{ - const struct ipt_ulog_info *l = src; - struct compat_ipt_ulog_info cl = { - .nl_group = l->nl_group, - .copy_range = l->copy_range, - .qthreshold = l->qthreshold, - }; - - memcpy(cl.prefix, l->prefix, sizeof(cl.prefix)); - return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0; -} -#endif /* CONFIG_COMPAT */ - -static struct xt_target ulog_tg_reg __read_mostly = { - .name = "ULOG", - .family = NFPROTO_IPV4, - .target = ulog_tg, - .targetsize = sizeof(struct ipt_ulog_info), - .checkentry = ulog_tg_check, -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_ipt_ulog_info), - .compat_from_user = ulog_tg_compat_from_user, - .compat_to_user = ulog_tg_compat_to_user, -#endif - .me = THIS_MODULE, -}; - -static struct nf_logger ipt_ulog_logger __read_mostly = { - .name = "ipt_ULOG", - .logfn = ipt_logfn, - .me = THIS_MODULE, -}; - -static int __net_init ulog_tg_net_init(struct net *net) -{ - int i; - struct ulog_net *ulog = ulog_pernet(net); - struct netlink_kernel_cfg cfg = { - .groups = ULOG_MAXNLGROUPS, - }; - - spin_lock_init(&ulog->lock); - /* initialize ulog_buffers */ - for (i = 0; i < ULOG_MAXNLGROUPS; i++) { - ulog->nlgroup[i] = i; - setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, - (unsigned long)&ulog->nlgroup[i]); - } - - ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); - if (!ulog->nflognl) - return -ENOMEM; - - if (nflog) - nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger); - - return 0; -} - -static void __net_exit ulog_tg_net_exit(struct net *net) -{ - ulog_buff_t *ub; - int i; - struct ulog_net *ulog = ulog_pernet(net); - - if (nflog) - nf_log_unset(net, &ipt_ulog_logger); - - netlink_kernel_release(ulog->nflognl); - - /* remove pending timers and free allocated skb's */ - for (i = 0; i < ULOG_MAXNLGROUPS; i++) { - ub = &ulog->ulog_buffers[i]; - pr_debug("timer is deleting\n"); - del_timer(&ub->timer); - - if (ub->skb) { - kfree_skb(ub->skb); - ub->skb = NULL; - } - } -} - -static struct pernet_operations ulog_tg_net_ops = { - .init = ulog_tg_net_init, - .exit = ulog_tg_net_exit, - .id = &ulog_net_id, - .size = sizeof(struct ulog_net), -}; - -static int __init ulog_tg_init(void) -{ - int ret; - pr_debug("init module\n"); - - if (nlbufsiz > 128*1024) { - pr_warn("Netlink buffer has to be <= 128kB\n"); - return -EINVAL; - } - - ret = register_pernet_subsys(&ulog_tg_net_ops); - if (ret) - goto out_pernet; - - ret = xt_register_target(&ulog_tg_reg); - if (ret < 0) - goto out_target; - - if (nflog) - nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); - - return 0; - -out_target: - unregister_pernet_subsys(&ulog_tg_net_ops); -out_pernet: - return ret; -} - -static void __exit ulog_tg_exit(void) -{ - pr_debug("cleanup_module\n"); - if (nflog) - nf_log_unregister(&ipt_ulog_logger); - xt_unregister_target(&ulog_tg_reg); - unregister_pernet_subsys(&ulog_tg_net_ops); -} - -module_init(ulog_tg_init); -module_exit(ulog_tg_exit); -- cgit v1.2.3 From a6eacef7fba7834da4d22762ea0d8524df3993a8 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Wed, 25 Jun 2014 10:07:05 +0200 Subject: tipc: bump max configurable window size The maximum window size is limited by the sequence gap field, which was expanded with bd7845337b105e090dd18912d511139945fa7586 ("tipc: Expand link sequence gap field to 13 bits") We remove the artificial limit that prevents the link window to be set larger than 150. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- include/uapi/linux/tipc_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h index 41a76acbb305..876d0a14863c 100644 --- a/include/uapi/linux/tipc_config.h +++ b/include/uapi/linux/tipc_config.h @@ -182,7 +182,7 @@ #define TIPC_MIN_LINK_WIN 16 #define TIPC_DEF_LINK_WIN 50 -#define TIPC_MAX_LINK_WIN 150 +#define TIPC_MAX_LINK_WIN 8191 struct tipc_node_info { -- cgit v1.2.3 From 09d27b88f15f08fcfbaf57d9b0b4489816264815 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 25 Jun 2014 13:37:13 +0200 Subject: netfilter: nft_log: complete logging support Use the unified nf_log_packet() interface that allows us explicit logger selection through the nf_loginfo structure. If you specify the group attribute, this means you want to receive logging messages through nfnetlink_log. In that case, the snaplen and qthreshold attributes allows you to tune internal aspects of the netlink logging infrastructure. On the other hand, if the level is specified, then the plain text format through the kernel logging ring is used instead, which is also used by default if neither group nor level are indicated. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 ++ net/netfilter/nft_log.c | 76 +++++++++++++++++++++++++------- 2 files changed, 63 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 2a88f645a5d8..801bdd1e56e3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -697,6 +697,8 @@ enum nft_counter_attributes { * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING) * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32) * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32) + * @NFTA_LOG_LEVEL: log level (NLA_U32) + * @NFTA_LOG_FLAGS: logging flags (NLA_U32) */ enum nft_log_attributes { NFTA_LOG_UNSPEC, @@ -704,6 +706,8 @@ enum nft_log_attributes { NFTA_LOG_PREFIX, NFTA_LOG_SNAPLEN, NFTA_LOG_QTHRESHOLD, + NFTA_LOG_LEVEL, + NFTA_LOG_FLAGS, __NFTA_LOG_MAX }; #define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 55d4297d8417..5b1a4f5c3dcc 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy + * Copyright (c) 2012-2014 Pablo Neira Ayuso * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -41,6 +42,8 @@ static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { [NFTA_LOG_PREFIX] = { .type = NLA_STRING }, [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 }, [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 }, + [NFTA_LOG_LEVEL] = { .type = NLA_U32 }, + [NFTA_LOG_FLAGS] = { .type = NLA_U32 }, }; static int nft_log_init(const struct nft_ctx *ctx, @@ -58,18 +61,41 @@ static int nft_log_init(const struct nft_ctx *ctx, if (priv->prefix == NULL) return -ENOMEM; nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1); - } else + } else { priv->prefix = (char *)nft_log_null_prefix; + } - li->type = NF_LOG_TYPE_ULOG; + li->type = NF_LOG_TYPE_LOG; + if (tb[NFTA_LOG_LEVEL] != NULL && + tb[NFTA_LOG_GROUP] != NULL) + return -EINVAL; if (tb[NFTA_LOG_GROUP] != NULL) + li->type = NF_LOG_TYPE_ULOG; + + switch (li->type) { + case NF_LOG_TYPE_LOG: + if (tb[NFTA_LOG_LEVEL] != NULL) { + li->u.log.level = + ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL]));; + } else { + li->u.log.level = 4; + } + if (tb[NFTA_LOG_FLAGS] != NULL) { + li->u.log.logflags = + ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS])); + } + break; + case NF_LOG_TYPE_ULOG: li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP])); - - if (tb[NFTA_LOG_SNAPLEN] != NULL) - li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); - if (tb[NFTA_LOG_QTHRESHOLD] != NULL) { - li->u.ulog.qthreshold = - ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD])); + if (tb[NFTA_LOG_SNAPLEN] != NULL) { + li->u.ulog.copy_len = + ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); + } + if (tb[NFTA_LOG_QTHRESHOLD] != NULL) { + li->u.ulog.qthreshold = + ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD])); + } + break; } if (ctx->afi->family == NFPROTO_INET) { @@ -113,17 +139,33 @@ static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) if (priv->prefix != nft_log_null_prefix) if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix)) goto nla_put_failure; - if (li->u.ulog.group) - if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) - goto nla_put_failure; - if (li->u.ulog.copy_len) - if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, - htonl(li->u.ulog.copy_len))) + switch (li->type) { + case NF_LOG_TYPE_LOG: + if (nla_put_be32(skb, NFTA_LOG_LEVEL, htonl(li->u.log.level))) goto nla_put_failure; - if (li->u.ulog.qthreshold) - if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD, - htons(li->u.ulog.qthreshold))) + + if (li->u.log.logflags) { + if (nla_put_be32(skb, NFTA_LOG_FLAGS, + htonl(li->u.log.logflags))) + goto nla_put_failure; + } + break; + case NF_LOG_TYPE_ULOG: + if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) goto nla_put_failure; + + if (li->u.ulog.copy_len) { + if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, + htonl(li->u.ulog.copy_len))) + goto nla_put_failure; + } + if (li->u.ulog.qthreshold) { + if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD, + htons(li->u.ulog.qthreshold))) + goto nla_put_failure; + } + break; + } return 0; nla_put_failure: -- cgit v1.2.3 From 9ad7860450ea65c6bbcbd52a9a25b54b07e35941 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 27 Jun 2014 10:41:00 -0500 Subject: Revert "tools: ffs-test: convert to new descriptor format fixing compilation error" This reverts commit f2af74123f8c5a735248547f4286a3adc28633c1. There is a better fix for this build error coming in a following patch. Signed-of-by: Felipe Balbi --- include/uapi/linux/usb/functionfs.h | 2 +- tools/usb/Makefile | 6 +----- tools/usb/ffs-test.c | 20 ++------------------ 3 files changed, 4 insertions(+), 24 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h index ecb3a31f7ca6..2a4b4a72a4f9 100644 --- a/include/uapi/linux/usb/functionfs.h +++ b/include/uapi/linux/usb/functionfs.h @@ -53,7 +53,7 @@ struct usb_endpoint_descriptor_no_audio { * structure. Any flags that are not recognised cause the whole block to be * rejected with -ENOSYS. * - * Legacy descriptors format (deprecated as of 3.14): + * Legacy descriptors format: * * | off | name | type | description | * |-----+-----------+--------------+--------------------------------------| diff --git a/tools/usb/Makefile b/tools/usb/Makefile index d576b3bac3cf..acf2165c04e6 100644 --- a/tools/usb/Makefile +++ b/tools/usb/Makefile @@ -6,11 +6,7 @@ WARNINGS = -Wall -Wextra CFLAGS = $(WARNINGS) -g -I../include LDFLAGS = $(PTHREAD_LIBS) -all: testusb ffs-test ffs-test-legacy - -ffs-test-legacy: ffs-test.c - $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) -DUSE_LEGACY_DESC_HEAD - +all: testusb ffs-test %: %.c $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) diff --git a/tools/usb/ffs-test.c b/tools/usb/ffs-test.c index 74b353d9eb50..fe1e66b6ef40 100644 --- a/tools/usb/ffs-test.c +++ b/tools/usb/ffs-test.c @@ -1,5 +1,5 @@ /* - * ffs-test.c -- user mode filesystem api for usb composite function + * ffs-test.c.c -- user mode filesystem api for usb composite function * * Copyright (C) 2010 Samsung Electronics * Author: Michal Nazarewicz @@ -21,8 +21,6 @@ /* $(CROSS_COMPILE)cc -Wall -Wextra -g -o ffs-test ffs-test.c -lpthread */ -/* Uncomment to make the tool use legacy FFS descriptor headers. */ -/* #define USE_LEGACY_DESC_HEAD */ #define _BSD_SOURCE /* for endian.h */ @@ -108,15 +106,7 @@ static void _msg(unsigned level, const char *fmt, ...) /******************** Descriptors and Strings *******************************/ static const struct { - struct { - __le32 magic; - __le32 length; -#ifndef USE_LEGACY_DESC_HEAD - __le32 flags; -#endif - __le32 fs_count; - __le32 hs_count; - } __attribute__((packed)) header; + struct usb_functionfs_descs_head header; struct { struct usb_interface_descriptor intf; struct usb_endpoint_descriptor_no_audio sink; @@ -124,13 +114,7 @@ static const struct { } __attribute__((packed)) fs_descs, hs_descs; } __attribute__((packed)) descriptors = { .header = { -#ifdef USE_LEGACY_DESC_HEAD .magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC), -#else - .magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2), - .flags = cpu_to_le32(FUNCTIONFS_HAS_FS_DESC | - FUNCTIONFS_HAS_HS_DESC), -#endif .length = cpu_to_le32(sizeof descriptors), .fs_count = 3, .hs_count = 3, -- cgit v1.2.3 From 09122141785348bf9539762a5f5dbbae3761c783 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Fri, 13 Jun 2014 15:38:04 +0200 Subject: usb: gadget: f_fs: resurect usb_functionfs_descs_head structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even though usb_functionfs_descs_head structure is now deprecated, it has been used by some user space tools. Its removel in commit [ac8dde1: “Add flags to descriptors block”] was an oversight leading to build breakage for such tools. Bring it back so that old user space tools can still be build without problems on newer kernel versions. Cc: # 3.14 Reported-by: Lad, Prabhakar Reported-by: Krzysztof Opasiak Signed-off-by: Michal Nazarewicz Signed-off-by: Felipe Balbi --- include/uapi/linux/usb/functionfs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h index 2a4b4a72a4f9..24b68c59dcf8 100644 --- a/include/uapi/linux/usb/functionfs.h +++ b/include/uapi/linux/usb/functionfs.h @@ -33,6 +33,13 @@ struct usb_endpoint_descriptor_no_audio { __u8 bInterval; } __attribute__((packed)); +/* Legacy format, deprecated as of 3.14. */ +struct usb_functionfs_descs_head { + __le32 magic; + __le32 length; + __le32 fs_count; + __le32 hs_count; +} __attribute__((packed, deprecated)); /* * Descriptors format: -- cgit v1.2.3 From b2373f255cacdc1ea4da25e75a5a78949ffd9d66 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 3 Jun 2014 11:36:03 +0800 Subject: btrfs: create sprout should rename fsid on the sysfs as well Creating sprout will change the fsid of the mounted root. do the same on the sysfs as well. reproducer: mount /dev/sdb /btrfs (seed disk) btrfs dev add /dev/sdc /btrfs mount -o rw,remount /btrfs btrfs dev del /dev/sdb /btrfs mount /dev/sdb /btrfs Error: kobject_add_internal failed for fe350492-dc28-4051-a601-e017b17e6145 with -EEXIST, don't try to register things with the same name in the same directory. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 9 +++++++++ include/uapi/linux/btrfs.h | 1 + 2 files changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 19b67e969b52..6ca0d9cc46f9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2154,6 +2154,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (seeding_dev) { + char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; ret = init_first_rw_device(trans, root, device); if (ret) { btrfs_abort_transaction(trans, root, ret); @@ -2164,6 +2165,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) btrfs_abort_transaction(trans, root, ret); goto error_trans; } + + /* Sprouting would change fsid of the mounted root, + * so rename the fsid on the sysfs + */ + snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU", + root->fs_info->fsid); + if (kobject_rename(&root->fs_info->super_kobj, fsid_buf)) + goto error_trans; } else { ret = btrfs_add_device(trans, root, device); if (ret) { diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 6f9c38ce45c7..2f47824e7a36 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -38,6 +38,7 @@ struct btrfs_ioctl_vol_args { #define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) #define BTRFS_FSID_SIZE 16 #define BTRFS_UUID_SIZE 16 +#define BTRFS_UUID_UNPARSED_SIZE 37 #define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) -- cgit v1.2.3 From d15156138dad40205c9fdd9abe85c9e1479ae272 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Tue, 1 Jul 2014 10:48:05 -0600 Subject: block SG_IO: add SG_FLAG_Q_AT_HEAD flag After the SG_IO ioctl was copied into the block layer and later into the bsg driver, subtle differences emerged. One difference is the way injected commands are queued through the block layer (i.e. this is not SCSI device queueing nor SATA NCQ). Summarizing: - SG_IO on block layer device: blk_exec*(at_head=false) - sg device SG_IO: at_head=true - bsg device SG_IO: at_head=true Some time ago Boaz Harrosh introduced a sg v4 flag called BSG_FLAG_Q_AT_TAIL to override the bsg driver default. A recent patch titled: "sg: add SG_FLAG_Q_AT_TAIL flag" allowed the sg driver default to be overridden. This patch allows a SG_IO ioctl sent to a block layer device to have its default overridden. ChangeLog: - introduce SG_FLAG_Q_AT_HEAD flag in sg.h to cause commands that are injected via a block layer device SG_IO ioctl to set at_head=true - make comments clearer about queueing in sg.h since the header is used both by the sg device and block layer device implementations of the SG_IO ioctl. - introduce BSG_FLAG_Q_AT_HEAD in bsg.h for compatibility (it does nothing) and update comments. Signed-off-by: Douglas Gilbert Reviewed-by: Christoph Hellwig Reviewed-by: Mike Christie Signed-off-by: Jens Axboe --- block/scsi_ioctl.c | 5 ++++- include/scsi/sg.h | 3 +++ include/uapi/linux/bsg.h | 11 ++++++----- 3 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index bda1497add4c..51bf5155ee75 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -290,6 +290,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, unsigned long start_time; ssize_t ret = 0; int writing = 0; + int at_head = 0; struct request *rq; char sense[SCSI_SENSE_BUFFERSIZE]; struct bio *bio; @@ -313,6 +314,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, case SG_DXFER_FROM_DEV: break; } + if (hdr->flags & SG_FLAG_Q_AT_HEAD) + at_head = 1; rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); if (!rq) @@ -369,7 +372,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, * (if he doesn't check that is his problem). * N.B. a non-zero SCSI status is _not_ necessarily an error. */ - blk_execute_rq(q, bd_disk, rq, 0); + blk_execute_rq(q, bd_disk, rq, at_head); hdr->duration = jiffies_to_msecs(jiffies - start_time); diff --git a/include/scsi/sg.h b/include/scsi/sg.h index a9f3c6fc3f57..4734c15ab5d6 100644 --- a/include/scsi/sg.h +++ b/include/scsi/sg.h @@ -129,6 +129,9 @@ typedef struct sg_io_hdr #define SG_FLAG_MMAP_IO 4 /* request memory mapped IO */ #define SG_FLAG_NO_DXFER 0x10000 /* no transfer of kernel buffers to/from */ /* user space (debug indirect IO) */ +/* defaults:: for sg driver: Q_AT_HEAD; for block layer: Q_AT_TAIL */ +#define SG_FLAG_Q_AT_TAIL 0x10 +#define SG_FLAG_Q_AT_HEAD 0x20 /* following 'info' values are "or"-ed together */ #define SG_INFO_OK_MASK 0x1 diff --git a/include/uapi/linux/bsg.h b/include/uapi/linux/bsg.h index 7a12e1c0f371..02986cf8b6f1 100644 --- a/include/uapi/linux/bsg.h +++ b/include/uapi/linux/bsg.h @@ -10,12 +10,13 @@ #define BSG_SUB_PROTOCOL_SCSI_TRANSPORT 2 /* - * For flags member below - * sg.h sg_io_hdr also has bits defined for it's flags member. However - * none of these bits are implemented/used by bsg. The bits below are - * allocated to not conflict with sg.h ones anyway. + * For flag constants below: + * sg.h sg_io_hdr also has bits defined for it's flags member. These + * two flag values (0x10 and 0x20) have the same meaning in sg.h . For + * bsg the BSG_FLAG_Q_AT_HEAD flag is ignored since it is the deafult. */ -#define BSG_FLAG_Q_AT_TAIL 0x10 /* default, == 0 at this bit, is Q_AT_HEAD */ +#define BSG_FLAG_Q_AT_TAIL 0x10 /* default is Q_AT_HEAD */ +#define BSG_FLAG_Q_AT_HEAD 0x20 struct sg_io_v4 { __s32 guard; /* [i] 'Q' to differentiate from v3 */ -- cgit v1.2.3 From cb553215d5d277d4838d7d6b7722e964bcf5ca1f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 26 Jun 2014 17:41:47 +0800 Subject: include/uapi/linux/virtio_blk.h: introduce feature of VIRTIO_BLK_F_MQ Current virtio-blk spec only supports one virtual queue for transfering data between VM and host, and inside VM all kinds of operations on the virtual queue needs to hold one lock, so cause below problems: - bad scalability - bad throughput This patch requests to introduce feature of VIRTIO_BLK_F_MQ so that more than one virtual queues can be used to virtio-blk device, then above problems can be solved or eased. Signed-off-by: Ming Lei Acked-by: Michael S. Tsirkin Signed-off-by: Jens Axboe --- include/uapi/linux/virtio_blk.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index 6d8e61c48563..9ad67b267584 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -40,6 +40,7 @@ #define VIRTIO_BLK_F_WCE 9 /* Writeback mode enabled after reset */ #define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */ #define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */ +#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ #ifndef __KERNEL__ /* Old (deprecated) name for VIRTIO_BLK_F_WCE. */ @@ -77,6 +78,10 @@ struct virtio_blk_config { /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */ __u8 wce; + __u8 unused; + + /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */ + __u16 num_queues; } __attribute__((packed)); /* -- cgit v1.2.3 From d93331965729850303f6111381c1a4a9e9b8ae5a Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 25 Jun 2014 14:44:53 -0700 Subject: ipv6: Allow accepting RA from local IP addresses. This can be used in virtual networking applications, and may have other uses as well. The option is disabled by default. A specific use case is setting up virtual routers, bridges, and hosts on a single OS without the use of network namespaces or virtual machines. With proper use of ip rules, routing tables, veth interface pairs and/or other virtual interfaces, and applications that can bind to interfaces and/or IP addresses, it is possibly to create one or more virtual routers with multiple hosts attached. The host interfaces can act as IPv6 systems, with radvd running on the ports in the virtual routers. With the option provided in this patch enabled, those hosts can now properly obtain IPv6 addresses from the radvd. Signed-off-by: Ben Greear Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 12 ++++++++++++ include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + include/uapi/linux/sysctl.h | 1 + kernel/sysctl_binary.c | 1 + net/ipv6/addrconf.c | 10 ++++++++++ net/ipv6/ndisc.c | 21 +++++++++++++-------- 7 files changed, 39 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ab42c95f9985..10e216c6e05e 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1210,6 +1210,18 @@ accept_ra_defrtr - BOOLEAN Functional default: enabled if accept_ra is enabled. disabled if accept_ra is disabled. +accept_ra_from_local - BOOLEAN + Accept RA with source-address that is found on local machine + if the RA is otherwise proper and able to be accepted. + Default is to NOT accept these as it may be an un-intended + network loop. + + Functional default: + enabled if accept_ra_from_local is enabled + on a specific interface. + disabled if accept_ra_from_local is disabled + on a specific interface. + accept_ra_pinfo - BOOLEAN Learn Prefix Information in Router Advertisement. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c811300b0b0c..b0f2452f1d58 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -39,6 +39,7 @@ struct ipv6_devconf { #endif __s32 proxy_ndp; __s32 accept_source_route; + __s32 accept_ra_from_local; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD __s32 optimistic_dad; #endif diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 593b0e32d956..efa2666f4b8a 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -163,6 +163,7 @@ enum { DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL, DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL, DEVCONF_SUPPRESS_FRAG_NDISC, + DEVCONF_ACCEPT_RA_FROM_LOCAL, DEVCONF_MAX }; diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 6d6721341f49..43aaba1cc037 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -568,6 +568,7 @@ enum { NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22, NET_IPV6_PROXY_NDP=23, NET_IPV6_ACCEPT_SOURCE_ROUTE=25, + NET_IPV6_ACCEPT_RA_FROM_LOCAL=26, __NET_IPV6_MAX }; diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 653cbbd9e7ad..e4ba9a5a5ccb 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -522,6 +522,7 @@ static const struct bin_table bin_net_ipv6_conf_var_table[] = { { CTL_INT, NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, "accept_ra_rt_info_max_plen" }, { CTL_INT, NET_IPV6_PROXY_NDP, "proxy_ndp" }, { CTL_INT, NET_IPV6_ACCEPT_SOURCE_ROUTE, "accept_source_route" }, + { CTL_INT, NET_IPV6_ACCEPT_RA_FROM_LOCAL, "accept_ra_from_local" }, {} }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5667b3003af9..358edd2272ac 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -186,6 +186,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, + .accept_ra_from_local = 0, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -222,6 +223,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, + .accept_ra_from_local = 0, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -4321,6 +4323,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; + array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; } static inline size_t inet6_ifla6_size(void) @@ -5167,6 +5170,13 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "accept_ra_from_local", + .data = &ipv6_devconf.accept_ra_from_local, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { /* sentinel */ } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 736c11c6d266..a845e3d2057e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1148,11 +1148,15 @@ static void ndisc_router_discovery(struct sk_buff *skb) goto skip_defrtr; } - if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, - NULL, 0)) { + /* Do not accept RA with source-addr found on local machine unless + * accept_ra_from_local is set to true. + */ + if (!(in6_dev->cnf.accept_ra_from_local || + ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, + NULL, 0))) { ND_PRINTK(2, info, - "RA: %s, chk_addr failed for dev: %s\n", - __func__, skb->dev->name); + "RA from local address detected on dev: %s: default router ignored\n", + skb->dev->name); goto skip_defrtr; } @@ -1290,11 +1294,12 @@ skip_linkparms: } #ifdef CONFIG_IPV6_ROUTE_INFO - if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, - NULL, 0)) { + if (!(in6_dev->cnf.accept_ra_from_local || + ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, + NULL, 0))) { ND_PRINTK(2, info, - "RA: %s, chk-addr (route info) is false for dev: %s\n", - __func__, skb->dev->name); + "RA from local address detected on dev: %s: router info ignored.\n", + skb->dev->name); goto skip_routeinfo; } -- cgit v1.2.3 From cb1ce2ef387b01686469487edd45994872d52d73 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 1 Jul 2014 21:33:10 -0700 Subject: ipv6: Implement automatic flow label generation on transmit Automatically generate flow labels for IPv6 packets on transmit. The flow label is computed based on skb_get_hash. The flow label will only automatically be set when it is zero otherwise (i.e. flow label manager hasn't set one). This supports the transmit side functionality of RFC 6438. Added an IPv6 sysctl auto_flowlabels to enable/disable this behavior system wide, and added IPV6_AUTOFLOWLABEL socket option to enable this functionality per socket. By default, auto flowlabels are disabled to avoid possible conflicts with flow label manager, however if this feature proves useful we may want to enable it by default. It should also be noted that FreeBSD has already implemented automatic flow labels (including the sysctl and socket option). In FreeBSD, automatic flow labels default to enabled. Performance impact: Running super_netperf with 200 flows for TCP_RR and UDP_RR for IPv6. Note that in UDP case, __skb_get_hash will be called for every packet with explains slight regression. In the TCP case the hash is saved in the socket so there is no regression. Automatic flow labels disabled: TCP_RR: 86.53% CPU utilization 127/195/322 90/95/99% latencies 1.40498e+06 tps UDP_RR: 90.70% CPU utilization 118/168/243 90/95/99% latencies 1.50309e+06 tps Automatic flow labels enabled: TCP_RR: 85.90% CPU utilization 128/199/337 90/95/99% latencies 1.40051e+06 UDP_RR 92.61% CPU utilization 115/164/236 90/95/99% latencies 1.4687e+06 Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 9 +++++++++ include/linux/ipv6.h | 3 ++- include/net/ipv6.h | 20 ++++++++++++++++++++ include/net/netns/ipv6.h | 1 + include/uapi/linux/in6.h | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/ip6_gre.c | 7 +++++-- net/ipv6/ip6_output.c | 7 +++++-- net/ipv6/ip6_tunnel.c | 3 ++- net/ipv6/ipv6_sockglue.c | 8 ++++++++ net/ipv6/sysctl_net_ipv6.c | 8 ++++++++ 11 files changed, 62 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 10e216c6e05e..f35bfe43bf7a 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1132,6 +1132,15 @@ flowlabel_consistency - BOOLEAN FALSE: disabled Default: TRUE +auto_flowlabels - BOOLEAN + Automatically generate flow labels based based on a flow hash + of the packet. This allows intermediate devices, such as routers, + to idenfify packet flows for mechanisms like Equal Cost Multipath + Routing (see RFC 6438). + TRUE: enabled + FALSE: disabled + Default: false + anycast_src_echo_reply - BOOLEAN Controls the use of anycast addresses as source addresses for ICMPv6 echo reply diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5dc68c3ebcbd..ff560537dd61 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -199,7 +199,8 @@ struct ipv6_pinfo { * 010: prefer public address * 100: prefer care-of address */ - dontfrag:1; + dontfrag:1, + autoflowlabel:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2aa86e1135a1..4308f2ada8b3 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -699,6 +699,26 @@ static inline void ip6_set_txhash(struct sock *sk) sk->sk_txhash = flow_hash_from_keys(&keys); } +static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, + __be32 flowlabel, bool autolabel) +{ + if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) { + __be32 hash; + + hash = skb_get_hash(skb); + + /* Since this is being sent on the wire obfuscate hash a bit + * to minimize possbility that any useful information to an + * attacker is leaked. Only lower 20 bits are relevant. + */ + hash ^= hash >> 12; + + flowlabel = hash & IPV6_FLOWLABEL_MASK; + } + + return flowlabel; +} + /* * Header manipulation */ diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 19d3446e59d2..eade27adecf3 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_mtu_expires; int ip6_rt_min_advmss; int flowlabel_consistency; + int auto_flowlabels; int icmpv6_time; int anycast_src_echo_reply; int fwmark_reflect; diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 0d8e0f0342dc..22b7a69619d8 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -233,6 +233,7 @@ struct in6_flowlabel_req { #if 0 /* not yet */ #define IPV6_USE_MIN_MTU 63 #endif +#define IPV6_AUTOFLOWLABEL 64 /* * Netfilter (1) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a426cd7099bb..2daa3a133e49 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -765,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.flowlabel_consistency = 1; + net->ipv6.sysctl.auto_flowlabels = 0; atomic_set(&net->ipv6.rt_genid, 0); err = ipv6_init_mibs(net); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3873181ed856..365b2b6f3942 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -723,7 +723,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, * Push down and install the IP header. */ ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), + ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); ipv6h->hop_limit = tunnel->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; @@ -1174,7 +1175,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); __be16 *p = (__be16 *)(ipv6h+1); - ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel); + ip6_flow_hdr(ipv6h, 0, + ip6_make_flowlabel(dev_net(dev), skb, + t->fl.u.ip6.flowlabel, false)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cb9df0eb4023..fa83bdd4c3dd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - ip6_flow_hdr(hdr, tclass, fl6->flowlabel); + ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, + np->autoflowlabel)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -1569,7 +1570,9 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel); + ip6_flow_hdr(hdr, np->cork.tclass, + ip6_make_flowlabel(net, skb, fl6->flowlabel, + np->autoflowlabel)); hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index afa082458360..51a1eb185ea7 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1046,7 +1046,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), + ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index cc34f65179e4..b50b9e54cf53 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -834,6 +834,10 @@ pref_skip_coa: np->dontfrag = valbool; retv = 0; break; + case IPV6_AUTOFLOWLABEL: + np->autoflowlabel = valbool; + retv = 0; + break; } release_sock(sk); @@ -1273,6 +1277,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->dontfrag; break; + case IPV6_AUTOFLOWLABEL: + val = np->autoflowlabel; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 058f3eca2e53..5bf7b61f8ae8 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "auto_flowlabels", + .data = &init_net.ipv6.sysctl.auto_flowlabels, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { .procname = "fwmark_reflect", .data = &init_net.ipv6.sysctl.fwmark_reflect, @@ -74,6 +81,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply; ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; + ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) -- cgit v1.2.3 From c1f732ad767e37bd1d41043cbdefc0874b4d05e5 Mon Sep 17 00:00:00 2001 From: Kleber Sacilotto de Souza Date: Wed, 4 Jun 2014 10:57:50 -0300 Subject: GenWQE: Add sysfs interface for bitstream reload This patch adds an interface on sysfs for userspace to request a card bitstream reload. It sets the appropriate register and try to perform a fundamental reset on the PCIe slot for the card to reload the bitstream from the chosen partition. Signed-off-by: Kleber Sacilotto de Souza Acked-by: Frank Haverkamp Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-driver-genwqe | 9 +++ drivers/misc/genwqe/card_base.c | 90 +++++++++++++++++++++++++++ drivers/misc/genwqe/card_sysfs.c | 25 ++++++++ include/uapi/linux/genwqe/genwqe_card.h | 1 + 4 files changed, 125 insertions(+) (limited to 'include/uapi/linux') diff --git a/Documentation/ABI/testing/sysfs-driver-genwqe b/Documentation/ABI/testing/sysfs-driver-genwqe index 1870737a1f5e..64ac6d567c4b 100644 --- a/Documentation/ABI/testing/sysfs-driver-genwqe +++ b/Documentation/ABI/testing/sysfs-driver-genwqe @@ -25,6 +25,15 @@ Date: Oct 2013 Contact: haver@linux.vnet.ibm.com Description: Interface to set the next bitstream to be used. +What: /sys/class/genwqe/genwqe_card/reload_bitstream +Date: May 2014 +Contact: klebers@linux.vnet.ibm.com +Description: Interface to trigger a PCIe card reset to reload the bitstream. + sudo sh -c 'echo 1 > \ + /sys/class/genwqe/genwqe0_card/reload_bitstream' + If successfully, the card will come back with the bitstream set + on 'next_bitstream'. + What: /sys/class/genwqe/genwqe_card/tempsens Date: Oct 2013 Contact: haver@linux.vnet.ibm.com diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c index 74d51c9bb858..e6cc3e1e7326 100644 --- a/drivers/misc/genwqe/card_base.c +++ b/drivers/misc/genwqe/card_base.c @@ -760,6 +760,89 @@ static u64 genwqe_fir_checking(struct genwqe_dev *cd) return IO_ILLEGAL_VALUE; } +/** + * genwqe_pci_fundamental_reset() - trigger a PCIe fundamental reset on the slot + * + * Note: pci_set_pcie_reset_state() is not implemented on all archs, so this + * reset method will not work in all cases. + * + * Return: 0 on success or error code from pci_set_pcie_reset_state() + */ +static int genwqe_pci_fundamental_reset(struct pci_dev *pci_dev) +{ + int rc; + + /* + * lock pci config space access from userspace, + * save state and issue PCIe fundamental reset + */ + pci_cfg_access_lock(pci_dev); + pci_save_state(pci_dev); + rc = pci_set_pcie_reset_state(pci_dev, pcie_warm_reset); + if (!rc) { + /* keep PCIe reset asserted for 250ms */ + msleep(250); + pci_set_pcie_reset_state(pci_dev, pcie_deassert_reset); + /* Wait for 2s to reload flash and train the link */ + msleep(2000); + } + pci_restore_state(pci_dev); + pci_cfg_access_unlock(pci_dev); + return rc; +} + +/* + * genwqe_reload_bistream() - reload card bitstream + * + * Set the appropriate register and call fundamental reset to reaload the card + * bitstream. + * + * Return: 0 on success, error code otherwise + */ +static int genwqe_reload_bistream(struct genwqe_dev *cd) +{ + struct pci_dev *pci_dev = cd->pci_dev; + int rc; + + dev_info(&pci_dev->dev, + "[%s] resetting card for bitstream reload\n", + __func__); + + genwqe_stop(cd); + + /* + * Cause a CPLD reprogram with the 'next_bitstream' + * partition on PCIe hot or fundamental reset + */ + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, + (cd->softreset & 0xcull) | 0x70ull); + + rc = genwqe_pci_fundamental_reset(pci_dev); + if (rc) { + /* + * A fundamental reset failure can be caused + * by lack of support on the arch, so we just + * log the error and try to start the card + * again. + */ + dev_err(&pci_dev->dev, + "[%s] err: failed to reset card for bitstream reload\n", + __func__); + } + + rc = genwqe_start(cd); + if (rc) { + dev_err(&pci_dev->dev, + "[%s] err: cannot start card services! (err=%d)\n", + __func__, rc); + return rc; + } + dev_info(&pci_dev->dev, + "[%s] card reloaded\n", __func__); + return 0; +} + + /** * genwqe_health_thread() - Health checking thread * @@ -846,6 +929,13 @@ static int genwqe_health_thread(void *data) } } + if (cd->card_state == GENWQE_CARD_RELOAD_BITSTREAM) { + /* Userspace requested card bitstream reload */ + rc = genwqe_reload_bistream(cd); + if (rc) + goto fatal_error; + } + cd->last_gfir = gfir; cond_resched(); } diff --git a/drivers/misc/genwqe/card_sysfs.c b/drivers/misc/genwqe/card_sysfs.c index a72a99266c3c..7232e40a3ad9 100644 --- a/drivers/misc/genwqe/card_sysfs.c +++ b/drivers/misc/genwqe/card_sysfs.c @@ -223,6 +223,30 @@ static ssize_t next_bitstream_store(struct device *dev, } static DEVICE_ATTR_RW(next_bitstream); +static ssize_t reload_bitstream_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int reload; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + if (kstrtoint(buf, 0, &reload) < 0) + return -EINVAL; + + if (reload == 0x1) { + if (cd->card_state == GENWQE_CARD_UNUSED || + cd->card_state == GENWQE_CARD_USED) + cd->card_state = GENWQE_CARD_RELOAD_BITSTREAM; + else + return -EIO; + } else { + return -EINVAL; + } + + return count; +} +static DEVICE_ATTR_WO(reload_bitstream); + /* * Create device_attribute structures / params: name, mode, show, store * additional flag if valid in VF @@ -239,6 +263,7 @@ static struct attribute *genwqe_attributes[] = { &dev_attr_status.attr, &dev_attr_freerunning_timer.attr, &dev_attr_queue_working_time.attr, + &dev_attr_reload_bitstream.attr, NULL, }; diff --git a/include/uapi/linux/genwqe/genwqe_card.h b/include/uapi/linux/genwqe/genwqe_card.h index 795e957bb840..4fc065f29255 100644 --- a/include/uapi/linux/genwqe/genwqe_card.h +++ b/include/uapi/linux/genwqe/genwqe_card.h @@ -328,6 +328,7 @@ enum genwqe_card_state { GENWQE_CARD_UNUSED = 0, GENWQE_CARD_USED = 1, GENWQE_CARD_FATAL_ERROR = 2, + GENWQE_CARD_RELOAD_BITSTREAM = 3, GENWQE_CARD_STATE_MAX, }; -- cgit v1.2.3 From d5d222605503dd39513b3baeb9475ddf316511d7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 21 Jun 2014 08:08:11 -0700 Subject: i8k: uapi: Introduce define for new highest fan speed Some Dell laptops support fan speeds of {0, 1, 2, 3} instead of {0, 1, 2}. Add a define for it. Signed-off-by: Guenter Roeck Cc: Andreas Mohr Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/i8k.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/i8k.h b/include/uapi/linux/i8k.h index 1c45ba505115..133d02f03c25 100644 --- a/include/uapi/linux/i8k.h +++ b/include/uapi/linux/i8k.h @@ -34,7 +34,8 @@ #define I8K_FAN_OFF 0 #define I8K_FAN_LOW 1 #define I8K_FAN_HIGH 2 -#define I8K_FAN_MAX I8K_FAN_HIGH +#define I8K_FAN_TURBO 3 +#define I8K_FAN_MAX I8K_FAN_TURBO #define I8K_VOL_UP 1 #define I8K_VOL_DOWN 2 -- cgit v1.2.3 From 875cbf3e4614cfdcc7f65033e25292aec80f09c0 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 4 Jul 2014 08:28:30 +0100 Subject: arm64: Add audit support On AArch64, audit is supported through generic lib/audit.c and compat_audit.c, and so this patch adds arch specific definitions required. Acked-by Will Deacon Acked-by: Richard Guy Briggs Signed-off-by: AKASHI Takahiro Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 ++ arch/arm64/include/asm/syscall.h | 14 ++++++++++++++ include/uapi/linux/audit.h | 1 + 3 files changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1cb806529393..ce6e733e0c05 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -10,6 +10,7 @@ config ARM64 select ARM_AMBA select ARM_ARCH_TIMER select ARM_GIC + select AUDIT_ARCH_COMPAT_GENERIC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK @@ -28,6 +29,7 @@ config ARM64 select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select HARDIRQS_SW_RESEND + select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 383771eb0b87..709a574468f0 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -16,6 +16,8 @@ #ifndef __ASM_SYSCALL_H #define __ASM_SYSCALL_H +#include +#include #include extern const void *sys_call_table[]; @@ -105,4 +107,16 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->regs[i], args, n * sizeof(args[0])); } +/* + * We don't care about endianness (__AUDIT_ARCH_LE bit) here because + * AArch64 has the same system calls both on little- and big- endian. + */ +static inline int syscall_get_arch(void) +{ + if (is_compat_task()) + return AUDIT_ARCH_ARM; + + return AUDIT_ARCH_AARCH64; +} + #endif /* __ASM_SYSCALL_H */ diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index cf6714752b69..3b9ff33e1768 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -342,6 +342,7 @@ enum { #define __AUDIT_ARCH_64BIT 0x80000000 #define __AUDIT_ARCH_LE 0x40000000 +#define AUDIT_ARCH_AARCH64 (EM_AARCH64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ALPHA (EM_ALPHA|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ARM (EM_ARM|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ARMEB (EM_ARM) -- cgit v1.2.3 From 0b4820d6d8b6448bc9f7fac1bb1a801a53b425e1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 May 2014 16:05:13 +0200 Subject: KVM: prepare for KVM_(S|G)ET_MP_STATE on other architectures Highlight the aspects of the ioctls that are actually specific to x86 and ia64. As defined restrictions (irqchip) and mp states may not apply to other architectures, these parts are flagged to belong to x86 and ia64. In preparation for the use of KVM_(S|G)ET_MP_STATE by s390. Fix a spelling error (KVM_SET_MP_STATE vs. KVM_SET_MPSTATE) on the way. Signed-off-by: David Hildenbrand Reviewed-by: Cornelia Huck Acked-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/api.txt | 21 ++++++++++++--------- include/uapi/linux/kvm.h | 3 ++- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 0fe36497642c..904c61cdd311 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -988,18 +988,20 @@ uniprocessor guests). Possible values are: - - KVM_MP_STATE_RUNNABLE: the vcpu is currently running + - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86, ia64] - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) - which has not yet received an INIT signal + which has not yet received an INIT signal [x86, + ia64] - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is - now ready for a SIPI + now ready for a SIPI [x86, ia64] - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and - is waiting for an interrupt + is waiting for an interrupt [x86, ia64] - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector - accessible via KVM_GET_VCPU_EVENTS) + accessible via KVM_GET_VCPU_EVENTS) [x86, ia64] -This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel -irqchip, the multiprocessing state must be maintained by userspace. +On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an +in-kernel irqchip, the multiprocessing state must be maintained by userspace on +these architectures. 4.39 KVM_SET_MP_STATE @@ -1013,8 +1015,9 @@ Returns: 0 on success; -1 on error Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for arguments. -This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel -irqchip, the multiprocessing state must be maintained by userspace. +On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an +in-kernel irqchip, the multiprocessing state must be maintained by userspace on +these architectures. 4.40 KVM_SET_IDENTITY_MAP_ADDR diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e11d8f170a62..37d4ec6f14d8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -399,8 +399,9 @@ struct kvm_vapic_addr { __u64 vapic_addr; }; -/* for KVM_SET_MPSTATE */ +/* for KVM_SET_MP_STATE */ +/* not all states are valid on all architectures */ #define KVM_MP_STATE_RUNNABLE 0 #define KVM_MP_STATE_UNINITIALIZED 1 #define KVM_MP_STATE_INIT_RECEIVED 2 -- cgit v1.2.3 From 6352e4d2dd9a349024a41356148eced553e1dce4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 10 Apr 2014 17:35:00 +0200 Subject: KVM: s390: implement KVM_(S|G)ET_MP_STATE for user space state control This patch - adds s390 specific MP states to linux headers and documents them - implements the KVM_{SET,GET}_MP_STATE ioctls - enables KVM_CAP_MP_STATE - allows user space to control the VCPU state on s390. If user space sets the VCPU state using the ioctl KVM_SET_MP_STATE, we can disable manual changing of the VCPU state and trust user space to do the right thing. Signed-off-by: David Hildenbrand Reviewed-by: Cornelia Huck Acked-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/api.txt | 10 ++++++++-- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/diag.c | 3 ++- arch/s390/kvm/intercept.c | 3 ++- arch/s390/kvm/kvm-s390.c | 37 +++++++++++++++++++++++++++++++++---- arch/s390/kvm/kvm-s390.h | 6 ++++++ include/uapi/linux/kvm.h | 4 ++++ 7 files changed, 56 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 904c61cdd311..a41465bd6a5c 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -974,7 +974,7 @@ for vm-wide capabilities. 4.38 KVM_GET_MP_STATE Capability: KVM_CAP_MP_STATE -Architectures: x86, ia64 +Architectures: x86, ia64, s390 Type: vcpu ioctl Parameters: struct kvm_mp_state (out) Returns: 0 on success; -1 on error @@ -998,6 +998,12 @@ Possible values are: is waiting for an interrupt [x86, ia64] - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector accessible via KVM_GET_VCPU_EVENTS) [x86, ia64] + - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390] + - KVM_MP_STATE_CHECK_STOP: the vcpu is in a special error state [s390] + - KVM_MP_STATE_OPERATING: the vcpu is operating (running or halted) + [s390] + - KVM_MP_STATE_LOAD: the vcpu is in a special load/startup state + [s390] On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace on @@ -1007,7 +1013,7 @@ these architectures. 4.39 KVM_SET_MP_STATE Capability: KVM_CAP_MP_STATE -Architectures: x86, ia64 +Architectures: x86, ia64, s390 Type: vcpu ioctl Parameters: struct kvm_mp_state (in) Returns: 0 on success; -1 on error diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 4181d7baabba..c2ba0208a0e1 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -418,6 +418,7 @@ struct kvm_arch{ int css_support; int use_irqchip; int use_cmma; + int user_cpu_state_ctrl; struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; wait_queue_head_t ipte_wq; spinlock_t start_stop_lock; diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 0161675878a2..59bd8f991b98 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -176,7 +176,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } - kvm_s390_vcpu_stop(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index ac6b32585a36..eaf46291d361 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -73,7 +73,8 @@ static int handle_stop(struct kvm_vcpu *vcpu) return rc; } - kvm_s390_vcpu_stop(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); return -EOPNOTSUPP; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 342895350825..fdf88f7a539c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -167,6 +167,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_DEVICE_CTRL: case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_VM_ATTRIBUTES: + case KVM_CAP_MP_STATE: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -595,7 +596,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->pp = 0; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); - kvm_s390_vcpu_stop(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); kvm_s390_clear_local_irqs(vcpu); } @@ -980,13 +982,34 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; /* not implemented yet */ + /* CHECK_STOP and LOAD are not supported yet */ + return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED : + KVM_MP_STATE_OPERATING; } int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; /* not implemented yet */ + int rc = 0; + + /* user space knows about this interface - let it control the state */ + vcpu->kvm->arch.user_cpu_state_ctrl = 1; + + switch (mp_state->mp_state) { + case KVM_MP_STATE_STOPPED: + kvm_s390_vcpu_stop(vcpu); + break; + case KVM_MP_STATE_OPERATING: + kvm_s390_vcpu_start(vcpu); + break; + case KVM_MP_STATE_LOAD: + case KVM_MP_STATE_CHECK_STOP: + /* fall through - CHECK_STOP and LOAD are not supported yet */ + default: + rc = -ENXIO; + } + + return rc; } bool kvm_s390_cmma_enabled(struct kvm *kvm) @@ -1284,7 +1307,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - kvm_s390_vcpu_start(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { + kvm_s390_vcpu_start(vcpu); + } else if (is_vcpu_stopped(vcpu)) { + pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n", + vcpu->vcpu_id); + return -EINVAL; + } switch (kvm_run->exit_reason) { case KVM_EXIT_S390_SIEIC: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 77ed846342d4..33a0e4bed2a5 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -129,6 +129,12 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) vcpu->arch.sie_block->gpsw.mask |= cc << 44; } +/* are cpu states controlled by user space */ +static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) +{ + return kvm->arch.user_cpu_state_ctrl != 0; +} + int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 37d4ec6f14d8..9b744af871d7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -407,6 +407,10 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_INIT_RECEIVED 2 #define KVM_MP_STATE_HALTED 3 #define KVM_MP_STATE_SIPI_RECEIVED 4 +#define KVM_MP_STATE_STOPPED 5 +#define KVM_MP_STATE_CHECK_STOP 6 +#define KVM_MP_STATE_OPERATING 7 +#define KVM_MP_STATE_LOAD 8 struct kvm_mp_state { __u32 mp_state; -- cgit v1.2.3 From f0175ab51993d2dc2728e7b22a16ffb0c8f4cfa0 Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Wed, 9 Jul 2014 12:20:08 +0200 Subject: usb: gadget: f_fs: OS descriptors support Add support for OS descriptors. The new format of descriptors is used, because the "flags" field is required for extensions. os_count gives the number of OSDesc[] elements. The format of descriptors is given in include/uapi/linux/usb/functionfs.h. For extended properties descriptor the usb_ext_prop_desc structure covers only a part of a descriptor, because the wPropertyNameLength is unknown up front. Signed-off-by: Andrzej Pietrasiewicz Acked-by: Michal Nazarewicz Signed-off-by: Felipe Balbi --- drivers/usb/gadget/f_fs.c | 341 +++++++++++++++++++++++++++++++++++- drivers/usb/gadget/u_fs.h | 7 + include/uapi/linux/usb/functionfs.h | 81 ++++++++- 3 files changed, 419 insertions(+), 10 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c index e1b2ddd7964a..fe45060e0a7a 100644 --- a/drivers/usb/gadget/f_fs.c +++ b/drivers/usb/gadget/f_fs.c @@ -34,6 +34,7 @@ #include "u_fs.h" #include "u_f.h" +#include "u_os_desc.h" #include "configfs.h" #define FUNCTIONFS_MAGIC 0xa647361 /* Chosen by a honest dice roll ;) */ @@ -1644,11 +1645,19 @@ enum ffs_entity_type { FFS_DESCRIPTOR, FFS_INTERFACE, FFS_STRING, FFS_ENDPOINT }; +enum ffs_os_desc_type { + FFS_OS_DESC, FFS_OS_DESC_EXT_COMPAT, FFS_OS_DESC_EXT_PROP +}; + typedef int (*ffs_entity_callback)(enum ffs_entity_type entity, u8 *valuep, struct usb_descriptor_header *desc, void *priv); +typedef int (*ffs_os_desc_callback)(enum ffs_os_desc_type entity, + struct usb_os_desc_header *h, void *data, + unsigned len, void *priv); + static int __must_check ffs_do_single_desc(char *data, unsigned len, ffs_entity_callback entity, void *priv) @@ -1856,11 +1865,191 @@ static int __ffs_data_do_entity(enum ffs_entity_type type, return 0; } +static int __ffs_do_os_desc_header(enum ffs_os_desc_type *next_type, + struct usb_os_desc_header *desc) +{ + u16 bcd_version = le16_to_cpu(desc->bcdVersion); + u16 w_index = le16_to_cpu(desc->wIndex); + + if (bcd_version != 1) { + pr_vdebug("unsupported os descriptors version: %d", + bcd_version); + return -EINVAL; + } + switch (w_index) { + case 0x4: + *next_type = FFS_OS_DESC_EXT_COMPAT; + break; + case 0x5: + *next_type = FFS_OS_DESC_EXT_PROP; + break; + default: + pr_vdebug("unsupported os descriptor type: %d", w_index); + return -EINVAL; + } + + return sizeof(*desc); +} + +/* + * Process all extended compatibility/extended property descriptors + * of a feature descriptor + */ +static int __must_check ffs_do_single_os_desc(char *data, unsigned len, + enum ffs_os_desc_type type, + u16 feature_count, + ffs_os_desc_callback entity, + void *priv, + struct usb_os_desc_header *h) +{ + int ret; + const unsigned _len = len; + + ENTER(); + + /* loop over all ext compat/ext prop descriptors */ + while (feature_count--) { + ret = entity(type, h, data, len, priv); + if (unlikely(ret < 0)) { + pr_debug("bad OS descriptor, type: %d\n", type); + return ret; + } + data += ret; + len -= ret; + } + return _len - len; +} + +/* Process a number of complete Feature Descriptors (Ext Compat or Ext Prop) */ +static int __must_check ffs_do_os_descs(unsigned count, + char *data, unsigned len, + ffs_os_desc_callback entity, void *priv) +{ + const unsigned _len = len; + unsigned long num = 0; + + ENTER(); + + for (num = 0; num < count; ++num) { + int ret; + enum ffs_os_desc_type type; + u16 feature_count; + struct usb_os_desc_header *desc = (void *)data; + + if (len < sizeof(*desc)) + return -EINVAL; + + /* + * Record "descriptor" entity. + * Process dwLength, bcdVersion, wIndex, get b/wCount. + * Move the data pointer to the beginning of extended + * compatibilities proper or extended properties proper + * portions of the data + */ + if (le32_to_cpu(desc->dwLength) > len) + return -EINVAL; + + ret = __ffs_do_os_desc_header(&type, desc); + if (unlikely(ret < 0)) { + pr_debug("entity OS_DESCRIPTOR(%02lx); ret = %d\n", + num, ret); + return ret; + } + /* + * 16-bit hex "?? 00" Little Endian looks like 8-bit hex "??" + */ + feature_count = le16_to_cpu(desc->wCount); + if (type == FFS_OS_DESC_EXT_COMPAT && + (feature_count > 255 || desc->Reserved)) + return -EINVAL; + len -= ret; + data += ret; + + /* + * Process all function/property descriptors + * of this Feature Descriptor + */ + ret = ffs_do_single_os_desc(data, len, type, + feature_count, entity, priv, desc); + if (unlikely(ret < 0)) { + pr_debug("%s returns %d\n", __func__, ret); + return ret; + } + + len -= ret; + data += ret; + } + return _len - len; +} + +/** + * Validate contents of the buffer from userspace related to OS descriptors. + */ +static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, + struct usb_os_desc_header *h, void *data, + unsigned len, void *priv) +{ + struct ffs_data *ffs = priv; + u8 length; + + ENTER(); + + switch (type) { + case FFS_OS_DESC_EXT_COMPAT: { + struct usb_ext_compat_desc *d = data; + int i; + + if (len < sizeof(*d) || + d->bFirstInterfaceNumber >= ffs->interfaces_count || + d->Reserved1) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i) + if (d->Reserved2[i]) + return -EINVAL; + + length = sizeof(struct usb_ext_compat_desc); + } + break; + case FFS_OS_DESC_EXT_PROP: { + struct usb_ext_prop_desc *d = data; + u32 type, pdl; + u16 pnl; + + if (len < sizeof(*d) || h->interface >= ffs->interfaces_count) + return -EINVAL; + length = le32_to_cpu(d->dwSize); + type = le32_to_cpu(d->dwPropertyDataType); + if (type < USB_EXT_PROP_UNICODE || + type > USB_EXT_PROP_UNICODE_MULTI) { + pr_vdebug("unsupported os descriptor property type: %d", + type); + return -EINVAL; + } + pnl = le16_to_cpu(d->wPropertyNameLength); + pdl = le32_to_cpu(*(u32 *)((u8 *)data + 10 + pnl)); + if (length != 14 + pnl + pdl) { + pr_vdebug("invalid os descriptor length: %d pnl:%d pdl:%d (descriptor %d)\n", + length, pnl, pdl, type); + return -EINVAL; + } + ++ffs->ms_os_descs_ext_prop_count; + /* property name reported to the host as "WCHAR"s */ + ffs->ms_os_descs_ext_prop_name_len += pnl * 2; + ffs->ms_os_descs_ext_prop_data_len += pdl; + } + break; + default: + pr_vdebug("unknown descriptor: %d\n", type); + return -EINVAL; + } + return length; +} + static int __ffs_data_got_descs(struct ffs_data *ffs, char *const _data, size_t len) { char *data = _data, *raw_descs; - unsigned counts[3], flags; + unsigned os_descs_count = 0, counts[3], flags; int ret = -EINVAL, i; ENTER(); @@ -1878,7 +2067,8 @@ static int __ffs_data_got_descs(struct ffs_data *ffs, flags = get_unaligned_le32(data + 8); if (flags & ~(FUNCTIONFS_HAS_FS_DESC | FUNCTIONFS_HAS_HS_DESC | - FUNCTIONFS_HAS_SS_DESC)) { + FUNCTIONFS_HAS_SS_DESC | + FUNCTIONFS_HAS_MS_OS_DESC)) { ret = -ENOSYS; goto error; } @@ -1901,6 +2091,11 @@ static int __ffs_data_got_descs(struct ffs_data *ffs, len -= 4; } } + if (flags & (1 << i)) { + os_descs_count = get_unaligned_le32(data); + data += 4; + len -= 4; + }; /* Read descriptors */ raw_descs = data; @@ -1914,6 +2109,14 @@ static int __ffs_data_got_descs(struct ffs_data *ffs, data += ret; len -= ret; } + if (os_descs_count) { + ret = ffs_do_os_descs(os_descs_count, data, len, + __ffs_data_do_os_desc, ffs); + if (ret < 0) + goto error; + data += ret; + len -= ret; + } if (raw_descs == data || len) { ret = -EINVAL; @@ -1926,6 +2129,7 @@ static int __ffs_data_got_descs(struct ffs_data *ffs, ffs->fs_descs_count = counts[0]; ffs->hs_descs_count = counts[1]; ffs->ss_descs_count = counts[2]; + ffs->ms_os_descs_count = os_descs_count; return 0; @@ -2267,6 +2471,85 @@ static int __ffs_func_bind_do_nums(enum ffs_entity_type type, u8 *valuep, return 0; } +static int __ffs_func_bind_do_os_desc(enum ffs_os_desc_type type, + struct usb_os_desc_header *h, void *data, + unsigned len, void *priv) +{ + struct ffs_function *func = priv; + u8 length = 0; + + switch (type) { + case FFS_OS_DESC_EXT_COMPAT: { + struct usb_ext_compat_desc *desc = data; + struct usb_os_desc_table *t; + + t = &func->function.os_desc_table[desc->bFirstInterfaceNumber]; + t->if_id = func->interfaces_nums[desc->bFirstInterfaceNumber]; + memcpy(t->os_desc->ext_compat_id, &desc->CompatibleID, + ARRAY_SIZE(desc->CompatibleID) + + ARRAY_SIZE(desc->SubCompatibleID)); + length = sizeof(*desc); + } + break; + case FFS_OS_DESC_EXT_PROP: { + struct usb_ext_prop_desc *desc = data; + struct usb_os_desc_table *t; + struct usb_os_desc_ext_prop *ext_prop; + char *ext_prop_name; + char *ext_prop_data; + + t = &func->function.os_desc_table[h->interface]; + t->if_id = func->interfaces_nums[h->interface]; + + ext_prop = func->ffs->ms_os_descs_ext_prop_avail; + func->ffs->ms_os_descs_ext_prop_avail += sizeof(*ext_prop); + + ext_prop->type = le32_to_cpu(desc->dwPropertyDataType); + ext_prop->name_len = le16_to_cpu(desc->wPropertyNameLength); + ext_prop->data_len = le32_to_cpu(*(u32 *) + usb_ext_prop_data_len_ptr(data, ext_prop->name_len)); + length = ext_prop->name_len + ext_prop->data_len + 14; + + ext_prop_name = func->ffs->ms_os_descs_ext_prop_name_avail; + func->ffs->ms_os_descs_ext_prop_name_avail += + ext_prop->name_len; + + ext_prop_data = func->ffs->ms_os_descs_ext_prop_data_avail; + func->ffs->ms_os_descs_ext_prop_data_avail += + ext_prop->data_len; + memcpy(ext_prop_data, + usb_ext_prop_data_ptr(data, ext_prop->name_len), + ext_prop->data_len); + /* unicode data reported to the host as "WCHAR"s */ + switch (ext_prop->type) { + case USB_EXT_PROP_UNICODE: + case USB_EXT_PROP_UNICODE_ENV: + case USB_EXT_PROP_UNICODE_LINK: + case USB_EXT_PROP_UNICODE_MULTI: + ext_prop->data_len *= 2; + break; + } + ext_prop->data = ext_prop_data; + + memcpy(ext_prop_name, usb_ext_prop_name_ptr(data), + ext_prop->name_len); + /* property name reported to the host as "WCHAR"s */ + ext_prop->name_len *= 2; + ext_prop->name = ext_prop_name; + + t->os_desc->ext_prop_len += + ext_prop->name_len + ext_prop->data_len + 14; + ++t->os_desc->ext_prop_count; + list_add_tail(&ext_prop->entry, &t->os_desc->ext_prop); + } + break; + default: + pr_vdebug("unknown descriptor: %d\n", type); + } + + return length; +} + static inline struct f_fs_opts *ffs_do_functionfs_bind(struct usb_function *f, struct usb_configuration *c) { @@ -2328,7 +2611,7 @@ static int _ffs_func_bind(struct usb_configuration *c, const int super = gadget_is_superspeed(func->gadget) && func->ffs->ss_descs_count; - int fs_len, hs_len, ret; + int fs_len, hs_len, ss_len, ret, i; /* Make it a single chunk, less management later on */ vla_group(d); @@ -2340,6 +2623,18 @@ static int _ffs_func_bind(struct usb_configuration *c, vla_item_with_sz(d, struct usb_descriptor_header *, ss_descs, super ? ffs->ss_descs_count + 1 : 0); vla_item_with_sz(d, short, inums, ffs->interfaces_count); + vla_item_with_sz(d, struct usb_os_desc_table, os_desc_table, + c->cdev->use_os_string ? ffs->interfaces_count : 0); + vla_item_with_sz(d, char[16], ext_compat, + c->cdev->use_os_string ? ffs->interfaces_count : 0); + vla_item_with_sz(d, struct usb_os_desc, os_desc, + c->cdev->use_os_string ? ffs->interfaces_count : 0); + vla_item_with_sz(d, struct usb_os_desc_ext_prop, ext_prop, + ffs->ms_os_descs_ext_prop_count); + vla_item_with_sz(d, char, ext_prop_name, + ffs->ms_os_descs_ext_prop_name_len); + vla_item_with_sz(d, char, ext_prop_data, + ffs->ms_os_descs_ext_prop_data_len); vla_item_with_sz(d, char, raw_descs, ffs->raw_descs_length); char *vlabuf; @@ -2350,12 +2645,16 @@ static int _ffs_func_bind(struct usb_configuration *c, return -ENOTSUPP; /* Allocate a single chunk, less management later on */ - vlabuf = kmalloc(vla_group_size(d), GFP_KERNEL); + vlabuf = kzalloc(vla_group_size(d), GFP_KERNEL); if (unlikely(!vlabuf)) return -ENOMEM; - /* Zero */ - memset(vla_ptr(vlabuf, d, eps), 0, d_eps__sz); + ffs->ms_os_descs_ext_prop_avail = vla_ptr(vlabuf, d, ext_prop); + ffs->ms_os_descs_ext_prop_name_avail = + vla_ptr(vlabuf, d, ext_prop_name); + ffs->ms_os_descs_ext_prop_data_avail = + vla_ptr(vlabuf, d, ext_prop_data); + /* Copy descriptors */ memcpy(vla_ptr(vlabuf, d, raw_descs), ffs->raw_descs, ffs->raw_descs_length); @@ -2409,12 +2708,16 @@ static int _ffs_func_bind(struct usb_configuration *c, if (likely(super)) { func->function.ss_descriptors = vla_ptr(vlabuf, d, ss_descs); - ret = ffs_do_descs(ffs->ss_descs_count, + ss_len = ffs_do_descs(ffs->ss_descs_count, vla_ptr(vlabuf, d, raw_descs) + fs_len + hs_len, d_raw_descs__sz - fs_len - hs_len, __ffs_func_bind_do_descs, func); - if (unlikely(ret < 0)) + if (unlikely(ss_len < 0)) { + ret = ss_len; goto error; + } + } else { + ss_len = 0; } /* @@ -2430,6 +2733,28 @@ static int _ffs_func_bind(struct usb_configuration *c, if (unlikely(ret < 0)) goto error; + func->function.os_desc_table = vla_ptr(vlabuf, d, os_desc_table); + if (c->cdev->use_os_string) + for (i = 0; i < ffs->interfaces_count; ++i) { + struct usb_os_desc *desc; + + desc = func->function.os_desc_table[i].os_desc = + vla_ptr(vlabuf, d, os_desc) + + i * sizeof(struct usb_os_desc); + desc->ext_compat_id = + vla_ptr(vlabuf, d, ext_compat) + i * 16; + INIT_LIST_HEAD(&desc->ext_prop); + } + ret = ffs_do_os_descs(ffs->ms_os_descs_count, + vla_ptr(vlabuf, d, raw_descs) + + fs_len + hs_len + ss_len, + d_raw_descs__sz - fs_len - hs_len - ss_len, + __ffs_func_bind_do_os_desc, func); + if (unlikely(ret < 0)) + goto error; + func->function.os_desc_n = + c->cdev->use_os_string ? ffs->interfaces_count : 0; + /* And we're done */ ffs_event_add(ffs, FUNCTIONFS_BIND); return 0; diff --git a/drivers/usb/gadget/u_fs.h b/drivers/usb/gadget/u_fs.h index bf0ba375d459..63d6e71569c1 100644 --- a/drivers/usb/gadget/u_fs.h +++ b/drivers/usb/gadget/u_fs.h @@ -216,6 +216,13 @@ struct ffs_data { unsigned fs_descs_count; unsigned hs_descs_count; unsigned ss_descs_count; + unsigned ms_os_descs_count; + unsigned ms_os_descs_ext_prop_count; + unsigned ms_os_descs_ext_prop_name_len; + unsigned ms_os_descs_ext_prop_data_len; + void *ms_os_descs_ext_prop_avail; + void *ms_os_descs_ext_prop_name_avail; + void *ms_os_descs_ext_prop_data_avail; unsigned short strings_count; unsigned short interfaces_count; diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h index 2a4b4a72a4f9..b66fae77c08c 100644 --- a/include/uapi/linux/usb/functionfs.h +++ b/include/uapi/linux/usb/functionfs.h @@ -18,10 +18,9 @@ enum functionfs_flags { FUNCTIONFS_HAS_FS_DESC = 1, FUNCTIONFS_HAS_HS_DESC = 2, FUNCTIONFS_HAS_SS_DESC = 4, + FUNCTIONFS_HAS_MS_OS_DESC = 8, }; -#ifndef __KERNEL__ - /* Descriptor of an non-audio endpoint */ struct usb_endpoint_descriptor_no_audio { __u8 bLength; @@ -33,6 +32,36 @@ struct usb_endpoint_descriptor_no_audio { __u8 bInterval; } __attribute__((packed)); +/* MS OS Descriptor header */ +struct usb_os_desc_header { + __u8 interface; + __le32 dwLength; + __le16 bcdVersion; + __le16 wIndex; + union { + struct { + __u8 bCount; + __u8 Reserved; + }; + __le16 wCount; + }; +} __attribute__((packed)); + +struct usb_ext_compat_desc { + __u8 bFirstInterfaceNumber; + __u8 Reserved1; + __u8 CompatibleID[8]; + __u8 SubCompatibleID[8]; + __u8 Reserved2[6]; +}; + +struct usb_ext_prop_desc { + __le32 dwSize; + __le32 dwPropertyDataType; + __le16 wPropertyNameLength; +} __attribute__((packed)); + +#ifndef __KERNEL__ /* * Descriptors format: @@ -45,9 +74,11 @@ struct usb_endpoint_descriptor_no_audio { * | | fs_count | LE32 | number of full-speed descriptors | * | | hs_count | LE32 | number of high-speed descriptors | * | | ss_count | LE32 | number of super-speed descriptors | + * | | os_count | LE32 | number of MS OS descriptors | * | | fs_descrs | Descriptor[] | list of full-speed descriptors | * | | hs_descrs | Descriptor[] | list of high-speed descriptors | * | | ss_descrs | Descriptor[] | list of super-speed descriptors | + * | | os_descrs | OSDesc[] | list of MS OS descriptors | * * Depending on which flags are set, various fields may be missing in the * structure. Any flags that are not recognised cause the whole block to be @@ -74,6 +105,52 @@ struct usb_endpoint_descriptor_no_audio { * | 0 | bLength | U8 | length of the descriptor | * | 1 | bDescriptorType | U8 | descriptor type | * | 2 | payload | | descriptor's payload | + * + * OSDesc[] is an array of valid MS OS Feature Descriptors which have one of + * the following formats: + * + * | off | name | type | description | + * |-----+-----------------+------+--------------------------| + * | 0 | inteface | U8 | related interface number | + * | 1 | dwLength | U32 | length of the descriptor | + * | 5 | bcdVersion | U16 | currently supported: 1 | + * | 7 | wIndex | U16 | currently supported: 4 | + * | 9 | bCount | U8 | number of ext. compat. | + * | 10 | Reserved | U8 | 0 | + * | 11 | ExtCompat[] | | list of ext. compat. d. | + * + * | off | name | type | description | + * |-----+-----------------+------+--------------------------| + * | 0 | inteface | U8 | related interface number | + * | 1 | dwLength | U32 | length of the descriptor | + * | 5 | bcdVersion | U16 | currently supported: 1 | + * | 7 | wIndex | U16 | currently supported: 5 | + * | 9 | wCount | U16 | number of ext. compat. | + * | 11 | ExtProp[] | | list of ext. prop. d. | + * + * ExtCompat[] is an array of valid Extended Compatiblity descriptors + * which have the following format: + * + * | off | name | type | description | + * |-----+-----------------------+------+-------------------------------------| + * | 0 | bFirstInterfaceNumber | U8 | index of the interface or of the 1st| + * | | | | interface in an IAD group | + * | 1 | Reserved | U8 | 0 | + * | 2 | CompatibleID | U8[8]| compatible ID string | + * | 10 | SubCompatibleID | U8[8]| subcompatible ID string | + * | 18 | Reserved | U8[6]| 0 | + * + * ExtProp[] is an array of valid Extended Properties descriptors + * which have the following format: + * + * | off | name | type | description | + * |-----+-----------------------+------+-------------------------------------| + * | 0 | dwSize | U32 | length of the descriptor | + * | 4 | dwPropertyDataType | U32 | 1..7 | + * | 8 | wPropertyNameLength | U16 | bPropertyName length (NL) | + * | 10 | bPropertyName |U8[NL]| name of this property | + * |10+NL| dwPropertyDataLength | U32 | bPropertyData length (DL) | + * |14+NL| bProperty |U8[DL]| payload of this property | */ struct usb_functionfs_strings_head { -- cgit v1.2.3 From bc91b0f07ada5535427373a4e2050877bcc12218 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 11 Jul 2014 21:10:18 +0200 Subject: ipv6: addrconf: implement address generation modes This patch introduces a possibility for userspace to set various (so far two) modes of generating addresses. This is useful for example for NetworkManager because it can set the mode to NONE and take care of link local addresses itself. That allow it to have the interface up, monitoring carrier but still don't have any addresses on it. One more use-case by Dan Williams: WWAN devices often have their LL address provided by the firmware of the device, which sometimes refuses to respond to incorrect LL addresses when doing DHCPv6 or IPv6 ND. The kernel cannot generate the correct LL address for two reasons: 1) WWAN pseudo-ethernet interfaces often construct a fake MAC address, or read a meaningless MAC address from the firmware. Thus the EUI64 and the IPv6LL address the kernel assigns will be wrong. The real LL address is often retrieved from the firmware with AT or proprietary commands. 2) WWAN PPP interfaces receive their LL address from IPV6CP, not from kernel assignments. Only after IPV6CP has completed do we know the LL address of the PPP interface and its peer. But the kernel has already assigned an incorrect LL address to the interface. So being able to suppress the kernel LL address generation and assign the one retrieved from the firmware is less complicated and more robust. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/if_inet6.h | 1 + include/uapi/linux/if_link.h | 6 +++++ net/ipv6/addrconf.c | 56 ++++++++++++++++++++++++++++++-------------- 3 files changed, 45 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index b4956a5fcc3f..d07b1a64b4e7 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -205,6 +205,7 @@ struct inet6_dev { struct timer_list rs_timer; __u8 rs_probes; + __u8 addr_gen_mode; unsigned long tstamp; /* ipv6InterfaceTable update timestamp */ struct rcu_head rcu; }; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b38534895db5..ff957604a721 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -204,11 +204,17 @@ enum { IFLA_INET6_CACHEINFO, /* time values and max reasm size */ IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */ IFLA_INET6_TOKEN, /* device token */ + IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */ __IFLA_INET6_MAX }; #define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) +enum in6_addr_gen_mode { + IN6_ADDR_GEN_MODE_EUI64, + IN6_ADDR_GEN_MODE_NONE, +}; + enum { BRIDGE_MODE_UNSPEC, BRIDGE_MODE_HAIRPIN, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 358edd2272ac..4c03c2843094 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2730,9 +2730,25 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr } } +static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) +{ + if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { + struct in6_addr addr; + + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); + /* addrconf_add_linklocal also adds a prefix_route and we + * only need to care about prefix routes if ipv6_generate_eui64 + * couldn't generate one. + */ + if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0) + addrconf_add_linklocal(idev, &addr); + else if (prefix_route) + addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + } +} + static void addrconf_dev_config(struct net_device *dev) { - struct in6_addr addr; struct inet6_dev *idev; ASSERT_RTNL(); @@ -2753,11 +2769,7 @@ static void addrconf_dev_config(struct net_device *dev) if (IS_ERR(idev)) return; - memset(&addr, 0, sizeof(struct in6_addr)); - addr.s6_addr32[0] = htonl(0xFE800000); - - if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0) - addrconf_add_linklocal(idev, &addr); + addrconf_addr_gen(idev, false); } #if IS_ENABLED(CONFIG_IPV6_SIT) @@ -2779,11 +2791,7 @@ static void addrconf_sit_config(struct net_device *dev) } if (dev->priv_flags & IFF_ISATAP) { - struct in6_addr addr; - - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) - addrconf_add_linklocal(idev, &addr); + addrconf_addr_gen(idev, false); return; } @@ -2798,7 +2806,6 @@ static void addrconf_sit_config(struct net_device *dev) static void addrconf_gre_config(struct net_device *dev) { struct inet6_dev *idev; - struct in6_addr addr; ASSERT_RTNL(); @@ -2807,11 +2814,7 @@ static void addrconf_gre_config(struct net_device *dev) return; } - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) - addrconf_add_linklocal(idev, &addr); - else - addrconf_prefix_route(&addr, 64, dev, 0, 0); + addrconf_addr_gen(idev, true); } #endif @@ -4423,6 +4426,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); if (nla == NULL) goto nla_put_failure; + + if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->addr_gen_mode)) + goto nla_put_failure; + read_lock_bh(&idev->lock); memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla)); read_unlock_bh(&idev->lock); @@ -4527,8 +4534,21 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0) BUG(); - if (tb[IFLA_INET6_TOKEN]) + if (tb[IFLA_INET6_TOKEN]) { err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN])); + if (err) + return err; + } + + if (tb[IFLA_INET6_ADDR_GEN_MODE]) { + u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]); + + if (mode != IN6_ADDR_GEN_MODE_EUI64 && + mode != IN6_ADDR_GEN_MODE_NONE) + return -EINVAL; + idev->addr_gen_mode = mode; + err = 0; + } return err; } -- cgit v1.2.3 From f736d9985e69e72d9a2ebfd131a72ee8f870c6f3 Mon Sep 17 00:00:00 2001 From: Nikita Edward Baruzdin Date: Fri, 11 Jul 2014 16:13:21 +0400 Subject: can: netlink: Remove space before tab Fixes the corresponing checkpatch.pl warning. Signed-off-by: Nikita Edward Baruzdin Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 813d11f54977..3bbf5c7e1500 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -92,7 +92,7 @@ struct can_ctrlmode { }; #define CAN_CTRLMODE_LOOPBACK 0x01 /* Loopback mode */ -#define CAN_CTRLMODE_LISTENONLY 0x02 /* Listen-only mode */ +#define CAN_CTRLMODE_LISTENONLY 0x02 /* Listen-only mode */ #define CAN_CTRLMODE_3_SAMPLES 0x04 /* Triple sampling mode */ #define CAN_CTRLMODE_ONE_SHOT 0x08 /* One-Shot mode */ #define CAN_CTRLMODE_BERR_REPORTING 0x10 /* Bus-error reporting */ -- cgit v1.2.3 From 4b9e1bab12c9b6de965268c2fbe6ebbb35dddd89 Mon Sep 17 00:00:00 2001 From: Nikita Edward Baruzdin Date: Fri, 11 Jul 2014 16:13:22 +0400 Subject: can: netlink: Add CAN_CTRLMODE_PRESUME_ACK flag Most CAN controllers have a support for ignoring ACK absence. Some of them refer to this feature as a self test mode (e. g. SJA1000) and some include it as a part of a loopback mode (e. g. MCP2510). Setting the introduced flag via netlink should make CAN controller perform a successful transmission, even if there is no acknowledgement (dominant ACK bit) received. Signed-off-by: Nikita Edward Baruzdin Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h index 3bbf5c7e1500..3e4323a3918d 100644 --- a/include/uapi/linux/can/netlink.h +++ b/include/uapi/linux/can/netlink.h @@ -97,6 +97,7 @@ struct can_ctrlmode { #define CAN_CTRLMODE_ONE_SHOT 0x08 /* One-Shot mode */ #define CAN_CTRLMODE_BERR_REPORTING 0x10 /* Bus-error reporting */ #define CAN_CTRLMODE_FD 0x20 /* CAN FD mode */ +#define CAN_CTRLMODE_PRESUME_ACK 0x40 /* Ignore missing CAN ACKs */ /* * CAN device statistics -- cgit v1.2.3 From 685343fc3ba61a1f6eef361b786601123db16c28 Mon Sep 17 00:00:00 2001 From: Tom Gundersen Date: Mon, 14 Jul 2014 16:37:22 +0200 Subject: net: add name_assign_type netdev attribute Based on a patch by David Herrmann. The name_assign_type attribute gives hints where the interface name of a given net-device comes from. These values are currently defined: NET_NAME_ENUM: The ifname is provided by the kernel with an enumerated suffix, typically based on order of discovery. Names may be reused and unpredictable. NET_NAME_PREDICTABLE: The ifname has been assigned by the kernel in a predictable way that is guaranteed to avoid reuse and always be the same for a given device. Examples include statically created devices like the loopback device and names deduced from hardware properties (including being given explicitly by the firmware). Names depending on the order of discovery, or in any other way on the existence of other devices, must not be marked as PREDICTABLE. NET_NAME_USER: The ifname was provided by user-space during net-device setup. NET_NAME_RENAMED: The net-device has been renamed from userspace. Once this type is set, it cannot change again. NET_NAME_UNKNOWN: This is an internal placeholder to indicate that we yet haven't yet categorized the name. It will not be exposed to userspace, rather -EINVAL is returned. The aim of these patches is to improve user-space renaming of interfaces. As a general rule, userspace must rename interfaces to guarantee that names stay the same every time a given piece of hardware appears (at boot, or when attaching it). However, there are several situations where userspace should not perform the renaming, and that depends on both the policy of the local admin, but crucially also on the nature of the current interface name. If an interface was created in repsonse to a userspace request, and userspace already provided a name, we most probably want to leave that name alone. The main instance of this is wifi-P2P devices created over nl80211, which currently have a long-standing bug where they are getting renamed by udev. We label such names NET_NAME_USER. If an interface, unbeknown to us, has already been renamed from userspace, we most probably want to leave also that alone. This will typically happen when third-party plugins (for instance to udev, but the interface is generic so could be from anywhere) renames the interface without informing udev about it. A typical situation is when you switch root from an installer or an initrd to the real system and the new instance of udev does not know what happened before the switch. These types of problems have caused repeated issues in the past. To solve this, once an interface has been renamed, its name is labelled NET_NAME_RENAMED. In many cases, the kernel is actually able to name interfaces in such a way that there is no need for userspace to rename them. This is the case when the enumeration order of devices, or in fact any other (non-parent) device on the system, can not influence the name of the interface. Examples include statically created devices, or any naming schemes based on hardware properties of the interface. In this case the admin may prefer to use the kernel-provided names, and to make that possible we label such names NET_NAME_PREDICTABLE. We want the kernel to have tho possibilty of performing predictable interface naming itself (and exposing to userspace that it has), as the information necessary for a proper naming scheme for a certain class of devices may not be exposed to userspace. The case where renaming is almost certainly desired, is when the kernel has given the interface a name using global device enumeration based on order of discovery (ethX, wlanY, etc). These naming schemes are labelled NET_NAME_ENUM. Lastly, a fallback is left as NET_NAME_UNKNOWN, to indicate that a driver has not yet been ported. This is mostly useful as a transitionary measure, allowing us to label the various naming schemes bit by bit. v8: minor documentation fixes v9: move comment to the right commit Signed-off-by: Tom Gundersen Reviewed-by: David Herrmann Reviewed-by: Kay Sievers Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-class-net | 11 +++++++++++ include/linux/netdevice.h | 2 ++ include/uapi/linux/netdevice.h | 6 ++++++ net/core/net-sysfs.c | 20 ++++++++++++++++++++ 4 files changed, 39 insertions(+) (limited to 'include/uapi/linux') diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net index 416c5d59f52e..d322b0581194 100644 --- a/Documentation/ABI/testing/sysfs-class-net +++ b/Documentation/ABI/testing/sysfs-class-net @@ -1,3 +1,14 @@ +What: /sys/class/net//name_assign_type +Date: July 2014 +KernelVersion: 3.17 +Contact: netdev@vger.kernel.org +Description: + Indicates the name assignment type. Possible values are: + 1: enumerated by the kernel, possibly in an unpredictable way + 2: predictably named by the kernel + 3: named by userspace + 4: renamed + What: /sys/class/net//addr_assign_type Date: July 2010 KernelVersion: 3.2 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3a320db96180..9be34732142f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1381,6 +1381,8 @@ struct net_device { struct kset *queues_kset; #endif + unsigned char name_assign_type; + bool uc_promisc; unsigned int promiscuity; unsigned int allmulti; diff --git a/include/uapi/linux/netdevice.h b/include/uapi/linux/netdevice.h index fdfbd1c17065..55818543342d 100644 --- a/include/uapi/linux/netdevice.h +++ b/include/uapi/linux/netdevice.h @@ -37,6 +37,12 @@ #define INIT_NETDEV_GROUP 0 +/* interface name assignment types (sysfs name_assign_type attribute) */ +#define NET_NAME_UNKNOWN 0 /* unknown origin (not exposed to userspace) */ +#define NET_NAME_ENUM 1 /* enumerated by kernel */ +#define NET_NAME_PREDICTABLE 2 /* predictably named by the kernel */ +#define NET_NAME_USER 3 /* provided by user-space */ +#define NET_NAME_RENAMED 4 /* renamed by user-space */ /* Media selection options. */ enum { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 1cac29ebb05b..7752f2ad49a5 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -112,6 +112,25 @@ NETDEVICE_SHOW_RO(ifindex, fmt_dec); NETDEVICE_SHOW_RO(type, fmt_dec); NETDEVICE_SHOW_RO(link_mode, fmt_dec); +static ssize_t format_name_assign_type(const struct net_device *net, char *buf) +{ + return sprintf(buf, fmt_dec, net->name_assign_type); +} + +static ssize_t name_assign_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct net_device *net = to_net_dev(dev); + ssize_t ret = -EINVAL; + + if (net->name_assign_type != NET_NAME_UNKNOWN) + ret = netdev_show(dev, attr, buf, format_name_assign_type); + + return ret; +} +static DEVICE_ATTR_RO(name_assign_type); + /* use same locking rules as GIFHWADDR ioctl's */ static ssize_t address_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -387,6 +406,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_dev_port.attr, &dev_attr_iflink.attr, &dev_attr_ifindex.attr, + &dev_attr_name_assign_type.attr, &dev_attr_addr_assign_type.attr, &dev_attr_addr_len.attr, &dev_attr_link_mode.attr, -- cgit v1.2.3 From 63b949382c5f263746b1c177f6ff84de2201ae9d Mon Sep 17 00:00:00 2001 From: Geir Ola Vaagland Date: Sat, 12 Jul 2014 20:30:36 +0200 Subject: net: sctp: implement rfc6458, 5.3.4. SCTP_SNDINFO cmsg support This patch implements section 5.3.4. of RFC6458, that is, support for 'SCTP Send Information Structure' (SCTP_SNDINFO) which can be placed into ancillary data cmsghdr structure for sendmsg() calls. The sctp_sndinfo structure is defined as per RFC as below ... struct sctp_sndinfo { uint16_t snd_sid; uint16_t snd_flags; uint32_t snd_ppid; uint32_t snd_context; sctp_assoc_t snd_assoc_id; }; ... and supplied under cmsg_level IPPROTO_SCTP, cmsg_type SCTP_SNDINFO, while cmsg_data[] contains struct sctp_sndinfo. An sctp_sndinfo item always corresponds to the data in msg_iov. Joint work with Daniel Borkmann. Signed-off-by: Geir Ola Vaagland Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 +- include/uapi/linux/sctp.h | 22 +++++++++++-- net/sctp/socket.c | 77 ++++++++++++++++++++++++++++++++++------------ 3 files changed, 79 insertions(+), 23 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f38588bf3462..7af9a0f5d8ce 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1919,7 +1919,8 @@ struct sctp_chunk *sctp_get_ecne_prepend(struct sctp_association *asoc); /* A convenience structure to parse out SCTP specific CMSGs. */ typedef struct sctp_cmsgs { struct sctp_initmsg *init; - struct sctp_sndrcvinfo *info; + struct sctp_sndrcvinfo *srinfo; + struct sctp_sndinfo *sinfo; } sctp_cmsgs_t; /* Structure for tracking memory objects */ diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 266022a2be4a..a387761f7e02 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -154,6 +154,22 @@ struct sctp_sndrcvinfo { sctp_assoc_t sinfo_assoc_id; }; +/* 5.3.4 SCTP Send Information Structure (SCTP_SNDINFO) + * + * This cmsghdr structure specifies SCTP options for sendmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ ------------------- + * IPPROTO_SCTP SCTP_SNDINFO struct sctp_sndinfo + */ +struct sctp_sndinfo { + __u16 snd_sid; + __u16 snd_flags; + __u32 snd_ppid; + __u32 snd_context; + sctp_assoc_t snd_assoc_id; +}; + /* * sinfo_flags: 16 bits (unsigned integer) * @@ -177,10 +193,12 @@ typedef union { /* These are cmsg_types. */ typedef enum sctp_cmsg_type { - SCTP_INIT, /* 5.2.1 SCTP Initiation Structure */ + SCTP_INIT, /* 5.2.1 SCTP Initiation Structure */ #define SCTP_INIT SCTP_INIT - SCTP_SNDRCV, /* 5.2.2 SCTP Header Information Structure */ + SCTP_SNDRCV, /* 5.2.2 SCTP Header Information Structure */ #define SCTP_SNDRCV SCTP_SNDRCV + SCTP_SNDINFO, /* 5.3.4 SCTP Send Information Structure */ +#define SCTP_SNDINFO SCTP_SNDINFO } sctp_cmsg_t; /* diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 429899689408..d61729e99856 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1602,12 +1602,13 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, struct sctp_initmsg *sinit; sctp_assoc_t associd = 0; sctp_cmsgs_t cmsgs = { NULL }; - int err; sctp_scope_t scope; - long timeo; - __u16 sinfo_flags = 0; + bool fill_sinfo_ttl = false; struct sctp_datamsg *datamsg; int msg_flags = msg->msg_flags; + __u16 sinfo_flags = 0; + long timeo; + int err; err = 0; sp = sctp_sk(sk); @@ -1648,10 +1649,21 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, msg_name = msg->msg_name; } - sinfo = cmsgs.info; sinit = cmsgs.init; + if (cmsgs.sinfo != NULL) { + memset(&default_sinfo, 0, sizeof(default_sinfo)); + default_sinfo.sinfo_stream = cmsgs.sinfo->snd_sid; + default_sinfo.sinfo_flags = cmsgs.sinfo->snd_flags; + default_sinfo.sinfo_ppid = cmsgs.sinfo->snd_ppid; + default_sinfo.sinfo_context = cmsgs.sinfo->snd_context; + default_sinfo.sinfo_assoc_id = cmsgs.sinfo->snd_assoc_id; - /* Did the user specify SNDRCVINFO? */ + sinfo = &default_sinfo; + fill_sinfo_ttl = true; + } else { + sinfo = cmsgs.srinfo; + } + /* Did the user specify SNDINFO/SNDRCVINFO? */ if (sinfo) { sinfo_flags = sinfo->sinfo_flags; associd = sinfo->sinfo_assoc_id; @@ -1858,8 +1870,8 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, pr_debug("%s: we have a valid association\n", __func__); if (!sinfo) { - /* If the user didn't specify SNDRCVINFO, make up one with - * some defaults. + /* If the user didn't specify SNDINFO/SNDRCVINFO, make up + * one with some defaults. */ memset(&default_sinfo, 0, sizeof(default_sinfo)); default_sinfo.sinfo_stream = asoc->default_stream; @@ -1868,7 +1880,13 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, default_sinfo.sinfo_context = asoc->default_context; default_sinfo.sinfo_timetolive = asoc->default_timetolive; default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc); + sinfo = &default_sinfo; + } else if (fill_sinfo_ttl) { + /* In case SNDINFO was specified, we still need to fill + * it with a default ttl from the assoc here. + */ + sinfo->sinfo_timetolive = asoc->default_timetolive; } /* API 7.1.7, the sndbuf size per association bounds the @@ -6390,8 +6408,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) struct cmsghdr *cmsg; struct msghdr *my_msg = (struct msghdr *)msg; - for (cmsg = CMSG_FIRSTHDR(msg); - cmsg != NULL; + for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; cmsg = CMSG_NXTHDR(my_msg, cmsg)) { if (!CMSG_OK(my_msg, cmsg)) return -EINVAL; @@ -6404,7 +6421,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) switch (cmsg->cmsg_type) { case SCTP_INIT: /* SCTP Socket API Extension - * 5.2.1 SCTP Initiation Structure (SCTP_INIT) + * 5.3.1 SCTP Initiation Structure (SCTP_INIT) * * This cmsghdr structure provides information for * initializing new SCTP associations with sendmsg(). @@ -6416,15 +6433,15 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) * ------------ ------------ ---------------------- * IPPROTO_SCTP SCTP_INIT struct sctp_initmsg */ - if (cmsg->cmsg_len != - CMSG_LEN(sizeof(struct sctp_initmsg))) + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_initmsg))) return -EINVAL; - cmsgs->init = (struct sctp_initmsg *)CMSG_DATA(cmsg); + + cmsgs->init = CMSG_DATA(cmsg); break; case SCTP_SNDRCV: /* SCTP Socket API Extension - * 5.2.2 SCTP Header Information Structure(SCTP_SNDRCV) + * 5.3.2 SCTP Header Information Structure(SCTP_SNDRCV) * * This cmsghdr structure specifies SCTP options for * sendmsg() and describes SCTP header information @@ -6434,24 +6451,44 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) * ------------ ------------ ---------------------- * IPPROTO_SCTP SCTP_SNDRCV struct sctp_sndrcvinfo */ - if (cmsg->cmsg_len != - CMSG_LEN(sizeof(struct sctp_sndrcvinfo))) + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_sndrcvinfo))) return -EINVAL; - cmsgs->info = - (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); + cmsgs->srinfo = CMSG_DATA(cmsg); - /* Minimally, validate the sinfo_flags. */ - if (cmsgs->info->sinfo_flags & + if (cmsgs->srinfo->sinfo_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; + case SCTP_SNDINFO: + /* SCTP Socket API Extension + * 5.3.4 SCTP Send Information Structure (SCTP_SNDINFO) + * + * This cmsghdr structure specifies SCTP options for + * sendmsg(). This structure and SCTP_RCVINFO replaces + * SCTP_SNDRCV which has been deprecated. + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ --------------------- + * IPPROTO_SCTP SCTP_SNDINFO struct sctp_sndinfo + */ + if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_sndinfo))) + return -EINVAL; + + cmsgs->sinfo = CMSG_DATA(cmsg); + + if (cmsgs->sinfo->snd_flags & + ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_ABORT | SCTP_EOF)) + return -EINVAL; + break; default: return -EINVAL; } } + return 0; } -- cgit v1.2.3 From 0d3a421d284812d07970b4ccee74d4fa38737e4d Mon Sep 17 00:00:00 2001 From: Geir Ola Vaagland Date: Sat, 12 Jul 2014 20:30:37 +0200 Subject: net: sctp: implement rfc6458, 5.3.5. SCTP_RCVINFO cmsg support This patch implements section 5.3.5. of RFC6458, that is, support for 'SCTP Receive Information Structure' (SCTP_RCVINFO) which is placed into ancillary data cmsghdr structure for each recvmsg() call. This option can be enabled/disabled via setsockopt(2) on SOL_SCTP level by setting an int value with 1/0 for SCTP_RECVRCVINFO in user space applications as per RFC6458, section 8.1.29. The sctp_rcvinfo structure is defined as per RFC as below ... struct sctp_rcvinfo { uint16_t rcv_sid; uint16_t rcv_ssn; uint16_t rcv_flags; <-- 2 bytes hole --> uint32_t rcv_ppid; uint32_t rcv_tsn; uint32_t rcv_cumtsn; uint32_t rcv_context; sctp_assoc_t rcv_assoc_id; }; ... and provided under cmsg_level IPPROTO_SCTP, cmsg_type SCTP_RCVINFO, while cmsg_data[] contains struct sctp_rcvinfo. An sctp_rcvinfo item always corresponds to the data in msg_iov. Joint work with Daniel Borkmann. Signed-off-by: Geir Ola Vaagland Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 +++ include/net/sctp/ulpevent.h | 5 ++++- include/uapi/linux/sctp.h | 32 ++++++++++++++++++++++------- net/sctp/socket.c | 49 ++++++++++++++++++++++++++++++++++++++++++++- net/sctp/ulpevent.c | 25 +++++++++++++++++++++++ 5 files changed, 105 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 7af9a0f5d8ce..11d5df015370 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -207,7 +207,9 @@ struct sctp_sock { struct sctp_paddrparams paddrparam; struct sctp_event_subscribe subscribe; struct sctp_assocparams assocparams; + int user_frag; + __u32 autoclose; __u8 nodelay; __u8 disable_fragments; @@ -215,6 +217,7 @@ struct sctp_sock { __u8 frag_interleave; __u32 adaptation_ind; __u32 pd_point; + __u8 recvrcvinfo; atomic_t pd_mode; /* Receive to here while partial delivery is in effect. */ diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index daacb32b55b5..e8095f973e94 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -129,7 +129,10 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event( const struct sctp_association *asoc, gfp_t gfp); void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, - struct msghdr *); + struct msghdr *); +void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event, + struct msghdr *); + __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event); /* Is this event type enabled? */ diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index a387761f7e02..29b81bbfc53d 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -95,6 +95,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_GET_ASSOC_ID_LIST 29 /* Read only */ #define SCTP_AUTO_ASCONF 30 #define SCTP_PEER_ADDR_THLDS 31 +#define SCTP_RECVRCVINFO 32 /* Internal Socket Options. Some of the sctp library functions are * implemented using these socket options. @@ -110,8 +111,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */ #define SCTP_GET_ASSOC_STATS 112 /* Read only */ -/* - * 5.2.1 SCTP Initiation Structure (SCTP_INIT) +/* 5.3.1 SCTP Initiation Structure (SCTP_INIT) * * This cmsghdr structure provides information for initializing new * SCTP associations with sendmsg(). The SCTP_INITMSG socket option @@ -121,7 +121,6 @@ typedef __s32 sctp_assoc_t; * cmsg_level cmsg_type cmsg_data[] * ------------ ------------ ---------------------- * IPPROTO_SCTP SCTP_INIT struct sctp_initmsg - * */ struct sctp_initmsg { __u16 sinit_num_ostreams; @@ -130,8 +129,7 @@ struct sctp_initmsg { __u16 sinit_max_init_timeo; }; -/* - * 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) +/* 5.3.2 SCTP Header Information Structure (SCTP_SNDRCV) * * This cmsghdr structure specifies SCTP options for sendmsg() and * describes SCTP header information about a received message through @@ -140,7 +138,6 @@ struct sctp_initmsg { * cmsg_level cmsg_type cmsg_data[] * ------------ ------------ ---------------------- * IPPROTO_SCTP SCTP_SNDRCV struct sctp_sndrcvinfo - * */ struct sctp_sndrcvinfo { __u16 sinfo_stream; @@ -170,13 +167,32 @@ struct sctp_sndinfo { sctp_assoc_t snd_assoc_id; }; +/* 5.3.5 SCTP Receive Information Structure (SCTP_RCVINFO) + * + * This cmsghdr structure describes SCTP receive information + * about a received message through recvmsg(). + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ ------------------- + * IPPROTO_SCTP SCTP_RCVINFO struct sctp_rcvinfo + */ +struct sctp_rcvinfo { + __u16 rcv_sid; + __u16 rcv_ssn; + __u16 rcv_flags; + __u32 rcv_ppid; + __u32 rcv_tsn; + __u32 rcv_cumtsn; + __u32 rcv_context; + sctp_assoc_t rcv_assoc_id; +}; + /* * sinfo_flags: 16 bits (unsigned integer) * * This field may contain any of the following flags and is composed of * a bitwise OR of these values. */ - enum sctp_sinfo_flags { SCTP_UNORDERED = 1, /* Send/receive message unordered. */ SCTP_ADDR_OVER = 2, /* Override the primary destination. */ @@ -199,6 +215,8 @@ typedef enum sctp_cmsg_type { #define SCTP_SNDRCV SCTP_SNDRCV SCTP_SNDINFO, /* 5.3.4 SCTP Send Information Structure */ #define SCTP_SNDINFO SCTP_SNDINFO + SCTP_RCVINFO, /* 5.3.5 SCTP Receive Information Structure */ +#define SCTP_RCVINFO SCTP_RCVINFO } sctp_cmsg_t; /* diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d61729e99856..9c193887c5cd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2112,9 +2112,13 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, sp->pf->skb_msgname(skb, msg->msg_name, addr_len); } + /* Check if we allow SCTP_RCVINFO. */ + if (sp->recvrcvinfo) + sctp_ulpevent_read_rcvinfo(event, msg); /* Check if we allow SCTP_SNDRCVINFO. */ if (sp->subscribe.sctp_data_io_event) sctp_ulpevent_read_sndrcvinfo(event, msg); + #if 0 /* FIXME: we should be calling IP/IPv6 layers. */ if (sk->sk_protinfo.af_inet.cmsg_flags) @@ -3541,7 +3545,6 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, return 0; } - /* * SCTP_PEER_ADDR_THLDS * @@ -3592,6 +3595,22 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, return 0; } +static int sctp_setsockopt_recvrcvinfo(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + int val; + + if (optlen < sizeof(int)) + return -EINVAL; + if (get_user(val, (int __user *) optval)) + return -EFAULT; + + sctp_sk(sk)->recvrcvinfo = (val == 0) ? 0 : 1; + + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3743,6 +3762,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_PEER_ADDR_THLDS: retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen); break; + case SCTP_RECVRCVINFO: + retval = sctp_setsockopt_recvrcvinfo(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -3989,6 +4011,8 @@ static int sctp_init_sock(struct sock *sk) /* Enable Nagle algorithm by default. */ sp->nodelay = 0; + sp->recvrcvinfo = 0; + /* Enable by default. */ sp->v4mapped = 1; @@ -5770,6 +5794,26 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, return 0; } +static int sctp_getsockopt_recvrcvinfo(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + int val = 0; + + if (len < sizeof(int)) + return -EINVAL; + + len = sizeof(int); + if (sctp_sk(sk)->recvrcvinfo) + val = 1; + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -5913,6 +5957,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_GET_ASSOC_STATS: retval = sctp_getsockopt_assoc_stats(sk, len, optval, optlen); break; + case SCTP_RECVRCVINFO: + retval = sctp_getsockopt_recvrcvinfo(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index b6842fdb53d4..b31f365f18ab 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -886,6 +886,31 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, sizeof(sinfo), &sinfo); } +/* RFC6458, Section 5.3.5 SCTP Receive Information Structure + * (SCTP_SNDRCV) + */ +void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event, + struct msghdr *msghdr) +{ + struct sctp_rcvinfo rinfo; + + if (sctp_ulpevent_is_notification(event)) + return; + + memset(&rinfo, 0, sizeof(struct sctp_rcvinfo)); + rinfo.rcv_sid = event->stream; + rinfo.rcv_ssn = event->ssn; + rinfo.rcv_ppid = event->ppid; + rinfo.rcv_flags = event->flags; + rinfo.rcv_tsn = event->tsn; + rinfo.rcv_cumtsn = event->cumtsn; + rinfo.rcv_assoc_id = sctp_assoc2id(event->asoc); + rinfo.rcv_context = event->asoc->default_rcv_context; + + put_cmsg(msghdr, IPPROTO_SCTP, SCTP_RCVINFO, + sizeof(rinfo), &rinfo); +} + /* Do accounting for bytes received and hold a reference to the association * for each skb. */ -- cgit v1.2.3 From 2347c80ff127b94ddaa675e2b78ab4cef46dc905 Mon Sep 17 00:00:00 2001 From: Geir Ola Vaagland Date: Sat, 12 Jul 2014 20:30:38 +0200 Subject: net: sctp: implement rfc6458, 5.3.6. SCTP_NXTINFO cmsg support This patch implements section 5.3.6. of RFC6458, that is, support for 'SCTP Next Receive Information Structure' (SCTP_NXTINFO) which is placed into ancillary data cmsghdr structure for each recvmsg() call, if this information is already available when delivering the current message. This option can be enabled/disabled via setsockopt(2) on SOL_SCTP level by setting an int value with 1/0 for SCTP_RECVNXTINFO in user space applications as per RFC6458, section 8.1.30. The sctp_nxtinfo structure is defined as per RFC as below ... struct sctp_nxtinfo { uint16_t nxt_sid; uint16_t nxt_flags; uint32_t nxt_ppid; uint32_t nxt_length; sctp_assoc_t nxt_assoc_id; }; ... and provided under cmsg_level IPPROTO_SCTP, cmsg_type SCTP_NXTINFO, while cmsg_data[] contains struct sctp_nxtinfo. Joint work with Daniel Borkmann. Signed-off-by: Geir Ola Vaagland Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 1 + include/net/sctp/structs.h | 1 + include/net/sctp/ulpevent.h | 9 ++------ include/uapi/linux/sctp.h | 47 +++++++++++++++++++++++++++++----------- net/sctp/socket.c | 52 +++++++++++++++++++++++++++++++++++++++++---- net/sctp/ulpevent.c | 38 +++++++++++++++++++++++++++++++++ 6 files changed, 125 insertions(+), 23 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index c2035c96a2ee..90c1cccd164d 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -109,6 +109,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct sctp_association *asoc); extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); +struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); /* * sctp/primitive.c diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 11d5df015370..7741d1b66967 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -218,6 +218,7 @@ struct sctp_sock { __u32 adaptation_ind; __u32 pd_point; __u8 recvrcvinfo; + __u8 recvnxtinfo; atomic_t pd_mode; /* Receive to here while partial delivery is in effect. */ diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index e8095f973e94..cccdcfd14973 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -132,6 +132,8 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *); void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event, struct msghdr *); +void sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event, + struct msghdr *, struct sock *sk); __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event); @@ -158,10 +160,3 @@ static inline int sctp_ulpevent_is_enabled(const struct sctp_ulpevent *event, } #endif /* __sctp_ulpevent_h__ */ - - - - - - - diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 29b81bbfc53d..222f82ffeca4 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -96,6 +96,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_AUTO_ASCONF 30 #define SCTP_PEER_ADDR_THLDS 31 #define SCTP_RECVRCVINFO 32 +#define SCTP_RECVNXTINFO 33 /* Internal Socket Options. Some of the sctp library functions are * implemented using these socket options. @@ -111,6 +112,13 @@ typedef __s32 sctp_assoc_t; #define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */ #define SCTP_GET_ASSOC_STATS 112 /* Read only */ +/* These are bit fields for msghdr->msg_flags. See section 5.1. */ +/* On user space Linux, these live in as an enum. */ +enum sctp_msg_flags { + MSG_NOTIFICATION = 0x8000, +#define MSG_NOTIFICATION MSG_NOTIFICATION +}; + /* 5.3.1 SCTP Initiation Structure (SCTP_INIT) * * This cmsghdr structure provides information for initializing new @@ -187,6 +195,25 @@ struct sctp_rcvinfo { sctp_assoc_t rcv_assoc_id; }; +/* 5.3.6 SCTP Next Receive Information Structure (SCTP_NXTINFO) + * + * This cmsghdr structure describes SCTP receive information + * of the next message that will be delivered through recvmsg() + * if this information is already available when delivering + * the current message. + * + * cmsg_level cmsg_type cmsg_data[] + * ------------ ------------ ------------------- + * IPPROTO_SCTP SCTP_NXTINFO struct sctp_nxtinfo + */ +struct sctp_nxtinfo { + __u16 nxt_sid; + __u16 nxt_flags; + __u32 nxt_ppid; + __u32 nxt_length; + sctp_assoc_t nxt_assoc_id; +}; + /* * sinfo_flags: 16 bits (unsigned integer) * @@ -194,11 +221,12 @@ struct sctp_rcvinfo { * a bitwise OR of these values. */ enum sctp_sinfo_flags { - SCTP_UNORDERED = 1, /* Send/receive message unordered. */ - SCTP_ADDR_OVER = 2, /* Override the primary destination. */ - SCTP_ABORT=4, /* Send an ABORT message to the peer. */ - SCTP_SACK_IMMEDIATELY = 8, /* SACK should be sent without delay */ - SCTP_EOF=MSG_FIN, /* Initiate graceful shutdown process. */ + SCTP_UNORDERED = (1 << 0), /* Send/receive message unordered. */ + SCTP_ADDR_OVER = (1 << 1), /* Override the primary destination. */ + SCTP_ABORT = (1 << 2), /* Send an ABORT message to the peer. */ + SCTP_SACK_IMMEDIATELY = (1 << 3), /* SACK should be sent without delay. */ + SCTP_NOTIFICATION = MSG_NOTIFICATION, /* Next message is not user msg but notification. */ + SCTP_EOF = MSG_FIN, /* Initiate graceful shutdown process. */ }; typedef union { @@ -217,6 +245,8 @@ typedef enum sctp_cmsg_type { #define SCTP_SNDINFO SCTP_SNDINFO SCTP_RCVINFO, /* 5.3.5 SCTP Receive Information Structure */ #define SCTP_RCVINFO SCTP_RCVINFO + SCTP_NXTINFO, /* 5.3.6 SCTP Next Receive Information Structure */ +#define SCTP_NXTINFO SCTP_NXTINFO } sctp_cmsg_t; /* @@ -844,13 +874,6 @@ struct sctp_assoc_stats { __u64 sas_ictrlchunks; /* Control chunks received */ }; -/* These are bit fields for msghdr->msg_flags. See section 5.1. */ -/* On user space Linux, these live in as an enum. */ -enum sctp_msg_flags { - MSG_NOTIFICATION = 0x8000, -#define MSG_NOTIFICATION MSG_NOTIFICATION -}; - /* * 8.1 sctp_bindx() * diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9c193887c5cd..9bca87ee5152 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2060,8 +2060,6 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) * flags - flags sent or received with the user message, see Section * 5 for complete description of the flags. */ -static struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); - static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) @@ -2112,6 +2110,9 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, sp->pf->skb_msgname(skb, msg->msg_name, addr_len); } + /* Check if we allow SCTP_NXTINFO. */ + if (sp->recvnxtinfo) + sctp_ulpevent_read_nxtinfo(event, msg, sk); /* Check if we allow SCTP_RCVINFO. */ if (sp->recvrcvinfo) sctp_ulpevent_read_rcvinfo(event, msg); @@ -3611,6 +3612,22 @@ static int sctp_setsockopt_recvrcvinfo(struct sock *sk, return 0; } +static int sctp_setsockopt_recvnxtinfo(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + int val; + + if (optlen < sizeof(int)) + return -EINVAL; + if (get_user(val, (int __user *) optval)) + return -EFAULT; + + sctp_sk(sk)->recvnxtinfo = (val == 0) ? 0 : 1; + + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3765,6 +3782,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_RECVRCVINFO: retval = sctp_setsockopt_recvrcvinfo(sk, optval, optlen); break; + case SCTP_RECVNXTINFO: + retval = sctp_setsockopt_recvnxtinfo(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -4012,6 +4032,7 @@ static int sctp_init_sock(struct sock *sk) sp->nodelay = 0; sp->recvrcvinfo = 0; + sp->recvnxtinfo = 0; /* Enable by default. */ sp->v4mapped = 1; @@ -5814,6 +5835,26 @@ static int sctp_getsockopt_recvrcvinfo(struct sock *sk, int len, return 0; } +static int sctp_getsockopt_recvnxtinfo(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + int val = 0; + + if (len < sizeof(int)) + return -EINVAL; + + len = sizeof(int); + if (sctp_sk(sk)->recvnxtinfo) + val = 1; + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -5960,6 +6001,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_RECVRCVINFO: retval = sctp_getsockopt_recvrcvinfo(sk, len, optval, optlen); break; + case SCTP_RECVNXTINFO: + retval = sctp_getsockopt_recvnxtinfo(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -6602,8 +6646,8 @@ out: * Note: This is pretty much the same routine as in core/datagram.c * with a few changes to make lksctp work. */ -static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, - int noblock, int *err) +struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, + int noblock, int *err) { int error; struct sk_buff *skb; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index b31f365f18ab..e049298ecfa0 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -911,6 +911,44 @@ void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event, sizeof(rinfo), &rinfo); } +/* RFC6458, Section 5.3.6. SCTP Next Receive Information Structure + * (SCTP_NXTINFO) + */ +static void __sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event, + struct msghdr *msghdr, + const struct sk_buff *skb) +{ + struct sctp_nxtinfo nxtinfo; + + memset(&nxtinfo, 0, sizeof(nxtinfo)); + nxtinfo.nxt_sid = event->stream; + nxtinfo.nxt_ppid = event->ppid; + nxtinfo.nxt_flags = event->flags; + if (sctp_ulpevent_is_notification(event)) + nxtinfo.nxt_flags |= SCTP_NOTIFICATION; + nxtinfo.nxt_length = skb->len; + nxtinfo.nxt_assoc_id = sctp_assoc2id(event->asoc); + + put_cmsg(msghdr, IPPROTO_SCTP, SCTP_NXTINFO, + sizeof(nxtinfo), &nxtinfo); +} + +void sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event, + struct msghdr *msghdr, + struct sock *sk) +{ + struct sk_buff *skb; + int err; + + skb = sctp_skb_recv_datagram(sk, MSG_PEEK, 1, &err); + if (skb != NULL) { + __sctp_ulpevent_read_nxtinfo(sctp_skb2event(skb), + msghdr, skb); + /* Just release refcount here. */ + kfree_skb(skb); + } +} + /* Do accounting for bytes received and hold a reference to the association * for each skb. */ -- cgit v1.2.3 From 6b3fd5f3a2bbc8464a8e0bf134a183b8fa026439 Mon Sep 17 00:00:00 2001 From: Geir Ola Vaagland Date: Sat, 12 Jul 2014 20:30:39 +0200 Subject: net: sctp: implement rfc6458, 8.1.31. SCTP_DEFAULT_SNDINFO support This patch implements section 8.1.31. of RFC6458, which adds support for setting/retrieving SCTP_DEFAULT_SNDINFO: Applications that wish to use the sendto() system call may wish to specify a default set of parameters that would normally be supplied through the inclusion of ancillary data. This socket option allows such an application to set the default sctp_sndinfo structure. The application that wishes to use this socket option simply passes the sctp_sndinfo structure (defined in Section 5.3.4) to this call. The input parameters accepted by this call include snd_sid, snd_flags, snd_ppid, and snd_context. The snd_flags parameter is composed of a bitwise OR of SCTP_UNORDERED, SCTP_EOF, and SCTP_SENDALL. The snd_assoc_id field specifies the association to which to apply the parameters. For a one-to-many style socket, any of the predefined constants are also allowed in this field. The field is ignored for one-to-one style sockets. Joint work with Daniel Borkmann. Signed-off-by: Geir Ola Vaagland Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 1 + net/sctp/socket.c | 107 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 99 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 222f82ffeca4..ce70fe6b45df 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -97,6 +97,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_PEER_ADDR_THLDS 31 #define SCTP_RECVRCVINFO 32 #define SCTP_RECVNXTINFO 33 +#define SCTP_DEFAULT_SNDINFO 34 /* Internal Socket Options. Some of the sctp library functions are * implemented using these socket options. diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9bca87ee5152..d95a50c013c9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2770,19 +2770,22 @@ static int sctp_setsockopt_default_send_param(struct sock *sk, char __user *optval, unsigned int optlen) { - struct sctp_sndrcvinfo info; - struct sctp_association *asoc; struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + struct sctp_sndrcvinfo info; - if (optlen != sizeof(struct sctp_sndrcvinfo)) + if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; + if (info.sinfo_flags & + ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_ABORT | SCTP_EOF)) + return -EINVAL; asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - if (asoc) { asoc->default_stream = info.sinfo_stream; asoc->default_flags = info.sinfo_flags; @@ -2800,6 +2803,44 @@ static int sctp_setsockopt_default_send_param(struct sock *sk, return 0; } +/* RFC6458, Section 8.1.31. Set/get Default Send Parameters + * (SCTP_DEFAULT_SNDINFO) + */ +static int sctp_setsockopt_default_sndinfo(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + struct sctp_sndinfo info; + + if (optlen != sizeof(info)) + return -EINVAL; + if (copy_from_user(&info, optval, optlen)) + return -EFAULT; + if (info.snd_flags & + ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_ABORT | SCTP_EOF)) + return -EINVAL; + + asoc = sctp_id2assoc(sk, info.snd_assoc_id); + if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + if (asoc) { + asoc->default_stream = info.snd_sid; + asoc->default_flags = info.snd_flags; + asoc->default_ppid = info.snd_ppid; + asoc->default_context = info.snd_context; + } else { + sp->default_stream = info.snd_sid; + sp->default_flags = info.snd_flags; + sp->default_ppid = info.snd_ppid; + sp->default_context = info.snd_context; + } + + return 0; +} + /* 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR) * * Requests that the local SCTP stack use the enclosed peer address as @@ -3725,6 +3766,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_default_send_param(sk, optval, optlen); break; + case SCTP_DEFAULT_SNDINFO: + retval = sctp_setsockopt_default_sndinfo(sk, optval, optlen); + break; case SCTP_PRIMARY_ADDR: retval = sctp_setsockopt_primary_addr(sk, optval, optlen); break; @@ -5027,14 +5071,14 @@ static int sctp_getsockopt_default_send_param(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct sctp_sndrcvinfo info; - struct sctp_association *asoc; struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + struct sctp_sndrcvinfo info; - if (len < sizeof(struct sctp_sndrcvinfo)) + if (len < sizeof(info)) return -EINVAL; - len = sizeof(struct sctp_sndrcvinfo); + len = sizeof(info); if (copy_from_user(&info, optval, len)) return -EFAULT; @@ -5042,7 +5086,6 @@ static int sctp_getsockopt_default_send_param(struct sock *sk, asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - if (asoc) { info.sinfo_stream = asoc->default_stream; info.sinfo_flags = asoc->default_flags; @@ -5065,6 +5108,48 @@ static int sctp_getsockopt_default_send_param(struct sock *sk, return 0; } +/* RFC6458, Section 8.1.31. Set/get Default Send Parameters + * (SCTP_DEFAULT_SNDINFO) + */ +static int sctp_getsockopt_default_sndinfo(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + struct sctp_sndinfo info; + + if (len < sizeof(info)) + return -EINVAL; + + len = sizeof(info); + + if (copy_from_user(&info, optval, len)) + return -EFAULT; + + asoc = sctp_id2assoc(sk, info.snd_assoc_id); + if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP)) + return -EINVAL; + if (asoc) { + info.snd_sid = asoc->default_stream; + info.snd_flags = asoc->default_flags; + info.snd_ppid = asoc->default_ppid; + info.snd_context = asoc->default_context; + } else { + info.snd_sid = sp->default_stream; + info.snd_flags = sp->default_flags; + info.snd_ppid = sp->default_ppid; + info.snd_context = sp->default_context; + } + + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &info, len)) + return -EFAULT; + + return 0; +} + /* * * 7.1.5 SCTP_NODELAY @@ -5924,6 +6009,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_default_send_param(sk, len, optval, optlen); break; + case SCTP_DEFAULT_SNDINFO: + retval = sctp_getsockopt_default_sndinfo(sk, len, + optval, optlen); + break; case SCTP_PRIMARY_ADDR: retval = sctp_getsockopt_primary_addr(sk, len, optval, optlen); break; -- cgit v1.2.3 From 7e6a68210784dcea8e39fd9d4c9966f9c733ba09 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 27 Mar 2014 11:25:45 -0300 Subject: [media] videodev2.h: add initial support for compound controls Compound controls are controls that can be used for compound and array types. This allows for more compound data structures to be used with the control framework. The existing V4L2_CTRL_FLAG_NEXT_CTRL flag will only enumerate non-compound controls, so a new V4L2_CTRL_FLAG_NEXT_COMPOUND flag is added to enumerate compound controls. Set both flags to enumerate any control (compound or not). Compound control types will start at V4L2_CTRL_COMPOUND_TYPES. In addition, any control that uses the new 'ptr' field or the existing 'string' field will have flag V4L2_CTRL_FLAG_HAS_PAYLOAD set. While not strictly necessary, adding that flag makes life for applications a lot simpler. If the flag is not set, then the control value is set through the value or value64 fields of struct v4l2_ext_control, otherwise a pointer points to the value. Signed-off-by: Hans Verkuil Reviewed-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 168ff507bf75..438c4a65c5b2 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1254,6 +1254,7 @@ struct v4l2_ext_control { __s32 value; __s64 value64; char *string; + void *ptr; }; } __attribute__ ((packed)); @@ -1278,7 +1279,10 @@ enum v4l2_ctrl_type { V4L2_CTRL_TYPE_CTRL_CLASS = 6, V4L2_CTRL_TYPE_STRING = 7, V4L2_CTRL_TYPE_BITMASK = 8, - V4L2_CTRL_TYPE_INTEGER_MENU = 9, + V4L2_CTRL_TYPE_INTEGER_MENU = 9, + + /* Compound types are >= 0x0100 */ + V4L2_CTRL_COMPOUND_TYPES = 0x0100, }; /* Used in the VIDIOC_QUERYCTRL ioctl for querying controls */ @@ -1314,9 +1318,11 @@ struct v4l2_querymenu { #define V4L2_CTRL_FLAG_SLIDER 0x0020 #define V4L2_CTRL_FLAG_WRITE_ONLY 0x0040 #define V4L2_CTRL_FLAG_VOLATILE 0x0080 +#define V4L2_CTRL_FLAG_HAS_PAYLOAD 0x0100 -/* Query flag, to be ORed with the control ID */ +/* Query flags, to be ORed with the control ID */ #define V4L2_CTRL_FLAG_NEXT_CTRL 0x80000000 +#define V4L2_CTRL_FLAG_NEXT_COMPOUND 0x40000000 /* User-class control IDs defined by V4L2 */ #define V4L2_CID_MAX_CTRLS 1024 -- cgit v1.2.3 From 5082c2417841e64df975789011e182ce99a9dacd Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 10 Jun 2014 04:14:50 -0300 Subject: [media] videodev2.h: add struct v4l2_query_ext_ctrl and VIDIOC_QUERY_EXT_CTRL Add a new struct and ioctl to extend the amount of information you can get for a control. The range is now a s64 type, and array dimensions and element size can be reported through nr_of_dims/dims/elems/elem_size. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 438c4a65c5b2..0297980fe491 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1269,6 +1269,7 @@ struct v4l2_ext_controls { #define V4L2_CTRL_ID_MASK (0x0fffffff) #define V4L2_CTRL_ID2CLASS(id) ((id) & 0x0fff0000UL) #define V4L2_CTRL_DRIVER_PRIV(id) (((id) & 0xffff) >= 0x1000) +#define V4L2_CTRL_MAX_DIMS (4) enum v4l2_ctrl_type { V4L2_CTRL_TYPE_INTEGER = 1, @@ -1298,6 +1299,23 @@ struct v4l2_queryctrl { __u32 reserved[2]; }; +/* Used in the VIDIOC_QUERY_EXT_CTRL ioctl for querying extended controls */ +struct v4l2_query_ext_ctrl { + __u32 id; + __u32 type; + char name[32]; + __s64 minimum; + __s64 maximum; + __u64 step; + __s64 default_value; + __u32 flags; + __u32 elem_size; + __u32 elems; + __u32 nr_of_dims; + __u32 dims[V4L2_CTRL_MAX_DIMS]; + __u32 reserved[32]; +}; + /* Used in the VIDIOC_QUERYMENU ioctl for querying menu items */ struct v4l2_querymenu { __u32 id; @@ -2011,6 +2029,8 @@ struct v4l2_create_buffers { Never use these in applications! */ #define VIDIOC_DBG_G_CHIP_INFO _IOWR('V', 102, struct v4l2_dbg_chip_info) +#define VIDIOC_QUERY_EXT_CTRL _IOWR('V', 103, struct v4l2_query_ext_ctrl) + /* Reminder: when adding new ioctls please add support for them to drivers/media/video/v4l2-compat-ioctl32.c as well! */ -- cgit v1.2.3 From dda4a4d5ea245591b788b70116fb52b0d145fb33 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 10 Jun 2014 07:30:04 -0300 Subject: [media] v4l2-ctrls/videodev2.h: add u8 and u16 types These are needed by the upcoming patches for the motion detection matrices. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-ctrls.c | 45 ++++++++++++++++++++++++++++++++---- include/media/v4l2-ctrls.h | 4 ++++ include/uapi/linux/videodev2.h | 4 ++++ 3 files changed, 49 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index 1086ae3f9152..adf5485e56a8 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -1174,6 +1174,10 @@ static bool std_equal(const struct v4l2_ctrl *ctrl, u32 idx, return !strcmp(ptr1.p_char + idx, ptr2.p_char + idx); case V4L2_CTRL_TYPE_INTEGER64: return ptr1.p_s64[idx] == ptr2.p_s64[idx]; + case V4L2_CTRL_TYPE_U8: + return ptr1.p_u8[idx] == ptr2.p_u8[idx]; + case V4L2_CTRL_TYPE_U16: + return ptr1.p_u16[idx] == ptr2.p_u16[idx]; default: if (ctrl->is_int) return ptr1.p_s32[idx] == ptr2.p_s32[idx]; @@ -1201,6 +1205,12 @@ static void std_init(const struct v4l2_ctrl *ctrl, u32 idx, case V4L2_CTRL_TYPE_BOOLEAN: ptr.p_s32[idx] = ctrl->default_value; break; + case V4L2_CTRL_TYPE_U8: + ptr.p_u8[idx] = ctrl->default_value; + break; + case V4L2_CTRL_TYPE_U16: + ptr.p_u16[idx] = ctrl->default_value; + break; default: idx *= ctrl->elem_size; memset(ptr.p + idx, 0, ctrl->elem_size); @@ -1242,6 +1252,12 @@ static void std_log(const struct v4l2_ctrl *ctrl) case V4L2_CTRL_TYPE_STRING: pr_cont("%s", ptr.p_char); break; + case V4L2_CTRL_TYPE_U8: + pr_cont("%u", (unsigned)*ptr.p_u8); + break; + case V4L2_CTRL_TYPE_U16: + pr_cont("%u", (unsigned)*ptr.p_u16); + break; default: pr_cont("unknown type %d", ctrl->type); break; @@ -1272,6 +1288,10 @@ static int std_validate(const struct v4l2_ctrl *ctrl, u32 idx, return ROUND_TO_RANGE(ptr.p_s32[idx], u32, ctrl); case V4L2_CTRL_TYPE_INTEGER64: return ROUND_TO_RANGE(ptr.p_s64[idx], u64, ctrl); + case V4L2_CTRL_TYPE_U8: + return ROUND_TO_RANGE(ptr.p_u8[idx], u8, ctrl); + case V4L2_CTRL_TYPE_U16: + return ROUND_TO_RANGE(ptr.p_u16[idx], u16, ctrl); case V4L2_CTRL_TYPE_BOOLEAN: ptr.p_s32[idx] = !!ptr.p_s32[idx]; @@ -1502,6 +1522,8 @@ static int check_range(enum v4l2_ctrl_type type, if (step != 1 || max > 1 || min < 0) return -ERANGE; /* fall through */ + case V4L2_CTRL_TYPE_U8: + case V4L2_CTRL_TYPE_U16: case V4L2_CTRL_TYPE_INTEGER: case V4L2_CTRL_TYPE_INTEGER64: if (step == 0 || min > max || def < min || def > max) @@ -1803,12 +1825,25 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl, } is_array = nr_of_dims > 0; - if (type == V4L2_CTRL_TYPE_INTEGER64) + /* Prefill elem_size for all types handled by std_type_ops */ + switch (type) { + case V4L2_CTRL_TYPE_INTEGER64: elem_size = sizeof(s64); - else if (type == V4L2_CTRL_TYPE_STRING) + break; + case V4L2_CTRL_TYPE_STRING: elem_size = max + 1; - else if (type < V4L2_CTRL_COMPOUND_TYPES) - elem_size = sizeof(s32); + break; + case V4L2_CTRL_TYPE_U8: + elem_size = sizeof(u8); + break; + case V4L2_CTRL_TYPE_U16: + elem_size = sizeof(u16); + break; + default: + if (type < V4L2_CTRL_COMPOUND_TYPES) + elem_size = sizeof(s32); + break; + } tot_ctrl_size = elem_size * elems; /* Sanity checks */ @@ -3148,6 +3183,8 @@ int v4l2_ctrl_modify_range(struct v4l2_ctrl *ctrl, case V4L2_CTRL_TYPE_MENU: case V4L2_CTRL_TYPE_INTEGER_MENU: case V4L2_CTRL_TYPE_BITMASK: + case V4L2_CTRL_TYPE_U8: + case V4L2_CTRL_TYPE_U16: if (ctrl->is_array) return -EINVAL; ret = check_range(ctrl->type, min, max, step, def); diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index 7915b1125bb5..c630345e9fd2 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -39,12 +39,16 @@ struct poll_table_struct; /** union v4l2_ctrl_ptr - A pointer to a control value. * @p_s32: Pointer to a 32-bit signed value. * @p_s64: Pointer to a 64-bit signed value. + * @p_u8: Pointer to a 8-bit unsigned value. + * @p_u16: Pointer to a 16-bit unsigned value. * @p_char: Pointer to a string. * @p: Pointer to a compound value. */ union v4l2_ctrl_ptr { s32 *p_s32; s64 *p_s64; + u8 *p_u8; + u16 *p_u16; char *p_char; void *p; }; diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 0297980fe491..632de96b42d5 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1254,6 +1254,8 @@ struct v4l2_ext_control { __s32 value; __s64 value64; char *string; + __u8 *p_u8; + __u16 *p_u16; void *ptr; }; } __attribute__ ((packed)); @@ -1284,6 +1286,8 @@ enum v4l2_ctrl_type { /* Compound types are >= 0x0100 */ V4L2_CTRL_COMPOUND_TYPES = 0x0100, + V4L2_CTRL_TYPE_U8 = 0x0100, + V4L2_CTRL_TYPE_U16 = 0x0101, }; /* Used in the VIDIOC_QUERYCTRL ioctl for querying controls */ -- cgit v1.2.3 From a77b4fc0bc328b0989f83220aba1c268e087107f Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 28 Mar 2014 13:00:08 -0300 Subject: [media] v4l2-ctrls/v4l2-controls.h: add MD controls Add the 'Detect' control class and the new motion detection controls. Those controls will be used by the solo6x10 and go7007 drivers. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-ctrls.c | 27 +++++++++++++++++++++++++++ include/uapi/linux/v4l2-controls.h | 17 +++++++++++++++++ 2 files changed, 44 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index 5aaf15e047c8..5c3b8de82e35 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -462,6 +462,13 @@ const char * const *v4l2_ctrl_get_menu(u32 id) "RGB full range (0-255)", NULL, }; + static const char * const detect_md_mode[] = { + "Disabled", + "Global", + "Threshold Grid", + "Region Grid", + NULL, + }; switch (id) { @@ -553,6 +560,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id) case V4L2_CID_DV_TX_RGB_RANGE: case V4L2_CID_DV_RX_RGB_RANGE: return dv_rgb_range; + case V4L2_CID_DETECT_MD_MODE: + return detect_md_mode; default: return NULL; @@ -874,6 +883,15 @@ const char *v4l2_ctrl_get_name(u32 id) case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO: return "Bandwidth, Auto"; case V4L2_CID_RF_TUNER_BANDWIDTH: return "Bandwidth"; case V4L2_CID_RF_TUNER_PLL_LOCK: return "PLL Lock"; + + /* Detection controls */ + /* Keep the order of the 'case's the same as in v4l2-controls.h! */ + case V4L2_CID_DETECT_CLASS: return "Detection Controls"; + case V4L2_CID_DETECT_MD_MODE: return "Motion Detection Mode"; + case V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD: return "MD Global Threshold"; + case V4L2_CID_DETECT_MD_THRESHOLD_GRID: return "MD Threshold Grid"; + case V4L2_CID_DETECT_MD_REGION_GRID: return "MD Region Grid"; + default: return NULL; } @@ -992,6 +1010,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, case V4L2_CID_TEST_PATTERN: case V4L2_CID_TUNE_DEEMPHASIS: case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL: + case V4L2_CID_DETECT_MD_MODE: *type = V4L2_CTRL_TYPE_MENU; break; case V4L2_CID_LINK_FREQ: @@ -1018,6 +1037,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, case V4L2_CID_DV_CLASS: case V4L2_CID_FM_RX_CLASS: case V4L2_CID_RF_TUNER_CLASS: + case V4L2_CID_DETECT_CLASS: *type = V4L2_CTRL_TYPE_CTRL_CLASS; /* You can neither read not write these */ *flags |= V4L2_CTRL_FLAG_READ_ONLY | V4L2_CTRL_FLAG_WRITE_ONLY; @@ -1063,6 +1083,12 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, *type = V4L2_CTRL_TYPE_INTEGER64; *flags |= V4L2_CTRL_FLAG_READ_ONLY; break; + case V4L2_CID_DETECT_MD_REGION_GRID: + *type = V4L2_CTRL_TYPE_U8; + break; + case V4L2_CID_DETECT_MD_THRESHOLD_GRID: + *type = V4L2_CTRL_TYPE_U16; + break; default: *type = V4L2_CTRL_TYPE_INTEGER; break; @@ -1103,6 +1129,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, case V4L2_CID_RF_TUNER_MIXER_GAIN: case V4L2_CID_RF_TUNER_IF_GAIN: case V4L2_CID_RF_TUNER_BANDWIDTH: + case V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD: *flags |= V4L2_CTRL_FLAG_SLIDER; break; case V4L2_CID_PAN_RELATIVE: diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 2ac5597f3ee1..db526d1c8309 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -61,6 +61,7 @@ #define V4L2_CTRL_CLASS_DV 0x00a00000 /* Digital Video controls */ #define V4L2_CTRL_CLASS_FM_RX 0x00a10000 /* FM Receiver controls */ #define V4L2_CTRL_CLASS_RF_TUNER 0x00a20000 /* RF tuner controls */ +#define V4L2_CTRL_CLASS_DETECT 0x00a30000 /* Detection controls */ /* User-class control IDs */ @@ -914,4 +915,20 @@ enum v4l2_deemphasis { #define V4L2_CID_RF_TUNER_IF_GAIN (V4L2_CID_RF_TUNER_CLASS_BASE + 62) #define V4L2_CID_RF_TUNER_PLL_LOCK (V4L2_CID_RF_TUNER_CLASS_BASE + 91) + +/* Detection-class control IDs defined by V4L2 */ +#define V4L2_CID_DETECT_CLASS_BASE (V4L2_CTRL_CLASS_DETECT | 0x900) +#define V4L2_CID_DETECT_CLASS (V4L2_CTRL_CLASS_DETECT | 1) + +#define V4L2_CID_DETECT_MD_MODE (V4L2_CID_DETECT_CLASS_BASE + 1) +enum v4l2_detect_md_mode { + V4L2_DETECT_MD_MODE_DISABLED = 0, + V4L2_DETECT_MD_MODE_GLOBAL = 1, + V4L2_DETECT_MD_MODE_THRESHOLD_GRID = 2, + V4L2_DETECT_MD_MODE_REGION_GRID = 3, +}; +#define V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD (V4L2_CID_DETECT_CLASS_BASE + 2) +#define V4L2_CID_DETECT_MD_THRESHOLD_GRID (V4L2_CID_DETECT_CLASS_BASE + 3) +#define V4L2_CID_DETECT_MD_REGION_GRID (V4L2_CID_DETECT_CLASS_BASE + 4) + #endif -- cgit v1.2.3 From 78ea611385f08da6e52ff2ea15f0a26e89acb3ab Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 27 Jan 2014 09:26:23 -0300 Subject: [media] v4l2: add a motion detection event Add a new MOTION_DET event to signal when motion is detected. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 632de96b42d5..1477abebd35b 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1799,6 +1799,7 @@ struct v4l2_streamparm { #define V4L2_EVENT_CTRL 3 #define V4L2_EVENT_FRAME_SYNC 4 #define V4L2_EVENT_SOURCE_CHANGE 5 +#define V4L2_EVENT_MOTION_DET 6 #define V4L2_EVENT_PRIVATE_START 0x08000000 /* Payload for V4L2_EVENT_VSYNC */ @@ -1836,6 +1837,21 @@ struct v4l2_event_src_change { __u32 changes; }; +#define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ (1 << 0) + +/** + * struct v4l2_event_motion_det - motion detection event + * @flags: if V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ is set, then the + * frame_sequence field is valid. + * @frame_sequence: the frame sequence number associated with this event. + * @region_mask: which regions detected motion. + */ +struct v4l2_event_motion_det { + __u32 flags; + __u32 frame_sequence; + __u32 region_mask; +}; + struct v4l2_event { __u32 type; union { @@ -1843,6 +1859,7 @@ struct v4l2_event { struct v4l2_event_ctrl ctrl; struct v4l2_event_frame_sync frame_sync; struct v4l2_event_src_change src_change; + struct v4l2_event_motion_det motion_det; __u8 data[64]; } u; __u32 pending; -- cgit v1.2.3 From 977ff0e4fb3460df9f3dd27de92d60786be28645 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 21 May 2014 22:07:55 -0300 Subject: [media] v4l: Add ARGB and XRGB pixel formats The existing RGB pixel formats are ill-defined in respect to their alpha bits and their meaning is driver dependent. Create new standard ARGB and XRGB variants with clearly defined meanings and make the existing variants deprecated. The new pixel formats 4CC values have been selected to match the DRM 4CCs for the same in-memory formats. Signed-off-by: Laurent Pinchart Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../DocBook/media/v4l/pixfmt-packed-rgb.xml | 415 ++++++++++++++++++++- include/uapi/linux/videodev2.h | 8 + 2 files changed, 403 insertions(+), 20 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml b/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml index e1c4f8b4c0b3..5f1602fe5494 100644 --- a/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml +++ b/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml @@ -130,9 +130,9 @@ colorspace V4L2_COLORSPACE_SRGB. b1 b0 - - V4L2_PIX_FMT_RGB444 - 'R444' + + V4L2_PIX_FMT_ARGB444 + 'AR12' g3 g2 @@ -152,9 +152,31 @@ colorspace V4L2_COLORSPACE_SRGB. r1 r0 - - V4L2_PIX_FMT_RGB555 - 'RGBO' + + V4L2_PIX_FMT_XRGB444 + 'XR12' + + g3 + g2 + g1 + g0 + b3 + b2 + b1 + b0 + + - + - + - + - + r3 + r2 + r1 + r0 + + + V4L2_PIX_FMT_ARGB555 + 'AR15' g2 g1 @@ -174,6 +196,28 @@ colorspace V4L2_COLORSPACE_SRGB. g4 g3 + + V4L2_PIX_FMT_XRGB555 + 'XR15' + + g2 + g1 + g0 + b4 + b3 + b2 + b1 + b0 + + - + r4 + r3 + r2 + r1 + r0 + g4 + g3 + V4L2_PIX_FMT_RGB565 'RGBP' @@ -341,9 +385,9 @@ colorspace V4L2_COLORSPACE_SRGB. b1 b0 - - V4L2_PIX_FMT_BGR32 - 'BGR4' + + V4L2_PIX_FMT_ABGR32 + 'AR24' b7 b6 @@ -381,9 +425,49 @@ colorspace V4L2_COLORSPACE_SRGB. a1 a0 - - V4L2_PIX_FMT_RGB32 - 'RGB4' + + V4L2_PIX_FMT_XBGR32 + 'XR24' + + b7 + b6 + b5 + b4 + b3 + b2 + b1 + b0 + + g7 + g6 + g5 + g4 + g3 + g2 + g1 + g0 + + r7 + r6 + r5 + r4 + r3 + r2 + r1 + r0 + + - + - + - + - + - + - + - + - + + + V4L2_PIX_FMT_ARGB32 + 'AX24' a7 a6 @@ -421,18 +505,76 @@ colorspace V4L2_COLORSPACE_SRGB. b1 b0 + + V4L2_PIX_FMT_XRGB32 + 'BX24' + + - + - + - + - + - + - + - + - + + r7 + r6 + r5 + r4 + r3 + r2 + r1 + r0 + + g7 + g6 + g5 + g4 + g3 + g2 + g1 + g0 + + b7 + b6 + b5 + b4 + b3 + b2 + b1 + b0 + - Bit 7 is the most significant bit. The value of the a = alpha -bits is undefined when reading from the driver, ignored when writing -to the driver, except when alpha blending has been negotiated for a -Video Overlay or -Video Output Overlay or when the alpha component has been configured -for a Video Capture by means of V4L2_CID_ALPHA_COMPONENT - control. + Bit 7 is the most significant bit. + + The usage and value of the alpha bits (a) in the ARGB and ABGR formats + (collectively referred to as alpha formats) depend on the device type and + hardware operation. Capture devices + (including capture queues of mem-to-mem devices) fill the alpha component in + memory. When the device outputs an alpha channel the alpha component will + have a meaningful value. Otherwise, when the device doesn't output an alpha + channel but can set the alpha bit to a user-configurable value, the V4L2_CID_ALPHA_COMPONENT + control is used to specify that alpha value, and the alpha component + of all pixels will be set to the value specified by that control. Otherwise + a corresponding format without an alpha component (XRGB or XBGR) must be + used instead of an alpha format. + + Output devices (including output queues + of mem-to-mem devices and video output overlay + devices) read the alpha component from memory. When the device processes the + alpha channel the alpha component must be filled with meaningful values by + applications. Otherwise a corresponding format without an alpha component + (XRGB or XBGR) must be used instead of an alpha format. + + The XRGB and XBGR formats contain undefined bits (-). Applications, + devices and drivers must ignore those bits, for both capture and output + devices. <constant>V4L2_PIX_FMT_BGR24</constant> 4 × 4 pixel @@ -512,6 +654,239 @@ image + Formats defined in are + deprecated and must not be used by new drivers. They are documented here for + reference. The meaning of their alpha bits (a) is ill-defined and + interpreted as in either the corresponding ARGB or XRGB format, depending on + the driver. + + + Deprecated Packed RGB Image Formats + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Identifier + Code +   + Byte 0 in memory + Byte 1 + Byte 2 + Byte 3 + + +   +   + Bit + 7 + 6 + 5 + 4 + 3 + 2 + 1 + 0 +   + 7 + 6 + 5 + 4 + 3 + 2 + 1 + 0 +   + 7 + 6 + 5 + 4 + 3 + 2 + 1 + 0 +   + 7 + 6 + 5 + 4 + 3 + 2 + 1 + 0 + + + + + V4L2_PIX_FMT_RGB444 + 'R444' + + g3 + g2 + g1 + g0 + b3 + b2 + b1 + b0 + + a3 + a2 + a1 + a0 + r3 + r2 + r1 + r0 + + + V4L2_PIX_FMT_RGB555 + 'RGBO' + + g2 + g1 + g0 + b4 + b3 + b2 + b1 + b0 + + a + r4 + r3 + r2 + r1 + r0 + g4 + g3 + + + V4L2_PIX_FMT_BGR32 + 'BGR4' + + b7 + b6 + b5 + b4 + b3 + b2 + b1 + b0 + + g7 + g6 + g5 + g4 + g3 + g2 + g1 + g0 + + r7 + r6 + r5 + r4 + r3 + r2 + r1 + r0 + + a7 + a6 + a5 + a4 + a3 + a2 + a1 + a0 + + + V4L2_PIX_FMT_RGB32 + 'RGB4' + + a7 + a6 + a5 + a4 + a3 + a2 + a1 + a0 + + r7 + r6 + r5 + r4 + r3 + r2 + r1 + r0 + + g7 + g6 + g5 + g4 + g3 + g2 + g1 + g0 + + b7 + b6 + b5 + b4 + b3 + b2 + b1 + b0 + + + +
+ A test utility to determine which RGB formats a driver actually supports is available from the LinuxTV v4l-dvb repository. See &v4l-dvb; for access instructions. diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 1477abebd35b..a498d8b58679 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -294,7 +294,11 @@ struct v4l2_pix_format { /* RGB formats */ #define V4L2_PIX_FMT_RGB332 v4l2_fourcc('R', 'G', 'B', '1') /* 8 RGB-3-3-2 */ #define V4L2_PIX_FMT_RGB444 v4l2_fourcc('R', '4', '4', '4') /* 16 xxxxrrrr ggggbbbb */ +#define V4L2_PIX_FMT_ARGB444 v4l2_fourcc('A', 'R', '1', '2') /* 16 aaaarrrr ggggbbbb */ +#define V4L2_PIX_FMT_XRGB444 v4l2_fourcc('X', 'R', '1', '2') /* 16 xxxxrrrr ggggbbbb */ #define V4L2_PIX_FMT_RGB555 v4l2_fourcc('R', 'G', 'B', 'O') /* 16 RGB-5-5-5 */ +#define V4L2_PIX_FMT_ARGB555 v4l2_fourcc('A', 'R', '1', '5') /* 16 ARGB-1-5-5-5 */ +#define V4L2_PIX_FMT_XRGB555 v4l2_fourcc('X', 'R', '1', '5') /* 16 XRGB-1-5-5-5 */ #define V4L2_PIX_FMT_RGB565 v4l2_fourcc('R', 'G', 'B', 'P') /* 16 RGB-5-6-5 */ #define V4L2_PIX_FMT_RGB555X v4l2_fourcc('R', 'G', 'B', 'Q') /* 16 RGB-5-5-5 BE */ #define V4L2_PIX_FMT_RGB565X v4l2_fourcc('R', 'G', 'B', 'R') /* 16 RGB-5-6-5 BE */ @@ -302,7 +306,11 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_BGR24 v4l2_fourcc('B', 'G', 'R', '3') /* 24 BGR-8-8-8 */ #define V4L2_PIX_FMT_RGB24 v4l2_fourcc('R', 'G', 'B', '3') /* 24 RGB-8-8-8 */ #define V4L2_PIX_FMT_BGR32 v4l2_fourcc('B', 'G', 'R', '4') /* 32 BGR-8-8-8-8 */ +#define V4L2_PIX_FMT_ABGR32 v4l2_fourcc('A', 'R', '2', '4') /* 32 BGRA-8-8-8-8 */ +#define V4L2_PIX_FMT_XBGR32 v4l2_fourcc('X', 'R', '2', '4') /* 32 BGRX-8-8-8-8 */ #define V4L2_PIX_FMT_RGB32 v4l2_fourcc('R', 'G', 'B', '4') /* 32 RGB-8-8-8-8 */ +#define V4L2_PIX_FMT_ARGB32 v4l2_fourcc('B', 'A', '2', '4') /* 32 ARGB-8-8-8-8 */ +#define V4L2_PIX_FMT_XRGB32 v4l2_fourcc('B', 'X', '2', '4') /* 32 XRGB-8-8-8-8 */ /* Grey formats */ #define V4L2_PIX_FMT_GREY v4l2_fourcc('G', 'R', 'E', 'Y') /* 8 Greyscale */ -- cgit v1.2.3 From d52e23813672c3c72f92e7b39c7408d4b9a40a96 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 27 May 2014 09:41:05 -0300 Subject: [media] v4l: Support extending the v4l2_pix_format structure The v4l2_pix_format structure has no reserved field. It is embedded in the v4l2_framebuffer structure which has no reserved fields either, and in the v4l2_format structure which has reserved fields that were not previously required to be zeroed out by applications. To allow extending v4l2_pix_format, inline it in the v4l2_framebuffer structure, and use the priv field as a magic value to indicate that the application has set all v4l2_pix_format extended fields and zeroed all reserved fields following the v4l2_pix_format field in the v4l2_format structure. The availability of this API extension is reported to userspace through the new V4L2_CAP_EXT_PIX_FORMAT capability flag. Just checking that the priv field is still set to the magic value at [GS]_FMT return wouldn't be enough, as older kernels don't zero the priv field on return. To simplify the internal API towards drivers zero the extended fields and set the priv field to the magic value for applications not aware of the extensions. Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/Makefile | 2 +- Documentation/DocBook/media/v4l/pixfmt.xml | 25 ++++++++- Documentation/DocBook/media/v4l/v4l2.xml | 8 +++ Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml | 12 ++-- .../DocBook/media/v4l/vidioc-querycap.xml | 6 ++ drivers/media/parport/bw-qcam.c | 2 - drivers/media/pci/cx18/cx18-ioctl.c | 1 - drivers/media/pci/cx25821/cx25821-video.c | 3 - drivers/media/pci/ivtv/ivtv-ioctl.c | 3 - drivers/media/pci/meye/meye.c | 2 - drivers/media/pci/saa7134/saa7134-empress.c | 3 - drivers/media/pci/saa7134/saa7134-video.c | 2 - drivers/media/pci/sta2x11/sta2x11_vip.c | 1 - drivers/media/platform/coda.c | 2 - drivers/media/platform/davinci/vpif_display.c | 1 - drivers/media/platform/mem2mem_testdev.c | 1 - drivers/media/platform/omap/omap_vout.c | 2 - drivers/media/platform/sh_veu.c | 2 - drivers/media/platform/vino.c | 5 -- drivers/media/platform/vivi.c | 1 - drivers/media/usb/cx231xx/cx231xx-417.c | 2 - drivers/media/usb/cx231xx/cx231xx-video.c | 2 - drivers/media/usb/gspca/gspca.c | 8 +-- drivers/media/usb/hdpvr/hdpvr-video.c | 1 - drivers/media/usb/stkwebcam/stk-webcam.c | 2 - drivers/media/usb/tlg2300/pd-video.c | 1 - drivers/media/usb/tm6000/tm6000-video.c | 2 - drivers/media/usb/zr364xx/zr364xx.c | 3 - drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 19 +++++-- drivers/media/v4l2-core/v4l2-ioctl.c | 65 ++++++++++++++++++++-- include/uapi/linux/videodev2.h | 15 ++++- 31 files changed, 134 insertions(+), 70 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/Makefile b/Documentation/DocBook/media/Makefile index 639e74857968..df2962d9e11e 100644 --- a/Documentation/DocBook/media/Makefile +++ b/Documentation/DocBook/media/Makefile @@ -174,7 +174,7 @@ FILENAME = \ DOCUMENTED = \ -e "s/\(enum *\)v4l2_mpeg_cx2341x_video_\([a-z]*_spatial_filter_type\)/\1v4l2_mpeg_cx2341x_video_\2<\/link>/g" \ -e "s/\(\(enum\|struct\) *\)\(v4l2_[a-zA-Z0-9_]*\)/\1\3<\/link>/g" \ - -e "s/\(V4L2_PIX_FMT_[A-Z0-9_]\+\) /\1<\/link> /g" \ + -e "s/\(V4L2_PIX_FMT_[A-Z0-9_]\+\)\(\s\+v4l2_fourcc\)/\1<\/link>\2/g" \ -e ":a;s/\(linkend=\".*\)_\(.*\">\)/\1-\2/;ta" \ -e "s/v4l2\-mpeg\-vbi\-ITV0/v4l2-mpeg-vbi-itv0-1/g" diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml index 91dcbc84f3f8..bb36b3829cf9 100644 --- a/Documentation/DocBook/media/v4l/pixfmt.xml +++ b/Documentation/DocBook/media/v4l/pixfmt.xml @@ -112,9 +112,28 @@ see . __u32 priv - Reserved for custom (driver defined) additional -information about formats. When not used drivers and applications must -set this field to zero. + This field indicates whether the remaining fields of the +v4l2_pix_format structure, also called the extended +fields, are valid. When set to V4L2_PIX_FMT_PRIV_MAGIC, it +indicates that the extended fields have been correctly initialized. When set to +any other value it indicates that the extended fields contain undefined values. + +Applications that wish to use the pixel format extended fields must first +ensure that the feature is supported by querying the device for the +V4L2_CAP_EXT_PIX_FORMAT +capability. If the capability isn't set the pixel format extended fields are not +supported and using the extended fields will lead to undefined results. +To use the extended fields, applications must set the +priv field to +V4L2_PIX_FMT_PRIV_MAGIC, initialize all the extended fields +and zero the unused bytes of the v4l2_format +raw_data field. +When the priv field isn't set to +V4L2_PIX_FMT_PRIV_MAGIC drivers must act as if all the +extended fields were set to zero. On return drivers must set the +priv field to +V4L2_PIX_FMT_PRIV_MAGIC and all the extended fields to +applicable values. diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml index b445161b912c..d0a48bebfa52 100644 --- a/Documentation/DocBook/media/v4l/v4l2.xml +++ b/Documentation/DocBook/media/v4l/v4l2.xml @@ -151,6 +151,14 @@ structs, ioctls) must be noted in more detail in the history chapter (compat.xml), along with the possible impact on existing drivers and applications. --> + + 3.16 + 2014-05-27 + lp + Extended &v4l2-pix-format;. + + + 3.15 2014-02-03 diff --git a/Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml b/Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml index 7c63815e7afd..20460730b02c 100644 --- a/Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml +++ b/Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml @@ -152,13 +152,10 @@ a valid base address, so applications can find the corresponding Linux framebuffer device (see ).
- &v4l2-pix-format; + struct fmt - Layout of the frame buffer. The -v4l2_pix_format structure is defined in , for clarification the fields and acceptable values - are listed below: + Layout of the frame buffer. @@ -276,9 +273,8 @@ see . __u32 priv - Reserved for additional information about custom -(driver defined) formats. When not used drivers and applications must -set this field to zero. + Reserved. Drivers and applications must set this field to +zero. diff --git a/Documentation/DocBook/media/v4l/vidioc-querycap.xml b/Documentation/DocBook/media/v4l/vidioc-querycap.xml index 370d49d6fb64..d0c5e604f014 100644 --- a/Documentation/DocBook/media/v4l/vidioc-querycap.xml +++ b/Documentation/DocBook/media/v4l/vidioc-querycap.xml @@ -300,6 +300,12 @@ modulator programming see 0x00100000 The device supports the SDR Capture interface. +
+ + V4L2_CAP_EXT_PIX_FORMAT + 0x00200000 + The device supports the &v4l2-pix-format; extended +fields. V4L2_CAP_READWRITE diff --git a/drivers/media/parport/bw-qcam.c b/drivers/media/parport/bw-qcam.c index 3c5dac4c1eab..67b9da1dc43f 100644 --- a/drivers/media/parport/bw-qcam.c +++ b/drivers/media/parport/bw-qcam.c @@ -759,7 +759,6 @@ static int qcam_g_fmt_vid_cap(struct file *file, void *fh, struct v4l2_format *f pix->sizeimage = pix->width * pix->height; /* Just a guess */ pix->colorspace = V4L2_COLORSPACE_SRGB; - pix->priv = 0; return 0; } @@ -785,7 +784,6 @@ static int qcam_try_fmt_vid_cap(struct file *file, void *fh, struct v4l2_format pix->sizeimage = pix->width * pix->height; /* Just a guess */ pix->colorspace = V4L2_COLORSPACE_SRGB; - pix->priv = 0; return 0; } diff --git a/drivers/media/pci/cx18/cx18-ioctl.c b/drivers/media/pci/cx18/cx18-ioctl.c index fefb2cd35838..6f2b59042b73 100644 --- a/drivers/media/pci/cx18/cx18-ioctl.c +++ b/drivers/media/pci/cx18/cx18-ioctl.c @@ -156,7 +156,6 @@ static int cx18_g_fmt_vid_cap(struct file *file, void *fh, pixfmt->height = cx->cxhdl.height; pixfmt->colorspace = V4L2_COLORSPACE_SMPTE170M; pixfmt->field = V4L2_FIELD_INTERLACED; - pixfmt->priv = 0; if (id->type == CX18_ENC_STREAM_TYPE_YUV) { pixfmt->pixelformat = s->pixelformat; pixfmt->sizeimage = s->vb_bytes_per_frame; diff --git a/drivers/media/pci/cx25821/cx25821-video.c b/drivers/media/pci/cx25821/cx25821-video.c index 8d2f1abeef77..3a419f134584 100644 --- a/drivers/media/pci/cx25821/cx25821-video.c +++ b/drivers/media/pci/cx25821/cx25821-video.c @@ -576,7 +576,6 @@ static int cx25821_vidioc_g_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.bytesperline = (chan->width * chan->fmt->depth) >> 3; f->fmt.pix.sizeimage = chan->height * f->fmt.pix.bytesperline; f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; - f->fmt.pix.priv = 0; return 0; } @@ -615,7 +614,6 @@ static int cx25821_vidioc_try_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.bytesperline = (f->fmt.pix.width * fmt->depth) >> 3; f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline; f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; - f->fmt.pix.priv = 0; return 0; } @@ -867,7 +865,6 @@ static int cx25821_vidioc_try_fmt_vid_out(struct file *file, void *priv, f->fmt.pix.bytesperline = (f->fmt.pix.width * fmt->depth) >> 3; f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline; f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; - f->fmt.pix.priv = 0; return 0; } diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c index b3667a00db3a..3e0cb77d5930 100644 --- a/drivers/media/pci/ivtv/ivtv-ioctl.c +++ b/drivers/media/pci/ivtv/ivtv-ioctl.c @@ -351,7 +351,6 @@ static int ivtv_g_fmt_vid_cap(struct file *file, void *fh, struct v4l2_format *f pixfmt->height = itv->cxhdl.height; pixfmt->colorspace = V4L2_COLORSPACE_SMPTE170M; pixfmt->field = V4L2_FIELD_INTERLACED; - pixfmt->priv = 0; if (id->type == IVTV_ENC_STREAM_TYPE_YUV) { pixfmt->pixelformat = V4L2_PIX_FMT_HM12; /* YUV size is (Y=(h*720) + UV=(h*(720/2))) */ @@ -418,7 +417,6 @@ static int ivtv_g_fmt_vid_out(struct file *file, void *fh, struct v4l2_format *f pixfmt->height = itv->main_rect.height; pixfmt->colorspace = V4L2_COLORSPACE_SMPTE170M; pixfmt->field = V4L2_FIELD_INTERLACED; - pixfmt->priv = 0; if (id->type == IVTV_DEC_STREAM_TYPE_YUV) { switch (itv->yuv_info.lace_mode & IVTV_YUV_MODE_MASK) { case IVTV_YUV_MODE_INTERLACED: @@ -1384,7 +1382,6 @@ static int ivtv_g_fbuf(struct file *file, void *fh, struct v4l2_framebuffer *fb) fb->fmt.bytesperline = fb->fmt.width; fb->fmt.colorspace = V4L2_COLORSPACE_SMPTE170M; fb->fmt.field = V4L2_FIELD_INTERLACED; - fb->fmt.priv = 0; if (fb->fmt.pixelformat != V4L2_PIX_FMT_PAL8) fb->fmt.bytesperline *= 2; if (fb->fmt.pixelformat == V4L2_PIX_FMT_RGB32 || diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c index 1a77f8dfafa5..aeae54708811 100644 --- a/drivers/media/pci/meye/meye.c +++ b/drivers/media/pci/meye/meye.c @@ -1166,7 +1166,6 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *fh, f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline; f->fmt.pix.colorspace = 0; - f->fmt.pix.priv = 0; return 0; } @@ -1232,7 +1231,6 @@ static int vidioc_s_fmt_vid_cap(struct file *file, void *fh, f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline; f->fmt.pix.colorspace = 0; - f->fmt.pix.priv = 0; return 0; } diff --git a/drivers/media/pci/saa7134/saa7134-empress.c b/drivers/media/pci/saa7134/saa7134-empress.c index 5526ed5444fb..ef39261c1f20 100644 --- a/drivers/media/pci/saa7134/saa7134-empress.c +++ b/drivers/media/pci/saa7134/saa7134-empress.c @@ -130,7 +130,6 @@ static int empress_g_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.pixelformat = V4L2_PIX_FMT_MPEG; f->fmt.pix.sizeimage = TS_PACKET_SIZE * dev->ts.nr_packets; f->fmt.pix.bytesperline = 0; - f->fmt.pix.priv = 0; return 0; } @@ -148,7 +147,6 @@ static int empress_s_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.pixelformat = V4L2_PIX_FMT_MPEG; f->fmt.pix.sizeimage = TS_PACKET_SIZE * dev->ts.nr_packets; f->fmt.pix.bytesperline = 0; - f->fmt.pix.priv = 0; return 0; } @@ -166,7 +164,6 @@ static int empress_try_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.pixelformat = V4L2_PIX_FMT_MPEG; f->fmt.pix.sizeimage = TS_PACKET_SIZE * dev->ts.nr_packets; f->fmt.pix.bytesperline = 0; - f->fmt.pix.priv = 0; return 0; } diff --git a/drivers/media/pci/saa7134/saa7134-video.c b/drivers/media/pci/saa7134/saa7134-video.c index d37599980768..0cfa2ca6a32a 100644 --- a/drivers/media/pci/saa7134/saa7134-video.c +++ b/drivers/media/pci/saa7134/saa7134-video.c @@ -1235,7 +1235,6 @@ static int saa7134_g_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline; f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; - f->fmt.pix.priv = 0; return 0; } @@ -1315,7 +1314,6 @@ static int saa7134_try_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline; f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; - f->fmt.pix.priv = 0; return 0; } diff --git a/drivers/media/pci/sta2x11/sta2x11_vip.c b/drivers/media/pci/sta2x11/sta2x11_vip.c index f2d8c70d35f5..365bd21301ba 100644 --- a/drivers/media/pci/sta2x11/sta2x11_vip.c +++ b/drivers/media/pci/sta2x11/sta2x11_vip.c @@ -640,7 +640,6 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.bytesperline = f->fmt.pix.width * 2; f->fmt.pix.sizeimage = f->fmt.pix.width * 2 * f->fmt.pix.height; f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; - f->fmt.pix.priv = 0; return 0; } diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c index b1783791d426..54886606ed68 100644 --- a/drivers/media/platform/coda.c +++ b/drivers/media/platform/coda.c @@ -613,8 +613,6 @@ static int coda_try_fmt(struct coda_ctx *ctx, struct coda_codec *codec, BUG(); } - f->fmt.pix.priv = 0; - return 0; } diff --git a/drivers/media/platform/davinci/vpif_display.c b/drivers/media/platform/davinci/vpif_display.c index 877b46e8b2e4..0bd6dcb13cbc 100644 --- a/drivers/media/platform/davinci/vpif_display.c +++ b/drivers/media/platform/davinci/vpif_display.c @@ -648,7 +648,6 @@ static int vpif_try_fmt_vid_out(struct file *file, void *priv, pixfmt->width = common->fmt.fmt.pix.width; pixfmt->height = common->fmt.fmt.pix.height; pixfmt->sizeimage = pixfmt->bytesperline * pixfmt->height * 2; - pixfmt->priv = 0; return 0; } diff --git a/drivers/media/platform/mem2mem_testdev.c b/drivers/media/platform/mem2mem_testdev.c index 0714070ed7fa..c1b03cfd6ded 100644 --- a/drivers/media/platform/mem2mem_testdev.c +++ b/drivers/media/platform/mem2mem_testdev.c @@ -532,7 +532,6 @@ static int vidioc_try_fmt(struct v4l2_format *f, struct m2mtest_fmt *fmt) f->fmt.pix.bytesperline = (f->fmt.pix.width * fmt->depth) >> 3; f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline; f->fmt.pix.field = V4L2_FIELD_NONE; - f->fmt.pix.priv = 0; return 0; } diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c index 9a726eacb29b..2d177fa58471 100644 --- a/drivers/media/platform/omap/omap_vout.c +++ b/drivers/media/platform/omap/omap_vout.c @@ -165,7 +165,6 @@ static int omap_vout_try_format(struct v4l2_pix_format *pix) pix->pixelformat = omap_formats[ifmt].pixelformat; pix->field = V4L2_FIELD_ANY; - pix->priv = 0; switch (pix->pixelformat) { case V4L2_PIX_FMT_YUYV: @@ -1896,7 +1895,6 @@ static int __init omap_vout_setup_video_data(struct omap_vout_device *vout) pix->field = V4L2_FIELD_ANY; pix->bytesperline = pix->width * 2; pix->sizeimage = pix->bytesperline * pix->height; - pix->priv = 0; pix->colorspace = V4L2_COLORSPACE_JPEG; vout->bpp = RGB565_BPP; diff --git a/drivers/media/platform/sh_veu.c b/drivers/media/platform/sh_veu.c index 744e43b480bc..8dc279d4d561 100644 --- a/drivers/media/platform/sh_veu.c +++ b/drivers/media/platform/sh_veu.c @@ -425,7 +425,6 @@ static int sh_veu_g_fmt(struct sh_veu_file *veu_file, struct v4l2_format *f) pix->bytesperline = vfmt->bytesperline; pix->sizeimage = vfmt->bytesperline * pix->height * vfmt->fmt->depth / vfmt->fmt->ydepth; - pix->priv = 0; dev_dbg(veu->dev, "%s(): type: %d, size %u @ %ux%u, fmt %x\n", __func__, f->type, pix->sizeimage, pix->width, pix->height, pix->pixelformat); @@ -473,7 +472,6 @@ static int sh_veu_try_fmt(struct v4l2_format *f, const struct sh_veu_format *fmt pix->pixelformat = fmt->fourcc; pix->colorspace = sh_veu_4cc2cspace(pix->pixelformat); - pix->priv = 0; pr_debug("%s(): type: %d, size %u\n", __func__, f->type, pix->sizeimage); diff --git a/drivers/media/platform/vino.c b/drivers/media/platform/vino.c index 470d35336119..91d44ea16f27 100644 --- a/drivers/media/platform/vino.c +++ b/drivers/media/platform/vino.c @@ -3147,7 +3147,6 @@ static int vino_try_fmt_vid_cap(struct file *file, void *__fh, pf->colorspace = vino_data_formats[tempvcs.data_format].colorspace; - pf->priv = 0; return 0; } @@ -3175,8 +3174,6 @@ static int vino_g_fmt_vid_cap(struct file *file, void *__fh, pf->colorspace = vino_data_formats[vcs->data_format].colorspace; - pf->priv = 0; - spin_unlock_irqrestore(&vino_drvdata->input_lock, flags); return 0; } @@ -3219,8 +3216,6 @@ static int vino_s_fmt_vid_cap(struct file *file, void *__fh, pf->colorspace = vino_data_formats[vcs->data_format].colorspace; - pf->priv = 0; - spin_unlock_irqrestore(&vino_drvdata->input_lock, flags); return 0; } diff --git a/drivers/media/platform/vivi.c b/drivers/media/platform/vivi.c index 7542b5dd9910..80333714ffa7 100644 --- a/drivers/media/platform/vivi.c +++ b/drivers/media/platform/vivi.c @@ -1014,7 +1014,6 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; else f->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; - f->fmt.pix.priv = 0; return 0; } diff --git a/drivers/media/usb/cx231xx/cx231xx-417.c b/drivers/media/usb/cx231xx/cx231xx-417.c index f0400e260eb7..459bb0e98971 100644 --- a/drivers/media/usb/cx231xx/cx231xx-417.c +++ b/drivers/media/usb/cx231xx/cx231xx-417.c @@ -1563,7 +1563,6 @@ static int vidioc_g_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.width = dev->ts1.width; f->fmt.pix.height = dev->ts1.height; f->fmt.pix.field = V4L2_FIELD_INTERLACED; - f->fmt.pix.priv = 0; dprintk(1, "VIDIOC_G_FMT: w: %d, h: %d\n", dev->ts1.width, dev->ts1.height); dprintk(3, "exit vidioc_g_fmt_vid_cap()\n"); @@ -1582,7 +1581,6 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.sizeimage = mpeglines * mpeglinesize; f->fmt.pix.field = V4L2_FIELD_INTERLACED; f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; - f->fmt.pix.priv = 0; dprintk(1, "VIDIOC_TRY_FMT: w: %d, h: %d\n", dev->ts1.width, dev->ts1.height); dprintk(3, "exit vidioc_try_fmt_vid_cap()\n"); diff --git a/drivers/media/usb/cx231xx/cx231xx-video.c b/drivers/media/usb/cx231xx/cx231xx-video.c index ae31ca2fc9a1..3b3ada6562ca 100644 --- a/drivers/media/usb/cx231xx/cx231xx-video.c +++ b/drivers/media/usb/cx231xx/cx231xx-video.c @@ -885,7 +885,6 @@ static int vidioc_g_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; f->fmt.pix.field = V4L2_FIELD_INTERLACED; - f->fmt.pix.priv = 0; return 0; } @@ -930,7 +929,6 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.sizeimage = f->fmt.pix.bytesperline * height; f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; f->fmt.pix.field = V4L2_FIELD_INTERLACED; - f->fmt.pix.priv = 0; return 0; } diff --git a/drivers/media/usb/gspca/gspca.c b/drivers/media/usb/gspca/gspca.c index 42d223239206..e8cf23c91cef 100644 --- a/drivers/media/usb/gspca/gspca.c +++ b/drivers/media/usb/gspca/gspca.c @@ -1109,8 +1109,8 @@ static int vidioc_g_fmt_vid_cap(struct file *file, void *priv, struct gspca_dev *gspca_dev = video_drvdata(file); fmt->fmt.pix = gspca_dev->pixfmt; - /* some drivers use priv internally, zero it before giving it to - userspace */ + /* some drivers use priv internally, zero it before giving it back to + the core */ fmt->fmt.pix.priv = 0; return 0; } @@ -1146,8 +1146,8 @@ static int try_fmt_vid_cap(struct gspca_dev *gspca_dev, fmt->fmt.pix.height = h; gspca_dev->sd_desc->try_fmt(gspca_dev, fmt); } - /* some drivers use priv internally, zero it before giving it to - userspace */ + /* some drivers use priv internally, zero it before giving it back to + the core */ fmt->fmt.pix.priv = 0; return mode; /* used when s_fmt */ } diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c index dca4b65053aa..4b7653fd4b0b 100644 --- a/drivers/media/usb/hdpvr/hdpvr-video.c +++ b/drivers/media/usb/hdpvr/hdpvr-video.c @@ -1022,7 +1022,6 @@ static int vidioc_g_fmt_vid_cap(struct file *file, void *_fh, f->fmt.pix.pixelformat = V4L2_PIX_FMT_MPEG; f->fmt.pix.sizeimage = dev->bulk_in_size; f->fmt.pix.bytesperline = 0; - f->fmt.pix.priv = 0; if (f->fmt.pix.width == 720) { /* SDTV formats */ f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; diff --git a/drivers/media/usb/stkwebcam/stk-webcam.c b/drivers/media/usb/stkwebcam/stk-webcam.c index d76860b6a0b8..3588dc38db87 100644 --- a/drivers/media/usb/stkwebcam/stk-webcam.c +++ b/drivers/media/usb/stkwebcam/stk-webcam.c @@ -923,7 +923,6 @@ static int stk_vidioc_g_fmt_vid_cap(struct file *filp, pix_format->bytesperline = 2 * pix_format->width; pix_format->sizeimage = pix_format->bytesperline * pix_format->height; - pix_format->priv = 0; return 0; } @@ -967,7 +966,6 @@ static int stk_try_fmt_vid_cap(struct file *filp, fmtd->fmt.pix.bytesperline = 2 * fmtd->fmt.pix.width; fmtd->fmt.pix.sizeimage = fmtd->fmt.pix.bytesperline * fmtd->fmt.pix.height; - fmtd->fmt.pix.priv = 0; return 0; } diff --git a/drivers/media/usb/tlg2300/pd-video.c b/drivers/media/usb/tlg2300/pd-video.c index 8df668d06552..8cd7f02fcf9f 100644 --- a/drivers/media/usb/tlg2300/pd-video.c +++ b/drivers/media/usb/tlg2300/pd-video.c @@ -1321,7 +1321,6 @@ static void init_video_context(struct running_context *context) .bytesperline = 720 * 2, .sizeimage = 720 * 576 * 2, .colorspace = V4L2_COLORSPACE_SMPTE170M, - .priv = 0 }; } diff --git a/drivers/media/usb/tm6000/tm6000-video.c b/drivers/media/usb/tm6000/tm6000-video.c index 9bde0642ffd1..793577fc4633 100644 --- a/drivers/media/usb/tm6000/tm6000-video.c +++ b/drivers/media/usb/tm6000/tm6000-video.c @@ -918,7 +918,6 @@ static int vidioc_g_fmt_vid_cap(struct file *file, void *priv, (f->fmt.pix.width * fh->fmt->depth) >> 3; f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline; - f->fmt.pix.priv = 0; return 0; } @@ -959,7 +958,6 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.width &= ~0x01; f->fmt.pix.field = field; - f->fmt.pix.priv = 0; f->fmt.pix.bytesperline = (f->fmt.pix.width * fmt->depth) >> 3; diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c index 3b80579a82c5..5c006277b8b1 100644 --- a/drivers/media/usb/zr364xx/zr364xx.c +++ b/drivers/media/usb/zr364xx/zr364xx.c @@ -806,7 +806,6 @@ static int zr364xx_vidioc_try_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.bytesperline = f->fmt.pix.width * 2; f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline; f->fmt.pix.colorspace = V4L2_COLORSPACE_JPEG; - f->fmt.pix.priv = 0; DBG("%s: V4L2_PIX_FMT_%s (%d) ok!\n", __func__, decode_fourcc(f->fmt.pix.pixelformat, pixelformat_name), f->fmt.pix.field); @@ -829,7 +828,6 @@ static int zr364xx_vidioc_g_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.bytesperline = f->fmt.pix.width * 2; f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline; f->fmt.pix.colorspace = V4L2_COLORSPACE_JPEG; - f->fmt.pix.priv = 0; return 0; } @@ -866,7 +864,6 @@ static int zr364xx_vidioc_s_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.bytesperline = f->fmt.pix.width * 2; f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline; f->fmt.pix.colorspace = V4L2_COLORSPACE_JPEG; - f->fmt.pix.priv = 0; cam->vb_vidq.field = f->fmt.pix.field; if (f->fmt.pix.width == 160 && f->fmt.pix.height == 120) diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 7e2411c36419..cca6c2f76b3a 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -540,7 +540,16 @@ struct v4l2_framebuffer32 { __u32 capability; __u32 flags; compat_caddr_t base; - struct v4l2_pix_format fmt; + struct { + __u32 width; + __u32 height; + __u32 pixelformat; + __u32 field; + __u32 bytesperline; + __u32 sizeimage; + __u32 colorspace; + __u32 priv; + } fmt; }; static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_framebuffer32 __user *up) @@ -550,10 +559,10 @@ static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_framebuffer32)) || get_user(tmp, &up->base) || get_user(kp->capability, &up->capability) || - get_user(kp->flags, &up->flags)) + get_user(kp->flags, &up->flags) || + copy_from_user(&kp->fmt, &up->fmt, sizeof(up->fmt))) return -EFAULT; kp->base = compat_ptr(tmp); - get_v4l2_pix_format(&kp->fmt, &up->fmt); return 0; } @@ -564,9 +573,9 @@ static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_framebuffer32)) || put_user(tmp, &up->base) || put_user(kp->capability, &up->capability) || - put_user(kp->flags, &up->flags)) + put_user(kp->flags, &up->flags) || + copy_to_user(&up->fmt, &kp->fmt, sizeof(up->fmt))) return -EFAULT; - put_v4l2_pix_format(&kp->fmt, &up->fmt); return 0; } diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 96bc117f66b2..2e630005676f 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -973,13 +973,48 @@ static int check_fmt(struct file *file, enum v4l2_buf_type type) return -EINVAL; } +static void v4l_sanitize_format(struct v4l2_format *fmt) +{ + unsigned int offset; + + /* + * The v4l2_pix_format structure has been extended with fields that were + * not previously required to be set to zero by applications. The priv + * field, when set to a magic value, indicates the the extended fields + * are valid. Otherwise they will contain undefined values. To simplify + * the API towards drivers zero the extended fields and set the priv + * field to the magic value when the extended pixel format structure + * isn't used by applications. + */ + + if (fmt->type != V4L2_BUF_TYPE_VIDEO_CAPTURE && + fmt->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) + return; + + if (fmt->fmt.pix.priv == V4L2_PIX_FMT_PRIV_MAGIC) + return; + + fmt->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; + + offset = offsetof(struct v4l2_pix_format, priv) + + sizeof(fmt->fmt.pix.priv); + memset(((void *)&fmt->fmt.pix) + offset, 0, + sizeof(fmt->fmt.pix) - offset); +} + static int v4l_querycap(const struct v4l2_ioctl_ops *ops, struct file *file, void *fh, void *arg) { struct v4l2_capability *cap = (struct v4l2_capability *)arg; + int ret; cap->version = LINUX_VERSION_CODE; - return ops->vidioc_querycap(file, fh, cap); + + ret = ops->vidioc_querycap(file, fh, cap); + + cap->capabilities |= V4L2_CAP_EXT_PIX_FORMAT; + + return ret; } static int v4l_s_input(const struct v4l2_ioctl_ops *ops, @@ -1103,12 +1138,17 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops, bool is_sdr = vfd->vfl_type == VFL_TYPE_SDR; bool is_rx = vfd->vfl_dir != VFL_DIR_TX; bool is_tx = vfd->vfl_dir != VFL_DIR_RX; + int ret; + + p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; switch (p->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: if (unlikely(!is_rx || !is_vid || !ops->vidioc_g_fmt_vid_cap)) break; - return ops->vidioc_g_fmt_vid_cap(file, fh, arg); + ret = ops->vidioc_g_fmt_vid_cap(file, fh, arg); + p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; + return ret; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: if (unlikely(!is_rx || !is_vid || !ops->vidioc_g_fmt_vid_cap_mplane)) break; @@ -1128,7 +1168,9 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops, case V4L2_BUF_TYPE_VIDEO_OUTPUT: if (unlikely(!is_tx || !is_vid || !ops->vidioc_g_fmt_vid_out)) break; - return ops->vidioc_g_fmt_vid_out(file, fh, arg); + ret = ops->vidioc_g_fmt_vid_out(file, fh, arg); + p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; + return ret; case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: if (unlikely(!is_tx || !is_vid || !ops->vidioc_g_fmt_vid_out_mplane)) break; @@ -1163,6 +1205,8 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, bool is_rx = vfd->vfl_dir != VFL_DIR_TX; bool is_tx = vfd->vfl_dir != VFL_DIR_RX; + v4l_sanitize_format(p); + switch (p->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: if (unlikely(!is_rx || !is_vid || !ops->vidioc_s_fmt_vid_cap)) @@ -1233,6 +1277,8 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops, bool is_rx = vfd->vfl_dir != VFL_DIR_TX; bool is_tx = vfd->vfl_dir != VFL_DIR_RX; + v4l_sanitize_format(p); + switch (p->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: if (unlikely(!is_rx || !is_vid || !ops->vidioc_try_fmt_vid_cap)) @@ -1516,7 +1562,18 @@ static int v4l_create_bufs(const struct v4l2_ioctl_ops *ops, struct v4l2_create_buffers *create = arg; int ret = check_fmt(file, create->format.type); - return ret ? ret : ops->vidioc_create_bufs(file, fh, create); + if (ret) + return ret; + + v4l_sanitize_format(&create->format); + + ret = ops->vidioc_create_bufs(file, fh, create); + + if (create->format.type == V4L2_BUF_TYPE_VIDEO_CAPTURE || + create->format.type == V4L2_BUF_TYPE_VIDEO_OUTPUT) + create->format.fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; + + return ret; } static int v4l_prepare_buf(const struct v4l2_ioctl_ops *ops, diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index a498d8b58679..eb3bdd33816b 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -268,6 +268,7 @@ struct v4l2_capability { #define V4L2_CAP_MODULATOR 0x00080000 /* has a modulator */ #define V4L2_CAP_SDR_CAPTURE 0x00100000 /* Is a SDR capture device */ +#define V4L2_CAP_EXT_PIX_FORMAT 0x00200000 /* Supports the extended pixel format */ #define V4L2_CAP_READWRITE 0x01000000 /* read/write systemcalls */ #define V4L2_CAP_ASYNCIO 0x02000000 /* async I/O */ @@ -448,6 +449,9 @@ struct v4l2_pix_format { #define V4L2_SDR_FMT_CU8 v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */ #define V4L2_SDR_FMT_CU16LE v4l2_fourcc('C', 'U', '1', '6') /* IQ u16le */ +/* priv field value to indicates that subsequent fields are valid. */ +#define V4L2_PIX_FMT_PRIV_MAGIC 0xfeedcafe + /* * F O R M A T E N U M E R A T I O N */ @@ -752,7 +756,16 @@ struct v4l2_framebuffer { /* FIXME: in theory we should pass something like PCI device + memory * region + offset instead of some physical address */ void *base; - struct v4l2_pix_format fmt; + struct { + __u32 width; + __u32 height; + __u32 pixelformat; + __u32 field; /* enum v4l2_field */ + __u32 bytesperline; /* for padding, zero if unused */ + __u32 sizeimage; + __u32 colorspace; /* enum v4l2_colorspace */ + __u32 priv; /* reserved field, set to 0 */ + } fmt; }; /* Flags for the 'capability' field. Read only */ #define V4L2_FBUF_CAP_EXTERNOVERLAY 0x0001 -- cgit v1.2.3 From c96fd46afb34a554406bce9784126b96ad09091e Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 27 May 2014 10:12:43 -0300 Subject: [media] v4l: Add premultiplied alpha flag for pixel formats When set, the new V4L2_PIX_FMT_FLAG_PREMUL_ALPHA flag indicates that the pixel values are premultiplied by the alpha channel value. Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media/v4l/pixfmt.xml | 31 +++++++++++++++++++++++++++++- Documentation/DocBook/media/v4l/v4l2.xml | 2 +- drivers/media/v4l2-core/v4l2-ioctl.c | 5 +++-- include/uapi/linux/videodev2.h | 8 +++++++- 4 files changed, 41 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml index bb36b3829cf9..87ce7f3e7178 100644 --- a/Documentation/DocBook/media/v4l/pixfmt.xml +++ b/Documentation/DocBook/media/v4l/pixfmt.xml @@ -135,6 +135,12 @@ extended fields were set to zero. On return drivers must set the V4L2_PIX_FMT_PRIV_MAGIC and all the extended fields to applicable values. + + __u32 + flags + Flags set by the application or driver, see . + @@ -220,9 +226,15 @@ codes can be used. and the number of valid entries in the plane_fmt array.
+ + __u8 + flags + Flags set by the application or driver, see . + __u8 - reserved[11] + reserved[10] Reserved for future extensions. Should be zeroed by the application. @@ -1079,4 +1091,21 @@ concatenated to form the JPEG stream. + + + Format Flags + + &cs-def; + + + V4L2_PIX_FMT_FLAG_PREMUL_ALPHA + 0x00000001 + The color values are premultiplied by the alpha channel +value. For example, if a light blue pixel with 50% transparency was described by +RGBA values (128, 192, 255, 128), the same pixel described with premultiplied +colors would be described by RGBA values (64, 96, 128, 128) + + + +
diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml index d0a48bebfa52..f2f81f06a17b 100644 --- a/Documentation/DocBook/media/v4l/v4l2.xml +++ b/Documentation/DocBook/media/v4l/v4l2.xml @@ -155,7 +155,7 @@ applications. --> 3.16 2014-05-27 lp - Extended &v4l2-pix-format;. + Extended &v4l2-pix-format;. Added format flags. diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 2e630005676f..e0bafda89e13 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -256,7 +256,8 @@ static void v4l_print_format(const void *arg, bool write_only) pix = &p->fmt.pix; pr_cont(", width=%u, height=%u, " "pixelformat=%c%c%c%c, field=%s, " - "bytesperline=%u, sizeimage=%u, colorspace=%d\n", + "bytesperline=%u, sizeimage=%u, colorspace=%d, " + "flags %u\n", pix->width, pix->height, (pix->pixelformat & 0xff), (pix->pixelformat >> 8) & 0xff, @@ -264,7 +265,7 @@ static void v4l_print_format(const void *arg, bool write_only) (pix->pixelformat >> 24) & 0xff, prt_names(pix->field, v4l2_field_names), pix->bytesperline, pix->sizeimage, - pix->colorspace); + pix->colorspace, pix->flags); break; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE: diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index eb3bdd33816b..b73e8cda7192 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -288,6 +288,7 @@ struct v4l2_pix_format { __u32 sizeimage; __u32 colorspace; /* enum v4l2_colorspace */ __u32 priv; /* private data, depends on pixelformat */ + __u32 flags; /* format flags (V4L2_PIX_FMT_FLAG_*) */ }; /* Pixel format FOURCC depth Description */ @@ -452,6 +453,9 @@ struct v4l2_pix_format { /* priv field value to indicates that subsequent fields are valid. */ #define V4L2_PIX_FMT_PRIV_MAGIC 0xfeedcafe +/* Flags */ +#define V4L2_PIX_FMT_FLAG_PREMUL_ALPHA 0x00000001 + /* * F O R M A T E N U M E R A T I O N */ @@ -1754,6 +1758,7 @@ struct v4l2_plane_pix_format { * @colorspace: enum v4l2_colorspace; supplemental to pixelformat * @plane_fmt: per-plane information * @num_planes: number of planes for this format + * @flags: format flags (V4L2_PIX_FMT_FLAG_*) */ struct v4l2_pix_format_mplane { __u32 width; @@ -1764,7 +1769,8 @@ struct v4l2_pix_format_mplane { struct v4l2_plane_pix_format plane_fmt[VIDEO_MAX_PLANES]; __u8 num_planes; - __u8 reserved[11]; + __u8 flags; + __u8 reserved[10]; } __attribute__ ((packed)); /** -- cgit v1.2.3 From e34c4db8fe5c8d47753057b2409f9b8388f1896e Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 26 Feb 2014 18:22:05 -0300 Subject: [media] videodev2.h: add V4L2_FIELD_HAS_T_OR_B macro Add a macro to test if the field consists of a single top or bottom field. Anyone who needs to work with fields as opposed to frame will need this. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index b73e8cda7192..1f1a65c1fe8e 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -124,6 +124,10 @@ enum v4l2_field { (field) == V4L2_FIELD_INTERLACED_BT ||\ (field) == V4L2_FIELD_SEQ_TB ||\ (field) == V4L2_FIELD_SEQ_BT) +#define V4L2_FIELD_HAS_T_OR_B(field) \ + ((field) == V4L2_FIELD_BOTTOM ||\ + (field) == V4L2_FIELD_TOP ||\ + (field) == V4L2_FIELD_ALTERNATE) enum v4l2_buf_type { V4L2_BUF_TYPE_VIDEO_CAPTURE = 1, -- cgit v1.2.3 From aef9a7bd9b676f797dd5cefd43deb30d36b976a9 Mon Sep 17 00:00:00 2001 From: Yoshihiro YUNOMAE Date: Wed, 16 Jul 2014 01:19:36 +0000 Subject: serial/uart/8250: Add tunable RX interrupt trigger I/F of FIFO buffers Add tunable RX interrupt trigger I/F of FIFO buffers. Serial devices are used as not only message communication devices but control or sending communication devices. For the latter uses, normally small data will be exchanged, so user applications want to receive data unit as soon as possible for real-time tendency. If we have a sensor which sends a 1 byte data each time and must control a device based on the sensor feedback, the RX interrupt should be triggered for each data. According to HW specification of serial UART devices, RX interrupt trigger can be changed, but the trigger is hard-coded. For example, RX interrupt trigger in 16550A can be set to 1, 4, 8, or 14 bytes for HW, but current driver sets the trigger to only 8bytes. This patch makes some devices change RX interrupt trigger from userland. - Read current setting # cat /sys/class/tty/ttyS0/rx_trig_bytes 8 - Write user setting # echo 1 > /sys/class/tty/ttyS0/rx_trig_bytes # cat /sys/class/tty/ttyS0/rx_trig_bytes 1 - 16550A and Tegra (1, 4, 8, or 14 bytes) - 16650V2 (8, 16, 24, or 28 bytes) - 16654 (8, 16, 56, or 60 bytes) - 16750 (1, 16, 32, or 56 bytes) Changes in V9: - Use attr_group instead of dev_spec_attr_group of uart_port structure Changes in V8: - Divide this patch from V7's patch based on Greg's comment Changes in V7: - Add Documentation - Change I/F name from rx_int_trig to rx_trig_bytes because the name rx_int_trig is hard to understand how users specify the value Changes in V6: - Move FCR_RX_TRIG_* definition in 8250.h to include/uapi/linux/serial_reg.h, rename those to UART_FCR_R_TRIG_*, and use UART_FCR_TRIGGER_MASK to UART_FCR_R_TRIG_BITS() - Change following function names: convert_fcr2val() => fcr_get_rxtrig_bytes() convert_val2rxtrig() => bytes_to_fcr_rxtrig() - Fix typo in serial8250_do_set_termios() - Delete the verbose error message pr_info() in bytes_to_fcr_rxtrig() - Rename *rx_int_trig/rx_trig* to *rxtrig* for several functions or variables (but UI remains rx_int_trig) - Change the meaningless variable name 'val' to 'bytes' following functions: fcr_get_rxtrig_bytes(), bytes_to_fcr_rxtrig(), do_set_rxtrig(), do_serial8250_set_rxtrig(), and serial8250_set_attr_rxtrig() - Use up->fcr in order to get rxtrig_bytes instead of rx_trig_raw in fcr_get_rxtrig_bytes() - Use conf_type->rxtrig_bytes[0] instead of switch statement for support check in register_dev_spec_attr_grp() - Delete the checking whether a user changed FCR or not when minimum buffer is needed in serial8250_do_set_termios() Changes in V5.1: - Fix FCR_RX_TRIG_MAX_STATE definition Changes in V5: - Support Tegra, 16650V2, 16654, and 16750 - Store default FCR value to up->fcr when the port is first created - Add rx_trig_byte[] in uart_config[] for each device and use rx_trig_byte[] in convert_fcr2val() and convert_val2rxtrig() Changes in V4: - Introduce fifo_bug flag in uart_8250_port structure This is enabled only when parity is enabled and UART_BUG_PARITY is enabled for up->bugs. If this flag is enabled, user cannot set RX trigger. - Return -EOPNOTSUPP when it does not support device at convert_fcr2val() and at convert_val2rxtrig() - Set the nearest lower RX trigger when users input a meaningless value at convert_val2rxtrig() - Check whether p->fcr is existing at serial8250_clear_and_reinit_fifos() - Set fcr = up->fcr in the begging of serial8250_do_set_termios() Changes in V3: - Change I/F from ioctl(2) to sysfs(rx_int_trig) Changed in V2: - Use _IOW for TIOCSFIFORTRIG definition - Pass the interrupt trigger value itself Signed-off-by: Yoshihiro YUNOMAE Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-tty | 16 ++++ drivers/tty/serial/8250/8250.h | 2 + drivers/tty/serial/8250/8250_core.c | 173 ++++++++++++++++++++++++++++++++---- include/linux/serial_8250.h | 2 + include/uapi/linux/serial_reg.h | 5 ++ 5 files changed, 183 insertions(+), 15 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/ABI/testing/sysfs-tty b/Documentation/ABI/testing/sysfs-tty index ad22fb0ee765..9eb3c2b6b040 100644 --- a/Documentation/ABI/testing/sysfs-tty +++ b/Documentation/ABI/testing/sysfs-tty @@ -138,3 +138,19 @@ Description: These sysfs values expose the TIOCGSERIAL interface via sysfs rather than via ioctls. + +What: /sys/class/tty/ttyS0/rx_trig_bytes +Date: May 2014 +Contact: Yoshihiro YUNOMAE +Description: + Shows current RX interrupt trigger bytes or sets the + user specified value to change it for the FIFO buffer. + Users can show or set this value regardless of opening the + serial device file or not. + + The RX trigger can be set one of four kinds of values for UART + serials. When users input a meaning less value to this I/F, + the RX trigger is changed to the nearest lower value for the + device specification. For example, when user sets 7bytes on + 16550A, which has 1/4/8/14 bytes trigger, the RX trigger is + automatically changed to 4 bytes. diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 1ebf8538b4fa..1b08c918cd51 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -12,6 +12,7 @@ */ #include +#include #include struct uart_8250_dma { @@ -60,6 +61,7 @@ struct serial8250_config { unsigned short fifo_size; unsigned short tx_loadsz; unsigned char fcr; + unsigned char rxtrig_bytes[UART_FCR_R_TRIG_MAX_STATE]; unsigned int flags; }; diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 0da01458816e..1d42dba6121d 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -161,6 +160,7 @@ static const struct serial8250_config uart_config[] = { .fifo_size = 16, .tx_loadsz = 16, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .rxtrig_bytes = {1, 4, 8, 14}, .flags = UART_CAP_FIFO, }, [PORT_CIRRUS] = { @@ -180,6 +180,7 @@ static const struct serial8250_config uart_config[] = { .tx_loadsz = 16, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | UART_FCR_T_TRIG_00, + .rxtrig_bytes = {8, 16, 24, 28}, .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, }, [PORT_16750] = { @@ -188,6 +189,7 @@ static const struct serial8250_config uart_config[] = { .tx_loadsz = 64, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 | UART_FCR7_64BYTE, + .rxtrig_bytes = {1, 16, 32, 56}, .flags = UART_CAP_FIFO | UART_CAP_SLEEP | UART_CAP_AFE, }, [PORT_STARTECH] = { @@ -209,6 +211,7 @@ static const struct serial8250_config uart_config[] = { .tx_loadsz = 32, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | UART_FCR_T_TRIG_10, + .rxtrig_bytes = {8, 16, 56, 60}, .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, }, [PORT_16850] = { @@ -266,6 +269,7 @@ static const struct serial8250_config uart_config[] = { .tx_loadsz = 8, .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | UART_FCR_T_TRIG_01, + .rxtrig_bytes = {1, 4, 8, 14}, .flags = UART_CAP_FIFO | UART_CAP_RTOIE, }, [PORT_XR17D15X] = { @@ -530,11 +534,8 @@ static void serial8250_clear_fifos(struct uart_8250_port *p) void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p) { - unsigned char fcr; - serial8250_clear_fifos(p); - fcr = uart_config[p->port.type].fcr; - serial_out(p, UART_FCR, fcr); + serial_out(p, UART_FCR, p->fcr); } EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos); @@ -2256,10 +2257,9 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old) { struct uart_8250_port *up = up_to_u8250p(port); - unsigned char cval, fcr = 0; + unsigned char cval; unsigned long flags; unsigned int baud, quot; - int fifo_bug = 0; switch (termios->c_cflag & CSIZE) { case CS5: @@ -2282,7 +2282,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, if (termios->c_cflag & PARENB) { cval |= UART_LCR_PARITY; if (up->bugs & UART_BUG_PARITY) - fifo_bug = 1; + up->fifo_bug = true; } if (!(termios->c_cflag & PARODD)) cval |= UART_LCR_EPAR; @@ -2306,10 +2306,10 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, quot++; if (up->capabilities & UART_CAP_FIFO && port->fifosize > 1) { - fcr = uart_config[port->type].fcr; - if ((baud < 2400 && !up->dma) || fifo_bug) { - fcr &= ~UART_FCR_TRIGGER_MASK; - fcr |= UART_FCR_TRIGGER_1; + /* NOTE: If fifo_bug is not set, a user can set RX_trigger. */ + if ((baud < 2400 && !up->dma) || up->fifo_bug) { + up->fcr &= ~UART_FCR_TRIGGER_MASK; + up->fcr |= UART_FCR_TRIGGER_1; } } @@ -2442,15 +2442,15 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, * is written without DLAB set, this mode will be disabled. */ if (port->type == PORT_16750) - serial_port_out(port, UART_FCR, fcr); + serial_port_out(port, UART_FCR, up->fcr); serial_port_out(port, UART_LCR, cval); /* reset DLAB */ up->lcr = cval; /* Save LCR */ if (port->type != PORT_16750) { /* emulated UARTs (Lucent Venus 167x) need two steps */ - if (fcr & UART_FCR_ENABLE_FIFO) + if (up->fcr & UART_FCR_ENABLE_FIFO) serial_port_out(port, UART_FCR, UART_FCR_ENABLE_FIFO); - serial_port_out(port, UART_FCR, fcr); /* set fcr */ + serial_port_out(port, UART_FCR, up->fcr); /* set fcr */ } serial8250_set_mctrl(port, port->mctrl); spin_unlock_irqrestore(&port->lock, flags); @@ -2640,6 +2640,146 @@ static int serial8250_request_port(struct uart_port *port) return ret; } +static int fcr_get_rxtrig_bytes(struct uart_8250_port *up) +{ + const struct serial8250_config *conf_type = &uart_config[up->port.type]; + unsigned char bytes; + + bytes = conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(up->fcr)]; + + return bytes ? bytes : -EOPNOTSUPP; +} + +static int bytes_to_fcr_rxtrig(struct uart_8250_port *up, unsigned char bytes) +{ + const struct serial8250_config *conf_type = &uart_config[up->port.type]; + int i; + + if (!conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(UART_FCR_R_TRIG_00)]) + return -EOPNOTSUPP; + + for (i = 1; i < UART_FCR_R_TRIG_MAX_STATE; i++) { + if (bytes < conf_type->rxtrig_bytes[i]) + /* Use the nearest lower value */ + return (--i) << UART_FCR_R_TRIG_SHIFT; + } + + return UART_FCR_R_TRIG_11; +} + +static int do_get_rxtrig(struct tty_port *port) +{ + struct uart_state *state = container_of(port, struct uart_state, port); + struct uart_port *uport = state->uart_port; + struct uart_8250_port *up = + container_of(uport, struct uart_8250_port, port); + + if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1) + return -EINVAL; + + return fcr_get_rxtrig_bytes(up); +} + +static int do_serial8250_get_rxtrig(struct tty_port *port) +{ + int rxtrig_bytes; + + mutex_lock(&port->mutex); + rxtrig_bytes = do_get_rxtrig(port); + mutex_unlock(&port->mutex); + + return rxtrig_bytes; +} + +static ssize_t serial8250_get_attr_rx_trig_bytes(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct tty_port *port = dev_get_drvdata(dev); + int rxtrig_bytes; + + rxtrig_bytes = do_serial8250_get_rxtrig(port); + if (rxtrig_bytes < 0) + return rxtrig_bytes; + + return snprintf(buf, PAGE_SIZE, "%d\n", rxtrig_bytes); +} + +static int do_set_rxtrig(struct tty_port *port, unsigned char bytes) +{ + struct uart_state *state = container_of(port, struct uart_state, port); + struct uart_port *uport = state->uart_port; + struct uart_8250_port *up = + container_of(uport, struct uart_8250_port, port); + int rxtrig; + + if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1 || + up->fifo_bug) + return -EINVAL; + + rxtrig = bytes_to_fcr_rxtrig(up, bytes); + if (rxtrig < 0) + return rxtrig; + + serial8250_clear_fifos(up); + up->fcr &= ~UART_FCR_TRIGGER_MASK; + up->fcr |= (unsigned char)rxtrig; + serial_out(up, UART_FCR, up->fcr); + return 0; +} + +static int do_serial8250_set_rxtrig(struct tty_port *port, unsigned char bytes) +{ + int ret; + + mutex_lock(&port->mutex); + ret = do_set_rxtrig(port, bytes); + mutex_unlock(&port->mutex); + + return ret; +} + +static ssize_t serial8250_set_attr_rx_trig_bytes(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct tty_port *port = dev_get_drvdata(dev); + unsigned char bytes; + int ret; + + if (!count) + return -EINVAL; + + ret = kstrtou8(buf, 10, &bytes); + if (ret < 0) + return ret; + + ret = do_serial8250_set_rxtrig(port, bytes); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(rx_trig_bytes, S_IRUSR | S_IWUSR | S_IRGRP, + serial8250_get_attr_rx_trig_bytes, + serial8250_set_attr_rx_trig_bytes); + +static struct attribute *serial8250_dev_attrs[] = { + &dev_attr_rx_trig_bytes.attr, + NULL, + }; + +static struct attribute_group serial8250_dev_attr_group = { + .attrs = serial8250_dev_attrs, + }; + +static void register_dev_spec_attr_grp(struct uart_8250_port *up) +{ + const struct serial8250_config *conf_type = &uart_config[up->port.type]; + + if (conf_type->rxtrig_bytes[0]) + up->port.attr_group = &serial8250_dev_attr_group; +} + static void serial8250_config_port(struct uart_port *port, int flags) { struct uart_8250_port *up = up_to_u8250p(port); @@ -2687,6 +2827,9 @@ static void serial8250_config_port(struct uart_port *port, int flags) if ((port->type == PORT_XR17V35X) || (port->type == PORT_XR17D15X)) port->handle_irq = exar_handle_irq; + + register_dev_spec_attr_grp(up); + up->fcr = uart_config[up->port.type].fcr; } static int diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 730ab4b3d686..f93649e22c43 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -74,8 +74,10 @@ struct uart_8250_port { struct list_head list; /* ports on this IRQ */ unsigned short capabilities; /* port capabilities */ unsigned short bugs; /* port bugs */ + bool fifo_bug; /* min RX trigger if enabled */ unsigned int tx_loadsz; /* transmit fifo load size */ unsigned char acr; + unsigned char fcr; unsigned char ier; unsigned char lcr; unsigned char mcr; diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h index 99b47058816a..df6c9ab6b0cd 100644 --- a/include/uapi/linux/serial_reg.h +++ b/include/uapi/linux/serial_reg.h @@ -88,6 +88,11 @@ #define UART_FCR6_T_TRIGGER_30 0x30 /* Mask for transmit trigger set at 30 */ #define UART_FCR7_64BYTE 0x20 /* Go into 64 byte mode (TI16C750) */ +#define UART_FCR_R_TRIG_SHIFT 6 +#define UART_FCR_R_TRIG_BITS(x) \ + (((x) & UART_FCR_TRIGGER_MASK) >> UART_FCR_R_TRIG_SHIFT) +#define UART_FCR_R_TRIG_MAX_STATE 4 + #define UART_LCR 3 /* Out: Line Control Register */ /* * Note: if the word length is 5 bits (UART_LCR_WLEN5), then setting -- cgit v1.2.3 From 48dc92b9fc3926844257316e75ba11eb5c742b2c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Jun 2014 16:08:24 -0700 Subject: seccomp: add "seccomp" syscall This adds the new "seccomp" syscall with both an "operation" and "flags" parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). In addition to the TSYNC flag later in this patch series, there is a non-zero chance that this syscall could be used for configuring a fixed argument area for seccomp-tracer-aware processes to pass syscall arguments in the future. Hence, the use of "seccomp" not simply "seccomp_add_filter" for this syscall. Additionally, this syscall uses operation, flags, and user pointer for arguments because strictly passing arguments via a user pointer would mean seccomp itself would be unable to trivially filter the seccomp syscall itself. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski --- arch/Kconfig | 1 + arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + include/linux/syscalls.h | 2 ++ include/uapi/asm-generic/unistd.h | 4 ++- include/uapi/linux/seccomp.h | 4 +++ kernel/seccomp.c | 55 +++++++++++++++++++++++++++++++++++---- kernel/sys_ni.c | 3 +++ 8 files changed, 65 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 97ff872c7acc..0eae9df35b88 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -321,6 +321,7 @@ config HAVE_ARCH_SECCOMP_FILTER - secure_computing is called from a ptrace_event()-safe context - secure_computing return value is checked and a return value of -1 results in the system call being skipped immediately. + - seccomp syscall wired up config SECCOMP_FILTER def_bool y diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..7527eac24122 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351 i386 sched_setattr sys_sched_setattr 352 i386 sched_getattr sys_sched_getattr 353 i386 renameat2 sys_renameat2 +354 i386 seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..16272a6c12b7 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314 common sched_setattr sys_sched_setattr 315 common sched_getattr sys_sched_getattr 316 common renameat2 sys_renameat2 +317 common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..1713977ee26f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 333640608087..65acbf0e2867 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) __SYSCALL(__NR_sched_getattr, sys_sched_getattr) #define __NR_renameat2 276 __SYSCALL(__NR_renameat2, sys_renameat2) +#define __NR_seccomp 277 +__SYSCALL(__NR_seccomp, sys_seccomp) #undef __NR_syscalls -#define __NR_syscalls 277 +#define __NR_syscalls 278 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index ac2dc9f72973..b258878ba754 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -10,6 +10,10 @@ #define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ +/* Valid operations for seccomp syscall. */ +#define SECCOMP_SET_MODE_STRICT 0 +#define SECCOMP_SET_MODE_FILTER 1 + /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 05cac2c2eca1..f0652578af75 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -18,6 +18,7 @@ #include #include #include +#include /* #define SECCOMP_DEBUG 1 */ @@ -314,7 +315,7 @@ free_prog: * * Returns 0 on success and non-zero otherwise. */ -static long seccomp_attach_user_filter(char __user *user_filter) +static long seccomp_attach_user_filter(const char __user *user_filter) { struct sock_fprog fprog; long ret = -EFAULT; @@ -517,6 +518,7 @@ out: #ifdef CONFIG_SECCOMP_FILTER /** * seccomp_set_mode_filter: internal function for setting seccomp filter + * @flags: flags to change filter behavior * @filter: struct sock_fprog containing filter * * This function may be called repeatedly to install additional filters. @@ -527,11 +529,16 @@ out: * * Returns 0 on success or -EINVAL on failure. */ -static long seccomp_set_mode_filter(char __user *filter) +static long seccomp_set_mode_filter(unsigned int flags, + const char __user *filter) { const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; long ret = -EINVAL; + /* Validate flags. */ + if (flags != 0) + goto out; + if (!seccomp_may_assign_mode(seccomp_mode)) goto out; @@ -544,12 +551,35 @@ out: return ret; } #else -static inline long seccomp_set_mode_filter(char __user *filter) +static inline long seccomp_set_mode_filter(unsigned int flags, + const char __user *filter) { return -EINVAL; } #endif +/* Common entry point for both prctl and syscall. */ +static long do_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs) +{ + switch (op) { + case SECCOMP_SET_MODE_STRICT: + if (flags != 0 || uargs != NULL) + return -EINVAL; + return seccomp_set_mode_strict(); + case SECCOMP_SET_MODE_FILTER: + return seccomp_set_mode_filter(flags, uargs); + default: + return -EINVAL; + } +} + +SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags, + const char __user *, uargs) +{ + return do_seccomp(op, flags, uargs); +} + /** * prctl_set_seccomp: configures current->seccomp.mode * @seccomp_mode: requested mode to use @@ -559,12 +589,27 @@ static inline long seccomp_set_mode_filter(char __user *filter) */ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) { + unsigned int op; + char __user *uargs; + switch (seccomp_mode) { case SECCOMP_MODE_STRICT: - return seccomp_set_mode_strict(); + op = SECCOMP_SET_MODE_STRICT; + /* + * Setting strict mode through prctl always ignored filter, + * so make sure it is always NULL here to pass the internal + * check in do_seccomp(). + */ + uargs = NULL; + break; case SECCOMP_MODE_FILTER: - return seccomp_set_mode_filter(filter); + op = SECCOMP_SET_MODE_FILTER; + uargs = filter; + break; default: return -EINVAL; } + + /* prctl interface doesn't have flags, so they are always zero. */ + return do_seccomp(op, 0, uargs); } diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 36441b51b5df..2904a2105914 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -213,3 +213,6 @@ cond_syscall(compat_sys_open_by_handle_at); /* compare kernel pointers */ cond_syscall(sys_kcmp); + +/* operate on Secure Computing state */ +cond_syscall(sys_seccomp); -- cgit v1.2.3 From c2e1f2e30daa551db3c670c0ccfeab20a540b9e1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 5 Jun 2014 00:23:17 -0700 Subject: seccomp: implement SECCOMP_FILTER_FLAG_TSYNC Applying restrictive seccomp filter programs to large or diverse codebases often requires handling threads which may be started early in the process lifetime (e.g., by code that is linked in). While it is possible to apply permissive programs prior to process start up, it is difficult to further restrict the kernel ABI to those threads after that point. This change adds a new seccomp syscall flag to SECCOMP_SET_MODE_FILTER for synchronizing thread group seccomp filters at filter installation time. When calling seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, filter) an attempt will be made to synchronize all threads in current's threadgroup to its new seccomp filter program. This is possible iff all threads are using a filter that is an ancestor to the filter current is attempting to synchronize to. NULL filters (where the task is running as SECCOMP_MODE_NONE) are also treated as ancestors allowing threads to be transitioned into SECCOMP_MODE_FILTER. If prctrl(PR_SET_NO_NEW_PRIVS, ...) has been set on the calling thread, no_new_privs will be set for all synchronized threads too. On success, 0 is returned. On failure, the pid of one of the failing threads will be returned and no filters will have been applied. The race conditions against another thread are: - requesting TSYNC (already handled by sighand lock) - performing a clone (already handled by sighand lock) - changing its filter (already handled by sighand lock) - calling exec (handled by cred_guard_mutex) The clone case is assisted by the fact that new threads will have their seccomp state duplicated from their parent before appearing on the tasklist. Holding cred_guard_mutex means that seccomp filters cannot be assigned while in the middle of another thread's exec (potentially bypassing no_new_privs or similar). The call to de_thread() may kill threads waiting for the mutex. Changes across threads to the filter pointer includes a barrier. Based on patches by Will Drewry. Suggested-by: Julien Tinnes Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski --- fs/exec.c | 2 +- include/linux/seccomp.h | 2 + include/uapi/linux/seccomp.h | 3 + kernel/seccomp.c | 135 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 140 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/exec.c b/fs/exec.c index 0f5c272410f6..ab1f1200ce5d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1216,7 +1216,7 @@ EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program * - the caller must hold ->cred_guard_mutex to protect against - * PTRACE_ATTACH + * PTRACE_ATTACH or seccomp thread-sync */ static void check_unsafe_exec(struct linux_binprm *bprm) { diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 9ff98b4bfe2e..5d586a45a319 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,6 +3,8 @@ #include +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) + #ifdef CONFIG_SECCOMP #include diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index b258878ba754..0f238a43ff1e 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -14,6 +14,9 @@ #define SECCOMP_SET_MODE_STRICT 0 #define SECCOMP_SET_MODE_FILTER 1 +/* Valid flags for SECCOMP_SET_MODE_FILTER */ +#define SECCOMP_FILTER_FLAG_TSYNC 1 + /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 9065d2c79c56..74f460179171 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -26,6 +26,7 @@ #ifdef CONFIG_SECCOMP_FILTER #include #include +#include #include #include #include @@ -225,6 +226,114 @@ static inline void seccomp_assign_mode(struct task_struct *task, } #ifdef CONFIG_SECCOMP_FILTER +/* Returns 1 if the parent is an ancestor of the child. */ +static int is_ancestor(struct seccomp_filter *parent, + struct seccomp_filter *child) +{ + /* NULL is the root ancestor. */ + if (parent == NULL) + return 1; + for (; child; child = child->prev) + if (child == parent) + return 1; + return 0; +} + +/** + * seccomp_can_sync_threads: checks if all threads can be synchronized + * + * Expects sighand and cred_guard_mutex locks to be held. + * + * Returns 0 on success, -ve on error, or the pid of a thread which was + * either not in the correct seccomp mode or it did not have an ancestral + * seccomp filter. + */ +static inline pid_t seccomp_can_sync_threads(void) +{ + struct task_struct *thread, *caller; + + BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + + /* Validate all threads being eligible for synchronization. */ + caller = current; + for_each_thread(caller, thread) { + pid_t failed; + + /* Skip current, since it is initiating the sync. */ + if (thread == caller) + continue; + + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED || + (thread->seccomp.mode == SECCOMP_MODE_FILTER && + is_ancestor(thread->seccomp.filter, + caller->seccomp.filter))) + continue; + + /* Return the first thread that cannot be synchronized. */ + failed = task_pid_vnr(thread); + /* If the pid cannot be resolved, then return -ESRCH */ + if (unlikely(WARN_ON(failed == 0))) + failed = -ESRCH; + return failed; + } + + return 0; +} + +/** + * seccomp_sync_threads: sets all threads to use current's filter + * + * Expects sighand and cred_guard_mutex locks to be held, and for + * seccomp_can_sync_threads() to have returned success already + * without dropping the locks. + * + */ +static inline void seccomp_sync_threads(void) +{ + struct task_struct *thread, *caller; + + BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + + /* Synchronize all threads. */ + caller = current; + for_each_thread(caller, thread) { + /* Skip current, since it needs no changes. */ + if (thread == caller) + continue; + + /* Get a task reference for the new leaf node. */ + get_seccomp_filter(caller); + /* + * Drop the task reference to the shared ancestor since + * current's path will hold a reference. (This also + * allows a put before the assignment.) + */ + put_seccomp_filter(thread); + smp_store_release(&thread->seccomp.filter, + caller->seccomp.filter); + /* + * Opt the other thread into seccomp if needed. + * As threads are considered to be trust-realm + * equivalent (see ptrace_may_access), it is safe to + * allow one thread to transition the other. + */ + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) { + /* + * Don't let an unprivileged task work around + * the no_new_privs restriction by creating + * a thread that sets it up, enters seccomp, + * then dies. + */ + if (task_no_new_privs(caller)) + task_set_no_new_privs(thread); + + seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); + } + } +} + /** * seccomp_prepare_filter: Prepares a seccomp filter for use. * @fprog: BPF program to install @@ -364,6 +473,15 @@ static long seccomp_attach_filter(unsigned int flags, if (total_insns > MAX_INSNS_PER_PATH) return -ENOMEM; + /* If thread sync has been requested, check that it is possible. */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC) { + int ret; + + ret = seccomp_can_sync_threads(); + if (ret) + return ret; + } + /* * If there is an existing filter, make it the prev and don't drop its * task reference. @@ -371,6 +489,10 @@ static long seccomp_attach_filter(unsigned int flags, filter->prev = current->seccomp.filter; current->seccomp.filter = filter; + /* Now that the new filter is in place, synchronize to all threads. */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC) + seccomp_sync_threads(); + return 0; } @@ -590,7 +712,7 @@ static long seccomp_set_mode_filter(unsigned int flags, long ret = -EINVAL; /* Validate flags. */ - if (flags != 0) + if (flags & ~SECCOMP_FILTER_FLAG_MASK) return -EINVAL; /* Prepare the new filter before holding any locks. */ @@ -598,6 +720,14 @@ static long seccomp_set_mode_filter(unsigned int flags, if (IS_ERR(prepared)) return PTR_ERR(prepared); + /* + * Make sure we cannot change seccomp or nnp state via TSYNC + * while another thread is in the middle of calling exec. + */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC && + mutex_lock_killable(¤t->signal->cred_guard_mutex)) + goto out_free; + spin_lock_irq(¤t->sighand->siglock); if (!seccomp_may_assign_mode(seccomp_mode)) @@ -612,6 +742,9 @@ static long seccomp_set_mode_filter(unsigned int flags, seccomp_assign_mode(current, seccomp_mode); out: spin_unlock_irq(¤t->sighand->siglock); + if (flags & SECCOMP_FILTER_FLAG_TSYNC) + mutex_unlock(¤t->signal->cred_guard_mutex); +out_free: seccomp_filter_free(prepared); return ret; } -- cgit v1.2.3 From b4e05923f9c5bb65ac82988d7b53cfd7425e6f36 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 20 Jul 2014 13:35:44 -0700 Subject: Input: add support for Wacom protocol 4 serial tablets Recent version of xf86-input-wacom no longer support directly accessing serial tablets. Instead xf86-input-wacom now expects all wacom tablets to be driven by the kernel and to show up as evdev devices. This has caused old serial Wacom tablets to stop working for people who still have such tablets. Julian Squires has written a serio input driver to fix this: https://github.com/tokenrove/wacom-serial-iv This is a cleaned up version of this driver with improved Graphire support (I own an old Graphire myself). Signed-off-by: Julian Squires Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- MAINTAINERS | 7 + drivers/input/tablet/Kconfig | 10 + drivers/input/tablet/Makefile | 1 + drivers/input/tablet/wacom_serial4.c | 616 +++++++++++++++++++++++++++++++++++ include/uapi/linux/serio.h | 1 + 5 files changed, 635 insertions(+) create mode 100644 drivers/input/tablet/wacom_serial4.c (limited to 'include/uapi/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 51ebb779c5f3..c68bd8b6f8e9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9708,6 +9708,13 @@ M: Pierre Ossman S: Maintained F: drivers/mmc/host/wbsd.* +WACOM PROTOCOL 4 SERIAL TABLETS +M: Julian Squires +M: Hans de Goede +L: linux-input@vger.kernel.org +S: Maintained +F: drivers/input/tablet/wacom_serial4.c + WATCHDOG DEVICE DRIVERS M: Wim Van Sebroeck L: linux-watchdog@vger.kernel.org diff --git a/drivers/input/tablet/Kconfig b/drivers/input/tablet/Kconfig index bed7cbf84cfd..a5121b09c63f 100644 --- a/drivers/input/tablet/Kconfig +++ b/drivers/input/tablet/Kconfig @@ -89,4 +89,14 @@ config TABLET_USB_WACOM To compile this driver as a module, choose M here: the module will be called wacom. +config TABLET_SERIAL_WACOM4 + tristate "Wacom protocol 4 serial tablet support" + select SERIO + help + Say Y here if you want to use Wacom protocol 4 serial tablets. + E.g. serial versions of the Cintiq, Graphire or Penpartner. + + To compile this driver as a module, choose M here: the + module will be called wacom_serial4. + endif diff --git a/drivers/input/tablet/Makefile b/drivers/input/tablet/Makefile index 3f6c25220638..4d9339fb3b63 100644 --- a/drivers/input/tablet/Makefile +++ b/drivers/input/tablet/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_TABLET_USB_GTCO) += gtco.o obj-$(CONFIG_TABLET_USB_HANWANG) += hanwang.o obj-$(CONFIG_TABLET_USB_KBTAB) += kbtab.o obj-$(CONFIG_TABLET_USB_WACOM) += wacom.o +obj-$(CONFIG_TABLET_SERIAL_WACOM4) += wacom_serial4.o diff --git a/drivers/input/tablet/wacom_serial4.c b/drivers/input/tablet/wacom_serial4.c new file mode 100644 index 000000000000..d3d251e27307 --- /dev/null +++ b/drivers/input/tablet/wacom_serial4.c @@ -0,0 +1,616 @@ +/* + * Wacom protocol 4 serial tablet driver + * + * Copyright 2014 Hans de Goede + * Copyright 2011-2012 Julian Squires + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version of 2 of the License, or (at your + * option) any later version. See the file COPYING in the main directory of + * this archive for more details. + * + * Many thanks to Bill Seremetis, without whom PenPartner support + * would not have been possible. Thanks to Patrick Mahoney. + * + * This driver was developed with reference to much code written by others, + * particularly: + * - elo, gunze drivers by Vojtech Pavlik ; + * - wacom_w8001 driver by Jaya Kumar ; + * - the USB wacom input driver, credited to many people + * (see drivers/input/tablet/wacom.h); + * - new and old versions of linuxwacom / xf86-input-wacom credited to + * Frederic Lepied, France. and + * Ping Cheng, Wacom. ; + * - and xf86wacom.c (a presumably ancient version of the linuxwacom code), + * by Frederic Lepied and Raph Levien . + * + * To do: + * - support pad buttons; (requires access to a model with pad buttons) + * - support (protocol 4-style) tilt (requires access to a > 1.4 rom model) + */ + +/* + * Wacom serial protocol 4 documentation taken from linuxwacom-0.9.9 code, + * protocol 4 uses 7 or 9 byte of data in the following format: + * + * Byte 1 + * bit 7 Sync bit always 1 + * bit 6 Pointing device detected + * bit 5 Cursor = 0 / Stylus = 1 + * bit 4 Reserved + * bit 3 1 if a button on the pointing device has been pressed + * bit 2 P0 (optional) + * bit 1 X15 + * bit 0 X14 + * + * Byte 2 + * bit 7 Always 0 + * bits 6-0 = X13 - X7 + * + * Byte 3 + * bit 7 Always 0 + * bits 6-0 = X6 - X0 + * + * Byte 4 + * bit 7 Always 0 + * bit 6 B3 + * bit 5 B2 + * bit 4 B1 + * bit 3 B0 + * bit 2 P1 (optional) + * bit 1 Y15 + * bit 0 Y14 + * + * Byte 5 + * bit 7 Always 0 + * bits 6-0 = Y13 - Y7 + * + * Byte 6 + * bit 7 Always 0 + * bits 6-0 = Y6 - Y0 + * + * Byte 7 + * bit 7 Always 0 + * bit 6 Sign of pressure data; or wheel-rel for cursor tool + * bit 5 P7; or REL1 for cursor tool + * bit 4 P6; or REL0 for cursor tool + * bit 3 P5 + * bit 2 P4 + * bit 1 P3 + * bit 0 P2 + * + * byte 8 and 9 are optional and present only + * in tilt mode. + * + * Byte 8 + * bit 7 Always 0 + * bit 6 Sign of tilt X + * bit 5 Xt6 + * bit 4 Xt5 + * bit 3 Xt4 + * bit 2 Xt3 + * bit 1 Xt2 + * bit 0 Xt1 + * + * Byte 9 + * bit 7 Always 0 + * bit 6 Sign of tilt Y + * bit 5 Yt6 + * bit 4 Yt5 + * bit 3 Yt4 + * bit 2 Yt3 + * bit 1 Yt2 + * bit 0 Yt1 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "wacom_wac.h" + +MODULE_AUTHOR("Julian Squires , Hans de Goede "); +MODULE_DESCRIPTION("Wacom protocol 4 serial tablet driver"); +MODULE_LICENSE("GPL"); + +#define REQUEST_MODEL_AND_ROM_VERSION "~#" +#define REQUEST_MAX_COORDINATES "~C\r" +#define REQUEST_CONFIGURATION_STRING "~R\r" +#define REQUEST_RESET_TO_PROTOCOL_IV "\r#" +/* + * Note: sending "\r$\r" causes at least the Digitizer II to send + * packets in ASCII instead of binary. "\r#" seems to undo that. + */ + +#define COMMAND_START_SENDING_PACKETS "ST\r" +#define COMMAND_STOP_SENDING_PACKETS "SP\r" +#define COMMAND_MULTI_MODE_INPUT "MU1\r" +#define COMMAND_ORIGIN_IN_UPPER_LEFT "OC1\r" +#define COMMAND_ENABLE_ALL_MACRO_BUTTONS "~M0\r" +#define COMMAND_DISABLE_GROUP_1_MACRO_BUTTONS "~M1\r" +#define COMMAND_TRANSMIT_AT_MAX_RATE "IT0\r" +#define COMMAND_DISABLE_INCREMENTAL_MODE "IN0\r" +#define COMMAND_ENABLE_CONTINUOUS_MODE "SR\r" +#define COMMAND_ENABLE_PRESSURE_MODE "PH1\r" +#define COMMAND_Z_FILTER "ZF1\r" + +/* Note that this is a protocol 4 packet without tilt information. */ +#define PACKET_LENGTH 7 +#define DATA_SIZE 32 + +/* flags */ +#define F_COVERS_SCREEN 0x01 +#define F_HAS_STYLUS2 0x02 +#define F_HAS_SCROLLWHEEL 0x04 + +enum { STYLUS = 1, ERASER, CURSOR }; + +static const struct { + int device_id; + int input_id; +} tools[] = { + { 0, 0 }, + { STYLUS_DEVICE_ID, BTN_TOOL_PEN }, + { ERASER_DEVICE_ID, BTN_TOOL_RUBBER }, + { CURSOR_DEVICE_ID, BTN_TOOL_MOUSE }, +}; + +struct wacom { + struct input_dev *dev; + struct completion cmd_done; + int result; + u8 expect; + u8 eraser_mask; + unsigned int extra_z_bits; + unsigned int flags; + unsigned int res_x, res_y; + unsigned int max_x, max_y; + unsigned int tool; + unsigned int idx; + u8 data[DATA_SIZE]; + char phys[32]; +}; + +enum { + MODEL_CINTIQ = 0x504C, /* PL */ + MODEL_CINTIQ2 = 0x4454, /* DT */ + MODEL_DIGITIZER_II = 0x5544, /* UD */ + MODEL_GRAPHIRE = 0x4554, /* ET */ + MODEL_PENPARTNER = 0x4354, /* CT */ +}; + +static void wacom_handle_model_response(struct wacom *wacom) +{ + int major_v, minor_v, r = 0; + char *p; + + p = strrchr(wacom->data, 'V'); + if (p) + r = sscanf(p + 1, "%u.%u", &major_v, &minor_v); + if (r != 2) + major_v = minor_v = 0; + + switch (wacom->data[2] << 8 | wacom->data[3]) { + case MODEL_CINTIQ: /* UNTESTED */ + case MODEL_CINTIQ2: + if ((wacom->data[2] << 8 | wacom->data[3]) == MODEL_CINTIQ) { + wacom->dev->name = "Wacom Cintiq"; + wacom->dev->id.version = MODEL_CINTIQ; + } else { + wacom->dev->name = "Wacom Cintiq II"; + wacom->dev->id.version = MODEL_CINTIQ2; + } + wacom->res_x = 508; + wacom->res_y = 508; + + switch (wacom->data[5] << 8 | wacom->data[6]) { + case 0x3731: /* PL-710 */ + wacom->res_x = 2540; + wacom->res_y = 2540; + /* fall through */ + case 0x3535: /* PL-550 */ + case 0x3830: /* PL-800 */ + wacom->extra_z_bits = 2; + } + + wacom->flags = F_COVERS_SCREEN; + break; + + case MODEL_PENPARTNER: + wacom->dev->name = "Wacom Penpartner"; + wacom->dev->id.version = MODEL_PENPARTNER; + wacom->res_x = 1000; + wacom->res_y = 1000; + break; + + case MODEL_GRAPHIRE: + wacom->dev->name = "Wacom Graphire"; + wacom->dev->id.version = MODEL_GRAPHIRE; + wacom->res_x = 1016; + wacom->res_y = 1016; + wacom->max_x = 5103; + wacom->max_y = 3711; + wacom->extra_z_bits = 2; + wacom->eraser_mask = 0x08; + wacom->flags = F_HAS_STYLUS2 | F_HAS_SCROLLWHEEL; + break; + + case MODEL_DIGITIZER_II: + wacom->dev->name = "Wacom Digitizer II"; + wacom->dev->id.version = MODEL_DIGITIZER_II; + if (major_v == 1 && minor_v <= 2) + wacom->extra_z_bits = 0; /* UNTESTED */ + break; + + default: + dev_err(&wacom->dev->dev, "Unsupported Wacom model %s\n", + wacom->data); + wacom->result = -ENODEV; + return; + } + + dev_info(&wacom->dev->dev, "%s tablet, version %u.%u\n", + wacom->dev->name, major_v, minor_v); +} + +static void wacom_handle_configuration_response(struct wacom *wacom) +{ + int r, skip; + + dev_dbg(&wacom->dev->dev, "Configuration string: %s\n", wacom->data); + r = sscanf(wacom->data, "~R%x,%u,%u,%u,%u", &skip, &skip, &skip, + &wacom->res_x, &wacom->res_y); + if (r != 5) + dev_warn(&wacom->dev->dev, "could not get resolution\n"); +} + +static void wacom_handle_coordinates_response(struct wacom *wacom) +{ + int r; + + dev_dbg(&wacom->dev->dev, "Coordinates string: %s\n", wacom->data); + r = sscanf(wacom->data, "~C%u,%u", &wacom->max_x, &wacom->max_y); + if (r != 2) + dev_warn(&wacom->dev->dev, "could not get max coordinates\n"); +} + +static void wacom_handle_response(struct wacom *wacom) +{ + if (wacom->data[0] != '~' || wacom->data[1] != wacom->expect) { + dev_err(&wacom->dev->dev, + "Wacom got an unexpected response: %s\n", wacom->data); + wacom->result = -EIO; + } else { + wacom->result = 0; + + switch (wacom->data[1]) { + case '#': + wacom_handle_model_response(wacom); + break; + case 'R': + wacom_handle_configuration_response(wacom); + break; + case 'C': + wacom_handle_coordinates_response(wacom); + break; + } + } + + complete(&wacom->cmd_done); +} + +static void wacom_handle_packet(struct wacom *wacom) +{ + u8 in_proximity_p, stylus_p, button; + unsigned int tool; + int x, y, z; + + in_proximity_p = wacom->data[0] & 0x40; + stylus_p = wacom->data[0] & 0x20; + button = (wacom->data[3] & 0x78) >> 3; + x = (wacom->data[0] & 3) << 14 | wacom->data[1]<<7 | wacom->data[2]; + y = (wacom->data[3] & 3) << 14 | wacom->data[4]<<7 | wacom->data[5]; + + if (in_proximity_p && stylus_p) { + z = wacom->data[6] & 0x7f; + if (wacom->extra_z_bits >= 1) + z = z << 1 | (wacom->data[3] & 0x4) >> 2; + if (wacom->extra_z_bits > 1) + z = z << 1 | (wacom->data[0] & 0x4) >> 2; + z = z ^ (0x40 << wacom->extra_z_bits); + } else { + z = -1; + } + + if (stylus_p) + tool = (button & wacom->eraser_mask) ? ERASER : STYLUS; + else + tool = CURSOR; + + if (tool != wacom->tool && wacom->tool != 0) { + input_report_key(wacom->dev, tools[wacom->tool].input_id, 0); + input_sync(wacom->dev); + } + wacom->tool = tool; + + input_report_key(wacom->dev, tools[tool].input_id, in_proximity_p); + input_report_abs(wacom->dev, ABS_MISC, + in_proximity_p ? tools[tool].device_id : 0); + input_report_abs(wacom->dev, ABS_X, x); + input_report_abs(wacom->dev, ABS_Y, y); + input_report_abs(wacom->dev, ABS_PRESSURE, z); + if (stylus_p) { + input_report_key(wacom->dev, BTN_TOUCH, button & 1); + input_report_key(wacom->dev, BTN_STYLUS, button & 2); + input_report_key(wacom->dev, BTN_STYLUS2, button & 4); + } else { + input_report_key(wacom->dev, BTN_LEFT, button & 1); + input_report_key(wacom->dev, BTN_RIGHT, button & 2); + input_report_key(wacom->dev, BTN_MIDDLE, button & 4); + /* handle relative wheel for non-stylus device */ + z = (wacom->data[6] & 0x30) >> 4; + if (wacom->data[6] & 0x40) + z = -z; + input_report_rel(wacom->dev, REL_WHEEL, z); + } + input_sync(wacom->dev); +} + +static void wacom_clear_data_buf(struct wacom *wacom) +{ + memset(wacom->data, 0, DATA_SIZE); + wacom->idx = 0; +} + +static irqreturn_t wacom_interrupt(struct serio *serio, unsigned char data, + unsigned int flags) +{ + struct wacom *wacom = serio_get_drvdata(serio); + + if (data & 0x80) + wacom->idx = 0; + + /* + * We're either expecting a carriage return-terminated ASCII + * response string, or a seven-byte packet with the MSB set on + * the first byte. + * + * Note however that some tablets (the PenPartner, for + * example) don't send a carriage return at the end of a + * command. We handle these by waiting for timeout. + */ + if (data == '\r' && !(wacom->data[0] & 0x80)) { + wacom_handle_response(wacom); + wacom_clear_data_buf(wacom); + return IRQ_HANDLED; + } + + /* Leave place for 0 termination */ + if (wacom->idx > (DATA_SIZE - 2)) { + dev_dbg(&wacom->dev->dev, + "throwing away %d bytes of garbage\n", wacom->idx); + wacom_clear_data_buf(wacom); + } + wacom->data[wacom->idx++] = data; + + if (wacom->idx == PACKET_LENGTH && (wacom->data[0] & 0x80)) { + wacom_handle_packet(wacom); + wacom_clear_data_buf(wacom); + } + + return IRQ_HANDLED; +} + +static void wacom_disconnect(struct serio *serio) +{ + struct wacom *wacom = serio_get_drvdata(serio); + + serio_close(serio); + serio_set_drvdata(serio, NULL); + input_unregister_device(wacom->dev); + kfree(wacom); +} + +static int wacom_send(struct serio *serio, const u8 *command) +{ + int err = 0; + + for (; !err && *command; command++) + err = serio_write(serio, *command); + + return err; +} + +static int wacom_send_setup_string(struct wacom *wacom, struct serio *serio) +{ + const u8 *cmd; + + switch (wacom->dev->id.version) { + case MODEL_CINTIQ: /* UNTESTED */ + cmd = COMMAND_ORIGIN_IN_UPPER_LEFT + COMMAND_TRANSMIT_AT_MAX_RATE + COMMAND_ENABLE_CONTINUOUS_MODE + COMMAND_START_SENDING_PACKETS; + break; + + case MODEL_PENPARTNER: + cmd = COMMAND_ENABLE_PRESSURE_MODE + COMMAND_START_SENDING_PACKETS; + break; + + default: + cmd = COMMAND_MULTI_MODE_INPUT + COMMAND_ORIGIN_IN_UPPER_LEFT + COMMAND_ENABLE_ALL_MACRO_BUTTONS + COMMAND_DISABLE_GROUP_1_MACRO_BUTTONS + COMMAND_TRANSMIT_AT_MAX_RATE + COMMAND_DISABLE_INCREMENTAL_MODE + COMMAND_ENABLE_CONTINUOUS_MODE + COMMAND_Z_FILTER + COMMAND_START_SENDING_PACKETS; + break; + } + + return wacom_send(serio, cmd); +} + +static int wacom_send_and_wait(struct wacom *wacom, struct serio *serio, + const u8 *cmd, const char *desc) +{ + int err; + unsigned long u; + + wacom->expect = cmd[1]; + init_completion(&wacom->cmd_done); + + err = wacom_send(serio, cmd); + if (err) + return err; + + u = wait_for_completion_timeout(&wacom->cmd_done, HZ); + if (u == 0) { + /* Timeout, process what we've received. */ + wacom_handle_response(wacom); + } + + wacom->expect = 0; + return wacom->result; +} + +static int wacom_setup(struct wacom *wacom, struct serio *serio) +{ + int err; + + /* Note that setting the link speed is the job of inputattach. + * We assume that reset negotiation has already happened, + * here. */ + err = wacom_send_and_wait(wacom, serio, REQUEST_MODEL_AND_ROM_VERSION, + "model and version"); + if (err) + return err; + + if (!(wacom->res_x && wacom->res_y)) { + err = wacom_send_and_wait(wacom, serio, + REQUEST_CONFIGURATION_STRING, + "configuration string"); + if (err) + return err; + } + + if (!(wacom->max_x && wacom->max_y)) { + err = wacom_send_and_wait(wacom, serio, + REQUEST_MAX_COORDINATES, + "coordinates string"); + if (err) + return err; + } + + return wacom_send_setup_string(wacom, serio); +} + +static int wacom_connect(struct serio *serio, struct serio_driver *drv) +{ + struct wacom *wacom; + struct input_dev *input_dev; + int err = -ENOMEM; + + wacom = kzalloc(sizeof(struct wacom), GFP_KERNEL); + input_dev = input_allocate_device(); + if (!wacom || !input_dev) + goto free_device; + + wacom->dev = input_dev; + wacom->extra_z_bits = 1; + wacom->eraser_mask = 0x04; + wacom->tool = wacom->idx = 0; + snprintf(wacom->phys, sizeof(wacom->phys), "%s/input0", serio->phys); + input_dev->phys = wacom->phys; + input_dev->id.bustype = BUS_RS232; + input_dev->id.vendor = SERIO_WACOM_IV; + input_dev->id.product = serio->id.extra; + input_dev->dev.parent = &serio->dev; + + input_dev->evbit[0] = + BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) | BIT_MASK(EV_REL); + set_bit(ABS_MISC, input_dev->absbit); + set_bit(BTN_TOOL_PEN, input_dev->keybit); + set_bit(BTN_TOOL_RUBBER, input_dev->keybit); + set_bit(BTN_TOOL_MOUSE, input_dev->keybit); + set_bit(BTN_TOUCH, input_dev->keybit); + set_bit(BTN_STYLUS, input_dev->keybit); + set_bit(BTN_LEFT, input_dev->keybit); + set_bit(BTN_RIGHT, input_dev->keybit); + set_bit(BTN_MIDDLE, input_dev->keybit); + + serio_set_drvdata(serio, wacom); + + err = serio_open(serio, drv); + if (err) + goto free_device; + + err = wacom_setup(wacom, serio); + if (err) + goto close_serio; + + set_bit(INPUT_PROP_DIRECT, input_dev->propbit); + if (!(wacom->flags & F_COVERS_SCREEN)) + __set_bit(INPUT_PROP_POINTER, input_dev->propbit); + + if (wacom->flags & F_HAS_STYLUS2) + __set_bit(BTN_STYLUS2, input_dev->keybit); + + if (wacom->flags & F_HAS_SCROLLWHEEL) + __set_bit(REL_WHEEL, input_dev->relbit); + + input_abs_set_res(wacom->dev, ABS_X, wacom->res_x); + input_abs_set_res(wacom->dev, ABS_Y, wacom->res_y); + input_set_abs_params(wacom->dev, ABS_X, 0, wacom->max_x, 0, 0); + input_set_abs_params(wacom->dev, ABS_Y, 0, wacom->max_y, 0, 0); + input_set_abs_params(wacom->dev, ABS_PRESSURE, -1, + (1 << (7 + wacom->extra_z_bits)) - 1, 0, 0); + + err = input_register_device(wacom->dev); + if (err) + goto close_serio; + + return 0; + +close_serio: + serio_close(serio); +free_device: + serio_set_drvdata(serio, NULL); + input_free_device(input_dev); + kfree(wacom); + return err; +} + +static struct serio_device_id wacom_serio_ids[] = { + { + .type = SERIO_RS232, + .proto = SERIO_WACOM_IV, + .id = SERIO_ANY, + .extra = SERIO_ANY, + }, + { 0 } +}; + +MODULE_DEVICE_TABLE(serio, wacom_serio_ids); + +static struct serio_driver wacom_drv = { + .driver = { + .name = "wacom_serial4", + }, + .description = "Wacom protocol 4 serial tablet driver", + .id_table = wacom_serio_ids, + .interrupt = wacom_interrupt, + .connect = wacom_connect, + .disconnect = wacom_disconnect, +}; + +module_serio_driver(wacom_drv); diff --git a/include/uapi/linux/serio.h b/include/uapi/linux/serio.h index 9f53fa7fc132..becdd78295cc 100644 --- a/include/uapi/linux/serio.h +++ b/include/uapi/linux/serio.h @@ -76,5 +76,6 @@ #define SERIO_HAMPSHIRE 0x3b #define SERIO_PS2MULT 0x3c #define SERIO_TSC40 0x3d +#define SERIO_WACOM_IV 0x3e #endif /* _UAPI_SERIO_H */ -- cgit v1.2.3 From ba4e9a61ad54c438d4c7b655e94e31f23a6fe13f Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sun, 20 Jul 2014 17:27:09 -0700 Subject: Input: uinput - add UI_GET_VERSION ioctl This ioctl is the counterpart to EVIOCGVERSION and returns the uinput-version the kernel was compiled with. Reviewed-by: Peter Hutterer Signed-off-by: David Herrmann Signed-off-by: Dmitry Torokhov --- drivers/input/misc/uinput.c | 6 ++++++ include/uapi/linux/uinput.h | 9 +++++++++ 2 files changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index 883f045f37df..421e29e4cd81 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -723,6 +723,12 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd, } switch (cmd) { + case UI_GET_VERSION: + if (put_user(UINPUT_VERSION, + (unsigned int __user *)p)) + retval = -EFAULT; + goto out; + case UI_DEV_CREATE: retval = uinput_create_device(udev); goto out; diff --git a/include/uapi/linux/uinput.h b/include/uapi/linux/uinput.h index 0389b489bbba..baeab83deb64 100644 --- a/include/uapi/linux/uinput.h +++ b/include/uapi/linux/uinput.h @@ -84,6 +84,15 @@ struct uinput_ff_erase { */ #define UI_GET_SYSNAME(len) _IOC(_IOC_READ, UINPUT_IOCTL_BASE, 300, len) +/** + * UI_GET_VERSION - Return version of uinput protocol + * + * This writes uinput protocol version implemented by the kernel into + * the integer pointed to by the ioctl argument. The protocol version + * is hard-coded in the kernel and is independent of the uinput device. + */ +#define UI_GET_VERSION _IOR(UINPUT_IOCTL_BASE, 301, unsigned int) + /* * To write a force-feedback-capable driver, the upload_effect * and erase_effect callbacks in input_dev must be implemented. -- cgit v1.2.3 From 9e9e3927ea76c95873f153c19b7c0d7f319dabc4 Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Fri, 27 Jun 2014 17:42:18 -0300 Subject: [media] v4l: uapi: add SDR format RU12LE V4L2_SDR_FMT_RU12LE - Real unsigned 12-bit little endian sample inside 16-bit (2 byte). V4L2 FourCC: RU12. Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 1f1a65c1fe8e..e8f01cd8a17d 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -453,6 +453,7 @@ struct v4l2_pix_format { /* SDR formats - used only for Software Defined Radio devices */ #define V4L2_SDR_FMT_CU8 v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */ #define V4L2_SDR_FMT_CU16LE v4l2_fourcc('C', 'U', '1', '6') /* IQ u16le */ +#define V4L2_SDR_FMT_RU12LE v4l2_fourcc('R', 'U', '1', '2') /* real u12le */ /* priv field value to indicates that subsequent fields are valid. */ #define V4L2_PIX_FMT_PRIV_MAGIC 0xfeedcafe -- cgit v1.2.3 From c0c03886763f4d5ccfc5d854fd8dfe77814753be Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Fri, 11 Jul 2014 13:25:14 -0300 Subject: [media] v4l: uapi: add SDR format CS8 V4L2_SDR_FMT_CS8 - Complex signed 8-bit IQ sample Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index e8f01cd8a17d..e943e4cc8df6 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -453,6 +453,7 @@ struct v4l2_pix_format { /* SDR formats - used only for Software Defined Radio devices */ #define V4L2_SDR_FMT_CU8 v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */ #define V4L2_SDR_FMT_CU16LE v4l2_fourcc('C', 'U', '1', '6') /* IQ u16le */ +#define V4L2_SDR_FMT_CS8 v4l2_fourcc('C', 'S', '0', '8') /* complex s8 */ #define V4L2_SDR_FMT_RU12LE v4l2_fourcc('R', 'U', '1', '2') /* real u12le */ /* priv field value to indicates that subsequent fields are valid. */ -- cgit v1.2.3 From 5c5be570043f864b8373ed26fd68698fe352a0ef Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Fri, 11 Jul 2014 16:35:47 -0300 Subject: [media] v4l: uapi: add SDR format CS14 V4L2_SDR_FMT_CS14LE - Complex signed 14-bit IQ sample Signed-off-by: Antti Palosaari Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index e943e4cc8df6..6da302d46145 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -454,6 +454,7 @@ struct v4l2_pix_format { #define V4L2_SDR_FMT_CU8 v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */ #define V4L2_SDR_FMT_CU16LE v4l2_fourcc('C', 'U', '1', '6') /* IQ u16le */ #define V4L2_SDR_FMT_CS8 v4l2_fourcc('C', 'S', '0', '8') /* complex s8 */ +#define V4L2_SDR_FMT_CS14LE v4l2_fourcc('C', 'S', '1', '4') /* complex s14le */ #define V4L2_SDR_FMT_RU12LE v4l2_fourcc('R', 'U', '1', '2') /* real u12le */ /* priv field value to indicates that subsequent fields are valid. */ -- cgit v1.2.3 From aaa968b636550042023bffa075adcdb01e6d257c Mon Sep 17 00:00:00 2001 From: Antti Palosaari Date: Fri, 18 Jul 2014 15:28:24 -0300 Subject: [media] v4l: videodev2: add buffer size to SDR format Add buffer size field to struct v4l2_sdr_format. It is used for negotiate streaming buffer size between application and driver. Signed-off-by: Antti Palosaari Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 6da302d46145..8fc4822c7883 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1783,10 +1783,12 @@ struct v4l2_pix_format_mplane { /** * struct v4l2_sdr_format - SDR format definition * @pixelformat: little endian four character code (fourcc) + * @buffersize: maximum size in bytes required for data */ struct v4l2_sdr_format { __u32 pixelformat; - __u8 reserved[28]; + __u32 buffersize; + __u8 reserved[24]; } __attribute__ ((packed)); /** -- cgit v1.2.3 From 273886b4f71a1a960a3625833df2f037efffab45 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 18 Jul 2014 02:58:41 -0300 Subject: [media] videodev2.h: add defines for the VBI field start lines While working with raw and sliced VBI support in several applications I noticed that you really need to know the start linenumbers for each video field in order to correctly convert the start line numbers reported by v4l2_vbi_format to the line numbers used in v4l2_sliced_vbi_format. This patch adds four defines that specify the start lines for each field for both 525 and 625 line standards. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 8fc4822c7883..5fd42027877a 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1642,6 +1642,12 @@ struct v4l2_vbi_format { #define V4L2_VBI_UNSYNC (1 << 0) #define V4L2_VBI_INTERLACED (1 << 1) +/* ITU-R start lines for each field */ +#define V4L2_VBI_ITU_525_F1_START (1) +#define V4L2_VBI_ITU_525_F2_START (264) +#define V4L2_VBI_ITU_625_F1_START (1) +#define V4L2_VBI_ITU_625_F2_START (314) + /* Sliced VBI * * This implements is a proposal V4L2 API to allow SLICED VBI -- cgit v1.2.3 From d7afaec0b564f0609e116f562983b8e72fc3e9c9 Mon Sep 17 00:00:00 2001 From: Andrew Gallagher Date: Tue, 22 Jul 2014 16:37:43 +0200 Subject: fuse: add FUSE_NO_OPEN_SUPPORT flag to INIT Here some additional changes to set a capability flag so that clients can detect when it's appropriate to return -ENOSYS from open. This amends the following commit introduced in 3.14: 7678ac50615d fuse: support clients that don't implement 'open' However we can only add the flag to 3.15 and later since there was no protocol version update in 3.14. Signed-off-by: Miklos Szeredi Cc: # v3.15+ --- fs/fuse/inode.c | 2 +- include/uapi/linux/fuse.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 5ca874f2415b..03246cd9d47a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -935,7 +935,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | - FUSE_WRITEBACK_CACHE; + FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 40b5ca8a1b1f..25084a052a1e 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -101,6 +101,7 @@ * - add FATTR_CTIME * - add ctime and ctimensec to fuse_setattr_in * - add FUSE_RENAME2 request + * - add FUSE_NO_OPEN_SUPPORT flag */ #ifndef _LINUX_FUSE_H @@ -229,6 +230,7 @@ struct fuse_file_lock { * FUSE_READDIRPLUS_AUTO: adaptive readdirplus * FUSE_ASYNC_DIO: asynchronous direct I/O submission * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes + * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -247,6 +249,7 @@ struct fuse_file_lock { #define FUSE_READDIRPLUS_AUTO (1 << 14) #define FUSE_ASYNC_DIO (1 << 15) #define FUSE_WRITEBACK_CACHE (1 << 16) +#define FUSE_NO_OPEN_SUPPORT (1 << 17) /** * CUSE INIT request/reply flags -- cgit v1.2.3 From 5cd667b0a4567048bb555927d6ee564f4e5620a9 Mon Sep 17 00:00:00 2001 From: Alex Wang Date: Thu, 17 Jul 2014 15:14:13 -0700 Subject: openvswitch: Allow each vport to have an array of 'port_id's. In order to allow handlers directly read upcalls from datapath, we need to support per-handler netlink socket for each vport in datapath. This commit makes this happen. Also, it is guaranteed to be backward compatible with previous branch. Signed-off-by: Alex Wang Acked-by: Thomas Graf Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 13 +++-- net/openvswitch/datapath.c | 23 ++++++--- net/openvswitch/vport.c | 101 ++++++++++++++++++++++++++++++++++++++- net/openvswitch/vport.h | 27 +++++++++-- 4 files changed, 148 insertions(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 0b979ee4bfc0..a794d1dd7b40 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -118,6 +118,9 @@ struct ovs_vport_stats { /* Allow last Netlink attribute to be unaligned */ #define OVS_DP_F_UNALIGNED (1 << 0) +/* Allow datapath to associate multiple Netlink PIDs to each vport */ +#define OVS_DP_F_VPORT_PIDS (1 << 1) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) @@ -203,9 +206,10 @@ enum ovs_vport_type { * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes * plus a null terminator. * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information. - * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that - * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on - * this port. A value of zero indicates that upcalls should not be sent. + * @OVS_VPORT_ATTR_UPCALL_PID: The array of Netlink socket pids in userspace + * among which OVS_PACKET_CMD_MISS upcalls will be distributed for packets + * received on this port. If this is a single-element array of value 0, + * upcalls should not be sent. * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for * packets sent or received through the vport. * @@ -228,7 +232,8 @@ enum ovs_vport_attr { OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */ OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */ OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */ - OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */ + OVS_VPORT_ATTR_UPCALL_PID, /* array of u32 Netlink socket PIDs for */ + /* receiving upcalls */ OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ __OVS_VPORT_ATTR_MAX }; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 20f59b62721a..65a8e5c089e4 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -266,7 +266,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) upcall.cmd = OVS_PACKET_CMD_MISS; upcall.key = &key; upcall.userdata = NULL; - upcall.portid = p->upcall_portid; + upcall.portid = ovs_vport_find_upcall_portid(p, skb); ovs_dp_upcall(dp, skb, &upcall); consume_skb(skb); stats_counter = &stats->n_missed; @@ -1373,7 +1373,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = NULL; parms.dp = dp; parms.port_no = OVSP_LOCAL; - parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); + parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; ovs_dp_change(dp, a); @@ -1632,8 +1632,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || - nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) || - nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid)) + nla_put_string(skb, OVS_VPORT_ATTR_NAME, + vport->ops->get_name(vport))) goto nla_put_failure; ovs_vport_get_stats(vport, &vport_stats); @@ -1641,6 +1641,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, &vport_stats)) goto nla_put_failure; + if (ovs_vport_get_upcall_portids(vport, skb)) + goto nla_put_failure; + err = ovs_vport_get_options(vport, skb); if (err == -EMSGSIZE) goto error; @@ -1762,7 +1765,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = a[OVS_VPORT_ATTR_OPTIONS]; parms.dp = dp; parms.port_no = port_no; - parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID]; vport = new_vport(&parms); err = PTR_ERR(vport); @@ -1812,8 +1815,14 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock_free; } - if (a[OVS_VPORT_ATTR_UPCALL_PID]) - vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + + if (a[OVS_VPORT_ATTR_UPCALL_PID]) { + struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID]; + + err = ovs_vport_set_upcall_portids(vport, ids); + if (err) + goto exit_unlock_free; + } err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_NEW); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 42c0f4a0b78c..702fb21bfe15 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -134,10 +134,12 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->dp = parms->dp; vport->port_no = parms->port_no; - vport->upcall_portid = parms->upcall_portid; vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); + if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) + return ERR_PTR(-EINVAL); + vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!vport->percpu_stats) { kfree(vport); @@ -161,6 +163,10 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, */ void ovs_vport_free(struct vport *vport) { + /* vport is freed from RCU callback or error path, Therefore + * it is safe to use raw dereference. + */ + kfree(rcu_dereference_raw(vport->upcall_portids)); free_percpu(vport->percpu_stats); kfree(vport); } @@ -326,6 +332,99 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) return 0; } +/** + * ovs_vport_set_upcall_portids - set upcall portids of @vport. + * + * @vport: vport to modify. + * @ids: new configuration, an array of port ids. + * + * Sets the vport's upcall_portids to @ids. + * + * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed + * as an array of U32. + * + * Must be called with ovs_mutex. + */ +int ovs_vport_set_upcall_portids(struct vport *vport, struct nlattr *ids) +{ + struct vport_portids *old, *vport_portids; + + if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) + return -EINVAL; + + old = ovsl_dereference(vport->upcall_portids); + + vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids), + GFP_KERNEL); + if (!vport_portids) + return -ENOMEM; + + vport_portids->n_ids = nla_len(ids) / sizeof(u32); + vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids); + nla_memcpy(vport_portids->ids, ids, nla_len(ids)); + + rcu_assign_pointer(vport->upcall_portids, vport_portids); + + if (old) + kfree_rcu(old, rcu); + return 0; +} + +/** + * ovs_vport_get_upcall_portids - get the upcall_portids of @vport. + * + * @vport: vport from which to retrieve the portids. + * @skb: sk_buff where portids should be appended. + * + * Retrieves the configuration of the given vport, appending the + * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall + * portids to @skb. + * + * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room. + * If an error occurs, @skb is left unmodified. Must be called with + * ovs_mutex or rcu_read_lock. + */ +int ovs_vport_get_upcall_portids(const struct vport *vport, + struct sk_buff *skb) +{ + struct vport_portids *ids; + + ids = rcu_dereference_ovsl(vport->upcall_portids); + + if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS) + return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID, + ids->n_ids * sizeof(u32), (void *)ids->ids); + else + return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]); +} + +/** + * ovs_vport_find_upcall_portid - find the upcall portid to send upcall. + * + * @vport: vport from which the missed packet is received. + * @skb: skb that the missed packet was received. + * + * Uses the skb_get_hash() to select the upcall portid to send the + * upcall. + * + * Returns the portid of the target socket. Must be called with rcu_read_lock. + */ +u32 ovs_vport_find_upcall_portid(const struct vport *p, struct sk_buff *skb) +{ + struct vport_portids *ids; + u32 ids_index; + u32 hash; + + ids = rcu_dereference(p->upcall_portids); + + if (ids->n_ids == 1 && ids->ids[0] == 0) + return 0; + + hash = skb_get_hash(skb); + ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids); + return ids->ids[ids_index]; +} + /** * ovs_vport_receive - pass up received packet to the datapath for processing * diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 8d721e62f388..35f89d84b45e 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +53,10 @@ void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *); int ovs_vport_set_options(struct vport *, struct nlattr *options); int ovs_vport_get_options(const struct vport *, struct sk_buff *); +int ovs_vport_set_upcall_portids(struct vport *, struct nlattr *pids); +int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *); +u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *); + int ovs_vport_send(struct vport *, struct sk_buff *); /* The following definitions are for implementers of vport devices: */ @@ -62,13 +67,27 @@ struct vport_err_stats { u64 tx_dropped; u64 tx_errors; }; +/** + * struct vport_portids - array of netlink portids of a vport. + * must be protected by rcu. + * @rn_ids: The reciprocal value of @n_ids. + * @rcu: RCU callback head for deferred destruction. + * @n_ids: Size of @ids array. + * @ids: Array storing the Netlink socket pids to be used for packets received + * on this port that miss the flow table. + */ +struct vport_portids { + struct reciprocal_value rn_ids; + struct rcu_head rcu; + u32 n_ids; + u32 ids[]; +}; /** * struct vport - one port within a datapath * @rcu: RCU callback head for deferred destruction. * @dp: Datapath to which this port belongs. - * @upcall_portid: The Netlink port to use for packets received on this port that - * miss the flow table. + * @upcall_portids: RCU protected 'struct vport_portids'. * @port_no: Index into @dp's @ports array. * @hash_node: Element in @dev_table hash table in vport.c. * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. @@ -80,7 +99,7 @@ struct vport_err_stats { struct vport { struct rcu_head rcu; struct datapath *dp; - u32 upcall_portid; + struct vport_portids __rcu *upcall_portids; u16 port_no; struct hlist_node hash_node; @@ -111,7 +130,7 @@ struct vport_parms { /* For ovs_vport_alloc(). */ struct datapath *dp; u16 port_no; - u32 upcall_portid; + struct nlattr *upcall_portids; }; /** -- cgit v1.2.3 From 811c508104d092683ea5fe86cdcfd23dded22934 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 21 Jul 2014 10:45:37 -0300 Subject: [media] v4l2-ctrls: add new RDS TX controls The si4713 supports several RDS features not yet implemented in the driver. This patch adds the missing RDS functionality to the list of RDS controls. The ALT_FREQS control is a compound control containing an array of up to 25 (the maximum according to the RDS standard) frequencies. To support that the V4L2_CTRL_TYPE_U32 was added. Signed-off-by: Hans Verkuil Cc: Eduardo Valentin Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-ctrls.c | 35 +++++++++++++++++++++++++++++++++++ include/media/v4l2-ctrls.h | 2 ++ include/uapi/linux/v4l2-controls.h | 9 +++++++++ include/uapi/linux/videodev2.h | 2 ++ 4 files changed, 48 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index 5db038525f99..4863cf05175d 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -805,6 +805,15 @@ const char *v4l2_ctrl_get_name(u32 id) case V4L2_CID_RDS_TX_PTY: return "RDS Program Type"; case V4L2_CID_RDS_TX_PS_NAME: return "RDS PS Name"; case V4L2_CID_RDS_TX_RADIO_TEXT: return "RDS Radio Text"; + case V4L2_CID_RDS_TX_MONO_STEREO: return "RDS Stereo"; + case V4L2_CID_RDS_TX_ARTIFICIAL_HEAD: return "RDS Artificial Head"; + case V4L2_CID_RDS_TX_COMPRESSED: return "RDS Compressed"; + case V4L2_CID_RDS_TX_DYNAMIC_PTY: return "RDS Dynamic PTY"; + case V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT: return "RDS Traffic Announcement"; + case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM: return "RDS Traffic Program"; + case V4L2_CID_RDS_TX_MUSIC_SPEECH: return "RDS Music"; + case V4L2_CID_RDS_TX_ALT_FREQS_ENABLE: return "RDS Enable Alt Frequencies"; + case V4L2_CID_RDS_TX_ALT_FREQS: return "RDS Alternate Frequencies"; case V4L2_CID_AUDIO_LIMITER_ENABLED: return "Audio Limiter Feature Enabled"; case V4L2_CID_AUDIO_LIMITER_RELEASE_TIME: return "Audio Limiter Release Time"; case V4L2_CID_AUDIO_LIMITER_DEVIATION: return "Audio Limiter Deviation"; @@ -946,6 +955,14 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, case V4L2_CID_RF_TUNER_IF_GAIN_AUTO: case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO: case V4L2_CID_RF_TUNER_PLL_LOCK: + case V4L2_CID_RDS_TX_MONO_STEREO: + case V4L2_CID_RDS_TX_ARTIFICIAL_HEAD: + case V4L2_CID_RDS_TX_COMPRESSED: + case V4L2_CID_RDS_TX_DYNAMIC_PTY: + case V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT: + case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM: + case V4L2_CID_RDS_TX_MUSIC_SPEECH: + case V4L2_CID_RDS_TX_ALT_FREQS_ENABLE: *type = V4L2_CTRL_TYPE_BOOLEAN; *min = 0; *max = *step = 1; @@ -1089,6 +1106,9 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, case V4L2_CID_DETECT_MD_THRESHOLD_GRID: *type = V4L2_CTRL_TYPE_U16; break; + case V4L2_CID_RDS_TX_ALT_FREQS: + *type = V4L2_CTRL_TYPE_U32; + break; default: *type = V4L2_CTRL_TYPE_INTEGER; break; @@ -1209,6 +1229,8 @@ static bool std_equal(const struct v4l2_ctrl *ctrl, u32 idx, return ptr1.p_u8[idx] == ptr2.p_u8[idx]; case V4L2_CTRL_TYPE_U16: return ptr1.p_u16[idx] == ptr2.p_u16[idx]; + case V4L2_CTRL_TYPE_U32: + return ptr1.p_u32[idx] == ptr2.p_u32[idx]; default: if (ctrl->is_int) return ptr1.p_s32[idx] == ptr2.p_s32[idx]; @@ -1242,6 +1264,9 @@ static void std_init(const struct v4l2_ctrl *ctrl, u32 idx, case V4L2_CTRL_TYPE_U16: ptr.p_u16[idx] = ctrl->default_value; break; + case V4L2_CTRL_TYPE_U32: + ptr.p_u32[idx] = ctrl->default_value; + break; default: idx *= ctrl->elem_size; memset(ptr.p + idx, 0, ctrl->elem_size); @@ -1289,6 +1314,9 @@ static void std_log(const struct v4l2_ctrl *ctrl) case V4L2_CTRL_TYPE_U16: pr_cont("%u", (unsigned)*ptr.p_u16); break; + case V4L2_CTRL_TYPE_U32: + pr_cont("%u", (unsigned)*ptr.p_u32); + break; default: pr_cont("unknown type %d", ctrl->type); break; @@ -1346,6 +1374,8 @@ static int std_validate(const struct v4l2_ctrl *ctrl, u32 idx, return ROUND_TO_RANGE(ptr.p_u8[idx], u8, ctrl); case V4L2_CTRL_TYPE_U16: return ROUND_TO_RANGE(ptr.p_u16[idx], u16, ctrl); + case V4L2_CTRL_TYPE_U32: + return ROUND_TO_RANGE(ptr.p_u32[idx], u32, ctrl); case V4L2_CTRL_TYPE_BOOLEAN: ptr.p_s32[idx] = !!ptr.p_s32[idx]; @@ -1578,6 +1608,7 @@ static int check_range(enum v4l2_ctrl_type type, /* fall through */ case V4L2_CTRL_TYPE_U8: case V4L2_CTRL_TYPE_U16: + case V4L2_CTRL_TYPE_U32: case V4L2_CTRL_TYPE_INTEGER: case V4L2_CTRL_TYPE_INTEGER64: if (step == 0 || min > max || def < min || def > max) @@ -1893,6 +1924,9 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl, case V4L2_CTRL_TYPE_U16: elem_size = sizeof(u16); break; + case V4L2_CTRL_TYPE_U32: + elem_size = sizeof(u32); + break; default: if (type < V4L2_CTRL_COMPOUND_TYPES) elem_size = sizeof(s32); @@ -3248,6 +3282,7 @@ int __v4l2_ctrl_modify_range(struct v4l2_ctrl *ctrl, case V4L2_CTRL_TYPE_BITMASK: case V4L2_CTRL_TYPE_U8: case V4L2_CTRL_TYPE_U16: + case V4L2_CTRL_TYPE_U32: if (ctrl->is_array) return -EINVAL; ret = check_range(ctrl->type, min, max, step, def); diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index d6540d201764..b7cd7a665e35 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -41,6 +41,7 @@ struct poll_table_struct; * @p_s64: Pointer to a 64-bit signed value. * @p_u8: Pointer to a 8-bit unsigned value. * @p_u16: Pointer to a 16-bit unsigned value. + * @p_u32: Pointer to a 32-bit unsigned value. * @p_char: Pointer to a string. * @p: Pointer to a compound value. */ @@ -49,6 +50,7 @@ union v4l2_ctrl_ptr { s64 *p_s64; u8 *p_u8; u16 *p_u16; + u32 *p_u32; char *p_char; void *p; }; diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index db526d1c8309..a13e6fef987e 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -757,6 +757,15 @@ enum v4l2_auto_focus_range { #define V4L2_CID_RDS_TX_PTY (V4L2_CID_FM_TX_CLASS_BASE + 3) #define V4L2_CID_RDS_TX_PS_NAME (V4L2_CID_FM_TX_CLASS_BASE + 5) #define V4L2_CID_RDS_TX_RADIO_TEXT (V4L2_CID_FM_TX_CLASS_BASE + 6) +#define V4L2_CID_RDS_TX_MONO_STEREO (V4L2_CID_FM_TX_CLASS_BASE + 7) +#define V4L2_CID_RDS_TX_ARTIFICIAL_HEAD (V4L2_CID_FM_TX_CLASS_BASE + 8) +#define V4L2_CID_RDS_TX_COMPRESSED (V4L2_CID_FM_TX_CLASS_BASE + 9) +#define V4L2_CID_RDS_TX_DYNAMIC_PTY (V4L2_CID_FM_TX_CLASS_BASE + 10) +#define V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT (V4L2_CID_FM_TX_CLASS_BASE + 11) +#define V4L2_CID_RDS_TX_TRAFFIC_PROGRAM (V4L2_CID_FM_TX_CLASS_BASE + 12) +#define V4L2_CID_RDS_TX_MUSIC_SPEECH (V4L2_CID_FM_TX_CLASS_BASE + 13) +#define V4L2_CID_RDS_TX_ALT_FREQS_ENABLE (V4L2_CID_FM_TX_CLASS_BASE + 14) +#define V4L2_CID_RDS_TX_ALT_FREQS (V4L2_CID_FM_TX_CLASS_BASE + 15) #define V4L2_CID_AUDIO_LIMITER_ENABLED (V4L2_CID_FM_TX_CLASS_BASE + 64) #define V4L2_CID_AUDIO_LIMITER_RELEASE_TIME (V4L2_CID_FM_TX_CLASS_BASE + 65) diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 5fd42027877a..778a3298fb34 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1288,6 +1288,7 @@ struct v4l2_ext_control { char *string; __u8 *p_u8; __u16 *p_u16; + __u32 *p_u32; void *ptr; }; } __attribute__ ((packed)); @@ -1320,6 +1321,7 @@ enum v4l2_ctrl_type { V4L2_CTRL_COMPOUND_TYPES = 0x0100, V4L2_CTRL_TYPE_U8 = 0x0100, V4L2_CTRL_TYPE_U16 = 0x0101, + V4L2_CTRL_TYPE_U32 = 0x0102, }; /* Used in the VIDIOC_QUERYCTRL ioctl for querying controls */ -- cgit v1.2.3 From 9570a14847a9d334a0baf5a5921da2dbc6b9f6d2 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 21 Jul 2014 10:45:40 -0300 Subject: [media] v4l2-ctrls: add RX RDS controls The radio-miropcm20 driver has firmware that decodes the RDS signals. So in that case the RDS data becomes available in the form of controls. Add support for these controls to the control framework, allowing the miro driver to use them. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-ctrls.c | 19 +++++++++++++++++-- include/uapi/linux/v4l2-controls.h | 6 ++++++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index 4863cf05175d..2d8ced8ad80b 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -881,7 +881,6 @@ const char *v4l2_ctrl_get_name(u32 id) case V4L2_CID_FM_RX_CLASS: return "FM Radio Receiver Controls"; case V4L2_CID_TUNE_DEEMPHASIS: return "De-Emphasis"; case V4L2_CID_RDS_RECEPTION: return "RDS Reception"; - case V4L2_CID_RF_TUNER_CLASS: return "RF Tuner Controls"; case V4L2_CID_RF_TUNER_LNA_GAIN_AUTO: return "LNA Gain, Auto"; case V4L2_CID_RF_TUNER_LNA_GAIN: return "LNA Gain"; @@ -892,6 +891,12 @@ const char *v4l2_ctrl_get_name(u32 id) case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO: return "Bandwidth, Auto"; case V4L2_CID_RF_TUNER_BANDWIDTH: return "Bandwidth"; case V4L2_CID_RF_TUNER_PLL_LOCK: return "PLL Lock"; + case V4L2_CID_RDS_RX_PTY: return "RDS Program Type"; + case V4L2_CID_RDS_RX_PS_NAME: return "RDS PS Name"; + case V4L2_CID_RDS_RX_RADIO_TEXT: return "RDS Radio Text"; + case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT: return "RDS Traffic Announcement"; + case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM: return "RDS Traffic Program"; + case V4L2_CID_RDS_RX_MUSIC_SPEECH: return "RDS Music"; /* Detection controls */ /* Keep the order of the 'case's the same as in v4l2-controls.h! */ @@ -900,7 +905,6 @@ const char *v4l2_ctrl_get_name(u32 id) case V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD: return "MD Global Threshold"; case V4L2_CID_DETECT_MD_THRESHOLD_GRID: return "MD Threshold Grid"; case V4L2_CID_DETECT_MD_REGION_GRID: return "MD Region Grid"; - default: return NULL; } @@ -963,6 +967,9 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM: case V4L2_CID_RDS_TX_MUSIC_SPEECH: case V4L2_CID_RDS_TX_ALT_FREQS_ENABLE: + case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT: + case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM: + case V4L2_CID_RDS_RX_MUSIC_SPEECH: *type = V4L2_CTRL_TYPE_BOOLEAN; *min = 0; *max = *step = 1; @@ -1035,6 +1042,8 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, break; case V4L2_CID_RDS_TX_PS_NAME: case V4L2_CID_RDS_TX_RADIO_TEXT: + case V4L2_CID_RDS_RX_PS_NAME: + case V4L2_CID_RDS_RX_RADIO_TEXT: *type = V4L2_CTRL_TYPE_STRING; break; case V4L2_CID_ISO_SENSITIVITY: @@ -1166,6 +1175,12 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, case V4L2_CID_DV_TX_RXSENSE: case V4L2_CID_DV_TX_EDID_PRESENT: case V4L2_CID_DV_RX_POWER_PRESENT: + case V4L2_CID_RDS_RX_PTY: + case V4L2_CID_RDS_RX_PS_NAME: + case V4L2_CID_RDS_RX_RADIO_TEXT: + case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT: + case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM: + case V4L2_CID_RDS_RX_MUSIC_SPEECH: *flags |= V4L2_CTRL_FLAG_READ_ONLY; break; case V4L2_CID_RF_TUNER_PLL_LOCK: diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index a13e6fef987e..e946e43fb8d5 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -910,6 +910,12 @@ enum v4l2_deemphasis { }; #define V4L2_CID_RDS_RECEPTION (V4L2_CID_FM_RX_CLASS_BASE + 2) +#define V4L2_CID_RDS_RX_PTY (V4L2_CID_FM_RX_CLASS_BASE + 3) +#define V4L2_CID_RDS_RX_PS_NAME (V4L2_CID_FM_RX_CLASS_BASE + 4) +#define V4L2_CID_RDS_RX_RADIO_TEXT (V4L2_CID_FM_RX_CLASS_BASE + 5) +#define V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT (V4L2_CID_FM_RX_CLASS_BASE + 6) +#define V4L2_CID_RDS_RX_TRAFFIC_PROGRAM (V4L2_CID_FM_RX_CLASS_BASE + 7) +#define V4L2_CID_RDS_RX_MUSIC_SPEECH (V4L2_CID_FM_RX_CLASS_BASE + 8) #define V4L2_CID_RF_TUNER_CLASS_BASE (V4L2_CTRL_CLASS_RF_TUNER | 0x900) #define V4L2_CID_RF_TUNER_CLASS (V4L2_CTRL_CLASS_RF_TUNER | 1) -- cgit v1.2.3 From 699a0ea0823d32030b0666b28ff8633960f7ffa7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 2 Jun 2014 11:02:59 +1000 Subject: KVM: PPC: Book3S: Controls for in-kernel sPAPR hypercall handling This provides a way for userspace controls which sPAPR hcalls get handled in the kernel. Each hcall can be individually enabled or disabled for in-kernel handling, except for H_RTAS. The exception for H_RTAS is because userspace can already control whether individual RTAS functions are handled in-kernel or not via the KVM_PPC_RTAS_DEFINE_TOKEN ioctl, and because the numeric value for H_RTAS is out of the normal sequence of hcall numbers. Hcalls are enabled or disabled using the KVM_ENABLE_CAP ioctl for the KVM_CAP_PPC_ENABLE_HCALL capability on the file descriptor for the VM. The args field of the struct kvm_enable_cap specifies the hcall number in args[0] and the enable/disable flag in args[1]; 0 means disable in-kernel handling (so that the hcall will always cause an exit to userspace) and 1 means enable. Enabling or disabling in-kernel handling of an hcall is effective across the whole VM. The ability for KVM_ENABLE_CAP to be used on a VM file descriptor on PowerPC is new, added by this commit. The KVM_CAP_ENABLE_CAP_VM capability advertises that this ability exists. When a VM is created, an initial set of hcalls are enabled for in-kernel handling. The set that is enabled is the set that have an in-kernel implementation at this point. Any new hcall implementations from this point onwards should not be added to the default set without a good reason. No distinction is made between real-mode and virtual-mode hcall implementations; the one setting controls them both. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 41 ++++++++++++++++++++++++-- arch/powerpc/include/asm/kvm_book3s.h | 1 + arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/book3s_hv.c | 51 +++++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 11 +++++++ arch/powerpc/kvm/book3s_pr.c | 5 ++++ arch/powerpc/kvm/book3s_pr_papr.c | 37 ++++++++++++++++++++++++ arch/powerpc/kvm/powerpc.c | 45 +++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 1 + 10 files changed, 193 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 0fe36497642c..5c54d196f4c8 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2863,8 +2863,8 @@ The fields in each entry are defined as follows: this function/index combination -6. Capabilities that can be enabled ------------------------------------ +6. Capabilities that can be enabled on vCPUs +-------------------------------------------- There are certain capabilities that change the behavior of the virtual CPU when enabled. To enable them, please see section 4.37. Below you can find a list of @@ -3002,3 +3002,40 @@ Parameters: args[0] is the XICS device fd args[1] is the XICS CPU number (server ID) for this vcpu This capability connects the vcpu to an in-kernel XICS device. + + +7. Capabilities that can be enabled on VMs +------------------------------------------ + +There are certain capabilities that change the behavior of the virtual +machine when enabled. To enable them, please see section 4.37. Below +you can find a list of capabilities and what their effect on the VM +is when enabling them. + +The following information is provided along with the description: + + Architectures: which instruction set architectures provide this ioctl. + x86 includes both i386 and x86_64. + + Parameters: what parameters are accepted by the capability. + + Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL) + are not detailed, but errors with specific meanings are. + + +7.1 KVM_CAP_PPC_ENABLE_HCALL + +Architectures: ppc +Parameters: args[0] is the sPAPR hcall number + args[1] is 0 to disable, 1 to enable in-kernel handling + +This capability controls whether individual sPAPR hypercalls (hcalls) +get handled by the kernel or not. Enabling or disabling in-kernel +handling of an hcall is effective across the VM. On creation, an +initial set of hcalls are enabled for in-kernel handling, which +consists of those hcalls for which in-kernel handlers were implemented +before this capability was implemented. If disabled, the kernel will +not to attempt to handle the hcall, but will always exit to userspace +to handle it. Note that it may not make sense to enable some and +disable others of a group of related hcalls, but KVM does not prevent +userspace from doing that. diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index a20cc0bbd048..052ab2ad49b5 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -187,6 +187,7 @@ extern void kvmppc_hv_entry_trampoline(void); extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd); +extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm); extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index f9ae69682ce1..62b2cee450a5 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -34,6 +34,7 @@ #include #include #include +#include #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS @@ -263,6 +264,7 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; struct list_head rtas_tokens; + DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); #endif #ifdef CONFIG_KVM_MPIC struct openpic *mpic; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f5995a912213..17ffcb4f27f9 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -493,6 +493,7 @@ int main(void) DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); + DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor)); DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1562acfa05bf..cf445d22570f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -67,6 +67,8 @@ /* Used as a "null" value for timebase values */ #define TB_NIL (~(u64)0) +static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -562,6 +564,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) struct kvm_vcpu *tvcpu; int idx, rc; + if (req <= MAX_HCALL_OPCODE && + !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls)) + return RESUME_HOST; + switch (req) { case H_ENTER: idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -2269,6 +2275,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) */ cpumask_setall(&kvm->arch.need_tlb_flush); + /* Start out with the default set of hcalls enabled */ + memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, + sizeof(kvm->arch.enabled_hcalls)); + kvm->arch.rma = NULL; kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); @@ -2407,6 +2417,45 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, return r; } +/* + * List of hcall numbers to enable by default. + * For compatibility with old userspace, we enable by default + * all hcalls that were implemented before the hcall-enabling + * facility was added. Note this list should not include H_RTAS. + */ +static unsigned int default_hcall_list[] = { + H_REMOVE, + H_ENTER, + H_READ, + H_PROTECT, + H_BULK_REMOVE, + H_GET_TCE, + H_PUT_TCE, + H_SET_DABR, + H_SET_XDABR, + H_CEDE, + H_PROD, + H_CONFER, + H_REGISTER_VPA, +#ifdef CONFIG_KVM_XICS + H_EOI, + H_CPPR, + H_IPI, + H_IPOLL, + H_XIRR, + H_XIRR_X, +#endif + 0 +}; + +static void init_default_hcalls(void) +{ + int i; + + for (i = 0; default_hcall_list[i]; ++i) + __set_bit(default_hcall_list[i] / 4, default_enabled_hcalls); +} + static struct kvmppc_ops kvm_ops_hv = { .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -2454,6 +2503,8 @@ static int kvmppc_book3s_init_hv(void) kvm_ops_hv.owner = THIS_MODULE; kvmppc_hv_ops = &kvm_ops_hv; + init_default_hcalls(); + r = kvmppc_mmu_hv_init(); return r; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 64ac56f6c3fe..33aaadef7139 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1909,6 +1909,17 @@ hcall_try_real_mode: clrrdi r3,r3,2 cmpldi r3,hcall_real_table_end - hcall_real_table bge guest_exit_cont + /* See if this hcall is enabled for in-kernel handling */ + ld r4, VCPU_KVM(r9) + srdi r0, r3, 8 /* r0 = (r3 / 4) >> 6 */ + sldi r0, r0, 3 /* index into kvm->arch.enabled_hcalls[] */ + add r4, r4, r0 + ld r0, KVM_ENABLED_HCALLS(r4) + rlwinm r4, r3, 32-2, 0x3f /* r4 = (r3 / 4) & 0x3f */ + srd r0, r0, r4 + andi. r0, r0, 1 + beq guest_exit_cont + /* Get pointer to handler, if any, and call it */ LOAD_REG_ADDR(r4, hcall_real_table) lwax r3,r3,r4 cmpwi r3,0 diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 3b82e8616dfa..123ac7dc5e1f 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1597,6 +1597,11 @@ static int kvmppc_core_init_vm_pr(struct kvm *kvm) { mutex_init(&kvm->arch.hpt_mutex); +#ifdef CONFIG_PPC_BOOK3S_64 + /* Start out with the default set of hcalls enabled */ + kvmppc_pr_init_default_hcalls(kvm); +#endif + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { spin_lock(&kvm_global_user_count_lock); if (++kvm_global_user_count == 1) diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index f7c25c625a5b..eacaa6e4876e 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -267,6 +267,10 @@ static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { + if (cmd <= MAX_HCALL_OPCODE && + !test_bit(cmd/4, vcpu->kvm->arch.enabled_hcalls)) + return EMULATE_FAIL; + switch (cmd) { case H_ENTER: return kvmppc_h_pr_enter(vcpu); @@ -304,3 +308,36 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) return EMULATE_FAIL; } + + +/* + * List of hcall numbers to enable by default. + * For compatibility with old userspace, we enable by default + * all hcalls that were implemented before the hcall-enabling + * facility was added. Note this list should not include H_RTAS. + */ +static unsigned int default_hcall_list[] = { + H_ENTER, + H_REMOVE, + H_PROTECT, + H_BULK_REMOVE, + H_PUT_TCE, + H_CEDE, +#ifdef CONFIG_KVM_XICS + H_XIRR, + H_CPPR, + H_EOI, + H_IPI, + H_IPOLL, + H_XIRR_X, +#endif + 0 +}; + +void kvmppc_pr_init_default_hcalls(struct kvm *kvm) +{ + int i; + + for (i = 0; default_hcall_list[i]; ++i) + __set_bit(default_hcall_list[i] / 4, kvm->arch.enabled_hcalls); +} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 61c738ab1283..3222a4d08a6f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -387,6 +387,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: case KVM_CAP_ENABLE_CAP: + case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: @@ -417,6 +418,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_ALLOC_HTAB: case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: + case KVM_CAP_PPC_ENABLE_HCALL: #ifdef CONFIG_KVM_XICS case KVM_CAP_IRQ_XICS: #endif @@ -1099,6 +1101,40 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, return 0; } + +static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + case KVM_CAP_PPC_ENABLE_HCALL: { + unsigned long hcall = cap->args[0]; + + r = -EINVAL; + if (hcall > MAX_HCALL_OPCODE || (hcall & 3) || + cap->args[1] > 1) + break; + if (cap->args[1]) + set_bit(hcall / 4, kvm->arch.enabled_hcalls); + else + clear_bit(hcall / 4, kvm->arch.enabled_hcalls); + r = 0; + break; + } +#endif + default: + r = -EINVAL; + break; + } + + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1118,6 +1154,15 @@ long kvm_arch_vm_ioctl(struct file *filp, break; } + case KVM_ENABLE_CAP: + { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vm_ioctl_enable_cap(kvm, &cap); + break; + } #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CREATE_SPAPR_TCE: { struct kvm_create_spapr_tce create_tce; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e11d8f170a62..0418b746cb68 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -758,6 +758,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_VM_ATTRIBUTES 101 #define KVM_CAP_ARM_PSCI_0_2 102 #define KVM_CAP_PPC_FIXUP_HCALL 103 +#define KVM_CAP_PPC_ENABLE_HCALL 104 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 92b591a4c46b103ebd3fc0d03a084e1efd331253 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 14 Jul 2014 18:33:08 +0200 Subject: KVM: Allow KVM_CHECK_EXTENSION on the vm fd The KVM_CHECK_EXTENSION is only available on the kvm fd today. Unfortunately on PPC some of the capabilities change depending on the way a VM was created. So instead we need a way to expose capabilities as VM ioctl, so that we can see which VM type we're using (HV or PR). To enable this, add the KVM_CHECK_EXTENSION ioctl to our vm ioctl portfolio. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 7 +++-- include/uapi/linux/kvm.h | 1 + virt/kvm/kvm_main.c | 58 +++++++++++++++++++++------------------ 3 files changed, 37 insertions(+), 29 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 884f819e1908..8898caf8c090 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl. 4.4 KVM_CHECK_EXTENSION -Capability: basic +Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl Architectures: all -Type: system ioctl +Type: system ioctl, vm ioctl Parameters: extension identifier (KVM_CAP_*) Returns: 0 if unsupported; 1 (or some other positive integer) if supported @@ -160,6 +160,9 @@ receives an integer that describes the extension availability. Generally 0 means no and 1 means yes, but some extensions may report additional information in the integer return value. +Based on their initialization different VMs may have different capabilities. +It is thus encouraged to use the vm ioctl to query for capabilities (available +with KVM_CAP_CHECK_EXTENSION_VM on the vm fd) 4.5 KVM_GET_VCPU_MMAP_SIZE diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0418b746cb68..51776cac6a9b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -759,6 +759,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_ARM_PSCI_0_2 102 #define KVM_CAP_PPC_FIXUP_HCALL 103 #define KVM_CAP_PPC_ENABLE_HCALL 104 +#define KVM_CAP_CHECK_EXTENSION_VM 105 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e28f3caa539d..1b95cc926cfc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2324,6 +2324,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return 0; } +static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) +{ + switch (arg) { + case KVM_CAP_USER_MEMORY: + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: +#ifdef CONFIG_KVM_APIC_ARCHITECTURE + case KVM_CAP_SET_BOOT_CPU_ID: +#endif + case KVM_CAP_INTERNAL_ERROR_DATA: +#ifdef CONFIG_HAVE_KVM_MSI + case KVM_CAP_SIGNAL_MSI: +#endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQFD_RESAMPLE: +#endif + case KVM_CAP_CHECK_EXTENSION_VM: + return 1; +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQ_ROUTING: + return KVM_MAX_IRQ_ROUTES; +#endif + default: + break; + } + return kvm_vm_ioctl_check_extension(kvm, arg); +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2487,6 +2515,9 @@ static long kvm_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_CHECK_EXTENSION: + r = kvm_vm_ioctl_check_extension_generic(kvm, arg); + break; default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) @@ -2571,33 +2602,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } -static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) -{ - switch (arg) { - case KVM_CAP_USER_MEMORY: - case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: - case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: -#ifdef CONFIG_KVM_APIC_ARCHITECTURE - case KVM_CAP_SET_BOOT_CPU_ID: -#endif - case KVM_CAP_INTERNAL_ERROR_DATA: -#ifdef CONFIG_HAVE_KVM_MSI - case KVM_CAP_SIGNAL_MSI: -#endif -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_CAP_IRQFD_RESAMPLE: -#endif - return 1; -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_CAP_IRQ_ROUTING: - return KVM_MAX_IRQ_ROUTES; -#endif - default: - break; - } - return kvm_vm_ioctl_check_extension(kvm, arg); -} - static long kvm_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { -- cgit v1.2.3 From ce91ddc471b77ec75e5b2a43c803efac605f37b3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 28 Jul 2014 19:29:13 +0200 Subject: KVM: PPC: Remove DCR handling DCR handling was only needed for 440 KVM. Since we removed it, we can also remove handling of DCR accesses. Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 6 +++--- arch/powerpc/include/asm/kvm_host.h | 4 ---- arch/powerpc/include/asm/kvm_ppc.h | 1 - arch/powerpc/kvm/booke.c | 5 ----- arch/powerpc/kvm/powerpc.c | 10 ---------- arch/powerpc/kvm/timing.c | 1 - arch/powerpc/kvm/timing.h | 3 --- include/uapi/linux/kvm.h | 4 ++-- 8 files changed, 5 insertions(+), 29 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 8898caf8c090..a21ff2265c21 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2613,8 +2613,8 @@ The 'data' member contains, in its first 'len' bytes, the value as it would appear if the VCPU performed a load or store of the appropriate width directly to the byte array. -NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR, - KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding +NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI KVM_EXIT_PAPR and + KVM_EXIT_EPR the corresponding operations are complete (and guest state is consistent) only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. Userspace @@ -2685,7 +2685,7 @@ Principles of Operation Book in the Chapter for Dynamic Address Translation __u8 is_write; } dcr; -powerpc specific. +Deprecated - was used for 440 KVM. /* KVM_EXIT_OSI */ struct { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 66f5b5914e6c..98d9dd50d063 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -94,7 +94,6 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u32 sum_exits; u32 mmio_exits; - u32 dcr_exits; u32 signal_exits; u32 light_exits; /* Account for special types of light exits: */ @@ -126,7 +125,6 @@ struct kvm_vcpu_stat { enum kvm_exit_types { MMIO_EXITS, - DCR_EXITS, SIGNAL_EXITS, ITLB_REAL_MISS_EXITS, ITLB_VIRT_MISS_EXITS, @@ -601,8 +599,6 @@ struct kvm_vcpu_arch { u8 io_gpr; /* GPR used as IO source/target */ u8 mmio_is_bigendian; u8 mmio_sign_extend; - u8 dcr_needed; - u8 dcr_is_write; u8 osi_needed; u8 osi_enabled; u8 papr_enabled; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index cbee4538307f..8e36c1e2c631 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -41,7 +41,6 @@ enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ - EMULATE_DO_DCR, /* kvm_run filled with DCR request */ EMULATE_FAIL, /* can't emulate this instruction */ EMULATE_AGAIN, /* something went wrong. go again */ EMULATE_EXIT_USER, /* emulation requires exit to user-space */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index f30948a17c03..b4c89fa6f109 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -51,7 +51,6 @@ unsigned long kvmppc_booke_handlers; struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmio", VCPU_STAT(mmio_exits) }, - { "dcr", VCPU_STAT(dcr_exits) }, { "sig", VCPU_STAT(signal_exits) }, { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, @@ -709,10 +708,6 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) case EMULATE_AGAIN: return RESUME_GUEST; - case EMULATE_DO_DCR: - run->exit_reason = KVM_EXIT_DCR; - return RESUME_HOST; - case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", __func__, vcpu->arch.pc, vcpu->arch.last_inst); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c14ed15fd60b..288b4bb05cbd 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -743,12 +743,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #endif } -static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, - struct kvm_run *run) -{ - kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data); -} - static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -945,10 +939,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (!vcpu->mmio_is_write) kvmppc_complete_mmio_load(vcpu, run); vcpu->mmio_needed = 0; - } else if (vcpu->arch.dcr_needed) { - if (!vcpu->arch.dcr_is_write) - kvmppc_complete_dcr_load(vcpu, run); - vcpu->arch.dcr_needed = 0; } else if (vcpu->arch.osi_needed) { u64 *gprs = run->osi.gprs; int i; diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index 07b6110a4bb7..e44d2b2ea97e 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -110,7 +110,6 @@ void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = { [MMIO_EXITS] = "MMIO", - [DCR_EXITS] = "DCR", [SIGNAL_EXITS] = "SIGNAL", [ITLB_REAL_MISS_EXITS] = "ITLBREAL", [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT", diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index bf191e72b2d8..3123690c82dc 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -63,9 +63,6 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) case EMULATED_INST_EXITS: vcpu->stat.emulated_inst_exits++; break; - case DCR_EXITS: - vcpu->stat.dcr_exits++; - break; case DSI_EXITS: vcpu->stat.dsi_exits++; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 51776cac6a9b..f6f24aeb9e1a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -162,7 +162,7 @@ struct kvm_pit_config { #define KVM_EXIT_TPR_ACCESS 12 #define KVM_EXIT_S390_SIEIC 13 #define KVM_EXIT_S390_RESET 14 -#define KVM_EXIT_DCR 15 +#define KVM_EXIT_DCR 15 /* deprecated */ #define KVM_EXIT_NMI 16 #define KVM_EXIT_INTERNAL_ERROR 17 #define KVM_EXIT_OSI 18 @@ -268,7 +268,7 @@ struct kvm_run { __u64 trans_exc_code; __u32 pgm_code; } s390_ucontrol; - /* KVM_EXIT_DCR */ + /* KVM_EXIT_DCR (deprecated) */ struct { __u32 dcrn; __u32 data; -- cgit v1.2.3 From 68a360e82e55c9b35097e7be7f7991d8f401032f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 25 Jul 2014 18:01:31 -0400 Subject: packet: remove deprecated syststamp timestamp No device driver will ever return an skb_shared_info structure with syststamp non-zero, so remove the branch that tests for this and optionally marks the packet timestamp as TP_STATUS_TS_SYS_HARDWARE. Do not remove the definition TP_STATUS_TS_SYS_HARDWARE, as processes may refer to it. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/networking/packet_mmap.txt | 18 ++++++------------ include/uapi/linux/if_packet.h | 2 +- net/packet/af_packet.c | 12 ++++-------- 3 files changed, 11 insertions(+), 21 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 38112d512f47..a6d7cb91069e 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -1008,14 +1008,9 @@ hardware timestamps to be used. Note: you may need to enable the generation of hardware timestamps with SIOCSHWTSTAMP (see related information from Documentation/networking/timestamping.txt). -PACKET_TIMESTAMP accepts the same integer bit field as -SO_TIMESTAMPING. However, only the SOF_TIMESTAMPING_SYS_HARDWARE -and SOF_TIMESTAMPING_RAW_HARDWARE values are recognized by -PACKET_TIMESTAMP. SOF_TIMESTAMPING_SYS_HARDWARE takes precedence over -SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set. - - int req = 0; - req |= SOF_TIMESTAMPING_SYS_HARDWARE; +PACKET_TIMESTAMP accepts the same integer bit field as SO_TIMESTAMPING: + + int req = SOF_TIMESTAMPING_RAW_HARDWARE; setsockopt(fd, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req, sizeof(req)) For the mmap(2)ed ring buffers, such timestamps are stored in the @@ -1023,14 +1018,13 @@ tpacket{,2,3}_hdr structure's tp_sec and tp_{n,u}sec members. To determine what kind of timestamp has been reported, the tp_status field is binary |'ed with the following possible bits ... - TP_STATUS_TS_SYS_HARDWARE TP_STATUS_TS_RAW_HARDWARE TP_STATUS_TS_SOFTWARE ... that are equivalent to its SOF_TIMESTAMPING_* counterparts. For the -RX_RING, if none of those 3 are set (i.e. PACKET_TIMESTAMP is not set), -then this means that a software fallback was invoked *within* PF_PACKET's -processing code (less precise). +RX_RING, if neither is set (i.e. PACKET_TIMESTAMP is not set), then a +software fallback was invoked *within* PF_PACKET's processing code (less +precise). Getting timestamps for the TX_RING works as follows: i) fill the ring frames, ii) call sendto() e.g. in blocking mode, iii) wait for status of relevant diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index bac27fa05f5b..da2d668b8cf1 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -108,7 +108,7 @@ struct tpacket_auxdata { /* Rx and Tx ring - header status */ #define TP_STATUS_TS_SOFTWARE (1 << 29) -#define TP_STATUS_TS_SYS_HARDWARE (1 << 30) +#define TP_STATUS_TS_SYS_HARDWARE (1 << 30) /* deprecated, never set */ #define TP_STATUS_TS_RAW_HARDWARE (1 << 31) /* Rx ring - feature request bits */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 614ca91f785a..8d9f8042705a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -441,14 +441,10 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, { struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); - if (shhwtstamps) { - if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->syststamp, ts)) - return TP_STATUS_TS_SYS_HARDWARE; - if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) - return TP_STATUS_TS_RAW_HARDWARE; - } + if (shhwtstamps && + (flags & SOF_TIMESTAMPING_RAW_HARDWARE) && + ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) + return TP_STATUS_TS_RAW_HARDWARE; if (ktime_to_timespec_cond(skb->tstamp, ts)) return TP_STATUS_TS_SOFTWARE; -- cgit v1.2.3 From 16eecd9be4b05e3216b8b12707aa6f51fb197903 Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Mon, 28 Jul 2014 20:57:07 -0700 Subject: dcbnl : Fix misleading dcb_app->priority explanation Current explanation of dcb_app->priority is wrong. It says priority is expected to be a 3-bit unsigned integer which is only true when working with DCBx-IEEE. Use of dcb_app->priority by DCBx-CEE expects it to be 802.1p user priority bitmap. Updated accordingly This affects the cxgb4 driver, but I will post those changes as part of a larger changeset shortly. Fixes: 3e29027af4372 ("dcbnl: add support for ieee8021Qaz attributes") Signed-off-by: Anish Bhatt Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/uapi/linux/dcbnl.h | 3 ++- net/dcb/dcbnl.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h index 6bb43382f3f3..e711f20dc522 100644 --- a/include/uapi/linux/dcbnl.h +++ b/include/uapi/linux/dcbnl.h @@ -148,7 +148,8 @@ struct cee_pfc { * * @selector: protocol identifier type * @protocol: protocol of type indicated - * @priority: 3-bit unsigned integer indicating priority + * @priority: 3-bit unsigned integer indicating priority for IEEE + * 8-bit 802.1p user priority bitmap for CEE * * ---- * Selector field values diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index c34af7a1d2d4..ca11d283bbeb 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1776,7 +1776,7 @@ EXPORT_SYMBOL(dcb_getapp); * * Priority 0 is an invalid priority in CEE spec. This routine * removes applications from the app list if the priority is - * set to zero. + * set to zero. Priority is expected to be 8-bit 802.1p user priority bitmap */ int dcb_setapp(struct net_device *dev, struct dcb_app *new) { @@ -1837,7 +1837,8 @@ EXPORT_SYMBOL(dcb_ieee_getapp_mask); * * This adds Application data to the list. Multiple application * entries may exists for the same selector and protocol as long - * as the priorities are different. + * as the priorities are different. Priority is expected to be a + * 3-bit unsigned integer */ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) { -- cgit v1.2.3 From e10038a8ec06ac819b7552bb67aaa6d2d6f850c1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Tue, 29 Jul 2014 18:12:15 +0200 Subject: netfilter: xt_bpf: add mising opaque struct sk_filter definition This structure is not exposed to userspace, so fix this by defining struct sk_filter; so we skip the casting in kernelspace. This is safe since userspace has no way to lurk with that internal pointer. Fixes: e6f30c7 ("netfilter: x_tables: add xt_bpf match") Signed-off-by: Pablo Neira Ayuso Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/linux/netfilter/xt_bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h index 5dda450eb55b..2ec9fbcd06f9 100644 --- a/include/uapi/linux/netfilter/xt_bpf.h +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -6,6 +6,8 @@ #define XT_BPF_MAX_NUM_INSTR 64 +struct sk_filter; + struct xt_bpf_info { __u16 bpf_program_num_elem; struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; -- cgit v1.2.3 From 7ae457c1e5b45a1b826fad9d62b32191d2bdcfdb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 30 Jul 2014 20:34:16 -0700 Subject: net: filter: split 'struct sk_filter' into socket and bpf parts clean up names related to socket filtering and bpf in the following way: - everything that deals with sockets keeps 'sk_*' prefix - everything that is pure BPF is changed to 'bpf_*' prefix split 'struct sk_filter' into struct sk_filter { atomic_t refcnt; struct rcu_head rcu; struct bpf_prog *prog; }; and struct bpf_prog { u32 jited:1, len:31; struct sock_fprog_kern *orig_prog; unsigned int (*bpf_func)(const struct sk_buff *skb, const struct bpf_insn *filter); union { struct sock_filter insns[0]; struct bpf_insn insnsi[0]; struct work_struct work; }; }; so that 'struct bpf_prog' can be used independent of sockets and cleans up 'unattached' bpf use cases split SK_RUN_FILTER macro into: SK_RUN_FILTER to be used with 'struct sk_filter *' and BPF_PROG_RUN to be used with 'struct bpf_prog *' __sk_filter_release(struct sk_filter *) gains __bpf_prog_release(struct bpf_prog *) helper function also perform related renames for the functions that work with 'struct bpf_prog *', since they're on the same lines: sk_filter_size -> bpf_prog_size sk_filter_select_runtime -> bpf_prog_select_runtime sk_filter_free -> bpf_prog_free sk_unattached_filter_create -> bpf_prog_create sk_unattached_filter_destroy -> bpf_prog_destroy sk_store_orig_filter -> bpf_prog_store_orig_filter sk_release_orig_filter -> bpf_release_orig_filter __sk_migrate_filter -> bpf_migrate_filter __sk_prepare_filter -> bpf_prepare_filter API for attaching classic BPF to a socket stays the same: sk_attach_filter(prog, struct sock *)/sk_detach_filter(struct sock *) and SK_RUN_FILTER(struct sk_filter *, ctx) to execute a program which is used by sockets, tun, af_packet API for 'unattached' BPF programs becomes: bpf_prog_create(struct bpf_prog **)/bpf_prog_destroy(struct bpf_prog *) and BPF_PROG_RUN(struct bpf_prog *, ctx) to execute a program which is used by isdn, ppp, team, seccomp, ptp, xt_bpf, cls_bpf, test_bpf Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 10 ++-- arch/arm/net/bpf_jit_32.c | 8 +-- arch/mips/net/bpf_jit.c | 8 +-- arch/powerpc/net/bpf_jit_comp.c | 8 +-- arch/s390/net/bpf_jit_comp.c | 4 +- arch/sparc/net/bpf_jit_comp.c | 4 +- arch/x86/net/bpf_jit_comp.c | 12 ++--- drivers/isdn/i4l/isdn_ppp.c | 26 +++++---- drivers/net/ppp/ppp_generic.c | 28 +++++----- drivers/net/team/team_mode_loadbalance.c | 14 ++--- include/linux/filter.h | 40 ++++++++------ include/linux/isdn_ppp.h | 4 +- include/uapi/linux/netfilter/xt_bpf.h | 4 +- kernel/bpf/core.c | 30 +++++------ kernel/seccomp.c | 10 ++-- lib/test_bpf.c | 24 ++++----- net/core/filter.c | 92 ++++++++++++++++++-------------- net/core/ptp_classifier.c | 6 +-- net/core/sock_diag.c | 2 +- net/netfilter/xt_bpf.c | 6 +-- net/sched/cls_bpf.c | 12 ++--- 21 files changed, 183 insertions(+), 169 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 712068be8171..c48a9704bda8 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -586,11 +586,11 @@ team driver's classifier for its load-balancing mode, netfilter's xt_bpf extension, PTP dissector/classifier, and much more. They are all internally converted by the kernel into the new instruction set representation and run in the eBPF interpreter. For in-kernel handlers, this all works transparently -by using sk_unattached_filter_create() for setting up the filter, resp. -sk_unattached_filter_destroy() for destroying it. The macro -SK_RUN_FILTER(filter, ctx) transparently invokes eBPF interpreter or JITed -code to run the filter. 'filter' is a pointer to struct sk_filter that we -got from sk_unattached_filter_create(), and 'ctx' the given context (e.g. +by using bpf_prog_create() for setting up the filter, resp. +bpf_prog_destroy() for destroying it. The macro +BPF_PROG_RUN(filter, ctx) transparently invokes eBPF interpreter or JITed +code to run the filter. 'filter' is a pointer to struct bpf_prog that we +got from bpf_prog_create(), and 'ctx' the given context (e.g. skb pointer). All constraints and restrictions from bpf_check_classic() apply before a conversion to the new layout is being done behind the scenes! diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index fb5503ce016f..a37b989a2f91 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -56,7 +56,7 @@ #define FLAG_NEED_X_RESET (1 << 0) struct jit_ctx { - const struct sk_filter *skf; + const struct bpf_prog *skf; unsigned idx; unsigned prologue_bytes; int ret0_fp_idx; @@ -465,7 +465,7 @@ static inline void update_on_xread(struct jit_ctx *ctx) static int build_body(struct jit_ctx *ctx) { void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; - const struct sk_filter *prog = ctx->skf; + const struct bpf_prog *prog = ctx->skf; const struct sock_filter *inst; unsigned i, load_order, off, condt; int imm12; @@ -857,7 +857,7 @@ b_epilogue: } -void bpf_jit_compile(struct sk_filter *fp) +void bpf_jit_compile(struct bpf_prog *fp) { struct jit_ctx ctx; unsigned tmp_idx; @@ -926,7 +926,7 @@ out: return; } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index b87390a56a2f..05a56619ece2 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -131,7 +131,7 @@ * @target: Memory location for the compiled filter */ struct jit_ctx { - const struct sk_filter *skf; + const struct bpf_prog *skf; unsigned int prologue_bytes; u32 idx; u32 flags; @@ -789,7 +789,7 @@ static int pkt_type_offset(void) static int build_body(struct jit_ctx *ctx) { void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; - const struct sk_filter *prog = ctx->skf; + const struct bpf_prog *prog = ctx->skf; const struct sock_filter *inst; unsigned int i, off, load_order, condt; u32 k, b_off __maybe_unused; @@ -1369,7 +1369,7 @@ jmp_cmp: int bpf_jit_enable __read_mostly; -void bpf_jit_compile(struct sk_filter *fp) +void bpf_jit_compile(struct bpf_prog *fp) { struct jit_ctx ctx; unsigned int alloc_size, tmp_idx; @@ -1423,7 +1423,7 @@ out: kfree(ctx.offsets); } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 82e82cadcde5..3afa6f4c1957 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -25,7 +25,7 @@ static inline void bpf_flush_icache(void *start, void *end) flush_icache_range((unsigned long)start, (unsigned long)end); } -static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image, +static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx) { int i; @@ -121,7 +121,7 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) /* Assemble the body code between the prologue & epilogue. */ -static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, +static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, unsigned int *addrs) { @@ -569,7 +569,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, return 0; } -void bpf_jit_compile(struct sk_filter *fp) +void bpf_jit_compile(struct bpf_prog *fp) { unsigned int proglen; unsigned int alloclen; @@ -693,7 +693,7 @@ out: return; } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index a2cbd875543a..61e45b7c04d7 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -812,7 +812,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize, return header; } -void bpf_jit_compile(struct sk_filter *fp) +void bpf_jit_compile(struct bpf_prog *fp) { struct bpf_binary_header *header = NULL; unsigned long size, prg_len, lit_len; @@ -875,7 +875,7 @@ out: kfree(addrs); } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 892a102671ad..1f76c22a6a75 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -354,7 +354,7 @@ do { *prog++ = BR_OPC | WDISP22(OFF); \ * emit_jump() calls with adjusted offsets. */ -void bpf_jit_compile(struct sk_filter *fp) +void bpf_jit_compile(struct bpf_prog *fp) { unsigned int cleanup_addr, proglen, oldproglen = 0; u32 temp[8], *prog, *func, seen = 0, pass; @@ -808,7 +808,7 @@ out: return; } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index e2ecc1380b3d..5c8cb8043c5a 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -211,7 +211,7 @@ struct jit_context { bool seen_ld_abs; }; -static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, +static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { struct bpf_insn *insn = bpf_prog->insnsi; @@ -841,7 +841,7 @@ common_load: ctx->seen_ld_abs = true; /* By design x64 JIT should support all BPF instructions * This error will be seen if new instruction was added * to interpreter, but not to JIT - * or if there is junk in sk_filter + * or if there is junk in bpf_prog */ pr_err("bpf_jit: unknown opcode %02x\n", insn->code); return -EINVAL; @@ -862,11 +862,11 @@ common_load: ctx->seen_ld_abs = true; return proglen; } -void bpf_jit_compile(struct sk_filter *prog) +void bpf_jit_compile(struct bpf_prog *prog) { } -void bpf_int_jit_compile(struct sk_filter *prog) +void bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_binary_header *header = NULL; int proglen, oldproglen = 0; @@ -932,7 +932,7 @@ out: static void bpf_jit_free_deferred(struct work_struct *work) { - struct sk_filter *fp = container_of(work, struct sk_filter, work); + struct bpf_prog *fp = container_of(work, struct bpf_prog, work); unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; @@ -941,7 +941,7 @@ static void bpf_jit_free_deferred(struct work_struct *work) kfree(fp); } -void bpf_jit_free(struct sk_filter *fp) +void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) { INIT_WORK(&fp->work, bpf_jit_free_deferred); diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 62f0688d45a5..c4198fa490bf 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -379,12 +379,12 @@ isdn_ppp_release(int min, struct file *file) #endif #ifdef CONFIG_IPPP_FILTER if (is->pass_filter) { - sk_unattached_filter_destroy(is->pass_filter); + bpf_prog_destroy(is->pass_filter); is->pass_filter = NULL; } if (is->active_filter) { - sk_unattached_filter_destroy(is->active_filter); + bpf_prog_destroy(is->active_filter); is->active_filter = NULL; } #endif @@ -639,12 +639,11 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) fprog.filter = code; if (is->pass_filter) { - sk_unattached_filter_destroy(is->pass_filter); + bpf_prog_destroy(is->pass_filter); is->pass_filter = NULL; } if (fprog.filter != NULL) - err = sk_unattached_filter_create(&is->pass_filter, - &fprog); + err = bpf_prog_create(&is->pass_filter, &fprog); else err = 0; kfree(code); @@ -664,12 +663,11 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) fprog.filter = code; if (is->active_filter) { - sk_unattached_filter_destroy(is->active_filter); + bpf_prog_destroy(is->active_filter); is->active_filter = NULL; } if (fprog.filter != NULL) - err = sk_unattached_filter_create(&is->active_filter, - &fprog); + err = bpf_prog_create(&is->active_filter, &fprog); else err = 0; kfree(code); @@ -1174,14 +1172,14 @@ isdn_ppp_push_higher(isdn_net_dev *net_dev, isdn_net_local *lp, struct sk_buff * } if (is->pass_filter - && SK_RUN_FILTER(is->pass_filter, skb) == 0) { + && BPF_PROG_RUN(is->pass_filter, skb) == 0) { if (is->debug & 0x2) printk(KERN_DEBUG "IPPP: inbound frame filtered.\n"); kfree_skb(skb); return; } if (!(is->active_filter - && SK_RUN_FILTER(is->active_filter, skb) == 0)) { + && BPF_PROG_RUN(is->active_filter, skb) == 0)) { if (is->debug & 0x2) printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n"); lp->huptimer = 0; @@ -1320,14 +1318,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev) } if (ipt->pass_filter - && SK_RUN_FILTER(ipt->pass_filter, skb) == 0) { + && BPF_PROG_RUN(ipt->pass_filter, skb) == 0) { if (ipt->debug & 0x4) printk(KERN_DEBUG "IPPP: outbound frame filtered.\n"); kfree_skb(skb); goto unlock; } if (!(ipt->active_filter - && SK_RUN_FILTER(ipt->active_filter, skb) == 0)) { + && BPF_PROG_RUN(ipt->active_filter, skb) == 0)) { if (ipt->debug & 0x4) printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n"); lp->huptimer = 0; @@ -1517,9 +1515,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp) } drop |= is->pass_filter - && SK_RUN_FILTER(is->pass_filter, skb) == 0; + && BPF_PROG_RUN(is->pass_filter, skb) == 0; drop |= is->active_filter - && SK_RUN_FILTER(is->active_filter, skb) == 0; + && BPF_PROG_RUN(is->active_filter, skb) == 0; skb_push(skb, IPPP_MAX_HEADER - 4); return drop; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 765248b42a0a..fa0d71727894 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -143,8 +143,8 @@ struct ppp { struct sk_buff_head mrq; /* MP: receive reconstruction queue */ #endif /* CONFIG_PPP_MULTILINK */ #ifdef CONFIG_PPP_FILTER - struct sk_filter *pass_filter; /* filter for packets to pass */ - struct sk_filter *active_filter;/* filter for pkts to reset idle */ + struct bpf_prog *pass_filter; /* filter for packets to pass */ + struct bpf_prog *active_filter; /* filter for pkts to reset idle */ #endif /* CONFIG_PPP_FILTER */ struct net *ppp_net; /* the net we belong to */ struct ppp_link_stats stats64; /* 64 bit network stats */ @@ -762,12 +762,12 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ppp_lock(ppp); if (ppp->pass_filter) { - sk_unattached_filter_destroy(ppp->pass_filter); + bpf_prog_destroy(ppp->pass_filter); ppp->pass_filter = NULL; } if (fprog.filter != NULL) - err = sk_unattached_filter_create(&ppp->pass_filter, - &fprog); + err = bpf_prog_create(&ppp->pass_filter, + &fprog); else err = 0; kfree(code); @@ -788,12 +788,12 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ppp_lock(ppp); if (ppp->active_filter) { - sk_unattached_filter_destroy(ppp->active_filter); + bpf_prog_destroy(ppp->active_filter); ppp->active_filter = NULL; } if (fprog.filter != NULL) - err = sk_unattached_filter_create(&ppp->active_filter, - &fprog); + err = bpf_prog_create(&ppp->active_filter, + &fprog); else err = 0; kfree(code); @@ -1205,7 +1205,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) a four-byte PPP header on each packet */ *skb_push(skb, 2) = 1; if (ppp->pass_filter && - SK_RUN_FILTER(ppp->pass_filter, skb) == 0) { + BPF_PROG_RUN(ppp->pass_filter, skb) == 0) { if (ppp->debug & 1) netdev_printk(KERN_DEBUG, ppp->dev, "PPP: outbound frame " @@ -1215,7 +1215,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) } /* if this packet passes the active filter, record the time */ if (!(ppp->active_filter && - SK_RUN_FILTER(ppp->active_filter, skb) == 0)) + BPF_PROG_RUN(ppp->active_filter, skb) == 0)) ppp->last_xmit = jiffies; skb_pull(skb, 2); #else @@ -1839,7 +1839,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) *skb_push(skb, 2) = 0; if (ppp->pass_filter && - SK_RUN_FILTER(ppp->pass_filter, skb) == 0) { + BPF_PROG_RUN(ppp->pass_filter, skb) == 0) { if (ppp->debug & 1) netdev_printk(KERN_DEBUG, ppp->dev, "PPP: inbound frame " @@ -1848,7 +1848,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) return; } if (!(ppp->active_filter && - SK_RUN_FILTER(ppp->active_filter, skb) == 0)) + BPF_PROG_RUN(ppp->active_filter, skb) == 0)) ppp->last_recv = jiffies; __skb_pull(skb, 2); } else @@ -2829,12 +2829,12 @@ static void ppp_destroy_interface(struct ppp *ppp) #endif /* CONFIG_PPP_MULTILINK */ #ifdef CONFIG_PPP_FILTER if (ppp->pass_filter) { - sk_unattached_filter_destroy(ppp->pass_filter); + bpf_prog_destroy(ppp->pass_filter); ppp->pass_filter = NULL; } if (ppp->active_filter) { - sk_unattached_filter_destroy(ppp->active_filter); + bpf_prog_destroy(ppp->active_filter); ppp->active_filter = NULL; } #endif /* CONFIG_PPP_FILTER */ diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index d7be9b36bce6..a1536d0d83a9 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -58,7 +58,7 @@ struct lb_priv_ex { }; struct lb_priv { - struct sk_filter __rcu *fp; + struct bpf_prog __rcu *fp; lb_select_tx_port_func_t __rcu *select_tx_port_func; struct lb_pcpu_stats __percpu *pcpu_stats; struct lb_priv_ex *ex; /* priv extension */ @@ -174,14 +174,14 @@ static lb_select_tx_port_func_t *lb_select_tx_port_get_func(const char *name) static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv, struct sk_buff *skb) { - struct sk_filter *fp; + struct bpf_prog *fp; uint32_t lhash; unsigned char *c; fp = rcu_dereference_bh(lb_priv->fp); if (unlikely(!fp)) return 0; - lhash = SK_RUN_FILTER(fp, skb); + lhash = BPF_PROG_RUN(fp, skb); c = (char *) &lhash; return c[0] ^ c[1] ^ c[2] ^ c[3]; } @@ -271,8 +271,8 @@ static void __fprog_destroy(struct sock_fprog_kern *fprog) static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) { struct lb_priv *lb_priv = get_lb_priv(team); - struct sk_filter *fp = NULL; - struct sk_filter *orig_fp = NULL; + struct bpf_prog *fp = NULL; + struct bpf_prog *orig_fp = NULL; struct sock_fprog_kern *fprog = NULL; int err; @@ -281,7 +281,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) ctx->data.bin_val.ptr); if (err) return err; - err = sk_unattached_filter_create(&fp, fprog); + err = bpf_prog_create(&fp, fprog); if (err) { __fprog_destroy(fprog); return err; @@ -300,7 +300,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) if (orig_fp) { synchronize_rcu(); - sk_unattached_filter_destroy(orig_fp); + bpf_prog_destroy(orig_fp); } return 0; } diff --git a/include/linux/filter.h b/include/linux/filter.h index 7cb9b40e9a2f..a5227ab8ccb1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -296,7 +296,8 @@ enum { }) /* Macro to invoke filter function. */ -#define SK_RUN_FILTER(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) +#define SK_RUN_FILTER(filter, ctx) \ + (*filter->prog->bpf_func)(ctx, filter->prog->insnsi) struct bpf_insn { __u8 code; /* opcode */ @@ -323,12 +324,10 @@ struct sk_buff; struct sock; struct seccomp_data; -struct sk_filter { - atomic_t refcnt; +struct bpf_prog { u32 jited:1, /* Is our filter JIT'ed? */ len:31; /* Number of filter blocks */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ - struct rcu_head rcu; unsigned int (*bpf_func)(const struct sk_buff *skb, const struct bpf_insn *filter); union { @@ -338,25 +337,32 @@ struct sk_filter { }; }; -static inline unsigned int sk_filter_size(unsigned int proglen) +struct sk_filter { + atomic_t refcnt; + struct rcu_head rcu; + struct bpf_prog *prog; +}; + +#define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) + +static inline unsigned int bpf_prog_size(unsigned int proglen) { - return max(sizeof(struct sk_filter), - offsetof(struct sk_filter, insns[proglen])); + return max(sizeof(struct bpf_prog), + offsetof(struct bpf_prog, insns[proglen])); } #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) int sk_filter(struct sock *sk, struct sk_buff *skb); -void sk_filter_select_runtime(struct sk_filter *fp); -void sk_filter_free(struct sk_filter *fp); +void bpf_prog_select_runtime(struct bpf_prog *fp); +void bpf_prog_free(struct bpf_prog *fp); int bpf_convert_filter(struct sock_filter *prog, int len, struct bpf_insn *new_prog, int *new_len); -int sk_unattached_filter_create(struct sk_filter **pfp, - struct sock_fprog_kern *fprog); -void sk_unattached_filter_destroy(struct sk_filter *fp); +int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); +void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_detach_filter(struct sock *sk); @@ -369,7 +375,7 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -void bpf_int_jit_compile(struct sk_filter *fp); +void bpf_int_jit_compile(struct bpf_prog *fp); #define BPF_ANC BIT(15) @@ -423,8 +429,8 @@ static inline void *bpf_load_pointer(const struct sk_buff *skb, int k, #include #include -void bpf_jit_compile(struct sk_filter *fp); -void bpf_jit_free(struct sk_filter *fp); +void bpf_jit_compile(struct bpf_prog *fp); +void bpf_jit_free(struct bpf_prog *fp); static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, u32 pass, void *image) @@ -438,11 +444,11 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, #else #include -static inline void bpf_jit_compile(struct sk_filter *fp) +static inline void bpf_jit_compile(struct bpf_prog *fp) { } -static inline void bpf_jit_free(struct sk_filter *fp) +static inline void bpf_jit_free(struct bpf_prog *fp) { kfree(fp); } diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h index 8e10f57f109f..a0070c6dfaf8 100644 --- a/include/linux/isdn_ppp.h +++ b/include/linux/isdn_ppp.h @@ -180,8 +180,8 @@ struct ippp_struct { struct slcompress *slcomp; #endif #ifdef CONFIG_IPPP_FILTER - struct sk_filter *pass_filter; /* filter for packets to pass */ - struct sk_filter *active_filter; /* filter for pkts to reset idle */ + struct bpf_prog *pass_filter; /* filter for packets to pass */ + struct bpf_prog *active_filter; /* filter for pkts to reset idle */ #endif unsigned long debug; struct isdn_ppp_compressor *compressor,*decompressor; diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h index 2ec9fbcd06f9..1fad2c27ac32 100644 --- a/include/uapi/linux/netfilter/xt_bpf.h +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -6,14 +6,14 @@ #define XT_BPF_MAX_NUM_INSTR 64 -struct sk_filter; +struct bpf_prog; struct xt_bpf_info { __u16 bpf_program_num_elem; struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; /* only used in the kernel */ - struct sk_filter *filter __attribute__((aligned(8))); + struct bpf_prog *filter __attribute__((aligned(8))); }; #endif /*_XT_BPF_H */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 188ac5ba3900..7f0dbcbb34af 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -73,15 +73,13 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) } /** - * __sk_run_filter - run a filter on a given context - * @ctx: buffer to run the filter on - * @insn: filter to apply + * __bpf_prog_run - run eBPF program on a given context + * @ctx: is the data we are operating on + * @insn: is the array of eBPF instructions * - * Decode and apply filter instructions to the skb->data. Return length to - * keep, 0 for none. @ctx is the data we are operating on, @insn is the - * array of filter instructions. + * Decode and execute eBPF instructions. */ -static unsigned int __sk_run_filter(void *ctx, const struct bpf_insn *insn) +static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) { u64 stack[MAX_BPF_STACK / sizeof(u64)]; u64 regs[MAX_BPF_REG], tmp; @@ -508,29 +506,29 @@ load_byte: return 0; } -void __weak bpf_int_jit_compile(struct sk_filter *prog) +void __weak bpf_int_jit_compile(struct bpf_prog *prog) { } /** - * sk_filter_select_runtime - select execution runtime for BPF program - * @fp: sk_filter populated with internal BPF program + * bpf_prog_select_runtime - select execution runtime for BPF program + * @fp: bpf_prog populated with internal BPF program * * try to JIT internal BPF program, if JIT is not available select interpreter - * BPF program will be executed via SK_RUN_FILTER() macro + * BPF program will be executed via BPF_PROG_RUN() macro */ -void sk_filter_select_runtime(struct sk_filter *fp) +void bpf_prog_select_runtime(struct bpf_prog *fp) { - fp->bpf_func = (void *) __sk_run_filter; + fp->bpf_func = (void *) __bpf_prog_run; /* Probe if internal BPF can be JITed */ bpf_int_jit_compile(fp); } -EXPORT_SYMBOL_GPL(sk_filter_select_runtime); +EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); /* free internal BPF program */ -void sk_filter_free(struct sk_filter *fp) +void bpf_prog_free(struct bpf_prog *fp) { bpf_jit_free(fp); } -EXPORT_SYMBOL_GPL(sk_filter_free); +EXPORT_SYMBOL_GPL(bpf_prog_free); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 33a3a97e2b58..2f3fa2cc2eac 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -54,7 +54,7 @@ struct seccomp_filter { atomic_t usage; struct seccomp_filter *prev; - struct sk_filter *prog; + struct bpf_prog *prog; }; /* Limit any path through the tree to 256KB worth of instructions. */ @@ -187,7 +187,7 @@ static u32 seccomp_run_filters(int syscall) * value always takes priority (ignoring the DATA). */ for (f = current->seccomp.filter; f; f = f->prev) { - u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd); + u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd); if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; @@ -260,7 +260,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) if (!filter) goto free_prog; - filter->prog = kzalloc(sk_filter_size(new_len), + filter->prog = kzalloc(bpf_prog_size(new_len), GFP_KERNEL|__GFP_NOWARN); if (!filter->prog) goto free_filter; @@ -273,7 +273,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) atomic_set(&filter->usage, 1); filter->prog->len = new_len; - sk_filter_select_runtime(filter->prog); + bpf_prog_select_runtime(filter->prog); /* * If there is an existing filter, make it the prev and don't drop its @@ -337,7 +337,7 @@ void put_seccomp_filter(struct task_struct *tsk) while (orig && atomic_dec_and_test(&orig->usage)) { struct seccomp_filter *freeme = orig; orig = orig->prev; - sk_filter_free(freeme->prog); + bpf_prog_free(freeme->prog); kfree(freeme); } } diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 5f48623ee1a7..89e0345733bd 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1761,9 +1761,9 @@ static int probe_filter_length(struct sock_filter *fp) return len + 1; } -static struct sk_filter *generate_filter(int which, int *err) +static struct bpf_prog *generate_filter(int which, int *err) { - struct sk_filter *fp; + struct bpf_prog *fp; struct sock_fprog_kern fprog; unsigned int flen = probe_filter_length(tests[which].u.insns); __u8 test_type = tests[which].aux & TEST_TYPE_MASK; @@ -1773,7 +1773,7 @@ static struct sk_filter *generate_filter(int which, int *err) fprog.filter = tests[which].u.insns; fprog.len = flen; - *err = sk_unattached_filter_create(&fp, &fprog); + *err = bpf_prog_create(&fp, &fprog); if (tests[which].aux & FLAG_EXPECTED_FAIL) { if (*err == -EINVAL) { pr_cont("PASS\n"); @@ -1798,7 +1798,7 @@ static struct sk_filter *generate_filter(int which, int *err) break; case INTERNAL: - fp = kzalloc(sk_filter_size(flen), GFP_KERNEL); + fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL); if (fp == NULL) { pr_cont("UNEXPECTED_FAIL no memory left\n"); *err = -ENOMEM; @@ -1809,7 +1809,7 @@ static struct sk_filter *generate_filter(int which, int *err) memcpy(fp->insnsi, tests[which].u.insns_int, fp->len * sizeof(struct bpf_insn)); - sk_filter_select_runtime(fp); + bpf_prog_select_runtime(fp); break; } @@ -1817,21 +1817,21 @@ static struct sk_filter *generate_filter(int which, int *err) return fp; } -static void release_filter(struct sk_filter *fp, int which) +static void release_filter(struct bpf_prog *fp, int which) { __u8 test_type = tests[which].aux & TEST_TYPE_MASK; switch (test_type) { case CLASSIC: - sk_unattached_filter_destroy(fp); + bpf_prog_destroy(fp); break; case INTERNAL: - sk_filter_free(fp); + bpf_prog_free(fp); break; } } -static int __run_one(const struct sk_filter *fp, const void *data, +static int __run_one(const struct bpf_prog *fp, const void *data, int runs, u64 *duration) { u64 start, finish; @@ -1840,7 +1840,7 @@ static int __run_one(const struct sk_filter *fp, const void *data, start = ktime_to_us(ktime_get()); for (i = 0; i < runs; i++) - ret = SK_RUN_FILTER(fp, data); + ret = BPF_PROG_RUN(fp, data); finish = ktime_to_us(ktime_get()); @@ -1850,7 +1850,7 @@ static int __run_one(const struct sk_filter *fp, const void *data, return ret; } -static int run_one(const struct sk_filter *fp, struct bpf_test *test) +static int run_one(const struct bpf_prog *fp, struct bpf_test *test) { int err_cnt = 0, i, runs = MAX_TESTRUNS; @@ -1884,7 +1884,7 @@ static __init int test_bpf(void) int i, err_cnt = 0, pass_cnt = 0; for (i = 0; i < ARRAY_SIZE(tests); i++) { - struct sk_filter *fp; + struct bpf_prog *fp; int err; pr_info("#%d %s ", i, tests[i].descr); diff --git a/net/core/filter.c b/net/core/filter.c index 6ac901613bee..d814b8a89d0f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -810,8 +810,8 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) } EXPORT_SYMBOL(bpf_check_classic); -static int sk_store_orig_filter(struct sk_filter *fp, - const struct sock_fprog *fprog) +static int bpf_prog_store_orig_filter(struct bpf_prog *fp, + const struct sock_fprog *fprog) { unsigned int fsize = bpf_classic_proglen(fprog); struct sock_fprog_kern *fkprog; @@ -831,7 +831,7 @@ static int sk_store_orig_filter(struct sk_filter *fp, return 0; } -static void sk_release_orig_filter(struct sk_filter *fp) +static void bpf_release_orig_filter(struct bpf_prog *fp) { struct sock_fprog_kern *fprog = fp->orig_prog; @@ -841,10 +841,16 @@ static void sk_release_orig_filter(struct sk_filter *fp) } } +static void __bpf_prog_release(struct bpf_prog *prog) +{ + bpf_release_orig_filter(prog); + bpf_prog_free(prog); +} + static void __sk_filter_release(struct sk_filter *fp) { - sk_release_orig_filter(fp); - sk_filter_free(fp); + __bpf_prog_release(fp->prog); + kfree(fp); } /** @@ -872,7 +878,7 @@ static void sk_filter_release(struct sk_filter *fp) void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) { - u32 filter_size = sk_filter_size(fp->len); + u32 filter_size = bpf_prog_size(fp->prog->len); atomic_sub(filter_size, &sk->sk_omem_alloc); sk_filter_release(fp); @@ -883,7 +889,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) */ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) { - u32 filter_size = sk_filter_size(fp->len); + u32 filter_size = bpf_prog_size(fp->prog->len); /* same check as in sock_kmalloc() */ if (filter_size <= sysctl_optmem_max && @@ -895,10 +901,10 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) return false; } -static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp) +static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) { struct sock_filter *old_prog; - struct sk_filter *old_fp; + struct bpf_prog *old_fp; int err, new_len, old_len = fp->len; /* We are free to overwrite insns et al right here as it @@ -927,7 +933,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp) /* Expand fp for appending the new filter representation. */ old_fp = fp; - fp = krealloc(old_fp, sk_filter_size(new_len), GFP_KERNEL); + fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL); if (!fp) { /* The old_fp is still around in case we couldn't * allocate new memory, so uncharge on that one. @@ -949,7 +955,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp) */ goto out_err_free; - sk_filter_select_runtime(fp); + bpf_prog_select_runtime(fp); kfree(old_prog); return fp; @@ -957,11 +963,11 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp) out_err_free: kfree(old_prog); out_err: - __sk_filter_release(fp); + __bpf_prog_release(fp); return ERR_PTR(err); } -static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp) +static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) { int err; @@ -970,7 +976,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp) err = bpf_check_classic(fp->insns, fp->len); if (err) { - __sk_filter_release(fp); + __bpf_prog_release(fp); return ERR_PTR(err); } @@ -983,13 +989,13 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp) * internal BPF translation for the optimized interpreter. */ if (!fp->jited) - fp = __sk_migrate_filter(fp); + fp = bpf_migrate_filter(fp); return fp; } /** - * sk_unattached_filter_create - create an unattached filter + * bpf_prog_create - create an unattached filter * @pfp: the unattached filter that is created * @fprog: the filter program * @@ -998,23 +1004,21 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp) * If an error occurs or there is insufficient memory for the filter * a negative errno code is returned. On success the return is zero. */ -int sk_unattached_filter_create(struct sk_filter **pfp, - struct sock_fprog_kern *fprog) +int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) { unsigned int fsize = bpf_classic_proglen(fprog); - struct sk_filter *fp; + struct bpf_prog *fp; /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) return -EINVAL; - fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL); + fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL); if (!fp) return -ENOMEM; memcpy(fp->insns, fprog->filter, fsize); - atomic_set(&fp->refcnt, 1); fp->len = fprog->len; /* Since unattached filters are not copied back to user * space through sk_get_filter(), we do not need to hold @@ -1022,23 +1026,23 @@ int sk_unattached_filter_create(struct sk_filter **pfp, */ fp->orig_prog = NULL; - /* __sk_prepare_filter() already takes care of freeing + /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - fp = __sk_prepare_filter(fp); + fp = bpf_prepare_filter(fp); if (IS_ERR(fp)) return PTR_ERR(fp); *pfp = fp; return 0; } -EXPORT_SYMBOL_GPL(sk_unattached_filter_create); +EXPORT_SYMBOL_GPL(bpf_prog_create); -void sk_unattached_filter_destroy(struct sk_filter *fp) +void bpf_prog_destroy(struct bpf_prog *fp) { - __sk_filter_release(fp); + __bpf_prog_release(fp); } -EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy); +EXPORT_SYMBOL_GPL(bpf_prog_destroy); /** * sk_attach_filter - attach a socket filter @@ -1054,7 +1058,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct sk_filter *fp, *old_fp; unsigned int fsize = bpf_classic_proglen(fprog); - unsigned int sk_fsize = sk_filter_size(fprog->len); + unsigned int bpf_fsize = bpf_prog_size(fprog->len); + struct bpf_prog *prog; int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) @@ -1064,29 +1069,36 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (fprog->filter == NULL) return -EINVAL; - fp = kmalloc(sk_fsize, GFP_KERNEL); - if (!fp) + prog = kmalloc(bpf_fsize, GFP_KERNEL); + if (!prog) return -ENOMEM; - if (copy_from_user(fp->insns, fprog->filter, fsize)) { - kfree(fp); + if (copy_from_user(prog->insns, fprog->filter, fsize)) { + kfree(prog); return -EFAULT; } - fp->len = fprog->len; + prog->len = fprog->len; - err = sk_store_orig_filter(fp, fprog); + err = bpf_prog_store_orig_filter(prog, fprog); if (err) { - kfree(fp); + kfree(prog); return -ENOMEM; } - /* __sk_prepare_filter() already takes care of freeing + /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - fp = __sk_prepare_filter(fp); - if (IS_ERR(fp)) - return PTR_ERR(fp); + prog = bpf_prepare_filter(prog); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + fp = kmalloc(sizeof(*fp), GFP_KERNEL); + if (!fp) { + __bpf_prog_release(prog); + return -ENOMEM; + } + fp->prog = prog; atomic_set(&fp->refcnt, 0); @@ -1142,7 +1154,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, /* We're copying the filter that has been originally attached, * so no conversion/decode needed anymore. */ - fprog = filter->orig_prog; + fprog = filter->prog->orig_prog; ret = fprog->len; if (!len) diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index 12ab7b4be609..4eab4a94a59d 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -107,11 +107,11 @@ #include #include -static struct sk_filter *ptp_insns __read_mostly; +static struct bpf_prog *ptp_insns __read_mostly; unsigned int ptp_classify_raw(const struct sk_buff *skb) { - return SK_RUN_FILTER(ptp_insns, skb); + return BPF_PROG_RUN(ptp_insns, skb); } EXPORT_SYMBOL_GPL(ptp_classify_raw); @@ -189,5 +189,5 @@ void __init ptp_classifier_init(void) .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, }; - BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog)); + BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog)); } diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 57d922320c59..ad704c757bb4 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -68,7 +68,7 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, if (!filter) goto out; - fprog = filter->orig_prog; + fprog = filter->prog->orig_prog; flen = bpf_classic_proglen(fprog); attr = nla_reserve(skb, attrtype, flen); diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index bbffdbdaf603..dffee9d47ec4 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -28,7 +28,7 @@ static int bpf_mt_check(const struct xt_mtchk_param *par) program.len = info->bpf_program_num_elem; program.filter = info->bpf_program; - if (sk_unattached_filter_create(&info->filter, &program)) { + if (bpf_prog_create(&info->filter, &program)) { pr_info("bpf: check failed: parse error\n"); return -EINVAL; } @@ -40,13 +40,13 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_bpf_info *info = par->matchinfo; - return SK_RUN_FILTER(info->filter, skb); + return BPF_PROG_RUN(info->filter, skb); } static void bpf_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_bpf_info *info = par->matchinfo; - sk_unattached_filter_destroy(info->filter); + bpf_prog_destroy(info->filter); } static struct xt_match bpf_mt_reg __read_mostly = { diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 13f64df2c710..0e30d58149da 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -30,7 +30,7 @@ struct cls_bpf_head { }; struct cls_bpf_prog { - struct sk_filter *filter; + struct bpf_prog *filter; struct sock_filter *bpf_ops; struct tcf_exts exts; struct tcf_result res; @@ -54,7 +54,7 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, int ret; list_for_each_entry(prog, &head->plist, link) { - int filter_res = SK_RUN_FILTER(prog->filter, skb); + int filter_res = BPF_PROG_RUN(prog->filter, skb); if (filter_res == 0) continue; @@ -92,7 +92,7 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) tcf_unbind_filter(tp, &prog->res); tcf_exts_destroy(tp, &prog->exts); - sk_unattached_filter_destroy(prog->filter); + bpf_prog_destroy(prog->filter); kfree(prog->bpf_ops); kfree(prog); @@ -161,7 +161,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, struct sock_filter *bpf_ops, *bpf_old; struct tcf_exts exts; struct sock_fprog_kern tmp; - struct sk_filter *fp, *fp_old; + struct bpf_prog *fp, *fp_old; u16 bpf_size, bpf_len; u32 classid; int ret; @@ -193,7 +193,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, tmp.len = bpf_len; tmp.filter = bpf_ops; - ret = sk_unattached_filter_create(&fp, &tmp); + ret = bpf_prog_create(&fp, &tmp); if (ret) goto errout_free; @@ -211,7 +211,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, tcf_exts_change(tp, &prog->exts, &exts); if (fp_old) - sk_unattached_filter_destroy(fp_old); + bpf_prog_destroy(fp_old); if (bpf_old) kfree(bpf_old); -- cgit v1.2.3 From 1b69be5e8afc634f39ad695a6ab6aad0cf0975c7 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 10 Jun 2014 11:41:57 +1000 Subject: drivers/vfio: EEH support for VFIO PCI device The patch adds new IOCTL commands for sPAPR VFIO container device to support EEH functionality for PCI devices, which have been passed through from host to somebody else via VFIO. Signed-off-by: Gavin Shan Acked-by: Alexander Graf Acked-by: Alex Williamson Signed-off-by: Benjamin Herrenschmidt --- Documentation/vfio.txt | 87 +++++++++++++++++++++++++++++++++++-- drivers/vfio/Makefile | 1 + drivers/vfio/pci/vfio_pci.c | 18 ++++++-- drivers/vfio/vfio_iommu_spapr_tce.c | 17 +++++++- drivers/vfio/vfio_spapr_eeh.c | 87 +++++++++++++++++++++++++++++++++++++ include/linux/vfio.h | 23 ++++++++++ include/uapi/linux/vfio.h | 34 +++++++++++++++ 7 files changed, 259 insertions(+), 8 deletions(-) create mode 100644 drivers/vfio/vfio_spapr_eeh.c (limited to 'include/uapi/linux') diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index b9ca02370d46..96978eced341 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -305,7 +305,15 @@ faster, the map/unmap handling has been implemented in real mode which provides an excellent performance which has limitations such as inability to do locked pages accounting in real time. -So 3 additional ioctls have been added: +4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O +subtree that can be treated as a unit for the purposes of partitioning and +error recovery. A PE may be a single or multi-function IOA (IO Adapter), a +function of a multi-function IOA, or multiple IOAs (possibly including switch +and bridge structures above the multiple IOAs). PPC64 guests detect PCI errors +and recover from them via EEH RTAS services, which works on the basis of +additional ioctl commands. + +So 4 additional ioctls have been added: VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start of the DMA window on the PCI bus. @@ -316,9 +324,12 @@ So 3 additional ioctls have been added: VFIO_IOMMU_DISABLE - disables the container. + VFIO_EEH_PE_OP - provides an API for EEH setup, error detection and recovery. The code flow from the example above should be slightly changed: + struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 }; + ..... /* Add the group to the container */ ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); @@ -342,9 +353,79 @@ The code flow from the example above should be slightly changed: dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; /* Check here is .iova/.size are within DMA window from spapr_iommu_info */ - ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); - ..... + + /* Get a file descriptor for the device */ + device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0"); + + .... + + /* Gratuitous device reset and go... */ + ioctl(device, VFIO_DEVICE_RESET); + + /* Make sure EEH is supported */ + ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH); + + /* Enable the EEH functionality on the device */ + pe_op.op = VFIO_EEH_PE_ENABLE; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* You're suggested to create additional data struct to represent + * PE, and put child devices belonging to same IOMMU group to the + * PE instance for later reference. + */ + + /* Check the PE's state and make sure it's in functional state */ + pe_op.op = VFIO_EEH_PE_GET_STATE; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* Save device state using pci_save_state(). + * EEH should be enabled on the specified device. + */ + + .... + + /* When 0xFF's returned from reading PCI config space or IO BARs + * of the PCI device. Check the PE's state to see if that has been + * frozen. + */ + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* Waiting for pending PCI transactions to be completed and don't + * produce any more PCI traffic from/to the affected PE until + * recovery is finished. + */ + + /* Enable IO for the affected PE and collect logs. Usually, the + * standard part of PCI config space, AER registers are dumped + * as logs for further analysis. + */ + pe_op.op = VFIO_EEH_PE_UNFREEZE_IO; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* + * Issue PE reset: hot or fundamental reset. Usually, hot reset + * is enough. However, the firmware of some PCI adapters would + * require fundamental reset. + */ + pe_op.op = VFIO_EEH_PE_RESET_HOT; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* Configure the PCI bridges for the affected PE */ + pe_op.op = VFIO_EEH_PE_CONFIGURE; + ioctl(container, VFIO_EEH_PE_OP, &pe_op); + + /* Restored state we saved at initialization time. pci_restore_state() + * is good enough as an example. + */ + + /* Hopefully, error is recovered successfully. Now, you can resume to + * start PCI traffic to/from the affected PE. + */ + + .... ------------------------------------------------------------------------------- diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 72bfabc8629e..50e30bc75e85 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_VFIO) += vfio.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o +obj-$(CONFIG_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_PCI) += pci/ diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 010e0f8b8e4f..e2ee80f36e3e 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -157,8 +157,10 @@ static void vfio_pci_release(void *device_data) { struct vfio_pci_device *vdev = device_data; - if (atomic_dec_and_test(&vdev->refcnt)) + if (atomic_dec_and_test(&vdev->refcnt)) { + vfio_spapr_pci_eeh_release(vdev->pdev); vfio_pci_disable(vdev); + } module_put(THIS_MODULE); } @@ -166,19 +168,27 @@ static void vfio_pci_release(void *device_data) static int vfio_pci_open(void *device_data) { struct vfio_pci_device *vdev = device_data; + int ret; if (!try_module_get(THIS_MODULE)) return -ENODEV; if (atomic_inc_return(&vdev->refcnt) == 1) { - int ret = vfio_pci_enable(vdev); + ret = vfio_pci_enable(vdev); + if (ret) + goto error; + + ret = vfio_spapr_pci_eeh_open(vdev->pdev); if (ret) { - module_put(THIS_MODULE); - return ret; + vfio_pci_disable(vdev); + goto error; } } return 0; +error: + module_put(THIS_MODULE); + return ret; } static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index a84788ba662c..730b4ef3e0cc 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -156,7 +156,16 @@ static long tce_iommu_ioctl(void *iommu_data, switch (cmd) { case VFIO_CHECK_EXTENSION: - return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0; + switch (arg) { + case VFIO_SPAPR_TCE_IOMMU: + ret = 1; + break; + default: + ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg); + break; + } + + return (ret < 0) ? 0 : ret; case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { struct vfio_iommu_spapr_tce_info info; @@ -283,6 +292,12 @@ static long tce_iommu_ioctl(void *iommu_data, tce_iommu_disable(container); mutex_unlock(&container->lock); return 0; + case VFIO_EEH_PE_OP: + if (!container->tbl || !container->tbl->it_group) + return -ENODEV; + + return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group, + cmd, arg); } return -ENOTTY; diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c new file mode 100644 index 000000000000..f834b4ce1431 --- /dev/null +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -0,0 +1,87 @@ +/* + * EEH functionality support for VFIO devices. The feature is only + * available on sPAPR compatible platforms. + * + * Copyright Gavin Shan, IBM Corporation 2014. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +/* We might build address mapping here for "fast" path later */ +int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) +{ + return eeh_dev_open(pdev); +} + +void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) +{ + eeh_dev_release(pdev); +} + +long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, + unsigned int cmd, unsigned long arg) +{ + struct eeh_pe *pe; + struct vfio_eeh_pe_op op; + unsigned long minsz; + long ret = -EINVAL; + + switch (cmd) { + case VFIO_CHECK_EXTENSION: + if (arg == VFIO_EEH) + ret = eeh_enabled() ? 1 : 0; + else + ret = 0; + break; + case VFIO_EEH_PE_OP: + pe = eeh_iommu_group_to_pe(group); + if (!pe) + return -ENODEV; + + minsz = offsetofend(struct vfio_eeh_pe_op, op); + if (copy_from_user(&op, (void __user *)arg, minsz)) + return -EFAULT; + if (op.argsz < minsz || op.flags) + return -EINVAL; + + switch (op.op) { + case VFIO_EEH_PE_DISABLE: + ret = eeh_pe_set_option(pe, EEH_OPT_DISABLE); + break; + case VFIO_EEH_PE_ENABLE: + ret = eeh_pe_set_option(pe, EEH_OPT_ENABLE); + break; + case VFIO_EEH_PE_UNFREEZE_IO: + ret = eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO); + break; + case VFIO_EEH_PE_UNFREEZE_DMA: + ret = eeh_pe_set_option(pe, EEH_OPT_THAW_DMA); + break; + case VFIO_EEH_PE_GET_STATE: + ret = eeh_pe_get_state(pe); + break; + case VFIO_EEH_PE_RESET_DEACTIVATE: + ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); + break; + case VFIO_EEH_PE_RESET_HOT: + ret = eeh_pe_reset(pe, EEH_RESET_HOT); + break; + case VFIO_EEH_PE_RESET_FUNDAMENTAL: + ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL); + break; + case VFIO_EEH_PE_CONFIGURE: + ret = eeh_pe_configure(pe); + break; + default: + ret = -EINVAL; + } + } + + return ret; +} diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 8ec980b5e3af..25a0fbd4b998 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -98,4 +98,27 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group); extern long vfio_external_check_extension(struct vfio_group *group, unsigned long arg); +#ifdef CONFIG_EEH +extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev); +extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); +extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, + unsigned int cmd, + unsigned long arg); +#else +static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) +{ + return 0; +} + +static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) +{ +} + +static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, + unsigned int cmd, + unsigned long arg) +{ + return -ENOTTY; +} +#endif /* CONFIG_EEH */ #endif /* VFIO_H */ diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index cb9023d4f063..6612974c64bf 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -30,6 +30,9 @@ */ #define VFIO_DMA_CC_IOMMU 4 +/* Check if EEH is supported */ +#define VFIO_EEH 5 + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between @@ -455,6 +458,37 @@ struct vfio_iommu_spapr_tce_info { #define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) +/* + * EEH PE operation struct provides ways to: + * - enable/disable EEH functionality; + * - unfreeze IO/DMA for frozen PE; + * - read PE state; + * - reset PE; + * - configure PE. + */ +struct vfio_eeh_pe_op { + __u32 argsz; + __u32 flags; + __u32 op; +}; + +#define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */ +#define VFIO_EEH_PE_ENABLE 1 /* Enable EEH functionality */ +#define VFIO_EEH_PE_UNFREEZE_IO 2 /* Enable IO for frozen PE */ +#define VFIO_EEH_PE_UNFREEZE_DMA 3 /* Enable DMA for frozen PE */ +#define VFIO_EEH_PE_GET_STATE 4 /* PE state retrieval */ +#define VFIO_EEH_PE_STATE_NORMAL 0 /* PE in functional state */ +#define VFIO_EEH_PE_STATE_RESET 1 /* PE reset in progress */ +#define VFIO_EEH_PE_STATE_STOPPED 2 /* Stopped DMA and IO */ +#define VFIO_EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA only */ +#define VFIO_EEH_PE_STATE_UNAVAIL 5 /* State unavailable */ +#define VFIO_EEH_PE_RESET_DEACTIVATE 5 /* Deassert PE reset */ +#define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */ +#define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */ +#define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */ + +#define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21) + /* ***************************************************************** */ #endif /* _UAPIVFIO_H */ -- cgit v1.2.3 From c6e9d6f38894798696f23c8084ca7edbf16ee895 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 17 Jul 2014 04:13:05 -0400 Subject: random: introduce getrandom(2) system call The getrandom(2) system call was requested by the LibreSSL Portable developers. It is analoguous to the getentropy(2) system call in OpenBSD. The rationale of this system call is to provide resiliance against file descriptor exhaustion attacks, where the attacker consumes all available file descriptors, forcing the use of the fallback code where /dev/[u]random is not available. Since the fallback code is often not well-tested, it is better to eliminate this potential failure mode entirely. The other feature provided by this new system call is the ability to request randomness from the /dev/urandom entropy pool, but to block until at least 128 bits of entropy has been accumulated in the /dev/urandom entropy pool. Historically, the emphasis in the /dev/urandom development has been to ensure that urandom pool is initialized as quickly as possible after system boot, and preferably before the init scripts start execution. This is because changing /dev/urandom reads to block represents an interface change that could potentially break userspace which is not acceptable. In practice, on most x86 desktop and server systems, in general the entropy pool can be initialized before it is needed (and in modern kernels, we will printk a warning message if not). However, on an embedded system, this may not be the case. And so with this new interface, we can provide the functionality of blocking until the urandom pool has been initialized. Any userspace program which uses this new functionality must take care to assure that if it is used during the boot process, that it will not cause the init scripts or other portions of the system startup to hang indefinitely. SYNOPSIS #include int getrandom(void *buf, size_t buflen, unsigned int flags); DESCRIPTION The system call getrandom() fills the buffer pointed to by buf with up to buflen random bytes which can be used to seed user space random number generators (i.e., DRBG's) or for other cryptographic uses. It should not be used for Monte Carlo simulations or other programs/algorithms which are doing probabilistic sampling. If the GRND_RANDOM flags bit is set, then draw from the /dev/random pool instead of the /dev/urandom pool. The /dev/random pool is limited based on the entropy that can be obtained from environmental noise, so if there is insufficient entropy, the requested number of bytes may not be returned. If there is no entropy available at all, getrandom(2) will either block, or return an error with errno set to EAGAIN if the GRND_NONBLOCK bit is set in flags. If the GRND_RANDOM bit is not set, then the /dev/urandom pool will be used. Unlike using read(2) to fetch data from /dev/urandom, if the urandom pool has not been sufficiently initialized, getrandom(2) will block (or return -1 with the errno set to EAGAIN if the GRND_NONBLOCK bit is set in flags). The getentropy(2) system call in OpenBSD can be emulated using the following function: int getentropy(void *buf, size_t buflen) { int ret; if (buflen > 256) goto failure; ret = getrandom(buf, buflen, 0); if (ret < 0) return ret; if (ret == buflen) return 0; failure: errno = EIO; return -1; } RETURN VALUE On success, the number of bytes that was filled in the buf is returned. This may not be all the bytes requested by the caller via buflen if insufficient entropy was present in the /dev/random pool, or if the system call was interrupted by a signal. On error, -1 is returned, and errno is set appropriately. ERRORS EINVAL An invalid flag was passed to getrandom(2) EFAULT buf is outside the accessible address space. EAGAIN The requested entropy was not available, and getentropy(2) would have blocked if the GRND_NONBLOCK flag was not set. EINTR While blocked waiting for entropy, the call was interrupted by a signal handler; see the description of how interrupted read(2) calls on "slow" devices are handled with and without the SA_RESTART flag in the signal(7) man page. NOTES For small requests (buflen <= 256) getrandom(2) will not return EINTR when reading from the urandom pool once the entropy pool has been initialized, and it will return all of the bytes that have been requested. This is the recommended way to use getrandom(2), and is designed for compatibility with OpenBSD's getentropy() system call. However, if you are using GRND_RANDOM, then getrandom(2) may block until the entropy accounting determines that sufficient environmental noise has been gathered such that getrandom(2) will be operating as a NRBG instead of a DRBG for those people who are working in the NIST SP 800-90 regime. Since it may block for a long time, these guarantees do *not* apply. The user may want to interrupt a hanging process using a signal, so blocking until all of the requested bytes are returned would be unfriendly. For this reason, the user of getrandom(2) MUST always check the return value, in case it returns some error, or if fewer bytes than requested was returned. In the case of !GRND_RANDOM and small request, the latter should never happen, but the careful userspace code (and all crypto code should be careful) should check for this anyway! Finally, unless you are doing long-term key generation (and perhaps not even then), you probably shouldn't be using GRND_RANDOM. The cryptographic algorithms used for /dev/urandom are quite conservative, and so should be sufficient for all purposes. The disadvantage of GRND_RANDOM is that it can block, and the increased complexity required to deal with partially fulfilled getrandom(2) requests. Signed-off-by: Theodore Ts'o Reviewed-by: Zach Brown --- arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + drivers/char/random.c | 40 ++++++++++++++++++++++++++++++++++++--- include/linux/syscalls.h | 3 +++ include/uapi/asm-generic/unistd.h | 4 +++- include/uapi/linux/random.h | 9 +++++++++ 6 files changed, 54 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..5b46a618aeb1 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351 i386 sched_setattr sys_sched_setattr 352 i386 sched_getattr sys_sched_getattr 353 i386 renameat2 sys_renameat2 +355 i386 getrandom sys_getrandom diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..0dc4bf891460 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314 common sched_setattr sys_sched_setattr 315 common sched_getattr sys_sched_getattr 316 common renameat2 sys_renameat2 +318 common getrandom sys_getrandom # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/drivers/char/random.c b/drivers/char/random.c index aa22fe551c2a..7d1682ea1e86 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -258,6 +258,8 @@ #include #include #include +#include +#include #include #include @@ -404,6 +406,7 @@ static struct poolinfo { */ static DECLARE_WAIT_QUEUE_HEAD(random_read_wait); static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); +static DECLARE_WAIT_QUEUE_HEAD(urandom_init_wait); static struct fasync_struct *fasync; /********************************************************************** @@ -657,6 +660,7 @@ retry: r->entropy_total = 0; if (r == &nonblocking_pool) { prandom_reseed_late(); + wake_up_interruptible(&urandom_init_wait); pr_notice("random: %s pool is initialized\n", r->name); } } @@ -1174,13 +1178,14 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf, { ssize_t ret = 0, i; __u8 tmp[EXTRACT_SIZE]; + int large_request = (nbytes > 256); trace_extract_entropy_user(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); xfer_secondary_pool(r, nbytes); nbytes = account(r, nbytes, 0, 0); while (nbytes) { - if (need_resched()) { + if (large_request && need_resched()) { if (signal_pending(current)) { if (ret == 0) ret = -ERESTARTSYS; @@ -1355,7 +1360,7 @@ static int arch_random_refill(void) } static ssize_t -random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) +_random_read(int nonblock, char __user *buf, size_t nbytes) { ssize_t n; @@ -1379,7 +1384,7 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) if (arch_random_refill()) continue; - if (file->f_flags & O_NONBLOCK) + if (nonblock) return -EAGAIN; wait_event_interruptible(random_read_wait, @@ -1390,6 +1395,12 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) } } +static ssize_t +random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) +{ + return _random_read(file->f_flags & O_NONBLOCK, buf, nbytes); +} + static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { @@ -1533,6 +1544,29 @@ const struct file_operations urandom_fops = { .llseek = noop_llseek, }; +SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, + unsigned int, flags) +{ + if (flags & ~(GRND_NONBLOCK|GRND_RANDOM)) + return -EINVAL; + + if (count > INT_MAX) + count = INT_MAX; + + if (flags & GRND_RANDOM) + return _random_read(flags & GRND_NONBLOCK, buf, count); + + if (unlikely(nonblocking_pool.initialized == 0)) { + if (flags & GRND_NONBLOCK) + return -EAGAIN; + wait_event_interruptible(urandom_init_wait, + nonblocking_pool.initialized); + if (signal_pending(current)) + return -ERESTARTSYS; + } + return urandom_read(NULL, buf, count, NULL); +} + /*************************************************************** * Random UUID interface * diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..43324a897cf2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,7 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_getrandom(char __user *buf, size_t count, + unsigned int flags); + #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 333640608087..1d104a2ca643 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) __SYSCALL(__NR_sched_getattr, sys_sched_getattr) #define __NR_renameat2 276 __SYSCALL(__NR_renameat2, sys_renameat2) +#define __NR_getrandom 278 +__SYSCALL(__NR_getrandom, sys_getrandom) #undef __NR_syscalls -#define __NR_syscalls 277 +#define __NR_syscalls 279 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h index fff3528a078f..3f93d1695e7f 100644 --- a/include/uapi/linux/random.h +++ b/include/uapi/linux/random.h @@ -40,4 +40,13 @@ struct rand_pool_info { __u32 buf[0]; }; +/* + * Flags for getrandom(2) + * + * GRND_NONBLOCK Don't block and return EAGAIN instead + * GRND_RANDOM Use the /dev/random pool instead of /dev/urandom + */ +#define GRND_NONBLOCK 0x0001 +#define GRND_RANDOM 0x0002 + #endif /* _UAPI_LINUX_RANDOM_H */ -- cgit v1.2.3 From f24b9be5957b38bb420b838115040dc2031b7d0c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 4 Aug 2014 22:11:45 -0400 Subject: net-timestamp: extend SCM_TIMESTAMPING ancillary data struct Applications that request kernel tx timestamps with SO_TIMESTAMPING read timestamps as recvmsg() ancillary data. The response is defined implicitly as timespec[3]. 1) define struct scm_timestamping explicitly and 2) add support for new tstamp types. On tx, scm_timestamping always accompanies a sock_extended_err. Define previously unused field ee_info to signal the type of ts[0]. Introduce SCM_TSTAMP_SND to define the existing behavior. The reception path is not modified. On rx, no struct similar to sock_extended_err is passed along with SCM_TIMESTAMPING. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ include/net/sock.h | 4 +++- include/uapi/linux/errqueue.h | 18 ++++++++++++++++++ net/core/skbuff.c | 1 + net/socket.c | 20 +++++++++++--------- 5 files changed, 36 insertions(+), 10 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 281deced7469..477f0f60db45 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -249,6 +249,9 @@ enum { SKBTX_SHARED_FRAG = 1 << 5, }; +#define SKBTX_ANY_SW_TSTAMP SKBTX_SW_TSTAMP +#define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) + /* * The callback notifies userspace to release buffers when skb DMA is done in * lower device, the skb last reference should be 0 when calling this. diff --git a/include/net/sock.h b/include/net/sock.h index b91c8868ab8d..02f5b35e65f1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2169,7 +2169,9 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) */ if (sock_flag(sk, SOCK_RCVTSTAMP) || sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) || - (kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) || + (kt.tv64 && + (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) || + skb_shinfo(skb)->tx_flags & SKBTX_ANY_SW_TSTAMP)) || (hwtstamps->hwtstamp.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))) __sock_recv_timestamp(msg, sk, skb); diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index aacd4fb7102a..accee72cae7c 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -22,5 +22,23 @@ struct sock_extended_err { #define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1)) +/** + * struct scm_timestamping - timestamps exposed through cmsg + * + * The timestamping interfaces SO_TIMESTAMPING, MSG_TSTAMP_* + * communicate network timestamps by passing this struct in a cmsg with + * recvmsg(). See Documentation/networking/timestamping.txt for details. + */ +struct scm_timestamping { + struct timespec ts[3]; +}; + +/* The type of scm_timestamping, passed in sock_extended_err ee_info. + * This defines the type of ts[0]. For SCM_TSTAMP_SND only, if ts[0] + * is zero, then this is a hardware timestamp and recorded in ts[2]. + */ +enum { + SCM_TSTAMP_SND, /* driver passed skb to NIC, or HW */ +}; #endif /* _UAPI_LINUX_ERRQUEUE_H */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c1a33033cbe2..c9f68802e992 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3521,6 +3521,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + serr->ee.ee_info = SCM_TSTAMP_SND; err = sock_queue_err_skb(sk, skb); diff --git a/net/socket.c b/net/socket.c index d8222c025061..dc0cc5d95ee5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -106,6 +106,7 @@ #include #include #include +#include #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sysctl_net_busy_read __read_mostly; @@ -697,7 +698,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); - struct timespec ts[3]; + struct scm_timestamping tss; int empty = 1; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); @@ -714,24 +715,25 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); } else { - skb_get_timestampns(skb, &ts[0]); + struct timespec ts; + skb_get_timestampns(skb, &ts); put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, - sizeof(ts[0]), &ts[0]); + sizeof(ts), &ts); } } - - memset(ts, 0, sizeof(ts)); - if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) && - ktime_to_timespec_cond(skb->tstamp, ts + 0)) + memset(&tss, 0, sizeof(tss)); + if ((sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) || + skb_shinfo(skb)->tx_flags & SKBTX_ANY_SW_TSTAMP) && + ktime_to_timespec_cond(skb->tstamp, tss.ts + 0)) empty = 0; if (shhwtstamps && sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2)) + ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) empty = 0; if (!empty) put_cmsg(msg, SOL_SOCKET, - SCM_TIMESTAMPING, sizeof(ts), &ts); + SCM_TIMESTAMPING, sizeof(tss), &tss); } EXPORT_SYMBOL_GPL(__sock_recv_timestamp); -- cgit v1.2.3 From 09c2d251b70723650ba47e83571ff49281320f7c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 4 Aug 2014 22:11:47 -0400 Subject: net-timestamp: add key to disambiguate concurrent datagrams Datagrams timestamped on transmission can coexist in the kernel stack and be reordered in packet scheduling. When reading looped datagrams from the socket error queue it is not always possible to unique correlate looped data with original send() call (for application level retransmits). Even if possible, it may be expensive and complex, requiring packet inspection. Introduce a data-independent ID mechanism to associate timestamps with send calls. Pass an ID alongside the timestamp in field ee_data of sock_extended_err. The ID is a simple 32 bit unsigned int that is associated with the socket and incremented on each send() call for which software tx timestamp generation is enabled. The feature is enabled only if SOF_TIMESTAMPING_OPT_ID is set, to avoid changing ee_data for existing applications that expect it 0. The counter is reset each time the flag is reenabled. Reenabling does not change the ID of already submitted data. It is possible to receive out of order IDs if the timestamp stream is not quiesced first. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/net/sock.h | 2 ++ include/uapi/linux/net_tstamp.h | 8 +++++--- net/core/skbuff.c | 2 ++ net/core/sock.c | 3 +++ net/ipv4/ip_output.c | 6 ++++++ net/ipv6/ip6_output.c | 9 ++++++++- 7 files changed, 27 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 477f0f60db45..0e35b3af7317 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -278,6 +278,7 @@ struct skb_shared_info { unsigned short gso_type; struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; + u32 tskey; __be32 ip6_frag_id; /* diff --git a/include/net/sock.h b/include/net/sock.h index a21129716aae..52fe0bc5598a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -280,6 +280,7 @@ struct cg_proto; * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received * @sk_tsflags: SO_TIMESTAMPING socket options + * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_socket: Identd and reporting IO signals * @sk_user_data: RPC layer private data * @sk_frag: cached page frag @@ -414,6 +415,7 @@ struct sock { struct timer_list sk_timer; ktime_t sk_stamp; u16 sk_tsflags; + u32 sk_tskey; struct socket *sk_socket; void *sk_user_data; struct page_frag sk_frag; diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index f53879c0f590..1e861d2e1a31 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -20,9 +20,11 @@ enum { SOF_TIMESTAMPING_SOFTWARE = (1<<4), SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5), SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6), - SOF_TIMESTAMPING_MASK = - (SOF_TIMESTAMPING_RAW_HARDWARE - 1) | - SOF_TIMESTAMPING_RAW_HARDWARE + SOF_TIMESTAMPING_OPT_ID = (1<<7), + + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID, + SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | + SOF_TIMESTAMPING_LAST }; /** diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c9f68802e992..0df4f1d14c5a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3522,6 +3522,8 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = SCM_TSTAMP_SND; + if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) + serr->ee.ee_data = skb_shinfo(skb)->tskey; err = sock_queue_err_skb(sk, skb); diff --git a/net/core/sock.c b/net/core/sock.c index 47c9377e14b9..1e0f1c63ad6b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -848,6 +848,9 @@ set_rcvbuf: ret = -EINVAL; break; } + if (val & SOF_TIMESTAMPING_OPT_ID && + !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) + sk->sk_tskey = 0; sk->sk_tsflags = val; if (val & SOF_TIMESTAMPING_RX_SOFTWARE) sock_enable_timestamp(sk, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b16556836d66..215af2b155cb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -855,11 +855,15 @@ static int __ip_append_data(struct sock *sk, unsigned int maxfraglen, fragheaderlen, maxnonfragsize; int csummode = CHECKSUM_NONE; struct rtable *rt = (struct rtable *)cork->dst; + u32 tskey = 0; skb = skb_peek_tail(queue); exthdrlen = !skb ? rt->dst.header_len : 0; mtu = cork->fragsize; + if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && + sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) + tskey = sk->sk_tskey++; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -976,6 +980,8 @@ alloc_new_skb: /* only the initial fragment is time stamped */ skb_shinfo(skb)->tx_flags = cork->tx_flags; cork->tx_flags = 0; + skb_shinfo(skb)->tskey = tskey; + tskey = 0; /* * Find where to start putting bytes. diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f5dafe609f8b..315a55d66079 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1157,6 +1157,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int err; int offset = 0; __u8 tx_flags = 0; + u32 tskey = 0; if (flags&MSG_PROBE) return 0; @@ -1272,8 +1273,12 @@ emsgsize: } } - if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) + if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { sock_tx_timestamp(sk, &tx_flags); + if (tx_flags & SKBTX_ANY_SW_TSTAMP && + sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) + tskey = sk->sk_tskey++; + } /* * Let's try using as much space as possible. @@ -1397,6 +1402,8 @@ alloc_new_skb: /* Only the initial fragment is time stamped */ skb_shinfo(skb)->tx_flags = tx_flags; tx_flags = 0; + skb_shinfo(skb)->tskey = tskey; + tskey = 0; /* * Find where to start putting bytes -- cgit v1.2.3 From e7fd2885385157d46c85f282fc6d7d297db43e1f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 4 Aug 2014 22:11:48 -0400 Subject: net-timestamp: SCHED timestamp on entering packet scheduler Kernel transmit latency is often incurred in the packet scheduler. Introduce a new timestamp on transmission just before entering the scheduler. When data travels through multiple devices (bonding, tunneling, ...) each device will export an individual timestamp. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 11 +++++++++-- include/uapi/linux/errqueue.h | 1 + include/uapi/linux/net_tstamp.h | 3 ++- net/core/dev.c | 4 ++++ net/core/skbuff.c | 16 ++++++++++++---- net/socket.c | 3 +++ 6 files changed, 31 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0e35b3af7317..50e1e9b3a5a5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -229,7 +229,7 @@ enum { /* generate hardware time stamp */ SKBTX_HW_TSTAMP = 1 << 0, - /* generate software time stamp */ + /* generate software time stamp when queueing packet to NIC */ SKBTX_SW_TSTAMP = 1 << 1, /* device driver is going to provide hardware time stamp */ @@ -247,9 +247,12 @@ enum { * all frags to avoid possible bad checksum */ SKBTX_SHARED_FRAG = 1 << 5, + + /* generate software time stamp when entering packet scheduling */ + SKBTX_SCHED_TSTAMP = 1 << 6, }; -#define SKBTX_ANY_SW_TSTAMP SKBTX_SW_TSTAMP +#define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | SKBTX_SCHED_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) /* @@ -2695,6 +2698,10 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps); +void __skb_tstamp_tx(struct sk_buff *orig_skb, + struct skb_shared_hwtstamps *hwtstamps, + struct sock *sk, int tstype); + /** * skb_tstamp_tx - queue clone of skb with send time stamps * @orig_skb: the original outgoing packet diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index accee72cae7c..17437cf297b7 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -39,6 +39,7 @@ struct scm_timestamping { */ enum { SCM_TSTAMP_SND, /* driver passed skb to NIC, or HW */ + SCM_TSTAMP_SCHED, /* data entered the packet scheduler */ }; #endif /* _UAPI_LINUX_ERRQUEUE_H */ diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 1e861d2e1a31..60733845fcdd 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -21,8 +21,9 @@ enum { SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5), SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6), SOF_TIMESTAMPING_OPT_ID = (1<<7), + SOF_TIMESTAMPING_TX_SCHED = (1<<8), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_SCHED, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; diff --git a/net/core/dev.c b/net/core/dev.c index b370230fe1d3..1c15b189c52b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -132,6 +132,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -2876,6 +2877,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) skb_reset_mac_header(skb); + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) + __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED); + /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0df4f1d14c5a..9705c0732aab 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3490,10 +3490,10 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(sock_queue_err_skb); -void skb_tstamp_tx(struct sk_buff *orig_skb, - struct skb_shared_hwtstamps *hwtstamps) +void __skb_tstamp_tx(struct sk_buff *orig_skb, + struct skb_shared_hwtstamps *hwtstamps, + struct sock *sk, int tstype) { - struct sock *sk = orig_skb->sk; struct sock_exterr_skb *serr; struct sk_buff *skb; int err; @@ -3521,7 +3521,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; - serr->ee.ee_info = SCM_TSTAMP_SND; + serr->ee.ee_info = tstype; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) serr->ee.ee_data = skb_shinfo(skb)->tskey; @@ -3530,6 +3530,14 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, if (err) kfree_skb(skb); } +EXPORT_SYMBOL_GPL(__skb_tstamp_tx); + +void skb_tstamp_tx(struct sk_buff *orig_skb, + struct skb_shared_hwtstamps *hwtstamps) +{ + return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk, + SCM_TSTAMP_SND); +} EXPORT_SYMBOL_GPL(skb_tstamp_tx); void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) diff --git a/net/socket.c b/net/socket.c index 255d9b802723..3a2778d71631 100644 --- a/net/socket.c +++ b/net/socket.c @@ -617,6 +617,9 @@ void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) *tx_flags |= SKBTX_HW_TSTAMP; if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) *tx_flags |= SKBTX_SW_TSTAMP; + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED) + *tx_flags |= SKBTX_SCHED_TSTAMP; + if (sock_flag(sk, SOCK_WIFI_STATUS)) *tx_flags |= SKBTX_WIFI_STATUS; } -- cgit v1.2.3 From e1c8a607b28190cd09a271508aa3025d3c2f312e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 4 Aug 2014 22:11:50 -0400 Subject: net-timestamp: ACK timestamp for bytestreams Add SOF_TIMESTAMPING_TX_ACK, a request for a tstamp when the last byte in the send() call is acknowledged. It implements the feature for TCP. The timestamp is generated when the TCP socket cumulative ACK is moved beyond the tracked seqno for the first time. The feature ignores SACK and FACK, because those acknowledge the specific byte, but not necessarily the entire contents of the buffer up to that byte. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 ++++++- include/uapi/linux/errqueue.h | 1 + include/uapi/linux/net_tstamp.h | 3 ++- net/ipv4/tcp_input.c | 6 ++++++ net/socket.c | 2 ++ 5 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 50e1e9b3a5a5..11c270551d25 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -250,9 +250,14 @@ enum { /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, + + /* generate software timestamp on peer data acknowledgment */ + SKBTX_ACK_TSTAMP = 1 << 7, }; -#define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | SKBTX_SCHED_TSTAMP) +#define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ + SKBTX_SCHED_TSTAMP | \ + SKBTX_ACK_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) /* diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index 17437cf297b7..07bdce1f444a 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -40,6 +40,7 @@ struct scm_timestamping { enum { SCM_TSTAMP_SND, /* driver passed skb to NIC, or HW */ SCM_TSTAMP_SCHED, /* data entered the packet scheduler */ + SCM_TSTAMP_ACK, /* data acknowledged by peer */ }; #endif /* _UAPI_LINUX_ERRQUEUE_H */ diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 60733845fcdd..ff354021bb69 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -22,8 +22,9 @@ enum { SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6), SOF_TIMESTAMPING_OPT_ID = (1<<7), SOF_TIMESTAMPING_TX_SCHED = (1<<8), + SOF_TIMESTAMPING_TX_ACK = (1<<9), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_SCHED, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_ACK, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6a2984507755..a3d47af01906 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -74,6 +74,7 @@ #include #include #include +#include int sysctl_tcp_timestamps __read_mostly = 1; int sysctl_tcp_window_scaling __read_mostly = 1; @@ -3106,6 +3107,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->retrans_stamp = 0; } + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_ACK_TSTAMP) && + between(skb_shinfo(skb)->tskey, prior_snd_una, + tp->snd_una + 1)) + __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); + if (!fully_acked) break; diff --git a/net/socket.c b/net/socket.c index 3a2778d71631..ae89569a2db5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -619,6 +619,8 @@ void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) *tx_flags |= SKBTX_SW_TSTAMP; if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED) *tx_flags |= SKBTX_SCHED_TSTAMP; + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK) + *tx_flags |= SKBTX_ACK_TSTAMP; if (sock_flag(sk, SOCK_WIFI_STATUS)) *tx_flags |= SKBTX_WIFI_STATUS; -- cgit v1.2.3 From 753a2ad54ef45e3417a9d49537c2b42b04a2e1be Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Thu, 7 Aug 2014 00:17:09 +0200 Subject: net: reallocate new socket option number for IPV6_AUTOFLOWLABEL cb1ce2e ("ipv6: Implement automatic flow label generation on transmit") accidentally uses socket option 64, which is already used by ip6tables: IP6T_SO_SET_REPLACE / IP6T_SO_GET_INFO 64 IP6T_SO_SET_ADD_COUNTERS / IP6T_SO_GET_ENTRIES 65 There is comment include/uapi/linux/in6.h warning about that. Allocate 70 for this, which seems to be unused instead. Cc: Tom Herbert Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/uapi/linux/in6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 22b7a69619d8..74a2a1773494 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -233,7 +233,6 @@ struct in6_flowlabel_req { #if 0 /* not yet */ #define IPV6_USE_MIN_MTU 63 #endif -#define IPV6_AUTOFLOWLABEL 64 /* * Netfilter (1) @@ -262,6 +261,7 @@ struct in6_flowlabel_req { * IP6T_SO_ORIGINAL_DST 80 */ +#define IPV6_AUTOFLOWLABEL 70 /* RFC5014: Source address selection */ #define IPV6_ADDR_PREFERENCES 72 -- cgit v1.2.3 From 40e041a2c858b3caefc757e26cb85bfceae5062b Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Fri, 8 Aug 2014 14:25:27 -0700 Subject: shm: add sealing API If two processes share a common memory region, they usually want some guarantees to allow safe access. This often includes: - one side cannot overwrite data while the other reads it - one side cannot shrink the buffer while the other accesses it - one side cannot grow the buffer beyond previously set boundaries If there is a trust-relationship between both parties, there is no need for policy enforcement. However, if there's no trust relationship (eg., for general-purpose IPC) sharing memory-regions is highly fragile and often not possible without local copies. Look at the following two use-cases: 1) A graphics client wants to share its rendering-buffer with a graphics-server. The memory-region is allocated by the client for read/write access and a second FD is passed to the server. While scanning out from the memory region, the server has no guarantee that the client doesn't shrink the buffer at any time, requiring rather cumbersome SIGBUS handling. 2) A process wants to perform an RPC on another process. To avoid huge bandwidth consumption, zero-copy is preferred. After a message is assembled in-memory and a FD is passed to the remote side, both sides want to be sure that neither modifies this shared copy, anymore. The source may have put sensible data into the message without a separate copy and the target may want to parse the message inline, to avoid a local copy. While SIGBUS handling, POSIX mandatory locking and MAP_DENYWRITE provide ways to achieve most of this, the first one is unproportionally ugly to use in libraries and the latter two are broken/racy or even disabled due to denial of service attacks. This patch introduces the concept of SEALING. If you seal a file, a specific set of operations is blocked on that file forever. Unlike locks, seals can only be set, never removed. Hence, once you verified a specific set of seals is set, you're guaranteed that no-one can perform the blocked operations on this file, anymore. An initial set of SEALS is introduced by this patch: - SHRINK: If SEAL_SHRINK is set, the file in question cannot be reduced in size. This affects ftruncate() and open(O_TRUNC). - GROW: If SEAL_GROW is set, the file in question cannot be increased in size. This affects ftruncate(), fallocate() and write(). - WRITE: If SEAL_WRITE is set, no write operations (besides resizing) are possible. This affects fallocate(PUNCH_HOLE), mmap() and write(). - SEAL: If SEAL_SEAL is set, no further seals can be added to a file. This basically prevents the F_ADD_SEAL operation on a file and can be set to prevent others from adding further seals that you don't want. The described use-cases can easily use these seals to provide safe use without any trust-relationship: 1) The graphics server can verify that a passed file-descriptor has SEAL_SHRINK set. This allows safe scanout, while the client is allowed to increase buffer size for window-resizing on-the-fly. Concurrent writes are explicitly allowed. 2) For general-purpose IPC, both processes can verify that SEAL_SHRINK, SEAL_GROW and SEAL_WRITE are set. This guarantees that neither process can modify the data while the other side parses it. Furthermore, it guarantees that even with writable FDs passed to the peer, it cannot increase the size to hit memory-limits of the source process (in case the file-storage is accounted to the source). The new API is an extension to fcntl(), adding two new commands: F_GET_SEALS: Return a bitset describing the seals on the file. This can be called on any FD if the underlying file supports sealing. F_ADD_SEALS: Change the seals of a given file. This requires WRITE access to the file and F_SEAL_SEAL may not already be set. Furthermore, the underlying file must support sealing and there may not be any existing shared mapping of that file. Otherwise, EBADF/EPERM is returned. The given seals are _added_ to the existing set of seals on the file. You cannot remove seals again. The fcntl() handler is currently specific to shmem and disabled on all files. A file needs to explicitly support sealing for this interface to work. A separate syscall is added in a follow-up, which creates files that support sealing. There is no intention to support this on other file-systems. Semantics are unclear for non-volatile files and we lack any use-case right now. Therefore, the implementation is specific to shmem. Signed-off-by: David Herrmann Acked-by: Hugh Dickins Cc: Michael Kerrisk Cc: Ryan Lortie Cc: Lennart Poettering Cc: Daniel Mack Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fcntl.c | 5 ++ include/linux/shmem_fs.h | 17 ++++++ include/uapi/linux/fcntl.h | 15 +++++ mm/shmem.c | 143 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 180 insertions(+) (limited to 'include/uapi/linux') diff --git a/fs/fcntl.c b/fs/fcntl.c index 72c82f69b01b..22d1c3df61ac 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -336,6 +337,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_GETPIPE_SZ: err = pipe_fcntl(filp, cmd, arg); break; + case F_ADD_SEALS: + case F_GET_SEALS: + err = shmem_fcntl(filp, cmd, arg); + break; default: break; } diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 4d1771c2d29f..50777b5b1e4c 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -1,6 +1,7 @@ #ifndef __SHMEM_FS_H #define __SHMEM_FS_H +#include #include #include #include @@ -11,6 +12,7 @@ struct shmem_inode_info { spinlock_t lock; + unsigned int seals; /* shmem seals */ unsigned long flags; unsigned long alloced; /* data pages alloced to file */ union { @@ -65,4 +67,19 @@ static inline struct page *shmem_read_mapping_page( mapping_gfp_mask(mapping)); } +#ifdef CONFIG_TMPFS + +extern int shmem_add_seals(struct file *file, unsigned int seals); +extern int shmem_get_seals(struct file *file); +extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg); + +#else + +static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a) +{ + return -EINVAL; +} + +#endif + #endif diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 074b886c6be0..beed138bd359 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -27,6 +27,21 @@ #define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) #define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) +/* + * Set/Get seals + */ +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +/* + * Types of seals + */ +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +/* (1U << 31) is reserved for signed error codes */ + /* * Types of directory notifications that may be requested. */ diff --git a/mm/shmem.c b/mm/shmem.c index 6dc80d298f9d..8b43bb7a4efe 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -66,6 +66,7 @@ static struct vfsmount *shm_mnt; #include #include #include +#include #include #include @@ -547,6 +548,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range); static int shmem_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; + struct shmem_inode_info *info = SHMEM_I(inode); int error; error = inode_change_ok(inode, attr); @@ -557,6 +559,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) loff_t oldsize = inode->i_size; loff_t newsize = attr->ia_size; + /* protected by i_mutex */ + if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) || + (newsize > oldsize && (info->seals & F_SEAL_GROW))) + return -EPERM; + if (newsize != oldsize) { error = shmem_reacct_size(SHMEM_I(inode)->flags, oldsize, newsize); @@ -1412,6 +1419,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode info = SHMEM_I(inode); memset(info, 0, (char *)inode - (char *)info); spin_lock_init(&info->lock); + info->seals = F_SEAL_SEAL; info->flags = flags & VM_NORESERVE; INIT_LIST_HEAD(&info->swaplist); simple_xattrs_init(&info->xattrs); @@ -1470,7 +1478,17 @@ shmem_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = mapping->host; + struct shmem_inode_info *info = SHMEM_I(inode); pgoff_t index = pos >> PAGE_CACHE_SHIFT; + + /* i_mutex is held by caller */ + if (unlikely(info->seals)) { + if (info->seals & F_SEAL_WRITE) + return -EPERM; + if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size) + return -EPERM; + } + return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); } @@ -1808,11 +1826,125 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) return offset; } +static int shmem_wait_for_pins(struct address_space *mapping) +{ + return 0; +} + +#define F_ALL_SEALS (F_SEAL_SEAL | \ + F_SEAL_SHRINK | \ + F_SEAL_GROW | \ + F_SEAL_WRITE) + +int shmem_add_seals(struct file *file, unsigned int seals) +{ + struct inode *inode = file_inode(file); + struct shmem_inode_info *info = SHMEM_I(inode); + int error; + + /* + * SEALING + * Sealing allows multiple parties to share a shmem-file but restrict + * access to a specific subset of file operations. Seals can only be + * added, but never removed. This way, mutually untrusted parties can + * share common memory regions with a well-defined policy. A malicious + * peer can thus never perform unwanted operations on a shared object. + * + * Seals are only supported on special shmem-files and always affect + * the whole underlying inode. Once a seal is set, it may prevent some + * kinds of access to the file. Currently, the following seals are + * defined: + * SEAL_SEAL: Prevent further seals from being set on this file + * SEAL_SHRINK: Prevent the file from shrinking + * SEAL_GROW: Prevent the file from growing + * SEAL_WRITE: Prevent write access to the file + * + * As we don't require any trust relationship between two parties, we + * must prevent seals from being removed. Therefore, sealing a file + * only adds a given set of seals to the file, it never touches + * existing seals. Furthermore, the "setting seals"-operation can be + * sealed itself, which basically prevents any further seal from being + * added. + * + * Semantics of sealing are only defined on volatile files. Only + * anonymous shmem files support sealing. More importantly, seals are + * never written to disk. Therefore, there's no plan to support it on + * other file types. + */ + + if (file->f_op != &shmem_file_operations) + return -EINVAL; + if (!(file->f_mode & FMODE_WRITE)) + return -EPERM; + if (seals & ~(unsigned int)F_ALL_SEALS) + return -EINVAL; + + mutex_lock(&inode->i_mutex); + + if (info->seals & F_SEAL_SEAL) { + error = -EPERM; + goto unlock; + } + + if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) { + error = mapping_deny_writable(file->f_mapping); + if (error) + goto unlock; + + error = shmem_wait_for_pins(file->f_mapping); + if (error) { + mapping_allow_writable(file->f_mapping); + goto unlock; + } + } + + info->seals |= seals; + error = 0; + +unlock: + mutex_unlock(&inode->i_mutex); + return error; +} +EXPORT_SYMBOL_GPL(shmem_add_seals); + +int shmem_get_seals(struct file *file) +{ + if (file->f_op != &shmem_file_operations) + return -EINVAL; + + return SHMEM_I(file_inode(file))->seals; +} +EXPORT_SYMBOL_GPL(shmem_get_seals); + +long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg) +{ + long error; + + switch (cmd) { + case F_ADD_SEALS: + /* disallow upper 32bit */ + if (arg > UINT_MAX) + return -EINVAL; + + error = shmem_add_seals(file, arg); + break; + case F_GET_SEALS: + error = shmem_get_seals(file); + break; + default: + error = -EINVAL; + break; + } + + return error; +} + static long shmem_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_falloc shmem_falloc; pgoff_t start, index, end; int error; @@ -1828,6 +1960,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); + /* protected by i_mutex */ + if (info->seals & F_SEAL_WRITE) { + error = -EPERM; + goto out; + } + shmem_falloc.waitq = &shmem_falloc_waitq; shmem_falloc.start = unmap_start >> PAGE_SHIFT; shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; @@ -1854,6 +1992,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, if (error) goto out; + if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) { + error = -EPERM; + goto out; + } + start = offset >> PAGE_CACHE_SHIFT; end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; /* Try to avoid a swapstorm if len is impossible to satisfy */ -- cgit v1.2.3 From 9183df25fe7b194563db3fec6dc3202a5855839c Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Fri, 8 Aug 2014 14:25:29 -0700 Subject: shm: add memfd_create() syscall memfd_create() is similar to mmap(MAP_ANON), but returns a file-descriptor that you can pass to mmap(). It can support sealing and avoids any connection to user-visible mount-points. Thus, it's not subject to quotas on mounted file-systems, but can be used like malloc()'ed memory, but with a file-descriptor to it. memfd_create() returns the raw shmem file, so calls like ftruncate() can be used to modify the underlying inode. Also calls like fstat() will return proper information and mark the file as regular file. If you want sealing, you can specify MFD_ALLOW_SEALING. Otherwise, sealing is not supported (like on all other regular files). Compared to O_TMPFILE, it does not require a tmpfs mount-point and is not subject to a filesystem size limit. It is still properly accounted to memcg limits, though, and to the same overcommit or no-overcommit accounting as all user memory. Signed-off-by: David Herrmann Acked-by: Hugh Dickins Cc: Michael Kerrisk Cc: Ryan Lortie Cc: Lennart Poettering Cc: Daniel Mack Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + include/linux/syscalls.h | 1 + include/uapi/linux/memfd.h | 8 +++++ kernel/sys_ni.c | 1 + mm/shmem.c | 73 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 85 insertions(+) create mode 100644 include/uapi/linux/memfd.h (limited to 'include/uapi/linux') diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d1b4a119d4a5..028b78168d85 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -362,3 +362,4 @@ 353 i386 renameat2 sys_renameat2 354 i386 seccomp sys_seccomp 355 i386 getrandom sys_getrandom +356 i386 memfd_create sys_memfd_create diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 252c804bb1aa..ca2b9aa78c81 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -325,6 +325,7 @@ 316 common renameat2 sys_renameat2 317 common seccomp sys_seccomp 318 common getrandom sys_getrandom +319 common memfd_create sys_memfd_create # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 701daff5d899..15a069425cbf 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -802,6 +802,7 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags, asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr); asmlinkage long sys_eventfd(unsigned int count); asmlinkage long sys_eventfd2(unsigned int count, int flags); +asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h new file mode 100644 index 000000000000..534e364bda92 --- /dev/null +++ b/include/uapi/linux/memfd.h @@ -0,0 +1,8 @@ +#ifndef _UAPI_LINUX_MEMFD_H +#define _UAPI_LINUX_MEMFD_H + +/* flags for memfd_create(2) (unsigned int) */ +#define MFD_CLOEXEC 0x0001U +#define MFD_ALLOW_SEALING 0x0002U + +#endif /* _UAPI_LINUX_MEMFD_H */ diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 2904a2105914..1f79e3714533 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -197,6 +197,7 @@ cond_syscall(compat_sys_timerfd_settime); cond_syscall(compat_sys_timerfd_gettime); cond_syscall(sys_eventfd); cond_syscall(sys_eventfd2); +cond_syscall(sys_memfd_create); /* performance counters: */ cond_syscall(sys_perf_event_open); diff --git a/mm/shmem.c b/mm/shmem.c index 8b43bb7a4efe..4a5498795a2b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -66,7 +66,9 @@ static struct vfsmount *shm_mnt; #include #include #include +#include #include +#include #include #include @@ -2732,6 +2734,77 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) shmem_show_mpol(seq, sbinfo->mpol); return 0; } + +#define MFD_NAME_PREFIX "memfd:" +#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1) +#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN) + +#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING) + +SYSCALL_DEFINE2(memfd_create, + const char __user *, uname, + unsigned int, flags) +{ + struct shmem_inode_info *info; + struct file *file; + int fd, error; + char *name; + long len; + + if (flags & ~(unsigned int)MFD_ALL_FLAGS) + return -EINVAL; + + /* length includes terminating zero */ + len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1); + if (len <= 0) + return -EFAULT; + if (len > MFD_NAME_MAX_LEN + 1) + return -EINVAL; + + name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY); + if (!name) + return -ENOMEM; + + strcpy(name, MFD_NAME_PREFIX); + if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) { + error = -EFAULT; + goto err_name; + } + + /* terminating-zero may have changed after strnlen_user() returned */ + if (name[len + MFD_NAME_PREFIX_LEN - 1]) { + error = -EFAULT; + goto err_name; + } + + fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0); + if (fd < 0) { + error = fd; + goto err_name; + } + + file = shmem_file_setup(name, 0, VM_NORESERVE); + if (IS_ERR(file)) { + error = PTR_ERR(file); + goto err_fd; + } + info = SHMEM_I(file_inode(file)); + file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; + file->f_flags |= O_RDWR | O_LARGEFILE; + if (flags & MFD_ALLOW_SEALING) + info->seals &= ~F_SEAL_SEAL; + + fd_install(fd, file); + kfree(name); + return fd; + +err_fd: + put_unused_fd(fd); +err_name: + kfree(name); + return error; +} + #endif /* CONFIG_TMPFS */ static void shmem_put_super(struct super_block *sb) -- cgit v1.2.3 From cb1052581e2bddd6096544f3f944f4e7fdad4c7f Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:25:57 -0700 Subject: kexec: implementation of new syscall kexec_file_load Previous patch provided the interface definition and this patch prvides implementation of new syscall. Previously segment list was prepared in user space. Now user space just passes kernel fd, initrd fd and command line and kernel will create a segment list internally. This patch contains generic part of the code. Actual segment preparation and loading is done by arch and image specific loader. Which comes in next patch. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Vivek Goyal Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Yinghai Lu Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/machine_kexec_64.c | 45 ++++ include/linux/kexec.h | 53 ++++ include/uapi/linux/kexec.h | 11 + kernel/kexec.c | 483 ++++++++++++++++++++++++++++++++++++- 4 files changed, 587 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 679cef0791cd..c8875b5545e1 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -22,6 +22,10 @@ #include #include +static struct kexec_file_ops *kexec_file_loaders[] = { + NULL, +}; + static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.pud); @@ -283,3 +287,44 @@ void arch_crash_save_vmcoreinfo(void) (unsigned long)&_text - __START_KERNEL); } +/* arch-dependent functionality related to kexec file-based syscall */ + +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + int i, ret = -ENOEXEC; + struct kexec_file_ops *fops; + + for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { + fops = kexec_file_loaders[i]; + if (!fops || !fops->probe) + continue; + + ret = fops->probe(buf, buf_len); + if (!ret) { + image->fops = fops; + return ret; + } + } + + return ret; +} + +void *arch_kexec_kernel_image_load(struct kimage *image) +{ + if (!image->fops || !image->fops->load) + return ERR_PTR(-ENOEXEC); + + return image->fops->load(image, image->kernel_buf, + image->kernel_buf_len, image->initrd_buf, + image->initrd_buf_len, image->cmdline_buf, + image->cmdline_buf_len); +} + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + if (!image->fops || !image->fops->cleanup) + return 0; + + return image->fops->cleanup(image); +} diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 66d56ac0f64c..8e80901e466f 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -121,13 +121,57 @@ struct kimage { #define KEXEC_TYPE_DEFAULT 0 #define KEXEC_TYPE_CRASH 1 unsigned int preserve_context : 1; + /* If set, we are using file mode kexec syscall */ + unsigned int file_mode:1; #ifdef ARCH_HAS_KIMAGE_ARCH struct kimage_arch arch; #endif + + /* Additional fields for file based kexec syscall */ + void *kernel_buf; + unsigned long kernel_buf_len; + + void *initrd_buf; + unsigned long initrd_buf_len; + + char *cmdline_buf; + unsigned long cmdline_buf_len; + + /* File operations provided by image loader */ + struct kexec_file_ops *fops; + + /* Image loader handling the kernel can store a pointer here */ + void *image_loader_data; }; +/* + * Keeps track of buffer parameters as provided by caller for requesting + * memory placement of buffer. + */ +struct kexec_buf { + struct kimage *image; + char *buffer; + unsigned long bufsz; + unsigned long memsz; + unsigned long buf_align; + unsigned long buf_min; + unsigned long buf_max; + bool top_down; /* allocate from top of memory hole */ +}; +typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size); +typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len); +typedef int (kexec_cleanup_t)(struct kimage *image); + +struct kexec_file_ops { + kexec_probe_t *probe; + kexec_load_t *load; + kexec_cleanup_t *cleanup; +}; /* kexec interface functions */ extern void machine_kexec(struct kimage *image); @@ -138,6 +182,11 @@ extern asmlinkage long sys_kexec_load(unsigned long entry, struct kexec_segment __user *segments, unsigned long flags); extern int kernel_kexec(void); +extern int kexec_add_buffer(struct kimage *image, char *buffer, + unsigned long bufsz, unsigned long memsz, + unsigned long buf_align, unsigned long buf_min, + unsigned long buf_max, bool top_down, + unsigned long *load_addr); extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); extern void crash_kexec(struct pt_regs *); @@ -188,6 +237,10 @@ extern int kexec_load_disabled; #define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT) #endif +/* List of defined/legal kexec file flags */ +#define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ + KEXEC_FILE_NO_INITRAMFS) + #define VMCOREINFO_BYTES (4096) #define VMCOREINFO_NOTE_NAME "VMCOREINFO" #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index d6629d49a243..6925f5b42f89 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -13,6 +13,17 @@ #define KEXEC_PRESERVE_CONTEXT 0x00000002 #define KEXEC_ARCH_MASK 0xffff0000 +/* + * Kexec file load interface flags. + * KEXEC_FILE_UNLOAD : Unload already loaded kexec/kdump image. + * KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image. + * KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd + * fd field. + */ +#define KEXEC_FILE_UNLOAD 0x00000001 +#define KEXEC_FILE_ON_CRASH 0x00000002 +#define KEXEC_FILE_NO_INITRAMFS 0x00000004 + /* These values match the ELF architecture values. * Unless there is a good reason that should continue to be the case. */ diff --git a/kernel/kexec.c b/kernel/kexec.c index ec4386c1b94f..9b46219254dd 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -6,6 +6,8 @@ * Version 2. See the file COPYING for more details. */ +#define pr_fmt(fmt) "kexec: " fmt + #include #include #include @@ -327,6 +329,221 @@ out_free_image: return ret; } +static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len) +{ + struct fd f = fdget(fd); + int ret; + struct kstat stat; + loff_t pos; + ssize_t bytes = 0; + + if (!f.file) + return -EBADF; + + ret = vfs_getattr(&f.file->f_path, &stat); + if (ret) + goto out; + + if (stat.size > INT_MAX) { + ret = -EFBIG; + goto out; + } + + /* Don't hand 0 to vmalloc, it whines. */ + if (stat.size == 0) { + ret = -EINVAL; + goto out; + } + + *buf = vmalloc(stat.size); + if (!*buf) { + ret = -ENOMEM; + goto out; + } + + pos = 0; + while (pos < stat.size) { + bytes = kernel_read(f.file, pos, (char *)(*buf) + pos, + stat.size - pos); + if (bytes < 0) { + vfree(*buf); + ret = bytes; + goto out; + } + + if (bytes == 0) + break; + pos += bytes; + } + + if (pos != stat.size) { + ret = -EBADF; + vfree(*buf); + goto out; + } + + *buf_len = pos; +out: + fdput(f); + return ret; +} + +/* Architectures can provide this probe function */ +int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + return -ENOEXEC; +} + +void * __weak arch_kexec_kernel_image_load(struct kimage *image) +{ + return ERR_PTR(-ENOEXEC); +} + +void __weak arch_kimage_file_post_load_cleanup(struct kimage *image) +{ +} + +/* + * Free up memory used by kernel, initrd, and comand line. This is temporary + * memory allocation which is not needed any more after these buffers have + * been loaded into separate segments and have been copied elsewhere. + */ +static void kimage_file_post_load_cleanup(struct kimage *image) +{ + vfree(image->kernel_buf); + image->kernel_buf = NULL; + + vfree(image->initrd_buf); + image->initrd_buf = NULL; + + kfree(image->cmdline_buf); + image->cmdline_buf = NULL; + + /* See if architecture has anything to cleanup post load */ + arch_kimage_file_post_load_cleanup(image); +} + +/* + * In file mode list of segments is prepared by kernel. Copy relevant + * data from user space, do error checking, prepare segment list + */ +static int +kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, + const char __user *cmdline_ptr, + unsigned long cmdline_len, unsigned flags) +{ + int ret = 0; + void *ldata; + + ret = copy_file_from_fd(kernel_fd, &image->kernel_buf, + &image->kernel_buf_len); + if (ret) + return ret; + + /* Call arch image probe handlers */ + ret = arch_kexec_kernel_image_probe(image, image->kernel_buf, + image->kernel_buf_len); + + if (ret) + goto out; + + /* It is possible that there no initramfs is being loaded */ + if (!(flags & KEXEC_FILE_NO_INITRAMFS)) { + ret = copy_file_from_fd(initrd_fd, &image->initrd_buf, + &image->initrd_buf_len); + if (ret) + goto out; + } + + if (cmdline_len) { + image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL); + if (!image->cmdline_buf) { + ret = -ENOMEM; + goto out; + } + + ret = copy_from_user(image->cmdline_buf, cmdline_ptr, + cmdline_len); + if (ret) { + ret = -EFAULT; + goto out; + } + + image->cmdline_buf_len = cmdline_len; + + /* command line should be a string with last byte null */ + if (image->cmdline_buf[cmdline_len - 1] != '\0') { + ret = -EINVAL; + goto out; + } + } + + /* Call arch image load handlers */ + ldata = arch_kexec_kernel_image_load(image); + + if (IS_ERR(ldata)) { + ret = PTR_ERR(ldata); + goto out; + } + + image->image_loader_data = ldata; +out: + /* In case of error, free up all allocated memory in this function */ + if (ret) + kimage_file_post_load_cleanup(image); + return ret; +} + +static int +kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, + int initrd_fd, const char __user *cmdline_ptr, + unsigned long cmdline_len, unsigned long flags) +{ + int ret; + struct kimage *image; + + image = do_kimage_alloc_init(); + if (!image) + return -ENOMEM; + + image->file_mode = 1; + + ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd, + cmdline_ptr, cmdline_len, flags); + if (ret) + goto out_free_image; + + ret = sanity_check_segment_list(image); + if (ret) + goto out_free_post_load_bufs; + + ret = -ENOMEM; + image->control_code_page = kimage_alloc_control_pages(image, + get_order(KEXEC_CONTROL_PAGE_SIZE)); + if (!image->control_code_page) { + pr_err("Could not allocate control_code_buffer\n"); + goto out_free_post_load_bufs; + } + + image->swap_page = kimage_alloc_control_pages(image, 0); + if (!image->swap_page) { + pr_err(KERN_ERR "Could not allocate swap buffer\n"); + goto out_free_control_pages; + } + + *rimage = image; + return 0; +out_free_control_pages: + kimage_free_page_list(&image->control_pages); +out_free_post_load_bufs: + kimage_file_post_load_cleanup(image); + kfree(image->image_loader_data); +out_free_image: + kfree(image); + return ret; +} + static int kimage_is_destination_range(struct kimage *image, unsigned long start, unsigned long end) @@ -644,6 +861,16 @@ static void kimage_free(struct kimage *image) /* Free the kexec control pages... */ kimage_free_page_list(&image->control_pages); + + kfree(image->image_loader_data); + + /* + * Free up any temporary buffers allocated. This might hit if + * error occurred much later after buffer allocation. + */ + if (image->file_mode) + kimage_file_post_load_cleanup(image); + kfree(image); } @@ -772,10 +999,14 @@ static int kimage_load_normal_segment(struct kimage *image, unsigned long maddr; size_t ubytes, mbytes; int result; - unsigned char __user *buf; + unsigned char __user *buf = NULL; + unsigned char *kbuf = NULL; result = 0; - buf = segment->buf; + if (image->file_mode) + kbuf = segment->kbuf; + else + buf = segment->buf; ubytes = segment->bufsz; mbytes = segment->memsz; maddr = segment->mem; @@ -807,7 +1038,11 @@ static int kimage_load_normal_segment(struct kimage *image, PAGE_SIZE - (maddr & ~PAGE_MASK)); uchunk = min(ubytes, mchunk); - result = copy_from_user(ptr, buf, uchunk); + /* For file based kexec, source pages are in kernel memory */ + if (image->file_mode) + memcpy(ptr, kbuf, uchunk); + else + result = copy_from_user(ptr, buf, uchunk); kunmap(page); if (result) { result = -EFAULT; @@ -815,7 +1050,10 @@ static int kimage_load_normal_segment(struct kimage *image, } ubytes -= uchunk; maddr += mchunk; - buf += mchunk; + if (image->file_mode) + kbuf += mchunk; + else + buf += mchunk; mbytes -= mchunk; } out: @@ -1062,7 +1300,72 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags) { - return -ENOSYS; + int ret = 0, i; + struct kimage **dest_image, *image; + + /* We only trust the superuser with rebooting the system. */ + if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) + return -EPERM; + + /* Make sure we have a legal set of flags */ + if (flags != (flags & KEXEC_FILE_FLAGS)) + return -EINVAL; + + image = NULL; + + if (!mutex_trylock(&kexec_mutex)) + return -EBUSY; + + dest_image = &kexec_image; + if (flags & KEXEC_FILE_ON_CRASH) + dest_image = &kexec_crash_image; + + if (flags & KEXEC_FILE_UNLOAD) + goto exchange; + + /* + * In case of crash, new kernel gets loaded in reserved region. It is + * same memory where old crash kernel might be loaded. Free any + * current crash dump kernel before we corrupt it. + */ + if (flags & KEXEC_FILE_ON_CRASH) + kimage_free(xchg(&kexec_crash_image, NULL)); + + ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr, + cmdline_len, flags); + if (ret) + goto out; + + ret = machine_kexec_prepare(image); + if (ret) + goto out; + + for (i = 0; i < image->nr_segments; i++) { + struct kexec_segment *ksegment; + + ksegment = &image->segment[i]; + pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n", + i, ksegment->buf, ksegment->bufsz, ksegment->mem, + ksegment->memsz); + + ret = kimage_load_segment(image, &image->segment[i]); + if (ret) + goto out; + } + + kimage_terminate(image); + + /* + * Free up any temporary buffers allocated which are not needed + * after image has been loaded + */ + kimage_file_post_load_cleanup(image); +exchange: + image = xchg(dest_image, image); +out: + mutex_unlock(&kexec_mutex); + kimage_free(image); + return ret; } void crash_kexec(struct pt_regs *regs) @@ -1620,6 +1923,176 @@ static int __init crash_save_vmcoreinfo_init(void) subsys_initcall(crash_save_vmcoreinfo_init); +static int __kexec_add_segment(struct kimage *image, char *buf, + unsigned long bufsz, unsigned long mem, + unsigned long memsz) +{ + struct kexec_segment *ksegment; + + ksegment = &image->segment[image->nr_segments]; + ksegment->kbuf = buf; + ksegment->bufsz = bufsz; + ksegment->mem = mem; + ksegment->memsz = memsz; + image->nr_segments++; + + return 0; +} + +static int locate_mem_hole_top_down(unsigned long start, unsigned long end, + struct kexec_buf *kbuf) +{ + struct kimage *image = kbuf->image; + unsigned long temp_start, temp_end; + + temp_end = min(end, kbuf->buf_max); + temp_start = temp_end - kbuf->memsz; + + do { + /* align down start */ + temp_start = temp_start & (~(kbuf->buf_align - 1)); + + if (temp_start < start || temp_start < kbuf->buf_min) + return 0; + + temp_end = temp_start + kbuf->memsz - 1; + + /* + * Make sure this does not conflict with any of existing + * segments + */ + if (kimage_is_destination_range(image, temp_start, temp_end)) { + temp_start = temp_start - PAGE_SIZE; + continue; + } + + /* We found a suitable memory range */ + break; + } while (1); + + /* If we are here, we found a suitable memory range */ + __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start, + kbuf->memsz); + + /* Success, stop navigating through remaining System RAM ranges */ + return 1; +} + +static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end, + struct kexec_buf *kbuf) +{ + struct kimage *image = kbuf->image; + unsigned long temp_start, temp_end; + + temp_start = max(start, kbuf->buf_min); + + do { + temp_start = ALIGN(temp_start, kbuf->buf_align); + temp_end = temp_start + kbuf->memsz - 1; + + if (temp_end > end || temp_end > kbuf->buf_max) + return 0; + /* + * Make sure this does not conflict with any of existing + * segments + */ + if (kimage_is_destination_range(image, temp_start, temp_end)) { + temp_start = temp_start + PAGE_SIZE; + continue; + } + + /* We found a suitable memory range */ + break; + } while (1); + + /* If we are here, we found a suitable memory range */ + __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start, + kbuf->memsz); + + /* Success, stop navigating through remaining System RAM ranges */ + return 1; +} + +static int locate_mem_hole_callback(u64 start, u64 end, void *arg) +{ + struct kexec_buf *kbuf = (struct kexec_buf *)arg; + unsigned long sz = end - start + 1; + + /* Returning 0 will take to next memory range */ + if (sz < kbuf->memsz) + return 0; + + if (end < kbuf->buf_min || start > kbuf->buf_max) + return 0; + + /* + * Allocate memory top down with-in ram range. Otherwise bottom up + * allocation. + */ + if (kbuf->top_down) + return locate_mem_hole_top_down(start, end, kbuf); + return locate_mem_hole_bottom_up(start, end, kbuf); +} + +/* + * Helper function for placing a buffer in a kexec segment. This assumes + * that kexec_mutex is held. + */ +int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz, + unsigned long memsz, unsigned long buf_align, + unsigned long buf_min, unsigned long buf_max, + bool top_down, unsigned long *load_addr) +{ + + struct kexec_segment *ksegment; + struct kexec_buf buf, *kbuf; + int ret; + + /* Currently adding segment this way is allowed only in file mode */ + if (!image->file_mode) + return -EINVAL; + + if (image->nr_segments >= KEXEC_SEGMENT_MAX) + return -EINVAL; + + /* + * Make sure we are not trying to add buffer after allocating + * control pages. All segments need to be placed first before + * any control pages are allocated. As control page allocation + * logic goes through list of segments to make sure there are + * no destination overlaps. + */ + if (!list_empty(&image->control_pages)) { + WARN_ON(1); + return -EINVAL; + } + + memset(&buf, 0, sizeof(struct kexec_buf)); + kbuf = &buf; + kbuf->image = image; + kbuf->buffer = buffer; + kbuf->bufsz = bufsz; + + kbuf->memsz = ALIGN(memsz, PAGE_SIZE); + kbuf->buf_align = max(buf_align, PAGE_SIZE); + kbuf->buf_min = buf_min; + kbuf->buf_max = buf_max; + kbuf->top_down = top_down; + + /* Walk the RAM ranges and allocate a suitable range for the buffer */ + ret = walk_system_ram_res(0, -1, kbuf, locate_mem_hole_callback); + if (ret != 1) { + /* A suitable memory range could not be found for buffer */ + return -EADDRNOTAVAIL; + } + + /* Found a suitable memory range */ + ksegment = &image->segment[image->nr_segments - 1]; + *load_addr = ksegment->mem; + return 0; +} + + /* * Move into place and start executing a preloaded standalone * executable. If nothing was preloaded return an error. -- cgit v1.2.3