From 581319c58600b54612c417aff32ae9bbd79f4cdb Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Mar 2017 13:54:08 +0100 Subject: net/socket: use per af lockdep classes for sk queues Currently the sock queue's spin locks get their lockdep classes by the default init_spin_lock() initializer: all socket families get - usually, see below - a single class for rx, another specific class for tx, etc. This can lead to false positive lockdep splat, as reported by Andrey. Moreover there are two separate initialization points for the sock queues, one in sk_clone_lock() and one in sock_init_data(), so that e.g. the rx queue lock can get one of two possible, different classes, depending on the socket being cloned or not. This change tries to address the above, setting explicitly a per address family lockdep class for each queue's spinlock. Also, move the duplicated initialization code to a single location. v1 -> v2: - renamed the init helper rfc -> v1: - no changes, tested with several different workload Suggested-by: Cong Wang Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 18 deletions(-) (limited to 'net/core/sock.c') diff --git a/net/core/sock.c b/net/core/sock.c index f6fd79f33097..768aedf238f5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -258,12 +258,66 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , "clock-AF_QIPCRTR", "clock-AF_SMC" , "clock-AF_MAX" }; +static const char *const af_family_rlock_key_strings[AF_MAX+1] = { + "rlock-AF_UNSPEC", "rlock-AF_UNIX" , "rlock-AF_INET" , + "rlock-AF_AX25" , "rlock-AF_IPX" , "rlock-AF_APPLETALK", + "rlock-AF_NETROM", "rlock-AF_BRIDGE" , "rlock-AF_ATMPVC" , + "rlock-AF_X25" , "rlock-AF_INET6" , "rlock-AF_ROSE" , + "rlock-AF_DECnet", "rlock-AF_NETBEUI" , "rlock-AF_SECURITY" , + "rlock-AF_KEY" , "rlock-AF_NETLINK" , "rlock-AF_PACKET" , + "rlock-AF_ASH" , "rlock-AF_ECONET" , "rlock-AF_ATMSVC" , + "rlock-AF_RDS" , "rlock-AF_SNA" , "rlock-AF_IRDA" , + "rlock-AF_PPPOX" , "rlock-AF_WANPIPE" , "rlock-AF_LLC" , + "rlock-27" , "rlock-28" , "rlock-AF_CAN" , + "rlock-AF_TIPC" , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV" , + "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" , + "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" , + "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" , + "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_MAX" +}; +static const char *const af_family_wlock_key_strings[AF_MAX+1] = { + "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" , + "wlock-AF_AX25" , "wlock-AF_IPX" , "wlock-AF_APPLETALK", + "wlock-AF_NETROM", "wlock-AF_BRIDGE" , "wlock-AF_ATMPVC" , + "wlock-AF_X25" , "wlock-AF_INET6" , "wlock-AF_ROSE" , + "wlock-AF_DECnet", "wlock-AF_NETBEUI" , "wlock-AF_SECURITY" , + "wlock-AF_KEY" , "wlock-AF_NETLINK" , "wlock-AF_PACKET" , + "wlock-AF_ASH" , "wlock-AF_ECONET" , "wlock-AF_ATMSVC" , + "wlock-AF_RDS" , "wlock-AF_SNA" , "wlock-AF_IRDA" , + "wlock-AF_PPPOX" , "wlock-AF_WANPIPE" , "wlock-AF_LLC" , + "wlock-27" , "wlock-28" , "wlock-AF_CAN" , + "wlock-AF_TIPC" , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV" , + "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" , + "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" , + "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" , + "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_MAX" +}; +static const char *const af_family_elock_key_strings[AF_MAX+1] = { + "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" , + "elock-AF_AX25" , "elock-AF_IPX" , "elock-AF_APPLETALK", + "elock-AF_NETROM", "elock-AF_BRIDGE" , "elock-AF_ATMPVC" , + "elock-AF_X25" , "elock-AF_INET6" , "elock-AF_ROSE" , + "elock-AF_DECnet", "elock-AF_NETBEUI" , "elock-AF_SECURITY" , + "elock-AF_KEY" , "elock-AF_NETLINK" , "elock-AF_PACKET" , + "elock-AF_ASH" , "elock-AF_ECONET" , "elock-AF_ATMSVC" , + "elock-AF_RDS" , "elock-AF_SNA" , "elock-AF_IRDA" , + "elock-AF_PPPOX" , "elock-AF_WANPIPE" , "elock-AF_LLC" , + "elock-27" , "elock-28" , "elock-AF_CAN" , + "elock-AF_TIPC" , "elock-AF_BLUETOOTH", "elock-AF_IUCV" , + "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" , + "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" , + "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" , + "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_MAX" +}; /* - * sk_callback_lock locking rules are per-address-family, + * sk_callback_lock and sk queues locking rules are per-address-family, * so split the lock classes by using a per-AF key: */ static struct lock_class_key af_callback_keys[AF_MAX]; +static struct lock_class_key af_rlock_keys[AF_MAX]; +static struct lock_class_key af_wlock_keys[AF_MAX]; +static struct lock_class_key af_elock_keys[AF_MAX]; /* Take into consideration the size of the struct sk_buff overhead in the * determination of these values, since that is non-constant across @@ -1478,6 +1532,27 @@ void sk_free(struct sock *sk) } EXPORT_SYMBOL(sk_free); +static void sk_init_common(struct sock *sk) +{ + skb_queue_head_init(&sk->sk_receive_queue); + skb_queue_head_init(&sk->sk_write_queue); + skb_queue_head_init(&sk->sk_error_queue); + + rwlock_init(&sk->sk_callback_lock); + lockdep_set_class_and_name(&sk->sk_receive_queue.lock, + af_rlock_keys + sk->sk_family, + af_family_rlock_key_strings[sk->sk_family]); + lockdep_set_class_and_name(&sk->sk_write_queue.lock, + af_wlock_keys + sk->sk_family, + af_family_wlock_key_strings[sk->sk_family]); + lockdep_set_class_and_name(&sk->sk_error_queue.lock, + af_elock_keys + sk->sk_family, + af_family_elock_key_strings[sk->sk_family]); + lockdep_set_class_and_name(&sk->sk_callback_lock, + af_callback_keys + sk->sk_family, + af_family_clock_key_strings[sk->sk_family]); +} + /** * sk_clone_lock - clone a socket, and lock its clone * @sk: the socket to clone @@ -1511,13 +1586,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) */ atomic_set(&newsk->sk_wmem_alloc, 1); atomic_set(&newsk->sk_omem_alloc, 0); - skb_queue_head_init(&newsk->sk_receive_queue); - skb_queue_head_init(&newsk->sk_write_queue); - - rwlock_init(&newsk->sk_callback_lock); - lockdep_set_class_and_name(&newsk->sk_callback_lock, - af_callback_keys + newsk->sk_family, - af_family_clock_key_strings[newsk->sk_family]); + sk_init_common(newsk); newsk->sk_dst_cache = NULL; newsk->sk_dst_pending_confirm = 0; @@ -1528,7 +1597,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; sock_reset_flag(newsk, SOCK_DONE); - skb_queue_head_init(&newsk->sk_error_queue); filter = rcu_dereference_protected(newsk->sk_filter, 1); if (filter != NULL) @@ -2454,10 +2522,7 @@ EXPORT_SYMBOL(sk_stop_timer); void sock_init_data(struct socket *sock, struct sock *sk) { - skb_queue_head_init(&sk->sk_receive_queue); - skb_queue_head_init(&sk->sk_write_queue); - skb_queue_head_init(&sk->sk_error_queue); - + sk_init_common(sk); sk->sk_send_head = NULL; init_timer(&sk->sk_timer); @@ -2480,11 +2545,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); } - rwlock_init(&sk->sk_callback_lock); - lockdep_set_class_and_name(&sk->sk_callback_lock, - af_callback_keys + sk->sk_family, - af_family_clock_key_strings[sk->sk_family]); - sk->sk_state_change = sock_def_wakeup; sk->sk_data_ready = sock_def_readable; sk->sk_write_space = sock_def_write_space; -- cgit v1.2.3 From a2d133b1d465016d0d97560b11f54ba0ace56d3e Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Mon, 20 Mar 2017 15:22:03 -0400 Subject: sock: introduce SO_MEMINFO getsockopt Allows reading of SK_MEMINFO_VARS via socket option. This way an application can get all meminfo related information in single socket option call instead of multiple calls. Adds helper function, sk_get_meminfo(), and uses that for both getsockopt and sock_diag_put_meminfo(). Suggested by Eric Dumazet. Signed-off-by: Josh Hunt Reviewed-by: Jason Baron Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/avr32/include/uapi/asm/socket.h | 2 ++ arch/frv/include/uapi/asm/socket.h | 2 ++ arch/ia64/include/uapi/asm/socket.h | 2 ++ arch/m32r/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 3 +++ arch/mn10300/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/powerpc/include/uapi/asm/socket.h | 2 ++ arch/s390/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ arch/xtensa/include/uapi/asm/socket.h | 2 ++ include/net/sock.h | 2 ++ include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 30 ++++++++++++++++++++++++++++++ net/core/sock_diag.c | 10 +--------- 16 files changed, 60 insertions(+), 9 deletions(-) (limited to 'net/core/sock.c') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index afc901b7a6f6..089db42c1b40 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -99,4 +99,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 5a650426f357..6eabcbd2f82a 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -92,4 +92,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index 81e03530ed39..bd497f8356b9 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -92,5 +92,7 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 57feb0c1f7d7..f1bb54686168 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -101,4 +101,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 5853f8e92c20..459c46076f6f 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -92,4 +92,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 566ecdcb5b4b..688c18dd62ef 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -110,4 +110,7 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index 0e12527c4b0e..312d2c457a04 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -92,4 +92,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 7a109b73ddf7..b98ec38f2083 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -91,4 +91,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 0x402F +#define SO_MEMINFO 0x4030 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index 44583a52f882..099a889240f6 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -99,4 +99,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index b24a64cbfeb1..6199bb34f7fa 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -98,4 +98,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index a25dc32f5d6a..12cd8c2ec422 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -88,6 +88,8 @@ #define SCM_TIMESTAMPING_OPT_STATS 0x0038 +#define SO_MEMINFO 0x0039 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 9fdbe1fe0473..d0b85f6c1484 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -103,4 +103,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 08142be8938e..cb241a0e8434 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2362,6 +2362,8 @@ bool sk_ns_capable(const struct sock *sk, bool sk_capable(const struct sock *sk, int cap); bool sk_net_capable(const struct sock *sk, int cap); +void sk_get_meminfo(const struct sock *sk, u32 *meminfo); + extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 2c748ddad5f8..8313702c1eae 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -94,4 +94,6 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/sock.c b/net/core/sock.c index a83731c36761..f8c0373a3a74 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1313,6 +1313,21 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_incoming_cpu; break; + case SO_MEMINFO: + { + u32 meminfo[SK_MEMINFO_VARS]; + + if (get_user(len, optlen)) + return -EFAULT; + + sk_get_meminfo(sk, meminfo); + + len = min_t(unsigned int, len, sizeof(meminfo)); + if (copy_to_user(optval, &meminfo, len)) + return -EFAULT; + + goto lenout; + } default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -2861,6 +2876,21 @@ void sk_common_release(struct sock *sk) } EXPORT_SYMBOL(sk_common_release); +void sk_get_meminfo(const struct sock *sk, u32 *mem) +{ + memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); + + mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); + mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; + mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); + mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; + mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; + mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; + mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); + mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; + mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); +} + #ifdef CONFIG_PROC_FS #define PROTO_INUSE_NR 64 /* should be enough for the first time */ struct prot_inuse { diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 6b10573cc9fa..8d11ee75a100 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -59,15 +59,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) { u32 mem[SK_MEMINFO_VARS]; - mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); - mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; - mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); - mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; - mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; - mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; - mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); - mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; - mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); + sk_get_meminfo(sk, mem); return nla_put(skb, attrtype, sizeof(mem), &mem); } -- cgit v1.2.3 From 7db6b048da3b9f84fe1d22fb29ff7e7c2ec6c0e5 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Fri, 24 Mar 2017 10:08:24 -0700 Subject: net: Commonize busy polling code to focus on napi_id instead of socket Move the core functionality in sk_busy_loop() to napi_busy_loop() and make it independent of sk. This enables re-using this function in epoll busy loop implementation. Signed-off-by: Sridhar Samudrala Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/busy_poll.h | 20 +++++++++++++++----- net/core/dev.c | 21 ++++++++------------- net/core/sock.c | 11 +++++++++++ 3 files changed, 34 insertions(+), 18 deletions(-) (limited to 'net/core/sock.c') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 72c82f2ea536..8ffd434676b7 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -51,7 +51,11 @@ static inline bool sk_can_busy_loop(const struct sock *sk) return sk->sk_ll_usec && !signal_pending(current); } -void sk_busy_loop(struct sock *sk, int nonblock); +bool sk_busy_loop_end(void *p, unsigned long start_time); + +void napi_busy_loop(unsigned int napi_id, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg); #else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) @@ -64,10 +68,6 @@ static inline bool sk_can_busy_loop(struct sock *sk) return false; } -static inline void sk_busy_loop(struct sock *sk, int nonblock) -{ -} - #endif /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long busy_loop_current_time(void) @@ -111,6 +111,16 @@ static inline bool sk_busy_loop_timeout(struct sock *sk, return true; } +static inline void sk_busy_loop(struct sock *sk, int nonblock) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned int napi_id = READ_ONCE(sk->sk_napi_id); + + if (napi_id >= MIN_NAPI_ID) + napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk); +#endif +} + /* used in the NIC receive handler to mark the skb */ static inline void skb_mark_napi_id(struct sk_buff *skb, struct napi_struct *napi) diff --git a/net/core/dev.c b/net/core/dev.c index 2d1b5613b7fd..ef9fe60ee294 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5060,19 +5060,16 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) do_softirq(); } -void sk_busy_loop(struct sock *sk, int nonblock) +void napi_busy_loop(unsigned int napi_id, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg) { - unsigned long start_time = nonblock ? 0 : busy_loop_current_time(); + unsigned long start_time = loop_end ? busy_loop_current_time() : 0; int (*napi_poll)(struct napi_struct *napi, int budget); void *have_poll_lock = NULL; struct napi_struct *napi; - unsigned int napi_id; restart: - napi_id = READ_ONCE(sk->sk_napi_id); - if (napi_id < MIN_NAPI_ID) - return; - napi_poll = NULL; rcu_read_lock(); @@ -5106,12 +5103,11 @@ restart: trace_napi_poll(napi, work, BUSY_POLL_BUDGET); count: if (work > 0) - __NET_ADD_STATS(sock_net(sk), + __NET_ADD_STATS(dev_net(napi->dev), LINUX_MIB_BUSYPOLLRXPACKETS, work); local_bh_enable(); - if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) || - sk_busy_loop_timeout(sk, start_time)) + if (!loop_end || loop_end(loop_end_arg, start_time)) break; if (unlikely(need_resched())) { @@ -5120,8 +5116,7 @@ count: preempt_enable(); rcu_read_unlock(); cond_resched(); - if (!skb_queue_empty(&sk->sk_receive_queue) || - sk_busy_loop_timeout(sk, start_time)) + if (loop_end(loop_end_arg, start_time)) return; goto restart; } @@ -5133,7 +5128,7 @@ count: out: rcu_read_unlock(); } -EXPORT_SYMBOL(sk_busy_loop); +EXPORT_SYMBOL(napi_busy_loop); #endif /* CONFIG_NET_RX_BUSY_POLL */ diff --git a/net/core/sock.c b/net/core/sock.c index 1b9030ee6f4b..4b762f2a3552 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3237,3 +3237,14 @@ static int __init proto_init(void) subsys_initcall(proto_init); #endif /* PROC_FS */ + +#ifdef CONFIG_NET_RX_BUSY_POLL +bool sk_busy_loop_end(void *p, unsigned long start_time) +{ + struct sock *sk = p; + + return !skb_queue_empty(&sk->sk_receive_queue) || + sk_busy_loop_timeout(sk, start_time); +} +EXPORT_SYMBOL(sk_busy_loop_end); +#endif /* CONFIG_NET_RX_BUSY_POLL */ -- cgit v1.2.3 From 6d4339028b350efbf87c61e6d9e113e5373545c9 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Fri, 24 Mar 2017 10:08:36 -0700 Subject: net: Introduce SO_INCOMING_NAPI_ID This socket option returns the NAPI ID associated with the queue on which the last frame is received. This information can be used by the apps to split the incoming flows among the threads based on the Rx queue on which they are received. If the NAPI ID actually represents a sender_cpu then the value is ignored and 0 is returned. Signed-off-by: Sridhar Samudrala Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/avr32/include/uapi/asm/socket.h | 2 ++ arch/frv/include/uapi/asm/socket.h | 2 ++ arch/ia64/include/uapi/asm/socket.h | 2 ++ arch/m32r/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 1 + arch/mn10300/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/powerpc/include/uapi/asm/socket.h | 2 ++ arch/s390/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ arch/xtensa/include/uapi/asm/socket.h | 2 ++ include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 12 ++++++++++++ 14 files changed, 37 insertions(+) (limited to 'net/core/sock.c') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 089db42c1b40..1bb8cac61a28 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -101,4 +101,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 6eabcbd2f82a..f824eeb0f2e4 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -94,4 +94,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index bd497f8356b9..a8ad9bebfc47 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -94,5 +94,7 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index f1bb54686168..6af3253e4209 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -103,4 +103,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 459c46076f6f..e98b6bb897c0 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -94,4 +94,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 688c18dd62ef..ae2b62e39d4d 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -112,5 +112,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index 312d2c457a04..e4ac1843ee01 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -94,4 +94,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index b98ec38f2083..f754c793e82a 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -93,4 +93,6 @@ #define SO_MEMINFO 0x4030 +#define SO_INCOMING_NAPI_ID 0x4031 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index 099a889240f6..5f84af7dcb2e 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -101,4 +101,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 6199bb34f7fa..25ac4960e707 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -100,4 +100,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 12cd8c2ec422..b05513acd589 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -90,6 +90,8 @@ #define SO_MEMINFO 0x0039 +#define SO_INCOMING_NAPI_ID 0x003a + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index d0b85f6c1484..786606c81edd 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -105,4 +105,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 8313702c1eae..c98a52fb572a 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -96,4 +96,6 @@ #define SO_MEMINFO 55 +#define SO_INCOMING_NAPI_ID 56 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/sock.c b/net/core/sock.c index 4b762f2a3552..1a58a9dc6888 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1328,6 +1328,18 @@ int sock_getsockopt(struct socket *sock, int level, int optname, goto lenout; } + +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_INCOMING_NAPI_ID: + v.val = READ_ONCE(sk->sk_napi_id); + + /* aggregate non-NAPI IDs down to 0 */ + if (v.val < MIN_NAPI_ID) + v.val = 0; + + break; +#endif + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). -- cgit v1.2.3 From 6c7c98bad4883a4a8710c96b2b44de482865eb6e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 30 Mar 2017 14:03:06 +0200 Subject: sock: avoid dirtying sk_stamp, if possible sock_recv_ts_and_drops() unconditionally set sk->sk_stamp for every packet, even if the SOCK_TIMESTAMP flag is not set in the related socket. If selinux is enabled, this cause a cache miss for every packet since sk->sk_stamp and sk->sk_security share the same cacheline. With this change sk_stamp is set only if the SOCK_TIMESTAMP flag is set, and is cleared for the first packet, so that the user perceived behavior is unchanged. This gives up to 5% speed-up under udp-flood with small packets. Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 5 ++++- net/core/sock.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net/core/sock.c') diff --git a/include/net/sock.h b/include/net/sock.h index cb241a0e8434..8e53158a7d95 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2239,6 +2239,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); +#define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC) static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { @@ -2249,8 +2250,10 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) __sock_recv_ts_and_drops(msg, sk, skb); - else + else if (unlikely(sk->sk_flags & SOCK_TIMESTAMP)) sk->sk_stamp = skb->tstamp; + else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) + sk->sk_stamp = 0; } void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); diff --git a/net/core/sock.c b/net/core/sock.c index 1a58a9dc6888..392f9b6f96e2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2613,7 +2613,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; - sk->sk_stamp = ktime_set(-1L, 0); + sk->sk_stamp = SK_DEFAULT_STAMP; #ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; -- cgit v1.2.3 From 5daab9db7b65df87da26fd8cfa695fb9546a1ddb Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 5 Apr 2017 19:00:55 -0700 Subject: New getsockopt option to get socket cookie Introduce a new getsockopt operation to retrieve the socket cookie for a specific socket based on the socket fd. It returns a unique non-decreasing cookie for each socket. Tested: https://android-review.googlesource.com/#/c/358163/ Acked-by: Willem de Bruijn Signed-off-by: Chenbo Feng Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/avr32/include/uapi/asm/socket.h | 2 ++ arch/frv/include/uapi/asm/socket.h | 2 ++ arch/ia64/include/uapi/asm/socket.h | 2 ++ arch/m32r/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/mn10300/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/powerpc/include/uapi/asm/socket.h | 2 ++ arch/s390/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ arch/xtensa/include/uapi/asm/socket.h | 2 ++ include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 8 ++++++++ 14 files changed, 34 insertions(+) (limited to 'net/core/sock.c') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 1bb8cac61a28..148d7a32754e 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -103,4 +103,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index f824eeb0f2e4..2434d08ad8d6 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -96,4 +96,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index a8ad9bebfc47..1ccf45657472 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -96,5 +96,7 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 6af3253e4209..2c3f4b48042a 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -105,4 +105,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index e98b6bb897c0..ae6548d29a18 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -96,4 +96,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index ae2b62e39d4d..3418ec9c1c50 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -114,4 +114,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index e4ac1843ee01..4526e92301a6 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -96,4 +96,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index f754c793e82a..514701840bd9 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -95,4 +95,6 @@ #define SO_INCOMING_NAPI_ID 0x4031 +#define SO_COOKIE 0x4032 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index 5f84af7dcb2e..58e2ec0310fc 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -103,4 +103,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 25ac4960e707..e8e5ecf673fd 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -102,4 +102,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index b05513acd589..3f4ad19d9ec7 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -92,6 +92,8 @@ #define SO_INCOMING_NAPI_ID 0x003a +#define SO_COOKIE 0x003b + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 786606c81edd..1eb6d2fe70d3 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -107,4 +107,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index c98a52fb572a..2b488565599d 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -98,4 +98,6 @@ #define SO_INCOMING_NAPI_ID 56 +#define SO_COOKIE 57 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/sock.c b/net/core/sock.c index 392f9b6f96e2..a06bb7a2a689 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1083,6 +1083,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, union { int val; + u64 val64; struct linger ling; struct timeval tm; } v; @@ -1340,6 +1341,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; #endif + case SO_COOKIE: + lv = sizeof(u64); + if (len < lv) + return -EINVAL; + v.val64 = sock_gen_cookie(sk); + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). -- cgit v1.2.3