From e11d4284e2f4de5048c6d1787c82226f0a198292 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Apr 2018 13:43:52 +0200 Subject: y2038: socket: Add compat_sys_recvmmsg_time64 recvmmsg() takes two arguments to pointers of structures that differ between 32-bit and 64-bit architectures: mmsghdr and timespec. For y2038 compatbility, we are changing the native system call from timespec to __kernel_timespec with a 64-bit time_t (in another patch), and use the existing compat system call on both 32-bit and 64-bit architectures for compatibility with traditional 32-bit user space. As we now have two variants of recvmmsg() for 32-bit tasks that are both different from the variant that we use on 64-bit tasks, this means we also require two compat system calls! The solution I picked is to flip things around: The existing compat_sys_recvmmsg() call gets moved from net/compat.c into net/socket.c and now handles the case for old user space on all architectures that have set CONFIG_COMPAT_32BIT_TIME. A new compat_sys_recvmmsg_time64() call gets added in the old place for 64-bit architectures only, this one handles the case of a compat mmsghdr structure combined with __kernel_timespec. In the indirect sys_socketcall(), we now need to call either do_sys_recvmmsg() or __compat_sys_recvmmsg(), depending on what kind of architecture we are on. For compat_sys_socketcall(), no such change is needed, we always call __compat_sys_recvmmsg(). I decided to not add a new SYS_RECVMMSG_TIME64 socketcall: Any libc implementation for 64-bit time_t will need significant changes including an updated asm/unistd.h, and it seems better to consistently use the separate syscalls that configuration, leaving the socketcall only for backward compatibility with 32-bit time_t based libc. The naming is asymmetric for the moment, so both existing syscalls entry points keep their names, while the new ones are recvmmsg_time32 and compat_recvmmsg_time64 respectively. I expect that we will rename the compat syscalls later as we start using generated syscall tables everywhere and add these entry points. Signed-off-by: Arnd Bergmann --- include/linux/socket.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux/socket.h') diff --git a/include/linux/socket.h b/include/linux/socket.h index 8b571e9b9f76..333b5df8a1b2 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -348,7 +348,8 @@ struct ucred { extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); -struct timespec64; +struct __kernel_timespec; +struct old_timespec32; /* The __sys_...msg variants allow MSG_CMSG_COMPAT iff * forbid_cmsg_compat==false @@ -357,8 +358,10 @@ extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, bool forbid_cmsg_compat); extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, bool forbid_cmsg_compat); -extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, - unsigned int flags, struct timespec64 *timeout); +extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, + unsigned int vlen, unsigned int flags, + struct __kernel_timespec __user *timeout, + struct old_timespec32 __user *timeout32); extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags, bool forbid_cmsg_compat); -- cgit v1.2.3 From 0608c69c9a805c6264689d7eab4203eab88cf1da Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 20 Dec 2018 11:35:35 -0800 Subject: bpf: sk_msg, sock{map|hash} redirect through ULP A sockmap program that redirects through a kTLS ULP enabled socket will not work correctly because the ULP layer is skipped. This fixes the behavior to call through the ULP layer on redirect to ensure any operations required on the data stream at the ULP layer continue to be applied. To do this we add an internal flag MSG_SENDPAGE_NOPOLICY to avoid calling the BPF layer on a redirected message. This is required to avoid calling the BPF layer multiple times (possibly recursively) which is not the current/expected behavior without ULPs. In the future we may add a redirect flag if users _do_ want the policy applied again but this would need to work for both ULP and non-ULP sockets and be opt-in to avoid breaking existing programs. Also to avoid polluting the flag space with an internal flag we reuse the flag space overlapping MSG_SENDPAGE_NOPOLICY with MSG_WAITFORONE. Here WAITFORONE is specific to recv path and SENDPAGE_NOPOLICY is only used for sendpage hooks. The last thing to verify is user space API is masked correctly to ensure the flag can not be set by user. (Note this needs to be true regardless because we have internal flags already in-use that user space should not be able to set). But for completeness we have two UAPI paths into sendpage, sendfile and splice. In the sendfile case the function do_sendfile() zero's flags, ./fs/read_write.c: static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, loff_t max) { ... fl = 0; #if 0 /* * We need to debate whether we can enable this or not. The * man page documents EAGAIN return for the output at least, * and the application is arguably buggy if it doesn't expect * EAGAIN on a non-blocking file descriptor. */ if (in.file->f_flags & O_NONBLOCK) fl = SPLICE_F_NONBLOCK; #endif file_start_write(out.file); retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); } In the splice case the pipe_to_sendpage "actor" is used which masks flags with SPLICE_F_MORE. ./fs/splice.c: static int pipe_to_sendpage(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { ... more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; ... } Confirming what we expect that internal flags are in fact internal to socket side. Fixes: d3b18ad31f93 ("tls: add bpf support to sk_msg handling") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/linux/socket.h | 1 + include/net/tls.h | 9 +++++++++ net/ipv4/tcp_bpf.c | 13 ++++++++++++- net/tls/tls_sw.c | 43 ++++++++++++++++++++++++++++++------------- 4 files changed, 52 insertions(+), 14 deletions(-) (limited to 'include/linux/socket.h') diff --git a/include/linux/socket.h b/include/linux/socket.h index 8b571e9b9f76..84c48a3c0227 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -286,6 +286,7 @@ struct ucred { #define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */ #define MSG_MORE 0x8000 /* Sender will send more */ #define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */ +#define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */ #define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */ #define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */ #define MSG_EOF MSG_FIN diff --git a/include/net/tls.h b/include/net/tls.h index bab5627ff5e3..23601f3b02ee 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -454,6 +454,15 @@ tls_offload_ctx_tx(const struct tls_context *tls_ctx) return (struct tls_offload_context_tx *)tls_ctx->priv_ctx_tx; } +static inline bool tls_sw_has_ctx_tx(const struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + if (!ctx) + return false; + return !!tls_sw_ctx_tx(ctx); +} + static inline struct tls_offload_context_rx * tls_offload_ctx_rx(const struct tls_context *tls_ctx) { diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 87503343743d..1bb7321a256d 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -8,6 +8,7 @@ #include #include +#include static bool tcp_bpf_stream_read(const struct sock *sk) { @@ -218,6 +219,8 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes, u32 off; while (1) { + bool has_tx_ulp; + sge = sk_msg_elem(msg, msg->sg.start); size = (apply && apply_bytes < sge->length) ? apply_bytes : sge->length; @@ -226,7 +229,15 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes, tcp_rate_check_app_limited(sk); retry: - ret = do_tcp_sendpages(sk, page, off, size, flags); + has_tx_ulp = tls_sw_has_ctx_tx(sk); + if (has_tx_ulp) { + flags |= MSG_SENDPAGE_NOPOLICY; + ret = kernel_sendpage_locked(sk, + page, off, size, flags); + } else { + ret = do_tcp_sendpages(sk, page, off, size, flags); + } + if (ret <= 0) return ret; if (apply) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d4ecc66464e6..5aee9ae5ca53 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -686,12 +686,13 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, struct sk_psock *psock; struct sock *sk_redir; struct tls_rec *rec; + bool enospc, policy; int err = 0, send; u32 delta = 0; - bool enospc; + policy = !(flags & MSG_SENDPAGE_NOPOLICY); psock = sk_psock_get(sk); - if (!psock) + if (!psock || !policy) return tls_push_record(sk, flags, record_type); more_data: enospc = sk_msg_full(msg); @@ -1017,8 +1018,8 @@ send_end: return copied ? copied : ret; } -int tls_sw_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) +int tls_sw_do_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags) { long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -1033,15 +1034,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, int ret = 0; bool eor; - if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_SENDPAGE_NOTLAST)) - return -ENOTSUPP; - - /* No MSG_EOR from splice, only look at MSG_MORE */ eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST)); - - lock_sock(sk); - sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); /* Wait till there is any pending write on socket */ @@ -1145,10 +1138,34 @@ wait_for_memory: } sendpage_end: ret = sk_stream_error(sk, flags, ret); - release_sock(sk); return copied ? copied : ret; } +int tls_sw_sendpage_locked(struct sock *sk, struct page *page, + int offset, size_t size, int flags) +{ + if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) + return -ENOTSUPP; + + return tls_sw_do_sendpage(sk, page, offset, size, flags); +} + +int tls_sw_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags) +{ + int ret; + + if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) + return -ENOTSUPP; + + lock_sock(sk); + ret = tls_sw_do_sendpage(sk, page, offset, size, flags); + release_sock(sk); + return ret; +} + static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock, int flags, long timeo, int *err) { -- cgit v1.2.3 From 9718475e69084de15c3930ce35672a7dc6da866b Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sat, 2 Feb 2019 07:34:51 -0800 Subject: socket: Add SO_TIMESTAMPING_NEW Add SO_TIMESTAMPING_NEW variant of socket timestamp options. This is the y2038 safe versions of the SO_TIMESTAMPING_OLD for all architectures. Signed-off-by: Deepa Dinamani Acked-by: Willem de Bruijn Cc: chris@zankel.net Cc: fenghua.yu@intel.com Cc: rth@twiddle.net Cc: tglx@linutronix.de Cc: ubraun@linux.ibm.com Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: linux-s390@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 5 +++-- arch/mips/include/uapi/asm/socket.h | 5 +++-- arch/parisc/include/uapi/asm/socket.h | 5 +++-- arch/sparc/include/uapi/asm/socket.h | 5 +++-- include/linux/socket.h | 8 ++++++++ include/uapi/asm-generic/socket.h | 5 +++-- include/uapi/linux/errqueue.h | 4 ++++ net/core/scm.c | 27 +++++++++++++++++++++++++++ net/core/sock.c | 8 +++++++- net/ipv4/tcp.c | 30 +++++++++++++++++------------- net/smc/af_smc.c | 3 ++- net/socket.c | 13 ++++++++----- 12 files changed, 88 insertions(+), 30 deletions(-) (limited to 'include/linux/socket.h') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index aab11eec7c22..934ea6268f1a 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -117,19 +117,20 @@ #define SO_TIMESTAMP_NEW 63 #define SO_TIMESTAMPNS_NEW 64 +#define SO_TIMESTAMPING_NEW 65 #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 #define SO_TIMESTAMP SO_TIMESTAMP_OLD #define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #else #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) #endif -#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD - #define SCM_TIMESTAMP SO_TIMESTAMP #define SCM_TIMESTAMPNS SO_TIMESTAMPNS #define SCM_TIMESTAMPING SO_TIMESTAMPING diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 11014f684d9f..110f9506d64f 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -128,19 +128,20 @@ #define SO_TIMESTAMP_NEW 63 #define SO_TIMESTAMPNS_NEW 64 +#define SO_TIMESTAMPING_NEW 65 #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 #define SO_TIMESTAMP SO_TIMESTAMP_OLD #define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #else #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) #endif -#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD - #define SCM_TIMESTAMP SO_TIMESTAMP #define SCM_TIMESTAMPNS SO_TIMESTAMPNS #define SCM_TIMESTAMPING SO_TIMESTAMPING diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index cbc4b89c2fe4..bee2a9dde656 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -109,19 +109,20 @@ #define SO_TIMESTAMP_NEW 0x4038 #define SO_TIMESTAMPNS_NEW 0x4039 +#define SO_TIMESTAMPING_NEW 0x403A #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 #define SO_TIMESTAMP SO_TIMESTAMP_OLD #define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #else #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) #endif -#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD - #define SCM_TIMESTAMP SO_TIMESTAMP #define SCM_TIMESTAMPNS SO_TIMESTAMPNS #define SCM_TIMESTAMPING SO_TIMESTAMPING diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 85127425b294..2b38dda51426 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -110,19 +110,20 @@ #define SO_TIMESTAMP_NEW 0x0041 #define SO_TIMESTAMPNS_NEW 0x0042 +#define SO_TIMESTAMPING_NEW 0x0043 #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 #define SO_TIMESTAMP SO_TIMESTAMP_OLD #define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #else #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) #endif -#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD - #define SCM_TIMESTAMP SO_TIMESTAMP #define SCM_TIMESTAMPNS SO_TIMESTAMPNS #define SCM_TIMESTAMPING SO_TIMESTAMPING diff --git a/include/linux/socket.h b/include/linux/socket.h index ab2041a00e01..6016daeecee4 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -349,9 +349,17 @@ struct ucred { extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); +struct timespec64; struct __kernel_timespec; struct old_timespec32; +struct scm_timestamping_internal { + struct timespec64 ts[3]; +}; + +extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss); +extern void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss); + /* The __sys_...msg variants allow MSG_CMSG_COMPAT iff * forbid_cmsg_compat==false */ diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index f22d3f7162f8..2713e0fa68ef 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -112,6 +112,7 @@ #define SO_TIMESTAMP_NEW 63 #define SO_TIMESTAMPNS_NEW 64 +#define SO_TIMESTAMPING_NEW 65 #if !defined(__KERNEL__) @@ -119,13 +120,13 @@ /* on 64-bit and x32, avoid the ?: operator */ #define SO_TIMESTAMP SO_TIMESTAMP_OLD #define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD +#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD #else #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW) #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW) +#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW) #endif -#define SO_TIMESTAMPING SO_TIMESTAMPING_OLD - #define SCM_TIMESTAMP SO_TIMESTAMP #define SCM_TIMESTAMPNS SO_TIMESTAMPNS #define SCM_TIMESTAMPING SO_TIMESTAMPING diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index c0151200f7d1..d955b9e32288 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -41,6 +41,10 @@ struct scm_timestamping { struct timespec ts[3]; }; +struct scm_timestamping64 { + struct __kernel_timespec ts[3]; +}; + /* The type of scm_timestamping, passed in sock_extended_err ee_info. * This defines the type of ts[0]. For SCM_TSTAMP_SND only, if ts[0] * is zero, then this is a hardware timestamp and recorded in ts[2]. diff --git a/net/core/scm.c b/net/core/scm.c index b1ff8a441748..52ef219cf6df 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -252,6 +253,32 @@ out: } EXPORT_SYMBOL(put_cmsg); +void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss_internal) +{ + struct scm_timestamping64 tss; + int i; + + for (i = 0; i < ARRAY_SIZE(tss.ts); i++) { + tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec; + tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec; + } + + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_NEW, sizeof(tss), &tss); +} +EXPORT_SYMBOL(put_cmsg_scm_timestamping64); + +void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss_internal) +{ + struct scm_timestamping tss; + int i; + + for (i = 0; i < ARRAY_SIZE(tss.ts); i++) + tss.ts[i] = timespec64_to_timespec(tss_internal->ts[i]); + + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD, sizeof(tss), &tss); +} +EXPORT_SYMBOL(put_cmsg_scm_timestamping); + void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { struct cmsghdr __user *cm diff --git a/net/core/sock.c b/net/core/sock.c index 14b987eab10c..a9d1ecce96e5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -890,6 +890,8 @@ set_rcvbuf: } break; + case SO_TIMESTAMPING_NEW: + sock_set_flag(sk, SOCK_TSTAMP_NEW); case SO_TIMESTAMPING_OLD: if (val & ~SOF_TIMESTAMPING_MASK) { ret = -EINVAL; @@ -921,9 +923,13 @@ set_rcvbuf: if (val & SOF_TIMESTAMPING_RX_SOFTWARE) sock_enable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE); - else + else { + if (optname == SO_TIMESTAMPING_NEW) + sock_reset_flag(sk, SOCK_TSTAMP_NEW); + sock_disable_timestamp(sk, (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); + } break; case SO_RCVLOWAT: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4e9388bf104a..cab6b2f2f61d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1844,22 +1844,22 @@ out: #endif static void tcp_update_recv_tstamps(struct sk_buff *skb, - struct scm_timestamping *tss) + struct scm_timestamping_internal *tss) { if (skb->tstamp) - tss->ts[0] = ktime_to_timespec(skb->tstamp); + tss->ts[0] = ktime_to_timespec64(skb->tstamp); else - tss->ts[0] = (struct timespec) {0}; + tss->ts[0] = (struct timespec64) {0}; if (skb_hwtstamps(skb)->hwtstamp) - tss->ts[2] = ktime_to_timespec(skb_hwtstamps(skb)->hwtstamp); + tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp); else - tss->ts[2] = (struct timespec) {0}; + tss->ts[2] = (struct timespec64) {0}; } /* Similar to __sock_recv_timestamp, but does not require an skb */ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, - struct scm_timestamping *tss) + struct scm_timestamping_internal *tss) { int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); bool has_timestamping = false; @@ -1873,8 +1873,10 @@ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, sizeof(kts), &kts); } else { + struct timespec ts_old = timespec64_to_timespec(tss->ts[0]); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, - sizeof(tss->ts[0]), &tss->ts[0]); + sizeof(ts_old), &ts_old); } } else { if (new_tstamp) { @@ -1898,20 +1900,22 @@ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) has_timestamping = true; else - tss->ts[0] = (struct timespec) {0}; + tss->ts[0] = (struct timespec64) {0}; } if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) { if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) has_timestamping = true; else - tss->ts[2] = (struct timespec) {0}; + tss->ts[2] = (struct timespec64) {0}; } if (has_timestamping) { - tss->ts[1] = (struct timespec) {0}; - put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD, - sizeof(*tss), tss); + tss->ts[1] = (struct timespec64) {0}; + if (sock_flag(sk, SOCK_TSTAMP_NEW)) + put_cmsg_scm_timestamping64(msg, tss); + else + put_cmsg_scm_timestamping(msg, tss); } } @@ -1952,7 +1956,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, long timeo; struct sk_buff *skb, *last; u32 urg_hole = 0; - struct scm_timestamping tss; + struct scm_timestamping_internal tss; bool has_tss = false; bool has_cmsg; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c4e56602e0c6..369870b0ef79 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -291,7 +291,8 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, (1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_WIFI_STATUS) | \ (1UL << SOCK_NOFCS) | \ - (1UL << SOCK_FILTER_LOCKED)) + (1UL << SOCK_FILTER_LOCKED) | \ + (1UL << SOCK_TSTAMP_NEW)) /* copy only relevant settings and flags of SOL_SOCKET level from smc to * clc socket (since smc is not called for these options from net/core) */ diff --git a/net/socket.c b/net/socket.c index 1de96abd78d3..d51930689b98 100644 --- a/net/socket.c +++ b/net/socket.c @@ -706,7 +706,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); - struct scm_timestamping tss; + struct scm_timestamping_internal tss; + int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); @@ -752,20 +753,22 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, memset(&tss, 0, sizeof(tss)); if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) && - ktime_to_timespec_cond(skb->tstamp, tss.ts + 0)) + ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0)) empty = 0; if (shhwtstamps && (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && !skb_is_swtx_tstamp(skb, false_tstamp) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { + ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { empty = 0; if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && !skb_is_err_queue(skb)) put_ts_pktinfo(msg, skb); } if (!empty) { - put_cmsg(msg, SOL_SOCKET, - SO_TIMESTAMPING_OLD, sizeof(tss), &tss); + if (sock_flag(sk, SOCK_TSTAMP_NEW)) + put_cmsg_scm_timestamping64(msg, &tss); + else + put_cmsg_scm_timestamping(msg, &tss); if (skb_is_err_queue(skb) && skb->len && SKB_EXT_ERR(skb)->opt_stats) -- cgit v1.2.3 From 8a3c245c031944f2176118270e7bc5d4fd4a1075 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Thu, 14 Mar 2019 10:45:23 -0300 Subject: net: add documentation to socket.c Adds missing sphinx documentation to the socket.c's functions. Also fixes some whitespaces. I also changed the style of older documentation as an effort to have an uniform documentation style. Signed-off-by: Pedro Tammela Signed-off-by: David S. Miller --- include/linux/net.h | 6 ++ include/linux/socket.h | 12 +-- net/socket.c | 277 +++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 271 insertions(+), 24 deletions(-) (limited to 'include/linux/socket.h') diff --git a/include/linux/net.h b/include/linux/net.h index 651fca72286c..c606c72311d0 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -83,6 +83,12 @@ enum sock_type { #endif /* ARCH_HAS_SOCKET_TYPES */ +/** + * enum sock_shutdown_cmd - Shutdown types + * @SHUT_RD: shutdown receptions + * @SHUT_WR: shutdown transmissions + * @SHUT_RDWR: shutdown receptions/transmissions + */ enum sock_shutdown_cmd { SHUT_RD, SHUT_WR, diff --git a/include/linux/socket.h b/include/linux/socket.h index 6016daeecee4..b57cd8bf96e2 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -26,7 +26,7 @@ typedef __kernel_sa_family_t sa_family_t; /* * 1003.1g requires sa_family_t and that sa_data is char. */ - + struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ char sa_data[14]; /* 14 bytes of protocol address */ @@ -44,7 +44,7 @@ struct linger { * system, not 4.3. Thus msg_accrights(len) are now missing. They * belong in an obscure libc emulation or the bin. */ - + struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ @@ -54,7 +54,7 @@ struct msghdr { unsigned int msg_flags; /* flags on received message */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ }; - + struct user_msghdr { void __user *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ @@ -122,7 +122,7 @@ struct cmsghdr { * inside range, given by msg->msg_controllen before using * ancillary object DATA. --ANK (980731) */ - + static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, struct cmsghdr *__cmsg) { @@ -264,10 +264,10 @@ struct ucred { /* Maximum queue length specifiable by listen. */ #define SOMAXCONN 128 -/* Flags we can use with send/ and recv. +/* Flags we can use with send/ and recv. Added those for 1003.1g not all are supported yet */ - + #define MSG_OOB 1 #define MSG_PEEK 2 #define MSG_DONTROUTE 4 diff --git a/net/socket.c b/net/socket.c index 3c176a12fe48..8255f5bda0aa 100644 --- a/net/socket.c +++ b/net/socket.c @@ -384,6 +384,18 @@ static struct file_system_type sock_fs_type = { * but we take care of internal coherence yet. */ +/** + * sock_alloc_file - Bind a &socket to a &file + * @sock: socket + * @flags: file status flags + * @dname: protocol name + * + * Returns the &file bound with @sock, implicitly storing it + * in sock->file. If dname is %NULL, sets to "". + * On failure the return is a ERR pointer (see linux/err.h). + * This function uses GFP_KERNEL internally. + */ + struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) { struct file *file; @@ -424,6 +436,14 @@ static int sock_map_fd(struct socket *sock, int flags) return PTR_ERR(newfile); } +/** + * sock_from_file - Return the &socket bounded to @file. + * @file: file + * @err: pointer to an error code return + * + * On failure returns %NULL and assigns -ENOTSOCK to @err. + */ + struct socket *sock_from_file(struct file *file, int *err) { if (file->f_op == &socket_file_ops) @@ -532,11 +552,11 @@ static const struct inode_operations sockfs_inode_ops = { }; /** - * sock_alloc - allocate a socket + * sock_alloc - allocate a socket * * Allocate a new inode and socket object. The two are bound together * and initialised. The socket is then returned. If we are out of inodes - * NULL is returned. + * NULL is returned. This functions uses GFP_KERNEL internally. */ struct socket *sock_alloc(void) @@ -561,7 +581,7 @@ struct socket *sock_alloc(void) EXPORT_SYMBOL(sock_alloc); /** - * sock_release - close a socket + * sock_release - close a socket * @sock: socket to close * * The socket is released from the protocol stack if it has a release @@ -617,6 +637,15 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) } EXPORT_SYMBOL(__sock_tx_timestamp); +/** + * sock_sendmsg - send a message through @sock + * @sock: socket + * @msg: message to send + * + * Sends @msg through @sock, passing through LSM. + * Returns the number of bytes sent, or an error code. + */ + static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg)); @@ -633,6 +662,18 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg) } EXPORT_SYMBOL(sock_sendmsg); +/** + * kernel_sendmsg - send a message through @sock (kernel-space) + * @sock: socket + * @msg: message header + * @vec: kernel vec + * @num: vec array length + * @size: total message data size + * + * Builds the message data with @vec and sends it through @sock. + * Returns the number of bytes sent, or an error code. + */ + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { @@ -641,6 +682,19 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_sendmsg); +/** + * kernel_sendmsg_locked - send a message through @sock (kernel-space) + * @sk: sock + * @msg: message header + * @vec: output s/g array + * @num: output s/g array length + * @size: total message data size + * + * Builds the message data with @vec and sends it through @sock. + * Returns the number of bytes sent, or an error code. + * Caller must hold @sk. + */ + int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { @@ -811,6 +865,16 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); +/** + * sock_recvmsg - receive a message from @sock + * @sock: socket + * @msg: message to receive + * @flags: message flags + * + * Receives @msg from @sock, passing through LSM. Returns the total number + * of bytes received, or an error. + */ + static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, int flags) { @@ -826,20 +890,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) EXPORT_SYMBOL(sock_recvmsg); /** - * kernel_recvmsg - Receive a message from a socket (kernel space) - * @sock: The socket to receive the message from - * @msg: Received message - * @vec: Input s/g array for message data - * @num: Size of input s/g array - * @size: Number of bytes to read - * @flags: Message flags (MSG_DONTWAIT, etc...) + * kernel_recvmsg - Receive a message from a socket (kernel space) + * @sock: The socket to receive the message from + * @msg: Received message + * @vec: Input s/g array for message data + * @num: Size of input s/g array + * @size: Number of bytes to read + * @flags: Message flags (MSG_DONTWAIT, etc...) * - * On return the msg structure contains the scatter/gather array passed in the - * vec argument. The array is modified so that it consists of the unfilled - * portion of the original array. + * On return the msg structure contains the scatter/gather array passed in the + * vec argument. The array is modified so that it consists of the unfilled + * portion of the original array. * - * The returned value is the total number of bytes received, or an error. + * The returned value is the total number of bytes received, or an error. */ + int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { @@ -1005,6 +1070,13 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * what to do with it - that's up to the protocol still. */ +/** + * get_net_ns - increment the refcount of the network namespace + * @ns: common namespace (net) + * + * Returns the net's common namespace. + */ + struct ns_common *get_net_ns(struct ns_common *ns) { return &get_net(container_of(ns, struct net, ns))->ns; @@ -1099,6 +1171,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) return err; } +/** + * sock_create_lite - creates a socket + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * Creates a new socket and assigns it to @res, passing through LSM. + * The new socket initialization is not complete, see kernel_accept(). + * Returns 0 or an error. On failure @res is set to %NULL. + * This function internally uses GFP_KERNEL. + */ + int sock_create_lite(int family, int type, int protocol, struct socket **res) { int err; @@ -1224,6 +1309,21 @@ call_kill: } EXPORT_SYMBOL(sock_wake_async); +/** + * __sock_create - creates a socket + * @net: net namespace + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * @kern: boolean for kernel space sockets + * + * Creates a new socket and assigns it to @res, passing through LSM. + * Returns 0 or an error. On failure @res is set to %NULL. @kern must + * be set to true if the socket resides in kernel space. + * This function internally uses GFP_KERNEL. + */ + int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { @@ -1333,12 +1433,35 @@ out_release: } EXPORT_SYMBOL(__sock_create); +/** + * sock_create - creates a socket + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * A wrapper around __sock_create(). + * Returns 0 or an error. This function internally uses GFP_KERNEL. + */ + int sock_create(int family, int type, int protocol, struct socket **res) { return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } EXPORT_SYMBOL(sock_create); +/** + * sock_create_kern - creates a socket (kernel space) + * @net: net namespace + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * A wrapper around __sock_create(). + * Returns 0 or an error. This function internally uses GFP_KERNEL. + */ + int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res) { return __sock_create(net, family, type, protocol, res, 1); @@ -3322,18 +3445,46 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, } #endif +/** + * kernel_bind - bind an address to a socket (kernel space) + * @sock: socket + * @addr: address + * @addrlen: length of address + * + * Returns 0 or an error. + */ + int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { return sock->ops->bind(sock, addr, addrlen); } EXPORT_SYMBOL(kernel_bind); +/** + * kernel_listen - move socket to listening state (kernel space) + * @sock: socket + * @backlog: pending connections queue size + * + * Returns 0 or an error. + */ + int kernel_listen(struct socket *sock, int backlog) { return sock->ops->listen(sock, backlog); } EXPORT_SYMBOL(kernel_listen); +/** + * kernel_accept - accept a connection (kernel space) + * @sock: listening socket + * @newsock: new connected socket + * @flags: flags + * + * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0. + * If it fails, @newsock is guaranteed to be %NULL. + * Returns 0 or an error. + */ + int kernel_accept(struct socket *sock, struct socket **newsock, int flags) { struct sock *sk = sock->sk; @@ -3359,6 +3510,19 @@ done: } EXPORT_SYMBOL(kernel_accept); +/** + * kernel_connect - connect a socket (kernel space) + * @sock: socket + * @addr: address + * @addrlen: address length + * @flags: flags (O_NONBLOCK, ...) + * + * For datagram sockets, @addr is the addres to which datagrams are sent + * by default, and the only address from which datagrams are received. + * For stream sockets, attempts to connect to @addr. + * Returns 0 or an error code. + */ + int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags) { @@ -3366,18 +3530,48 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, } EXPORT_SYMBOL(kernel_connect); +/** + * kernel_getsockname - get the address which the socket is bound (kernel space) + * @sock: socket + * @addr: address holder + * + * Fills the @addr pointer with the address which the socket is bound. + * Returns 0 or an error code. + */ + int kernel_getsockname(struct socket *sock, struct sockaddr *addr) { return sock->ops->getname(sock, addr, 0); } EXPORT_SYMBOL(kernel_getsockname); +/** + * kernel_peername - get the address which the socket is connected (kernel space) + * @sock: socket + * @addr: address holder + * + * Fills the @addr pointer with the address which the socket is connected. + * Returns 0 or an error code. + */ + int kernel_getpeername(struct socket *sock, struct sockaddr *addr) { return sock->ops->getname(sock, addr, 1); } EXPORT_SYMBOL(kernel_getpeername); +/** + * kernel_getsockopt - get a socket option (kernel space) + * @sock: socket + * @level: API level (SOL_SOCKET, ...) + * @optname: option tag + * @optval: option value + * @optlen: option length + * + * Assigns the option length to @optlen. + * Returns 0 or an error. + */ + int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { @@ -3400,6 +3594,17 @@ int kernel_getsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(kernel_getsockopt); +/** + * kernel_setsockopt - set a socket option (kernel space) + * @sock: socket + * @level: API level (SOL_SOCKET, ...) + * @optname: option tag + * @optval: option value + * @optlen: option length + * + * Returns 0 or an error. + */ + int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen) { @@ -3420,6 +3625,17 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(kernel_setsockopt); +/** + * kernel_sendpage - send a &page through a socket (kernel space) + * @sock: socket + * @page: page + * @offset: page offset + * @size: total size in bytes + * @flags: flags (MSG_DONTWAIT, ...) + * + * Returns the total amount sent in bytes or an error. + */ + int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { @@ -3430,6 +3646,18 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage); +/** + * kernel_sendpage_locked - send a &page through the locked sock (kernel space) + * @sk: sock + * @page: page + * @offset: page offset + * @size: total size in bytes + * @flags: flags (MSG_DONTWAIT, ...) + * + * Returns the total amount sent in bytes or an error. + * Caller must hold @sk. + */ + int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -3443,17 +3671,30 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage_locked); +/** + * kernel_shutdown - shut down part of a full-duplex connection (kernel space) + * @sock: socket + * @how: connection part + * + * Returns 0 or an error. + */ + int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); } EXPORT_SYMBOL(kernel_sock_shutdown); -/* This routine returns the IP overhead imposed by a socket i.e. - * the length of the underlying IP header, depending on whether - * this is an IPv4 or IPv6 socket and the length from IP options turned - * on at the socket. Assumes that the caller has a lock on the socket. +/** + * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket + * @sk: socket + * + * This routine returns the IP overhead imposed by a socket i.e. + * the length of the underlying IP header, depending on whether + * this is an IPv4 or IPv6 socket and the length from IP options turned + * on at the socket. Assumes that the caller has a lock on the socket. */ + u32 kernel_sock_ip_overhead(struct sock *sk) { struct inet_sock *inet; -- cgit v1.2.3