summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/timestamping.txt21
-rw-r--r--Documentation/networking/timestamping/txtimestamp.c28
-rw-r--r--Documentation/sysctl/net.txt8
-rw-r--r--include/net/sock.h1
-rw-r--r--include/uapi/linux/net_tstamp.h3
-rw-r--r--net/core/skbuff.c40
-rw-r--r--net/core/sock.c3
-rw-r--r--net/core/sysctl_net_core.c9
-rw-r--r--net/ipv4/ip_sockglue.c7
-rw-r--r--net/ipv6/datagram.c5
-rw-r--r--net/rxrpc/ar-error.c5
11 files changed, 113 insertions, 17 deletions
diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index a5c784c89312..5f0922613f1a 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -162,6 +162,27 @@ SOF_TIMESTAMPING_OPT_CMSG:
option IP_PKTINFO simultaneously.
+SOF_TIMESTAMPING_OPT_TSONLY:
+
+ Applies to transmit timestamps only. Makes the kernel return the
+ timestamp as a cmsg alongside an empty packet, as opposed to
+ alongside the original packet. This reduces the amount of memory
+ charged to the socket's receive budget (SO_RCVBUF) and delivers
+ the timestamp even if sysctl net.core.tstamp_allow_data is 0.
+ This option disables SOF_TIMESTAMPING_OPT_CMSG.
+
+
+New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
+disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
+regardless of the setting of sysctl net.core.tstamp_allow_data.
+
+An exception is when a process needs additional cmsg data, for
+instance SOL_IP/IP_PKTINFO to detect the egress network interface.
+Then pass option SOF_TIMESTAMPING_OPT_CMSG. This option depends on
+having access to the contents of the original packet, so cannot be
+combined with SOF_TIMESTAMPING_OPT_TSONLY.
+
+
1.4 Bytestream Timestamps
The SO_TIMESTAMPING interface supports timestamping of bytes in a
diff --git a/Documentation/networking/timestamping/txtimestamp.c b/Documentation/networking/timestamping/txtimestamp.c
index 05694febc238..8217510d3842 100644
--- a/Documentation/networking/timestamping/txtimestamp.c
+++ b/Documentation/networking/timestamping/txtimestamp.c
@@ -70,6 +70,7 @@ static int do_ipv6 = 1;
static int cfg_payload_len = 10;
static bool cfg_show_payload;
static bool cfg_do_pktinfo;
+static bool cfg_loop_nodata;
static uint16_t dest_port = 9000;
static struct sockaddr_in daddr;
@@ -141,6 +142,9 @@ static void print_payload(char *data, int len)
{
int i;
+ if (!len)
+ return;
+
if (len > 70)
len = 70;
@@ -177,6 +181,7 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
struct sock_extended_err *serr = NULL;
struct scm_timestamping *tss = NULL;
struct cmsghdr *cm;
+ int batch = 0;
for (cm = CMSG_FIRSTHDR(msg);
cm && cm->cmsg_len;
@@ -209,10 +214,18 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
} else
fprintf(stderr, "unknown cmsg %d,%d\n",
cm->cmsg_level, cm->cmsg_type);
+
+ if (serr && tss) {
+ print_timestamp(tss, serr->ee_info, serr->ee_data,
+ payload_len);
+ serr = NULL;
+ tss = NULL;
+ batch++;
+ }
}
- if (serr && tss)
- print_timestamp(tss, serr->ee_info, serr->ee_data, payload_len);
+ if (batch > 1)
+ fprintf(stderr, "batched %d timestamps\n", batch);
}
static int recv_errmsg(int fd)
@@ -244,7 +257,7 @@ static int recv_errmsg(int fd)
if (ret == -1 && errno != EAGAIN)
error(1, errno, "recvmsg");
- if (ret > 0) {
+ if (ret >= 0) {
__recv_errmsg_cmsg(&msg, ret);
if (cfg_show_payload)
print_payload(data, cfg_payload_len);
@@ -309,6 +322,9 @@ static void do_test(int family, unsigned int opt)
opt |= SOF_TIMESTAMPING_SOFTWARE |
SOF_TIMESTAMPING_OPT_CMSG |
SOF_TIMESTAMPING_OPT_ID;
+ if (cfg_loop_nodata)
+ opt |= SOF_TIMESTAMPING_OPT_TSONLY;
+
if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
(char *) &opt, sizeof(opt)))
error(1, 0, "setsockopt timestamping");
@@ -378,6 +394,7 @@ static void __attribute__((noreturn)) usage(const char *filepath)
" -h: show this message\n"
" -I: request PKTINFO\n"
" -l N: send N bytes at a time\n"
+ " -n: set no-payload option\n"
" -r: use raw\n"
" -R: use raw (IP_HDRINCL)\n"
" -p N: connect to port N\n"
@@ -392,7 +409,7 @@ static void parse_opt(int argc, char **argv)
int proto_count = 0;
char c;
- while ((c = getopt(argc, argv, "46hIl:p:rRux")) != -1) {
+ while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) {
switch (c) {
case '4':
do_ipv6 = 0;
@@ -403,6 +420,9 @@ static void parse_opt(int argc, char **argv)
case 'I':
cfg_do_pktinfo = true;
break;
+ case 'n':
+ cfg_loop_nodata = true;
+ break;
case 'r':
proto_count++;
cfg_proto = SOCK_RAW;
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 666594b43cff..6294b5186ae5 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -97,6 +97,14 @@ rmem_max
The maximum receive socket buffer size in bytes.
+tstamp_allow_data
+-----------------
+Allow processes to receive tx timestamps looped together with the original
+packet contents. If disabled, transmit timestamp requests from unprivileged
+processes are dropped unless socket option SOF_TIMESTAMPING_OPT_TSONLY is set.
+Default: 1 (on)
+
+
wmem_default
------------
diff --git a/include/net/sock.h b/include/net/sock.h
index 15341499786c..511ef7c8889b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2239,6 +2239,7 @@ bool sk_net_capable(const struct sock *sk, int cap);
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
+extern int sysctl_tstamp_allow_data;
extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index edbc888ceb51..6d1abea9746e 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -24,8 +24,9 @@ enum {
SOF_TIMESTAMPING_TX_SCHED = (1<<8),
SOF_TIMESTAMPING_TX_ACK = (1<<9),
SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
+ SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
- SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG,
+ SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
SOF_TIMESTAMPING_LAST
};
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 56db472e9b86..a5bff2767f15 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -74,6 +74,8 @@
#include <asm/uaccess.h>
#include <trace/events/skb.h>
#include <linux/highmem.h>
+#include <linux/capability.h>
+#include <linux/user_namespace.h>
struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
@@ -3690,11 +3692,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
kfree_skb(skb);
}
+static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
+{
+ bool ret;
+
+ if (likely(sysctl_tstamp_allow_data || tsonly))
+ return true;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ ret = sk->sk_socket && sk->sk_socket->file &&
+ file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW);
+ read_unlock_bh(&sk->sk_callback_lock);
+ return ret;
+}
+
void skb_complete_tx_timestamp(struct sk_buff *skb,
struct skb_shared_hwtstamps *hwtstamps)
{
struct sock *sk = skb->sk;
+ if (!skb_may_tx_timestamp(sk, false))
+ return;
+
/* take a reference to prevent skb_orphan() from freeing the socket */
sock_hold(sk);
@@ -3710,19 +3729,28 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
struct sock *sk, int tstype)
{
struct sk_buff *skb;
+ bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
- if (!sk)
+ if (!sk || !skb_may_tx_timestamp(sk, tsonly))
return;
- if (hwtstamps)
- *skb_hwtstamps(orig_skb) = *hwtstamps;
+ if (tsonly)
+ skb = alloc_skb(0, GFP_ATOMIC);
else
- orig_skb->tstamp = ktime_get_real();
-
- skb = skb_clone(orig_skb, GFP_ATOMIC);
+ skb = skb_clone(orig_skb, GFP_ATOMIC);
if (!skb)
return;
+ if (tsonly) {
+ skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags;
+ skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
+ }
+
+ if (hwtstamps)
+ *skb_hwtstamps(skb) = *hwtstamps;
+ else
+ skb->tstamp = ktime_get_real();
+
__skb_complete_tx_timestamp(skb, sk, tstype);
}
EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
diff --git a/net/core/sock.c b/net/core/sock.c
index 1c7a33db1314..93c8b20c91e4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -325,6 +325,8 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
EXPORT_SYMBOL(sysctl_optmem_max);
+int sysctl_tstamp_allow_data __read_mostly = 1;
+
struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
EXPORT_SYMBOL_GPL(memalloc_socks);
@@ -840,6 +842,7 @@ set_rcvbuf:
ret = -EINVAL;
break;
}
+
if (val & SOF_TIMESTAMPING_OPT_ID &&
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
if (sk->sk_protocol == IPPROTO_TCP) {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 31baba2a71ce..fde21d19e61b 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -321,6 +321,15 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "tstamp_allow_data",
+ .data = &sysctl_tstamp_allow_data,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one
+ },
#ifdef CONFIG_RPS
{
.procname = "rps_sock_flow_entries",
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index db5e0f81ce0a..31d8c71986b4 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -483,7 +483,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
serr = SKB_EXT_ERR(skb);
- if (sin) {
+ if (sin && skb->len) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
serr->addr_offset);
@@ -496,8 +496,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
sin = &errhdr.offender;
memset(sin, 0, sizeof(*sin));
- if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
- ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
+ if (skb->len &&
+ (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+ ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
if (inet_sk(sk)->cmsg_flags)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 49f5e73db122..c215be70cac0 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -369,7 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
serr = SKB_EXT_ERR(skb);
- if (sin) {
+ if (sin && skb->len) {
const unsigned char *nh = skb_network_header(skb);
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
@@ -394,8 +394,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
sin = &errhdr.offender;
memset(sin, 0, sizeof(*sin));
-
- if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
+ if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) {
sin->sin6_family = AF_INET6;
if (np->rxopt.all) {
if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index 74c0fcd36838..5394b6be46ec 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -42,6 +42,11 @@ void rxrpc_UDP_error_report(struct sock *sk)
_leave("UDP socket errqueue empty");
return;
}
+ if (!skb->len) {
+ _leave("UDP empty message");
+ kfree_skb(skb);
+ return;
+ }
rxrpc_new_skb(skb);