diff options
Diffstat (limited to 'Documentation/networking')
-rw-r--r-- | Documentation/networking/filter.txt | 4 | ||||
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 31 | ||||
-rw-r--r-- | Documentation/networking/netlink_mmap.txt | 13 | ||||
-rw-r--r-- | Documentation/networking/nf_conntrack-sysctl.txt | 3 | ||||
-rw-r--r-- | Documentation/networking/openvswitch.txt | 13 | ||||
-rw-r--r-- | Documentation/networking/timestamping.txt | 21 | ||||
-rw-r--r-- | Documentation/networking/timestamping/txtimestamp.c | 38 |
7 files changed, 98 insertions, 25 deletions
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 58d08f8d8d80..9930ecfbb465 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -279,8 +279,8 @@ Possible BPF extensions are shown in the following table: hatype skb->dev->type rxhash skb->hash cpu raw_smp_processor_id() - vlan_tci vlan_tx_tag_get(skb) - vlan_pr vlan_tx_tag_present(skb) + vlan_tci skb_vlan_tag_get(skb) + vlan_pr skb_vlan_tag_present(skb) rand prandom_u32() These extensions can also be prefixed with '#'. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 9bffdfc648dc..1b8c964b0d17 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -66,6 +66,8 @@ fwmark_reflect - BOOLEAN route/max_size - INTEGER Maximum number of routes allowed in the kernel. Increase this when using large numbers of interfaces and/or routes. + From linux kernel 3.6 onwards, this is deprecated for ipv4 + as route cache is no longer used. neigh/default/gc_thresh1 - INTEGER Minimum number of entries to keep. Garbage collector will not @@ -288,6 +290,28 @@ tcp_frto - INTEGER By default it's enabled with a non-zero value. 0 disables F-RTO. +tcp_invalid_ratelimit - INTEGER + Limit the maximal rate for sending duplicate acknowledgments + in response to incoming TCP packets that are for an existing + connection but that are invalid due to any of these reasons: + + (a) out-of-window sequence number, + (b) out-of-window acknowledgment number, or + (c) PAWS (Protection Against Wrapped Sequence numbers) check failure + + This can help mitigate simple "ack loop" DoS attacks, wherein + a buggy or malicious middlebox or man-in-the-middle can + rewrite TCP header fields in manner that causes each endpoint + to think that the other is sending invalid TCP segments, thus + causing each side to send an unterminating stream of duplicate + acknowledgments for invalid segments. + + Using 0 disables rate-limiting of dupacks in response to + invalid segments; otherwise this value specifies the minimal + space between sending such dupacks, in milliseconds. + + Default: 500 (milliseconds). + tcp_keepalive_time - INTEGER How often TCP sends out keepalive messages when keepalive is enabled. Default: 2hours. @@ -1285,6 +1309,13 @@ accept_ra_rtr_pref - BOOLEAN Functional default: enabled if accept_ra is enabled. disabled if accept_ra is disabled. +accept_ra_mtu - BOOLEAN + Apply the MTU value specified in RA option 5 (RFC4861). If + disabled, the MTU specified in the RA will be ignored. + + Functional default: enabled if accept_ra is enabled. + disabled if accept_ra is disabled. + accept_redirects - BOOLEAN Accept Redirects. diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt index c6af4bac5aa8..54f10478e8e3 100644 --- a/Documentation/networking/netlink_mmap.txt +++ b/Documentation/networking/netlink_mmap.txt @@ -199,16 +199,9 @@ frame header. TX limitations -------------- -Kernel processing usually involves validation of the message received by -user-space, then processing its contents. The kernel must assure that -userspace is not able to modify the message contents after they have been -validated. In order to do so, the message is copied from the ring frame -to an allocated buffer if either of these conditions is false: - -- only a single mapping of the ring exists -- the file descriptor is not shared between processes - -This means that for threaded programs, the kernel will fall back to copying. +As of Jan 2015 the message is always copied from the ring frame to an +allocated buffer due to unresolved security concerns. +See commit 4682a0358639b29cf ("netlink: Always copy on mmap TX."). Example ------- diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt index 70da5086153d..f55599c62c9d 100644 --- a/Documentation/networking/nf_conntrack-sysctl.txt +++ b/Documentation/networking/nf_conntrack-sysctl.txt @@ -11,7 +11,8 @@ nf_conntrack_buckets - INTEGER (read-only) Size of hash table. If not specified as parameter during module loading, the default size is calculated by dividing total memory by 16384 to determine the number of buckets but the hash table will - never have fewer than 32 or more than 16384 buckets. + never have fewer than 32 and limited to 16384 buckets. For systems + with more than 4GB of memory it will be 65536 buckets. nf_conntrack_checksum - BOOLEAN 0 - disabled diff --git a/Documentation/networking/openvswitch.txt b/Documentation/networking/openvswitch.txt index 37c20ee2455e..b3b9ac61d29d 100644 --- a/Documentation/networking/openvswitch.txt +++ b/Documentation/networking/openvswitch.txt @@ -131,6 +131,19 @@ performs best-effort detection of overlapping wildcarded flows and may reject some but not all of them. However, this behavior may change in future versions. +Unique flow identifiers +----------------------- + +An alternative to using the original match portion of a key as the handle for +flow identification is a unique flow identifier, or "UFID". UFIDs are optional +for both the kernel and user space program. + +User space programs that support UFID are expected to provide it during flow +setup in addition to the flow, then refer to the flow using the UFID for all +future operations. The kernel is not required to index flows by the original +flow key if a UFID is specified. + + Basic rule for evolving flow keys --------------------------------- diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index a5c784c89312..5f0922613f1a 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -162,6 +162,27 @@ SOF_TIMESTAMPING_OPT_CMSG: option IP_PKTINFO simultaneously. +SOF_TIMESTAMPING_OPT_TSONLY: + + Applies to transmit timestamps only. Makes the kernel return the + timestamp as a cmsg alongside an empty packet, as opposed to + alongside the original packet. This reduces the amount of memory + charged to the socket's receive budget (SO_RCVBUF) and delivers + the timestamp even if sysctl net.core.tstamp_allow_data is 0. + This option disables SOF_TIMESTAMPING_OPT_CMSG. + + +New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to +disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate +regardless of the setting of sysctl net.core.tstamp_allow_data. + +An exception is when a process needs additional cmsg data, for +instance SOL_IP/IP_PKTINFO to detect the egress network interface. +Then pass option SOF_TIMESTAMPING_OPT_CMSG. This option depends on +having access to the contents of the original packet, so cannot be +combined with SOF_TIMESTAMPING_OPT_TSONLY. + + 1.4 Bytestream Timestamps The SO_TIMESTAMPING interface supports timestamping of bytes in a diff --git a/Documentation/networking/timestamping/txtimestamp.c b/Documentation/networking/timestamping/txtimestamp.c index 876f71c5625a..8217510d3842 100644 --- a/Documentation/networking/timestamping/txtimestamp.c +++ b/Documentation/networking/timestamping/txtimestamp.c @@ -30,6 +30,8 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ +#define _GNU_SOURCE + #include <arpa/inet.h> #include <asm/types.h> #include <error.h> @@ -59,14 +61,6 @@ #include <time.h> #include <unistd.h> -/* ugly hack to work around netinet/in.h and linux/ipv6.h conflicts */ -#ifndef in6_pktinfo -struct in6_pktinfo { - struct in6_addr ipi6_addr; - int ipi6_ifindex; -}; -#endif - /* command line parameters */ static int cfg_proto = SOCK_STREAM; static int cfg_ipproto = IPPROTO_TCP; @@ -76,6 +70,7 @@ static int do_ipv6 = 1; static int cfg_payload_len = 10; static bool cfg_show_payload; static bool cfg_do_pktinfo; +static bool cfg_loop_nodata; static uint16_t dest_port = 9000; static struct sockaddr_in daddr; @@ -147,6 +142,9 @@ static void print_payload(char *data, int len) { int i; + if (!len) + return; + if (len > 70) len = 70; @@ -183,6 +181,7 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) struct sock_extended_err *serr = NULL; struct scm_timestamping *tss = NULL; struct cmsghdr *cm; + int batch = 0; for (cm = CMSG_FIRSTHDR(msg); cm && cm->cmsg_len; @@ -215,10 +214,18 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) } else fprintf(stderr, "unknown cmsg %d,%d\n", cm->cmsg_level, cm->cmsg_type); + + if (serr && tss) { + print_timestamp(tss, serr->ee_info, serr->ee_data, + payload_len); + serr = NULL; + tss = NULL; + batch++; + } } - if (serr && tss) - print_timestamp(tss, serr->ee_info, serr->ee_data, payload_len); + if (batch > 1) + fprintf(stderr, "batched %d timestamps\n", batch); } static int recv_errmsg(int fd) @@ -250,7 +257,7 @@ static int recv_errmsg(int fd) if (ret == -1 && errno != EAGAIN) error(1, errno, "recvmsg"); - if (ret > 0) { + if (ret >= 0) { __recv_errmsg_cmsg(&msg, ret); if (cfg_show_payload) print_payload(data, cfg_payload_len); @@ -315,6 +322,9 @@ static void do_test(int family, unsigned int opt) opt |= SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_CMSG | SOF_TIMESTAMPING_OPT_ID; + if (cfg_loop_nodata) + opt |= SOF_TIMESTAMPING_OPT_TSONLY; + if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (char *) &opt, sizeof(opt))) error(1, 0, "setsockopt timestamping"); @@ -384,6 +394,7 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -h: show this message\n" " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" + " -n: set no-payload option\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" " -p N: connect to port N\n" @@ -398,7 +409,7 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; char c; - while ((c = getopt(argc, argv, "46hIl:p:rRux")) != -1) { + while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -409,6 +420,9 @@ static void parse_opt(int argc, char **argv) case 'I': cfg_do_pktinfo = true; break; + case 'n': + cfg_loop_nodata = true; + break; case 'r': proto_count++; cfg_proto = SOCK_RAW; |