From 133800d1f0288b9ddfc0d0aded10d9efa82d5b8c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 8 Mar 2016 10:34:28 -0300 Subject: sctp: fix copying more bytes than expected in sctp_add_bind_addr Dmitry reported that sctp_add_bind_addr may read more bytes than expected in case the parameter is a IPv4 addr supplied by the user through calls such as sctp_bindx_add(), because it always copies sizeof(union sctp_addr) while the buffer may be just a struct sockaddr_in, which is smaller. This patch then fixes it by limiting the memcpy to the min between the union size and a (new parameter) provided addr size. Where possible this parameter still is the size of that union, except for reading from user-provided buffers, which then it accounts for protocol type. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 205630bb5010..f816344f65f2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1098,7 +1098,7 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, gfp_t gfp); int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *, - __u8 addr_state, gfp_t gfp); + int new_size, __u8 addr_state, gfp_t gfp); int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *); int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *, struct sctp_sock *); -- cgit v1.2.3 From eaa93bf4c6090809395605d1775a0db9970eda5e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 18 Mar 2016 18:37:57 +0100 Subject: vxlan: fix populating tclass in vxlan6_get_route Jiri mentioned that flowi6_tos of struct flowi6 is never used/read anywhere. In fact, rest of the kernel uses the flowi6's flowlabel, where the traffic class _and_ the flowlabel (aka flowinfo) is encoded. For example, for policy routing, fib6_rule_match() uses ip6_tclass() that is applied on the flowlabel member for matching on tclass. Similar fix is needed for geneve, where flowi6_tos is set as well. Installing a v6 blackhole rule that f.e. matches on tos is now working with vxlan. Fixes: 1400615d64cf ("vxlan: allow setting ipv6 traffic class") Reported-by: Jiri Benc Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 3 +-- include/net/ipv6.h | 6 ++++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 800106a7246c..7bfcb9a62a5d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1810,10 +1810,9 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; - fl6.flowi6_tos = RT_TOS(tos); fl6.daddr = *daddr; fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr; - fl6.flowlabel = label; + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = IPPROTO_UDP; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f3c9857c645d..d0aeb97aec5d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -835,6 +835,12 @@ static inline u8 ip6_tclass(__be32 flowinfo) { return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT; } + +static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel) +{ + return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel; +} + /* * Prototypes exported by ipv6 */ -- cgit v1.2.3 From 69716a2b51aeb68fe295c0d09e26c8781eacebde Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 18 Mar 2016 18:37:59 +0100 Subject: ipv6, trace: fix tos reporting on fib6_table_lookup flowi6_tos of struct flowi6 is unused in IPv6, therefore dumping tos on that tracepoint will also give incorrect information wrt traffic class. If we want to fix it, we need to extract it via ip6_tclass(flp->flowlabel). While for the same test case I get a count of 0 non-zero tos values before the change, they now start to show up after the change: # ./perf record -e fib6:fib6_table_lookup -a sleep 10 # ./perf script | grep -v "tos 0" | wc -l 60 Since there's no user in the kernel tree anymore of flowi6_tos, remove the define to avoid any future confusion on this. Fixes: b811580d91e9 ("net: IPv6 fib lookup tracepoint") Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/flow.h | 2 +- include/trace/events/fib6.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 83969eebebf3..d47ef4bb5423 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -127,7 +127,6 @@ struct flowi6 { #define flowi6_oif __fl_common.flowic_oif #define flowi6_iif __fl_common.flowic_iif #define flowi6_mark __fl_common.flowic_mark -#define flowi6_tos __fl_common.flowic_tos #define flowi6_scope __fl_common.flowic_scope #define flowi6_proto __fl_common.flowic_proto #define flowi6_flags __fl_common.flowic_flags @@ -135,6 +134,7 @@ struct flowi6 { #define flowi6_tun_key __fl_common.flowic_tun_key struct in6_addr daddr; struct in6_addr saddr; + /* Note: flowi6_tos is encoded in flowlabel, too. */ __be32 flowlabel; union flowi_uli uli; #define fl6_sport uli.ports.sport diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index 4cf6bac4686d..d60096cddb2a 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -37,7 +37,7 @@ TRACE_EVENT(fib6_table_lookup, __entry->tb_id = tb_id; __entry->oif = flp->flowi6_oif; __entry->iif = flp->flowi6_iif; - __entry->tos = flp->flowi6_tos; + __entry->tos = ip6_tclass(flp->flowlabel); __entry->scope = flp->flowi6_scope; __entry->flags = flp->flowi6_flags; -- cgit v1.2.3 From 3822a5ff4bc32043fa9c7b6d6f125bcdca6da39c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Sat, 19 Mar 2016 12:17:20 -0300 Subject: sctp: align MTU to a word SCTP is a protocol that is aligned to a word (4 bytes). Thus using bare MTU can sometimes return values that are not aligned, like for loopback, which is 65536 but ipv4_mtu() limits that to 65535. This mis-alignment will cause the last non-aligned bytes to never be used and can cause issues with congestion control. So it's better to just consider a lower MTU and keep congestion control calcs saner as they are based on PMTU. Same applies to icmp frag needed messages, which is also fixed by this patch. One other effect of this is the inability to send MTU-sized packet without queueing or fragmentation and without hitting Nagle. As the check performed at sctp_packet_can_append_data(): if (chunk->skb->len + q->out_qlen >= transport->pathmtu - packet->overhead) /* Enough data queued to fill a packet */ return SCTP_XMIT_OK; with the above example of MTU, if there are no other messages queued, one cannot send a packet that just fits one packet (65532 bytes) and without causing DATA chunk fragmentation or a delay. v2: - Added WORD_TRUNC macro Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 8 +++++--- net/sctp/associola.c | 3 ++- net/sctp/input.c | 3 ++- net/sctp/transport.c | 4 ++-- 4 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 835aa2ed9870..ad2136caa7d6 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -82,6 +82,11 @@ #define SCTP_PROTOSW_FLAG INET_PROTOSW_PERMANENT #endif +/* Round an int up to the next multiple of 4. */ +#define WORD_ROUND(s) (((s)+3)&~3) +/* Truncate to the previous multiple of 4. */ +#define WORD_TRUNC(s) ((s)&~3) + /* * Function declarations. */ @@ -475,9 +480,6 @@ for (pos = chunk->subh.fwdtsn_hdr->skip;\ (void *)pos <= (void *)chunk->subh.fwdtsn_hdr->skip + end - sizeof(struct sctp_fwdtsn_skip);\ pos++) -/* Round an int up to the next multiple of 4. */ -#define WORD_ROUND(s) (((s)+3)&~3) - /* External references. */ extern struct proto sctp_prot; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index a19b3e607703..e1849f3714ad 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1406,7 +1406,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu(sk, t, dst_mtu(t->dst)); + sctp_transport_update_pmtu(sk, t, + WORD_TRUNC(dst_mtu(t->dst))); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/input.c b/net/sctp/input.c index db76f1ab4ac2..00b8445364e3 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -606,7 +606,8 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) /* PMTU discovery (RFC1191) */ if (ICMP_FRAG_NEEDED == code) { - sctp_icmp_frag_needed(sk, asoc, transport, info); + sctp_icmp_frag_needed(sk, asoc, transport, + WORD_TRUNC(info)); goto out_unlock; } else { if (ICMP_PROT_UNREACH == code) { diff --git a/net/sctp/transport.c b/net/sctp/transport.c index d517153891a6..9b6b48c7524e 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -226,7 +226,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) } if (transport->dst) { - transport->pathmtu = dst_mtu(transport->dst); + transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); } else transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } @@ -280,7 +280,7 @@ void sctp_transport_route(struct sctp_transport *transport, return; } if (transport->dst) { - transport->pathmtu = dst_mtu(transport->dst); + transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); /* Initialize sk->sk_rcv_saddr, if the transport is the * association's active path for getsockname(). -- cgit v1.2.3 From 659e0bcaebc4ca36e64eac6e9f39c1904b17472c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Sat, 19 Mar 2016 12:17:43 -0300 Subject: sctp: keep fragmentation point aligned to word size If the user supply a different fragmentation point or if there is a network header that cause it to not be aligned, force it to be aligned. Fragmentation point at a value that is not aligned is not optimal. It causes extra padding to be used and has just no pros. v2: - Make use of the new WORD_TRUNC macro Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index ad2136caa7d6..65521cfdcade 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -431,7 +431,7 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); - frag = min_t(int, frag, SCTP_MAX_CHUNK_LEN); + frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); return frag; } -- cgit v1.2.3 From fac8e0f579695a3ecbc4d3cac369139d7f819971 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sat, 19 Mar 2016 09:32:01 -0700 Subject: tunnels: Don't apply GRO to multiple layers of encapsulation. When drivers express support for TSO of encapsulated packets, they only mean that they can do it for one layer of encapsulation. Supporting additional levels would mean updating, at a minimum, more IP length fields and they are unaware of this. No encapsulation device expresses support for handling offloaded encapsulated packets, so we won't generate these types of frames in the transmit path. However, GRO doesn't have a check for multiple levels of encapsulation and will attempt to build them. UDP tunnel GRO actually does prevent this situation but it only handles multiple UDP tunnels stacked on top of each other. This generalizes that solution to prevent any kind of tunnel stacking that would cause problems. Fixes: bf5a755f ("net-gre-gro: Add GRE support to the GRO stack") Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- net/core/dev.c | 2 +- net/ipv4/af_inet.c | 15 ++++++++++++++- net/ipv4/gre_offload.c | 5 +++++ net/ipv4/udp_offload.c | 6 +++--- net/ipv6/ip6_offload.c | 15 ++++++++++++++- 6 files changed, 39 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index be693b34662f..f9eebd518545 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2096,8 +2096,8 @@ struct napi_gro_cb { /* This is non-zero if the packet may be of the same flow. */ u8 same_flow:1; - /* Used in udp_gro_receive */ - u8 udp_mark:1; + /* Used in tunnel GRO receive */ + u8 encap_mark:1; /* GRO checksum is valid */ u8 csum_valid:1; diff --git a/net/core/dev.c b/net/core/dev.c index edb7179bc051..43c74cad25bc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4438,7 +4438,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; - NAPI_GRO_CB(skb)->udp_mark = 0; + NAPI_GRO_CB(skb)->encap_mark = 0; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 965923325535..0fefba64ee79 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1380,6 +1380,19 @@ out: return pp; } +static struct sk_buff **ipip_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->encap_mark) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + NAPI_GRO_CB(skb)->encap_mark = 1; + + return inet_gro_receive(head, skb); +} + #define SECONDS_PER_DAY 86400 /* inet_current_timestamp - Return IP network timestamp @@ -1682,7 +1695,7 @@ static struct packet_offload ip_packet_offload __read_mostly = { static const struct net_offload ipip_offload = { .callbacks = { .gso_segment = inet_gso_segment, - .gro_receive = inet_gro_receive, + .gro_receive = ipip_gro_receive, .gro_complete = ipip_gro_complete, }, }; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 540866dbd27d..dd031617160a 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -126,6 +126,11 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, struct packet_offload *ptype; __be16 type; + if (NAPI_GRO_CB(skb)->encap_mark) + goto out; + + NAPI_GRO_CB(skb)->encap_mark = 1; + off = skb_gro_offset(skb); hlen = off + sizeof(*greh); greh = skb_gro_header_fast(skb, off); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 8a3405a80260..8007f73b8981 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -311,14 +311,14 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; - if (NAPI_GRO_CB(skb)->udp_mark || + if (NAPI_GRO_CB(skb)->encap_mark || (skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && !NAPI_GRO_CB(skb)->csum_valid)) goto out; - /* mark that this skb passed once through the udp gro layer */ - NAPI_GRO_CB(skb)->udp_mark = 1; + /* mark that this skb passed once through the tunnel gro layer */ + NAPI_GRO_CB(skb)->encap_mark = 1; rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index eeca943f12dc..82e9f3076028 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -258,6 +258,19 @@ out: return pp; } +static struct sk_buff **sit_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->encap_mark) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + NAPI_GRO_CB(skb)->encap_mark = 1; + + return ipv6_gro_receive(head, skb); +} + static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; @@ -302,7 +315,7 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, + .gro_receive = sit_gro_receive, .gro_complete = sit_gro_complete, }, }; -- cgit v1.2.3 From a09a4c8dd1ec7f830e1fb9e59eb72bddc965d168 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sat, 19 Mar 2016 09:32:02 -0700 Subject: tunnels: Remove encapsulation offloads on decap. If a packet is either locally encapsulated or processed through GRO it is marked with the offloads that it requires. However, when it is decapsulated these tunnel offload indications are not removed. This means that if we receive an encapsulated TCP packet, aggregate it with GRO, decapsulate, and retransmit the resulting frame on a NIC that does not support encapsulation, we won't be able to take advantage of hardware offloads even though it is just a simple TCP packet at this point. This fixes the problem by stripping off encapsulation offload indications when packets are decapsulated. The performance impacts of this bug are significant. In a test where a Geneve encapsulated TCP stream is sent to a hypervisor, GRO'ed, decapsulated, and bridged to a VM performance is improved by 60% (5Gbps->8Gbps) as a result of avoiding unnecessary segmentation at the VM tap interface. Reported-by: Ramu Ramamurthy Fixes: 68c33163 ("v4 GRE: Add TCP segmentation offload for GRE") Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 16 ++++++++++++++++ net/ipv4/fou.c | 13 +++++++++++-- net/ipv4/ip_tunnel_core.c | 3 ++- net/ipv6/sit.c | 6 ++++-- 4 files changed, 33 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index c35dda9ec991..56050f913339 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -305,6 +305,22 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); +static inline int iptunnel_pull_offloads(struct sk_buff *skb) +{ + if (skb_is_gso(skb)) { + int err; + + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + return err; + skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >> + NETIF_F_GSO_SHIFT); + } + + skb->encapsulation = 0; + return 0; +} + static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) { if (pkt_len > 0) { diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 780484243e14..a0586b4a197d 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -48,7 +48,7 @@ static inline struct fou *fou_from_sock(struct sock *sk) return sk->sk_user_data; } -static void fou_recv_pull(struct sk_buff *skb, size_t len) +static int fou_recv_pull(struct sk_buff *skb, size_t len) { struct iphdr *iph = ip_hdr(skb); @@ -59,6 +59,7 @@ static void fou_recv_pull(struct sk_buff *skb, size_t len) __skb_pull(skb, len); skb_postpull_rcsum(skb, udp_hdr(skb), len); skb_reset_transport_header(skb); + return iptunnel_pull_offloads(skb); } static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) @@ -68,9 +69,14 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) if (!fou) return 1; - fou_recv_pull(skb, sizeof(struct udphdr)); + if (fou_recv_pull(skb, sizeof(struct udphdr))) + goto drop; return -fou->protocol; + +drop: + kfree_skb(skb); + return 0; } static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, @@ -170,6 +176,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, sizeof(struct udphdr) + hdrlen); skb_reset_transport_header(skb); + if (iptunnel_pull_offloads(skb)) + goto drop; + return -guehdr->proto_ctype; drop: diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index d27276f6f8dd..02dd990af542 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -114,7 +114,8 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, skb->vlan_tci = 0; skb_set_queue_mapping(skb, 0); skb_scrub_packet(skb, xnet); - return 0; + + return iptunnel_pull_offloads(skb); } EXPORT_SYMBOL_GPL(iptunnel_pull_header); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index f45b8ffc2840..83384308d032 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -681,14 +681,16 @@ static int ipip6_rcv(struct sk_buff *skb) skb->mac_header = skb->network_header; skb_reset_network_header(skb); IPCB(skb)->flags = 0; - skb->protocol = htons(ETH_P_IPV6); + skb->dev = tunnel->dev; if (packet_is_spoofed(skb, iph, tunnel)) { tunnel->dev->stats.rx_errors++; goto out; } - __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); + if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6), + !net_eq(tunnel->net, dev_net(tunnel->dev)))) + goto out; err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { -- cgit v1.2.3 From ebf918cf9922748b9d4d9bf6a2620530cdc5a1d0 Mon Sep 17 00:00:00 2001 From: Tina Ruchandani Date: Sat, 19 Mar 2016 11:21:14 -0700 Subject: isdn: Use ktime_t instead of 'struct timeval' 'struct timeval' uses 32-bit representation for seconds which will overflow in year 2038 and beyond. mISDN/clock.c needs to compute and store elapsed time in intervals of 125 microseconds. This patch replaces the usage of 'struct timeval' with 64-bit ktime_t which is y2038 safe. The patch also replaces do_gettimeofday() (wall-clock time) with ktime_get() (monotonic time) since we only care about elapsed time here. Signed-off-by: Tina Ruchandani Suggested-by: Arnd Bergmnann Suggested-by: David Miller Signed-off-by: David S. Miller --- drivers/isdn/mISDN/clock.c | 69 +++++++++++++++++++--------------------------- include/linux/mISDNif.h | 2 +- 2 files changed, 30 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/drivers/isdn/mISDN/clock.c b/drivers/isdn/mISDN/clock.c index 693fb7c9b59a..f8f659f1ce1b 100644 --- a/drivers/isdn/mISDN/clock.c +++ b/drivers/isdn/mISDN/clock.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include "core.h" @@ -45,15 +46,15 @@ static u_int *debug; static LIST_HEAD(iclock_list); static DEFINE_RWLOCK(iclock_lock); static u16 iclock_count; /* counter of last clock */ -static struct timeval iclock_tv; /* time stamp of last clock */ -static int iclock_tv_valid; /* already received one timestamp */ +static ktime_t iclock_timestamp; /* time stamp of last clock */ +static int iclock_timestamp_valid; /* already received one timestamp */ static struct mISDNclock *iclock_current; void mISDN_init_clock(u_int *dp) { debug = dp; - do_gettimeofday(&iclock_tv); + iclock_timestamp = ktime_get(); } static void @@ -86,7 +87,7 @@ select_iclock(void) } if (bestclock != iclock_current) { /* no clock received yet */ - iclock_tv_valid = 0; + iclock_timestamp_valid = 0; } iclock_current = bestclock; } @@ -139,12 +140,11 @@ mISDN_unregister_clock(struct mISDNclock *iclock) EXPORT_SYMBOL(mISDN_unregister_clock); void -mISDN_clock_update(struct mISDNclock *iclock, int samples, struct timeval *tv) +mISDN_clock_update(struct mISDNclock *iclock, int samples, ktime_t *timestamp) { u_long flags; - struct timeval tv_now; - time_t elapsed_sec; - int elapsed_8000th; + ktime_t timestamp_now; + u16 delta; write_lock_irqsave(&iclock_lock, flags); if (iclock_current != iclock) { @@ -156,33 +156,27 @@ mISDN_clock_update(struct mISDNclock *iclock, int samples, struct timeval *tv) write_unlock_irqrestore(&iclock_lock, flags); return; } - if (iclock_tv_valid) { + if (iclock_timestamp_valid) { /* increment sample counter by given samples */ iclock_count += samples; - if (tv) { /* tv must be set, if function call is delayed */ - iclock_tv.tv_sec = tv->tv_sec; - iclock_tv.tv_usec = tv->tv_usec; - } else - do_gettimeofday(&iclock_tv); + if (timestamp) { /* timestamp must be set, if function call is delayed */ + iclock_timestamp = *timestamp; + } else { + iclock_timestamp = ktime_get(); + } } else { /* calc elapsed time by system clock */ - if (tv) { /* tv must be set, if function call is delayed */ - tv_now.tv_sec = tv->tv_sec; - tv_now.tv_usec = tv->tv_usec; - } else - do_gettimeofday(&tv_now); - elapsed_sec = tv_now.tv_sec - iclock_tv.tv_sec; - elapsed_8000th = (tv_now.tv_usec / 125) - - (iclock_tv.tv_usec / 125); - if (elapsed_8000th < 0) { - elapsed_sec -= 1; - elapsed_8000th += 8000; + if (timestamp) { /* timestamp must be set, if function call is delayed */ + timestamp_now = *timestamp; + } else { + timestamp_now = ktime_get(); } + delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp), + (NSEC_PER_SEC / 8000)); /* add elapsed time to counter and set new timestamp */ - iclock_count += elapsed_sec * 8000 + elapsed_8000th; - iclock_tv.tv_sec = tv_now.tv_sec; - iclock_tv.tv_usec = tv_now.tv_usec; - iclock_tv_valid = 1; + iclock_count += delta; + iclock_timestamp = timestamp_now; + iclock_timestamp_valid = 1; if (*debug & DEBUG_CLOCK) printk("Received first clock from source '%s'.\n", iclock_current ? iclock_current->name : "nothing"); @@ -195,22 +189,17 @@ unsigned short mISDN_clock_get(void) { u_long flags; - struct timeval tv_now; - time_t elapsed_sec; - int elapsed_8000th; + ktime_t timestamp_now; + u16 delta; u16 count; read_lock_irqsave(&iclock_lock, flags); /* calc elapsed time by system clock */ - do_gettimeofday(&tv_now); - elapsed_sec = tv_now.tv_sec - iclock_tv.tv_sec; - elapsed_8000th = (tv_now.tv_usec / 125) - (iclock_tv.tv_usec / 125); - if (elapsed_8000th < 0) { - elapsed_sec -= 1; - elapsed_8000th += 8000; - } + timestamp_now = ktime_get(); + delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp), + (NSEC_PER_SEC / 8000)); /* add elapsed time to counter */ - count = iclock_count + elapsed_sec * 8000 + elapsed_8000th; + count = iclock_count + delta; read_unlock_irqrestore(&iclock_lock, flags); return count; } diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 246a3529ecf6..ac02c54520e9 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -596,7 +596,7 @@ static inline struct mISDNdevice *dev_to_mISDN(struct device *dev) } extern void set_channel_address(struct mISDNchannel *, u_int, u_int); -extern void mISDN_clock_update(struct mISDNclock *, int, struct timeval *); +extern void mISDN_clock_update(struct mISDNclock *, int, ktime_t *); extern unsigned short mISDN_clock_get(void); extern const char *mISDNDevName4ch(struct mISDNchannel *); -- cgit v1.2.3 From 5692d7ea4183b8dd5a49d73d6a4436aa22929b7b Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Mon, 21 Mar 2016 17:39:18 +0100 Subject: vxlan: fix sparse warnings Sparse reports false positives for the header manipulation inlines. Annotate them correctly. Tested by sparse on a little endian and big endian machine. Fixes: 54bfd872bf16d ("vxlan: keep flags and vni in network byte order") Reported-by: kbuild test robot Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/net/vxlan.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index a763c96ecde4..73ed2e951c02 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -271,36 +271,36 @@ static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb) static inline __be32 vxlan_vni(__be32 vni_field) { #if defined(__BIG_ENDIAN) - return vni_field >> 8; + return (__force __be32)((__force u32)vni_field >> 8); #else - return (vni_field & VXLAN_VNI_MASK) << 8; + return (__force __be32)((__force u32)(vni_field & VXLAN_VNI_MASK) << 8); #endif } static inline __be32 vxlan_vni_field(__be32 vni) { #if defined(__BIG_ENDIAN) - return vni << 8; + return (__force __be32)((__force u32)vni << 8); #else - return vni >> 8; + return (__force __be32)((__force u32)vni >> 8); #endif } static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id) { #if defined(__BIG_ENDIAN) - return tun_id; + return (__force __be32)tun_id; #else - return tun_id >> 32; + return (__force __be32)((__force u64)tun_id >> 32); #endif } static inline __be64 vxlan_vni_to_tun_id(__be32 vni) { #if defined(__BIG_ENDIAN) - return (__be64)vni; + return (__force __be64)vni; #else - return (__be64)vni << 32; + return (__force __be64)((u64)(__force u32)vni << 32); #endif } -- cgit v1.2.3 From c70ce028e834f8e51306217dbdbd441d851c64d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Mar 2016 09:55:10 -0700 Subject: net/rtnetlink: add IFLA_GSO_MAX_SEGS and IFLA_GSO_MAX_SIZE attributes It can be useful to report dev->gso_max_segs and dev->gso_max_size so that "ip -d link" can display them to help debugging. For the moment, these attributes are read-only. Signed-off-by: Eric Dumazet Cc: Petri Gynther Cc: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 2 ++ net/core/rtnetlink.c | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8e3f88fa5b59..76d315ca597d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -153,6 +153,8 @@ enum { IFLA_LINK_NETNSID, IFLA_PHYS_PORT_NAME, IFLA_PROTO_DOWN, + IFLA_GSO_MAX_SEGS, + IFLA_GSO_MAX_SIZE, __IFLA_MAX }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d2d9e5ebf58e..a69cd0c097b5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -895,6 +895,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_PROMISCUITY */ + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ + + nla_total_size(4) /* IFLA_MAX_GSO_SEGS */ + + nla_total_size(4) /* IFLA_MAX_GSO_SIZE */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ @@ -1223,6 +1225,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u32(skb, IFLA_GROUP, dev->group) || nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || + nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || + nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif -- cgit v1.2.3 From 6d0e24cd07f7e97015d958299fda535e459d2c99 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Mon, 21 Mar 2016 20:58:28 +0000 Subject: net: add missing descriptions in net_device_priv_flags The flags IFF_XMIT_DST_RELEASE_PERM, IFF_IPVLAN_MASTER and IFF_IPVLAN_SLAVE are missing descriptions for the Documentation. Adding them. Signed-off-by: Luis de Bethencourt Suggested-by: Benjamin Poirier Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f9eebd518545..fd07734fb277 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1320,6 +1320,10 @@ struct net_device_ops { * @IFF_LIVE_ADDR_CHANGE: device supports hardware address * change when it's running * @IFF_MACVLAN: Macvlan device + * @IFF_XMIT_DST_RELEASE_PERM: IFF_XMIT_DST_RELEASE not taking into account + * underlying stacked devices + * @IFF_IPVLAN_MASTER: IPvlan master device + * @IFF_IPVLAN_SLAVE: IPvlan slave device * @IFF_L3MDEV_MASTER: device is an L3 master device * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master -- cgit v1.2.3 From 62d885fe73e0032004e756b25101ac6c9ebf85dd Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 21 Mar 2016 14:08:28 -0700 Subject: net: Add missing kernel-doc for netdev ptype lists .//include/linux/netdevice.h:1826: warning: No description found for parameter 'ptype_all' .//include/linux/netdevice.h:1826: warning: No description found for parameter 'ptype_specific' Introduced by commit 7866a621043f ("dev: add per net_device packet type chains") Cc: Salam Noureddine Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fd07734fb277..3f6385d27b81 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1418,6 +1418,8 @@ enum netdev_priv_flags { * @unreg_list: List entry, that is used, when we are unregistering the * device, see the function unregister_netdev * @close_list: List entry, that is used, when we are closing the device + * @ptype_all: Device-specific packet handlers for all protocols + * @ptype_specific: Device-specific, protocol-specific packet handlers * * @adj_list: Directly linked devices, like slaves for bonding * @all_adj_list: All linked devices, *including* neighbours -- cgit v1.2.3 From 5f2d472450c94402627f54b603d271076f3f97c2 Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Mon, 21 Mar 2016 10:15:34 -0700 Subject: ethtool: minor doc update Updates: commit 793cf87de9d1 ("ethtool: Set cmd field in ETHTOOL_GLINKSETTINGS response to wrong nwords") Signed-off-by: David Decotigny Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 2835b07416b7..9222db8ccccc 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1648,9 +1648,9 @@ enum ethtool_reset_flags { * %ETHTOOL_GLINKSETTINGS: on entry, number of words passed by user * (>= 0); on return, if handshake in progress, negative if * request size unsupported by kernel: absolute value indicates - * kernel recommended size and cmd field is 0, as well as all the - * other fields; otherwise (handshake completed), strictly - * positive to indicate size used by kernel and cmd field is + * kernel expected size and all the other fields but cmd + * are 0; otherwise (handshake completed), strictly positive + * to indicate size used by kernel and cmd field stays * %ETHTOOL_GLINKSETTINGS, all other fields populated by driver. For * %ETHTOOL_SLINKSETTINGS: must be valid on entry, ie. a positive * value returned previously by %ETHTOOL_GLINKSETTINGS, otherwise -- cgit v1.2.3 From db219baf19a7024ec1db71f7c3c2663d49de1818 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Mon, 21 Mar 2016 17:37:28 +0000 Subject: ipv6: remove unused in6_addr struct struct in6_addr isn't used anymore in inet6_connection_sock.h, removing the forward declaration. Fixes: 1b33bc3e9e90 ("ipv6: remove obsolete inet6 functions") Signed-off-by: Luis de Bethencourt Signed-off-by: David S. Miller --- include/net/inet6_connection_sock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 064cfbe639d0..954ad6bfb56a 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -15,7 +15,6 @@ #include -struct in6_addr; struct inet_bind_bucket; struct request_sock; struct sk_buff; -- cgit v1.2.3 From 5e82b4b2a0015ed0a659b22ef2ee3409a3c39e54 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 23 Mar 2016 13:47:23 -0500 Subject: net: Fix typos and whitespace. Fix typos. Capitalize CPU, NAPI, RCU consistently. Align structure indentation. No functional change intended; only comment and whitespace changes. Signed-off-by: Bjorn Helgaas Signed-off-by: David S. Miller --- include/linux/netdevice.h | 215 +++++++++++++++++++++++----------------------- 1 file changed, 106 insertions(+), 109 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3f6385d27b81..a675205df0f1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -81,8 +81,8 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, * function. Real network devices commonly used with qdiscs should only return * the driver transmit return codes though - when qdiscs are used, the actual * transmission happens asynchronously, so the value is not propagated to - * higher layers. Virtual network devices transmit synchronously, in this case - * the driver transmit return codes are consumed by dev_queue_xmit(), all + * higher layers. Virtual network devices transmit synchronously; in this case + * the driver transmit return codes are consumed by dev_queue_xmit(), and all * others are propagated to higher layers. */ @@ -129,7 +129,7 @@ static inline bool dev_xmit_complete(int rc) } /* - * Compute the worst case header length according to the protocols + * Compute the worst-case header length according to the protocols * used. */ @@ -246,7 +246,7 @@ struct hh_cache { unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)]; }; -/* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much. +/* Reserve HH_DATA_MOD byte-aligned hard_header_len, but at least that much. * Alternative is: * dev->hard_header_len ? (dev->hard_header_len + * (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0 @@ -272,7 +272,7 @@ struct header_ops { }; /* These flag bits are private to the generic network queueing - * layer, they may not be explicitly referenced by any other + * layer; they may not be explicitly referenced by any other * code. */ @@ -286,7 +286,7 @@ enum netdev_state_t { /* - * This structure holds at boot time configured netdevice settings. They + * This structure holds boot-time configured netdevice settings. They * are then used in the device probing. */ struct netdev_boot_setup { @@ -304,7 +304,7 @@ struct napi_struct { /* The poll_list must only be managed by the entity which * changes the state of the NAPI_STATE_SCHED bit. This means * whoever atomically sets that bit can add this napi_struct - * to the per-cpu poll_list, and whoever clears that bit + * to the per-CPU poll_list, and whoever clears that bit * can remove from the list right before clearing the bit. */ struct list_head poll_list; @@ -350,7 +350,7 @@ typedef enum gro_result gro_result_t; * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in * case skb->dev was changed by rx_handler. * @RX_HANDLER_EXACT: Force exact delivery, no wildcard. - * @RX_HANDLER_PASS: Do nothing, passe the skb as if no rx_handler was called. + * @RX_HANDLER_PASS: Do nothing, pass the skb as if no rx_handler was called. * * rx_handlers are functions called from inside __netif_receive_skb(), to do * special processing of the skb, prior to delivery to protocol handlers. @@ -365,19 +365,19 @@ typedef enum gro_result gro_result_t; * Upon return, rx_handler is expected to tell __netif_receive_skb() what to * do with the skb. * - * If the rx_handler consumed to skb in some way, it should return + * If the rx_handler consumed the skb in some way, it should return * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for - * the skb to be delivered in some other ways. + * the skb to be delivered in some other way. * * If the rx_handler changed skb->dev, to divert the skb to another * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the * new device will be called if it exists. * - * If the rx_handler consider the skb should be ignored, it should return + * If the rx_handler decides the skb should be ignored, it should return * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that * are registered on exact device (ptype->dev == skb->dev). * - * If the rx_handler didn't changed skb->dev, but want the skb to be normally + * If the rx_handler didn't change skb->dev, but wants the skb to be normally * delivered, it should return RX_HANDLER_PASS. * * A device without a registered rx_handler will behave as if rx_handler @@ -402,11 +402,11 @@ static inline bool napi_disable_pending(struct napi_struct *n) } /** - * napi_schedule_prep - check if napi can be scheduled - * @n: napi context + * napi_schedule_prep - check if NAPI can be scheduled + * @n: NAPI context * * Test if NAPI routine is already running, and if not mark - * it as running. This is used as a condition variable + * it as running. This is used as a condition variable to * insure only one NAPI poll instance runs. We also make * sure there is no pending NAPI disable. */ @@ -418,7 +418,7 @@ static inline bool napi_schedule_prep(struct napi_struct *n) /** * napi_schedule - schedule NAPI poll - * @n: napi context + * @n: NAPI context * * Schedule NAPI poll routine to be called if it is not already * running. @@ -431,7 +431,7 @@ static inline void napi_schedule(struct napi_struct *n) /** * napi_schedule_irqoff - schedule NAPI poll - * @n: napi context + * @n: NAPI context * * Variant of napi_schedule(), assuming hard irqs are masked. */ @@ -455,7 +455,7 @@ void __napi_complete(struct napi_struct *n); void napi_complete_done(struct napi_struct *n, int work_done); /** * napi_complete - NAPI processing complete - * @n: napi context + * @n: NAPI context * * Mark NAPI processing as complete. * Consider using napi_complete_done() instead. @@ -467,32 +467,32 @@ static inline void napi_complete(struct napi_struct *n) /** * napi_hash_add - add a NAPI to global hashtable - * @napi: napi context + * @napi: NAPI context * - * generate a new napi_id and store a @napi under it in napi_hash - * Used for busy polling (CONFIG_NET_RX_BUSY_POLL) + * Generate a new napi_id and store a @napi under it in napi_hash. + * Used for busy polling (CONFIG_NET_RX_BUSY_POLL). * Note: This is normally automatically done from netif_napi_add(), - * so might disappear in a future linux version. + * so might disappear in a future Linux version. */ void napi_hash_add(struct napi_struct *napi); /** * napi_hash_del - remove a NAPI from global table - * @napi: napi context + * @napi: NAPI context * - * Warning: caller must observe rcu grace period + * Warning: caller must observe RCU grace period * before freeing memory containing @napi, if * this function returns true. * Note: core networking stack automatically calls it - * from netif_napi_del() + * from netif_napi_del(). * Drivers might want to call this helper to combine all - * the needed rcu grace periods into a single one. + * the needed RCU grace periods into a single one. */ bool napi_hash_del(struct napi_struct *napi); /** * napi_disable - prevent NAPI from scheduling - * @n: napi context + * @n: NAPI context * * Stop NAPI from being scheduled on this context. * Waits till any outstanding processing completes. @@ -501,7 +501,7 @@ void napi_disable(struct napi_struct *n); /** * napi_enable - enable NAPI scheduling - * @n: napi context + * @n: NAPI context * * Resume NAPI from being scheduled on this context. * Must be paired with napi_disable. @@ -516,7 +516,7 @@ static inline void napi_enable(struct napi_struct *n) /** * napi_synchronize - wait until NAPI is not running - * @n: napi context + * @n: NAPI context * * Wait until NAPI is done being scheduled on this context. * Waits till any outstanding processing completes but @@ -559,7 +559,7 @@ enum netdev_queue_state_t { struct netdev_queue { /* - * read mostly part + * read-mostly part */ struct net_device *dev; struct Qdisc __rcu *qdisc; @@ -571,7 +571,7 @@ struct netdev_queue { int numa_node; #endif /* - * write mostly part + * write-mostly part */ spinlock_t _xmit_lock ____cacheline_aligned_in_smp; int xmit_lock_owner; @@ -648,11 +648,11 @@ struct rps_dev_flow_table { /* * The rps_sock_flow_table contains mappings of flows to the last CPU * on which they were processed by the application (set in recvmsg). - * Each entry is a 32bit value. Upper part is the high order bits - * of flow hash, lower part is cpu number. + * Each entry is a 32bit value. Upper part is the high-order bits + * of flow hash, lower part is CPU number. * rps_cpu_mask is used to partition the space, depending on number of - * possible cpus : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 - * For example, if 64 cpus are possible, rps_cpu_mask = 0x3f, + * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 + * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { @@ -674,7 +674,7 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, unsigned int index = hash & table->mask; u32 val = hash & ~rps_cpu_mask; - /* We only give a hint, preemption can change cpu under us */ + /* We only give a hint, preemption can change CPU under us */ val |= raw_smp_processor_id(); if (table->ents[index] != val) @@ -807,21 +807,21 @@ struct tc_to_netdev { * optional and can be filled with a null pointer. * * int (*ndo_init)(struct net_device *dev); - * This function is called once when network device is registered. - * The network device can use this to any late stage initializaton - * or semantic validattion. It can fail with an error code which will - * be propogated back to register_netdev + * This function is called once when a network device is registered. + * The network device can use this for any late stage initialization + * or semantic validation. It can fail with an error code which will + * be propagated back to register_netdev. * * void (*ndo_uninit)(struct net_device *dev); * This function is called when device is unregistered or when registration * fails. It is not called if init fails. * * int (*ndo_open)(struct net_device *dev); - * This function is called when network device transistions to the up + * This function is called when a network device transitions to the up * state. * * int (*ndo_stop)(struct net_device *dev); - * This function is called when network device transistions to the down + * This function is called when a network device transitions to the down * state. * * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, @@ -832,7 +832,7 @@ struct tc_to_netdev { * corner cases, but the stack really does a non-trivial amount * of useless work if you return NETDEV_TX_BUSY. * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) - * Required can not be NULL. + * Required; cannot be NULL. * * netdev_features_t (*ndo_fix_features)(struct net_device *dev, * netdev_features_t features); @@ -842,34 +842,34 @@ struct tc_to_netdev { * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, * void *accel_priv, select_queue_fallback_t fallback); - * Called to decide which queue to when device supports multiple + * Called to decide which queue to use when device supports multiple * transmit queues. * * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); * This function is called to allow device receiver to make - * changes to configuration when multicast or promiscious is enabled. + * changes to configuration when multicast or promiscuous is enabled. * * void (*ndo_set_rx_mode)(struct net_device *dev); * This function is called device changes address list filtering. * If driver handles unicast address filtering, it should set - * IFF_UNICAST_FLT to its priv_flags. + * IFF_UNICAST_FLT in its priv_flags. * * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); * This function is called when the Media Access Control address * needs to be changed. If this interface is not defined, the - * mac address can not be changed. + * MAC address can not be changed. * * int (*ndo_validate_addr)(struct net_device *dev); * Test if Media Access Control address is valid for the device. * * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); - * Called when a user request an ioctl which can't be handled by - * the generic interface code. If not defined ioctl's return + * Called when a user requests an ioctl which can't be handled by + * the generic interface code. If not defined ioctls return * not supported error code. * * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); * Used to set network devices bus interface parameters. This interface - * is retained for legacy reason, new devices should use the bus + * is retained for legacy reasons; new devices should use the bus * interface (PCI) for low level management. * * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); @@ -878,7 +878,7 @@ struct tc_to_netdev { * will return an error. * * void (*ndo_tx_timeout)(struct net_device *dev); - * Callback uses when the transmitter has not made any progress + * Callback used when the transmitter has not made any progress * for dev->watchdog ticks. * * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, @@ -896,11 +896,11 @@ struct tc_to_netdev { * neither operation. * * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid); - * If device support VLAN filtering this function is called when a + * If device supports VLAN filtering this function is called when a * VLAN id is registered. * * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid); - * If device support VLAN filtering this function is called when a + * If device supports VLAN filtering this function is called when a * VLAN id is unregistered. * * void (*ndo_poll_controller)(struct net_device *dev); @@ -920,7 +920,7 @@ struct tc_to_netdev { * * Enable or disable the VF ability to query its RSS Redirection Table and * Hash Key. This is needed since on some devices VF share this information - * with PF and querying it may adduce a theoretical security risk. + * with PF and querying it may introduce a theoretical security risk. * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); * int (*ndo_setup_tc)(struct net_device *dev, u8 tc) @@ -1030,20 +1030,20 @@ struct tc_to_netdev { * * void (*ndo_add_vxlan_port)(struct net_device *dev, * sa_family_t sa_family, __be16 port); - * Called by vxlan to notiy a driver about the UDP port and socket - * address family that vxlan is listnening to. It is called only when + * Called by vxlan to notify a driver about the UDP port and socket + * address family that vxlan is listening to. It is called only when * a new port starts listening. The operation is protected by the * vxlan_net->sock_lock. * * void (*ndo_add_geneve_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); + * sa_family_t sa_family, __be16 port); * Called by geneve to notify a driver about the UDP port and socket * address family that geneve is listnening to. It is called only when * a new port starts listening. The operation is protected by the * geneve_net->sock_lock. * * void (*ndo_del_geneve_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); + * sa_family_t sa_family, __be16 port); * Called by geneve to notify the driver about a UDP port and socket * address family that geneve is not listening to anymore. The operation * is protected by the geneve_net->sock_lock. @@ -1072,9 +1072,9 @@ struct tc_to_netdev { * Callback to use for xmit over the accelerated station. This * is used in place of ndo_start_xmit on accelerated net * devices. - * netdev_features_t (*ndo_features_check) (struct sk_buff *skb, - * struct net_device *dev - * netdev_features_t features); + * netdev_features_t (*ndo_features_check)(struct sk_buff *skb, + * struct net_device *dev + * netdev_features_t features); * Called by core transmit path to determine if device is capable of * performing offload operations on a given packet. This is to give * the device an opportunity to implement any restrictions that cannot @@ -1088,7 +1088,7 @@ struct tc_to_netdev { * int (*ndo_get_iflink)(const struct net_device *dev); * Called to get the iflink value of this device. * void (*ndo_change_proto_down)(struct net_device *dev, - * bool proto_down); + * bool proto_down); * This function is used to pass protocol port error state information * to the switch driver. The switch driver can react to the proto_down * by doing a phys down on the associated switch port. @@ -1100,7 +1100,7 @@ struct tc_to_netdev { * This function is used to specify the headroom that the skb must * consider when allocation skb during packet reception. Setting * appropriate rx headroom value allows avoiding skb head copy on - * forward. Setting a negative value reset the rx headroom to the + * forward. Setting a negative value resets the rx headroom to the * default value. * */ @@ -1296,7 +1296,7 @@ struct net_device_ops { * * These are the &struct net_device, they are only set internally * by drivers and used in the kernel. These flags are invisible to - * userspace, this means that the order of these flags can change + * userspace; this means that the order of these flags can change * during any kernel release. * * You should have a pretty good reason to be extending these flags. @@ -1414,10 +1414,10 @@ enum netdev_priv_flags { * * @state: Generic network queuing layer state, see netdev_state_t * @dev_list: The global list of network devices - * @napi_list: List entry, that is used for polling napi devices - * @unreg_list: List entry, that is used, when we are unregistering the - * device, see the function unregister_netdev - * @close_list: List entry, that is used, when we are closing the device + * @napi_list: List entry used for polling NAPI devices + * @unreg_list: List entry when we are unregistering the + * device; see the function unregister_netdev + * @close_list: List entry used when we are closing the device * @ptype_all: Device-specific packet handlers for all protocols * @ptype_specific: Device-specific, protocol-specific packet handlers * @@ -1437,7 +1437,7 @@ enum netdev_priv_flags { * @mpls_features: Mask of features inheritable by MPLS * * @ifindex: interface index - * @group: The group, that the device belongs to + * @group: The group the device belongs to * * @stats: Statistics struct, which was left as a legacy, use * rtnl_link_stats64 instead @@ -1491,7 +1491,7 @@ enum netdev_priv_flags { * @dev_port: Used to differentiate devices that share * the same function * @addr_list_lock: XXX: need comments on this one - * @uc_promisc: Counter, that indicates, that promiscuous mode + * @uc_promisc: Counter that indicates promiscuous mode * has been enabled due to the need to listen to * additional unicast addresses in a device that * does not implement ndo_set_rx_mode() @@ -1499,9 +1499,9 @@ enum netdev_priv_flags { * @mc: multicast mac addresses * @dev_addrs: list of device hw addresses * @queues_kset: Group of all Kobjects in the Tx and RX queues - * @promiscuity: Number of times, the NIC is told to work in - * Promiscuous mode, if it becomes 0 the NIC will - * exit from working in Promiscuous mode + * @promiscuity: Number of times the NIC is told to work in + * promiscuous mode; if it becomes 0 the NIC will + * exit promiscuous mode * @allmulti: Counter, enables or disables allmulticast mode * * @vlan_info: VLAN info @@ -1547,7 +1547,7 @@ enum netdev_priv_flags { * * @trans_start: Time (in jiffies) of last Tx * @watchdog_timeo: Represents the timeout that is used by - * the watchdog ( see dev_watchdog() ) + * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers * * @pcpu_refcnt: Number of references to this device @@ -1664,8 +1664,8 @@ struct net_device { atomic_long_t rx_nohandler; #ifdef CONFIG_WIRELESS_EXT - const struct iw_handler_def * wireless_handlers; - struct iw_public_data * wireless_data; + const struct iw_handler_def *wireless_handlers; + struct iw_public_data *wireless_data; #endif const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; @@ -1718,7 +1718,7 @@ struct net_device { unsigned int allmulti; - /* Protocol specific pointers */ + /* Protocol-specific pointers */ #if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_info __rcu *vlan_info; @@ -1748,13 +1748,11 @@ struct net_device { /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; - #ifdef CONFIG_SYSFS struct netdev_rx_queue *_rx; unsigned int num_rx_queues; unsigned int real_num_rx_queues; - #endif unsigned long gro_flush_timeout; @@ -1846,7 +1844,7 @@ struct net_device { struct garp_port __rcu *garp_port; struct mrp_port __rcu *mrp_port; - struct device dev; + struct device dev; const struct attribute_group *sysfs_groups[4]; const struct attribute_group *sysfs_rx_queue_group; @@ -1861,9 +1859,9 @@ struct net_device { #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; #endif - u8 num_tc; - struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; - u8 prio_tc_map[TC_BITMASK + 1]; + u8 num_tc; + struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; + u8 prio_tc_map[TC_BITMASK + 1]; #if IS_ENABLED(CONFIG_FCOE) unsigned int fcoe_ddp_xid; @@ -1871,9 +1869,9 @@ struct net_device { #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map __rcu *priomap; #endif - struct phy_device *phydev; - struct lock_class_key *qdisc_tx_busylock; - bool proto_down; + struct phy_device *phydev; + struct lock_class_key *qdisc_tx_busylock; + bool proto_down; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2021,7 +2019,7 @@ static inline void *netdev_priv(const struct net_device *dev) /* Set the sysfs device type for the network logical device to allow * fine-grained identification of different network device types. For - * example Ethernet, Wirelss LAN, Bluetooth, WiMAX etc. + * example Ethernet, Wireless LAN, Bluetooth, WiMAX etc. */ #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype)) @@ -2031,22 +2029,22 @@ static inline void *netdev_priv(const struct net_device *dev) #define NAPI_POLL_WEIGHT 64 /** - * netif_napi_add - initialize a napi context + * netif_napi_add - initialize a NAPI context * @dev: network device - * @napi: napi context + * @napi: NAPI context * @poll: polling function * @weight: default weight * - * netif_napi_add() must be used to initialize a napi context prior to calling - * *any* of the other napi related functions. + * netif_napi_add() must be used to initialize a NAPI context prior to calling + * *any* of the other NAPI-related functions. */ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight); /** - * netif_tx_napi_add - initialize a napi context + * netif_tx_napi_add - initialize a NAPI context * @dev: network device - * @napi: napi context + * @napi: NAPI context * @poll: polling function * @weight: default weight * @@ -2064,22 +2062,22 @@ static inline void netif_tx_napi_add(struct net_device *dev, } /** - * netif_napi_del - remove a napi context - * @napi: napi context + * netif_napi_del - remove a NAPI context + * @napi: NAPI context * - * netif_napi_del() removes a napi context from the network device napi list + * netif_napi_del() removes a NAPI context from the network device NAPI list */ void netif_napi_del(struct napi_struct *napi); struct napi_gro_cb { /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ - void *frag0; + void *frag0; /* Length of frag0. */ unsigned int frag0_len; /* This indicates where we are processing relative to skb->data. */ - int data_offset; + int data_offset; /* This is non-zero if the packet cannot be merged with the new skb. */ u16 flush; @@ -2175,7 +2173,7 @@ struct udp_offload { struct udp_offload_callbacks callbacks; }; -/* often modified stats are per cpu, other are shared (netdev->stats) */ +/* often modified stats are per-CPU, other are shared (netdev->stats) */ struct pcpu_sw_netstats { u64 rx_packets; u64 rx_bytes; @@ -2272,7 +2270,7 @@ struct netdev_notifier_changeupper_info { struct netdev_notifier_info info; /* must be first */ struct net_device *upper_dev; /* new upper dev */ bool master; /* is upper dev master */ - bool linking; /* is the nofication for link or unlink */ + bool linking; /* is the notification for link or unlink */ void *upper_info; /* upper dev info */ }; @@ -2737,7 +2735,7 @@ extern int netdev_flow_limit_table_len; #endif /* CONFIG_NET_FLOW_LIMIT */ /* - * Incoming packets are placed on per-cpu queues + * Incoming packets are placed on per-CPU queues */ struct softnet_data { struct list_head poll_list; @@ -2907,7 +2905,7 @@ netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue) * @dev_queue: pointer to transmit queue * * BQL enabled drivers might use this helper in their ndo_start_xmit(), - * to give appropriate hint to the cpu. + * to give appropriate hint to the CPU. */ static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_queue) { @@ -2921,7 +2919,7 @@ static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_que * @dev_queue: pointer to transmit queue * * BQL enabled drivers might use this helper in their TX completion path, - * to give appropriate hint to the cpu. + * to give appropriate hint to the CPU. */ static inline void netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_queue) { @@ -3060,7 +3058,7 @@ static inline bool netif_running(const struct net_device *dev) } /* - * Routines to manage the subqueues on a device. We only need start + * Routines to manage the subqueues on a device. We only need start, * stop, and a check if it's stopped. All other device management is * done at the overall netdevice level. * Also test the device if we're multiqueue. @@ -3344,7 +3342,6 @@ void netif_carrier_off(struct net_device *dev); * in a "pending" state, waiting for some external event. For "on- * demand" interfaces, this new state identifies the situation where the * interface is waiting for events to place it in the up state. - * */ static inline void netif_dormant_on(struct net_device *dev) { @@ -3679,7 +3676,7 @@ void dev_uc_init(struct net_device *dev); * * Add newly added addresses to the interface, and release * addresses that have been deleted. - **/ + */ static inline int __dev_uc_sync(struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *), @@ -3695,7 +3692,7 @@ static inline int __dev_uc_sync(struct net_device *dev, * @unsync: function to call if address should be removed * * Remove all addresses that were added to the device by dev_uc_sync(). - **/ + */ static inline void __dev_uc_unsync(struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *)) @@ -3723,7 +3720,7 @@ void dev_mc_init(struct net_device *dev); * * Add newly added addresses to the interface, and release * addresses that have been deleted. - **/ + */ static inline int __dev_mc_sync(struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *), @@ -3739,7 +3736,7 @@ static inline int __dev_mc_sync(struct net_device *dev, * @unsync: function to call if address should be removed * * Remove all addresses that were added to the device by dev_mc_sync(). - **/ + */ static inline void __dev_mc_unsync(struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *)) -- cgit v1.2.3 From 6579a023a881e0592ce9a98fdfcbcc0a2a096aa7 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 23 Mar 2016 17:59:51 +0800 Subject: net: ping: make ping_v6_sendmsg static As ping_v6_sendmsg is used only in this file, making it static The body of "pingv6_prot" and "pingv6_protosw" were moved at the middle of the file, to avoid having to declare some static prototypes. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/ping.h | 1 - net/ipv6/ping.c | 59 +++++++++++++++++++++++++++--------------------------- 2 files changed, 29 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/net/ping.h b/include/net/ping.h index 5fd7cc244833..4cd90d6b5c25 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -79,7 +79,6 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); -int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); bool ping_rcv(struct sk_buff *skb); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 263a5164a6f5..c382db7a2e73 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -26,35 +26,6 @@ #include #include -struct proto pingv6_prot = { - .name = "PINGv6", - .owner = THIS_MODULE, - .init = ping_init_sock, - .close = ping_close, - .connect = ip6_datagram_connect_v6_only, - .disconnect = udp_disconnect, - .setsockopt = ipv6_setsockopt, - .getsockopt = ipv6_getsockopt, - .sendmsg = ping_v6_sendmsg, - .recvmsg = ping_recvmsg, - .bind = ping_bind, - .backlog_rcv = ping_queue_rcv_skb, - .hash = ping_hash, - .unhash = ping_unhash, - .get_port = ping_get_port, - .obj_size = sizeof(struct raw6_sock), -}; -EXPORT_SYMBOL_GPL(pingv6_prot); - -static struct inet_protosw pingv6_protosw = { - .type = SOCK_DGRAM, - .protocol = IPPROTO_ICMPV6, - .prot = &pingv6_prot, - .ops = &inet6_dgram_ops, - .flags = INET_PROTOSW_REUSE, -}; - - /* Compatibility glue so we can support IPv6 when it's compiled as a module */ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) @@ -77,7 +48,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } -int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) +static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -192,6 +163,34 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return len; } +struct proto pingv6_prot = { + .name = "PINGv6", + .owner = THIS_MODULE, + .init = ping_init_sock, + .close = ping_close, + .connect = ip6_datagram_connect_v6_only, + .disconnect = udp_disconnect, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, + .sendmsg = ping_v6_sendmsg, + .recvmsg = ping_recvmsg, + .bind = ping_bind, + .backlog_rcv = ping_queue_rcv_skb, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, + .obj_size = sizeof(struct raw6_sock), +}; +EXPORT_SYMBOL_GPL(pingv6_prot); + +static struct inet_protosw pingv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, + .ops = &inet6_dgram_ops, + .flags = INET_PROTOSW_REUSE, +}; + #ifdef CONFIG_PROC_FS static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) { -- cgit v1.2.3