diff options
Diffstat (limited to 'include/net/inet_hashtables.h')
-rw-r--r-- | include/net/inet_hashtables.h | 145 |
1 files changed, 84 insertions, 61 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f72ec113ae56..a0887b70967b 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -90,11 +90,32 @@ struct inet_bind_bucket { struct hlist_head owners; }; +struct inet_bind2_bucket { + possible_net_t ib_net; + int l3mdev; + unsigned short port; + union { +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr v6_rcv_saddr; +#endif + __be32 rcv_saddr; + }; + /* Node in the inet2_bind_hashbucket chain */ + struct hlist_node node; + /* List of sockets hashed to this bucket */ + struct hlist_head owners; +}; + static inline struct net *ib_net(struct inet_bind_bucket *ib) { return read_pnet(&ib->ib_net); } +static inline struct net *ib2_net(struct inet_bind2_bucket *ib) +{ + return read_pnet(&ib->ib_net); +} + #define inet_bind_bucket_for_each(tb, head) \ hlist_for_each_entry(tb, head, node) @@ -103,6 +124,15 @@ struct inet_bind_hashbucket { struct hlist_head chain; }; +/* This is synchronized using the inet_bind_hashbucket's spinlock. + * Instead of having separate spinlocks, the inet_bind2_hashbucket can share + * the inet_bind_hashbucket's given that in every case where the bhash2 table + * is useful, a lookup in the bhash table also occurs. + */ +struct inet_bind2_hashbucket { + struct hlist_head chain; +}; + /* Sockets can be hashed in established or listening table. * We must use different 'nulls' end-of-chain value for all hash buckets : * A socket might transition from ESTABLISH to LISTEN state without @@ -111,11 +141,7 @@ struct inet_bind_hashbucket { #define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; - unsigned int count; - union { - struct hlist_head head; - struct hlist_nulls_head nulls_head; - }; + struct hlist_nulls_head nulls_head; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ @@ -138,37 +164,19 @@ struct inet_hashinfo { */ struct kmem_cache *bind_bucket_cachep; struct inet_bind_hashbucket *bhash; + /* The 2nd binding table hashed by port and address. + * This is used primarily for expediting the resolution of bind + * conflicts. + */ + struct kmem_cache *bind2_bucket_cachep; + struct inet_bind2_hashbucket *bhash2; unsigned int bhash_size; /* The 2nd listener table hashed by local port and address */ unsigned int lhash2_mask; struct inet_listen_hashbucket *lhash2; - - /* All the above members are written once at bootup and - * never written again _or_ are predominantly read-access. - * - * Now align to a new cache line as all the following members - * might be often dirty. - */ - /* All sockets in TCP_LISTEN state will be in listening_hash. - * This is the only table where wildcard'd TCP sockets can - * exist. listening_hash is only hashed by local port number. - * If lhash2 is initialized, the same socket will also be hashed - * to lhash2 by port and address. - */ - struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] - ____cacheline_aligned_in_smp; }; -#define inet_lhash2_for_each_icsk_continue(__icsk) \ - hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node) - -#define inet_lhash2_for_each_icsk(__icsk, list) \ - hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node) - -#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \ - hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node) - static inline struct inet_listen_hashbucket * inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash) { @@ -221,32 +229,50 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb); -static inline u32 inet_bhashfn(const struct net *net, const __u16 lport, - const u32 bhash_size) +static inline bool check_bind_bucket_match(struct inet_bind_bucket *tb, + struct net *net, + const unsigned short port, + int l3mdev) { - return (lport + net_hash_mix(net)) & (bhash_size - 1); + return net_eq(ib_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev; } -void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, - const unsigned short snum); +struct inet_bind2_bucket * +inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, + struct inet_bind2_hashbucket *head, + const unsigned short port, int l3mdev, + const struct sock *sk); -/* These can have wildcards, don't try too hard. */ -static inline u32 inet_lhashfn(const struct net *net, const unsigned short num) -{ - return (num + net_hash_mix(net)) & (INET_LHTABLE_SIZE - 1); -} +void inet_bind2_bucket_destroy(struct kmem_cache *cachep, + struct inet_bind2_bucket *tb); + +struct inet_bind2_bucket * +inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net, + const unsigned short port, int l3mdev, + struct sock *sk, + struct inet_bind2_hashbucket **head); -static inline int inet_sk_listen_hashfn(const struct sock *sk) +bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb, + struct net *net, + const unsigned short port, + int l3mdev, + const struct sock *sk); + +static inline u32 inet_bhashfn(const struct net *net, const __u16 lport, + const u32 bhash_size) { - return inet_lhashfn(sock_net(sk), inet_sk(sk)->inet_num); + return (lport + net_hash_mix(net)) & (bhash_size - 1); } +void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, + struct inet_bind2_bucket *tb2, const unsigned short snum); + /* Caller must disable local BH processing. */ int __inet_inherit_port(const struct sock *sk, struct sock *child); void inet_put_port(struct sock *sk); -void inet_hashinfo_init(struct inet_hashinfo *h); void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, @@ -295,7 +321,6 @@ static inline struct sock *inet_lookup_listener(struct net *net, ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport))) #endif -#if (BITS_PER_LONG == 64) #ifdef __BIG_ENDIAN #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const __addrpair __name = (__force __addrpair) ( \ @@ -307,24 +332,22 @@ static inline struct sock *inet_lookup_listener(struct net *net, (((__force __u64)(__be32)(__daddr)) << 32) | \ ((__force __u64)(__be32)(__saddr))) #endif /* __BIG_ENDIAN */ -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_addrpair == (__cookie)) && \ - (((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ - net_eq(sock_net(__sk), (__net))) -#else /* 32-bit arch */ -#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ - const int __name __deprecated __attribute__((unused)) -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ - (((__sk)->sk_portpair == (__ports)) && \ - ((__sk)->sk_daddr == (__saddr)) && \ - ((__sk)->sk_rcv_saddr == (__daddr)) && \ - (((__sk)->sk_bound_dev_if == (__dif)) || \ - ((__sk)->sk_bound_dev_if == (__sdif))) && \ - net_eq(sock_net(__sk), (__net))) -#endif /* 64-bit arch */ +static inline bool inet_match(struct net *net, const struct sock *sk, + const __addrpair cookie, const __portpair ports, + int dif, int sdif) +{ + int bound_dev_if; + + if (!net_eq(sock_net(sk), net) || + sk->sk_portpair != ports || + sk->sk_addrpair != cookie) + return false; + + /* Paired with WRITE_ONCE() from sock_bindtoindex_locked() */ + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + return bound_dev_if == dif || bound_dev_if == sdif; +} /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need * not check it for lookups anymore, thanks Alexey. -DaveM @@ -425,7 +448,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) } int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)); |