diff options
Diffstat (limited to 'include/net')
65 files changed, 1049 insertions, 370 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h index cfa2ae33da9a..26ffd8333f50 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -42,6 +42,7 @@ struct tc_action { struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie *act_cookie; + struct tcf_chain *goto_chain; }; #define tcf_head common.tcfa_head #define tcf_index common.tcfa_index @@ -180,12 +181,12 @@ int tcf_unregister_action(struct tc_action_ops *a, int tcf_action_destroy(struct list_head *actions, int bind); int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res); -int tcf_action_init(struct net *net, struct nlattr *nla, - struct nlattr *est, char *n, int ovr, - int bind, struct list_head *); -struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, - struct nlattr *est, char *n, int ovr, - int bind); +int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, + struct nlattr *est, char *name, int ovr, int bind, + struct list_head *actions); +struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, + struct nlattr *nla, struct nlattr *est, + char *name, int ovr, int bind); int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index b43a4eec3cec..6df79e96a780 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -48,11 +48,15 @@ struct prefix_info { struct in6_addr prefix; }; - #include <linux/netdevice.h> #include <net/if_inet6.h> #include <net/ipv6.h> +struct in6_validator_info { + struct in6_addr i6vi_addr; + struct inet6_dev *i6vi_dev; +}; + #define IN6_ADDR_HSIZE_SHIFT 4 #define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) @@ -278,6 +282,10 @@ int register_inet6addr_notifier(struct notifier_block *nb); int unregister_inet6addr_notifier(struct notifier_block *nb); int inet6addr_notifier_call_chain(unsigned long val, void *v); +int register_inet6addr_validator_notifier(struct notifier_block *nb); +int unregister_inet6addr_validator_notifier(struct notifier_block *nb); +int inet6addr_validator_notifier_call_chain(unsigned long val, void *v); + void inet6_netconf_notify_devconf(struct net *net, int event, int type, int ifindex, struct ipv6_devconf *devconf); @@ -308,7 +316,7 @@ static inline struct inet6_dev *in6_dev_get(const struct net_device *dev) rcu_read_lock(); idev = rcu_dereference(dev->ip6_ptr); if (idev) - atomic_inc(&idev->refcnt); + refcount_inc(&idev->refcnt); rcu_read_unlock(); return idev; } @@ -324,36 +332,36 @@ void in6_dev_finish_destroy(struct inet6_dev *idev); static inline void in6_dev_put(struct inet6_dev *idev) { - if (atomic_dec_and_test(&idev->refcnt)) + if (refcount_dec_and_test(&idev->refcnt)) in6_dev_finish_destroy(idev); } static inline void __in6_dev_put(struct inet6_dev *idev) { - atomic_dec(&idev->refcnt); + refcount_dec(&idev->refcnt); } static inline void in6_dev_hold(struct inet6_dev *idev) { - atomic_inc(&idev->refcnt); + refcount_inc(&idev->refcnt); } void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); static inline void in6_ifa_put(struct inet6_ifaddr *ifp) { - if (atomic_dec_and_test(&ifp->refcnt)) + if (refcount_dec_and_test(&ifp->refcnt)) inet6_ifa_finish_destroy(ifp); } static inline void __in6_ifa_put(struct inet6_ifaddr *ifp) { - atomic_dec(&ifp->refcnt); + refcount_dec(&ifp->refcnt); } static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) { - atomic_inc(&ifp->refcnt); + refcount_inc(&ifp->refcnt); } diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index b5f5187f488c..c172709787af 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -33,6 +33,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, struct sockaddr_rxrpc *, struct key *, unsigned long, + s64, gfp_t, rxrpc_notify_rx_t); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, @@ -46,5 +47,6 @@ void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t); +void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); #endif /* _NET_RXRPC_H */ diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 75e612a45824..678e4d6fa317 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -4,6 +4,7 @@ #include <linux/socket.h> #include <linux/un.h> #include <linux/mutex.h> +#include <linux/refcount.h> #include <net/sock.h> void unix_inflight(struct user_struct *user, struct file *fp); @@ -21,7 +22,7 @@ extern spinlock_t unix_table_lock; extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { - atomic_t refcnt; + refcount_t refcnt; int len; unsigned int hash; struct sockaddr_un name[0]; diff --git a/include/net/arp.h b/include/net/arp.h index 65619a2de6f4..17d90e4e8dc5 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -28,7 +28,7 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 rcu_read_lock_bh(); n = __ipv4_neigh_lookup_noref(dev, key); - if (n && !atomic_inc_not_zero(&n->refcnt)) + if (n && !refcount_inc_not_zero(&n->refcnt)) n = NULL; rcu_read_unlock_bh(); diff --git a/include/net/ax25.h b/include/net/ax25.h index e602f8177ebf..c4a0cf6f0810 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -11,7 +11,7 @@ #include <linux/timer.h> #include <linux/list.h> #include <linux/slab.h> -#include <linux/atomic.h> +#include <linux/refcount.h> #include <net/neighbour.h> #include <net/sock.h> @@ -158,7 +158,7 @@ enum { typedef struct ax25_uid_assoc { struct hlist_node uid_node; - atomic_t refcount; + refcount_t refcount; kuid_t uid; ax25_address call; } ax25_uid_assoc; @@ -167,11 +167,11 @@ typedef struct ax25_uid_assoc { hlist_for_each_entry(__ax25, list, uid_node) #define ax25_uid_hold(ax25) \ - atomic_inc(&((ax25)->refcount)) + refcount_inc(&((ax25)->refcount)) static inline void ax25_uid_put(ax25_uid_assoc *assoc) { - if (atomic_dec_and_test(&assoc->refcount)) { + if (refcount_dec_and_test(&assoc->refcount)) { kfree(assoc); } } @@ -185,7 +185,7 @@ typedef struct { typedef struct ax25_route { struct ax25_route *next; - atomic_t refcount; + refcount_t refcount; ax25_address callsign; struct net_device *dev; ax25_digi *digipeat; @@ -194,14 +194,14 @@ typedef struct ax25_route { static inline void ax25_hold_route(ax25_route *ax25_rt) { - atomic_inc(&ax25_rt->refcount); + refcount_inc(&ax25_rt->refcount); } void __ax25_put_route(ax25_route *ax25_rt); static inline void ax25_put_route(ax25_route *ax25_rt) { - if (atomic_dec_and_test(&ax25_rt->refcount)) + if (refcount_dec_and_test(&ax25_rt->refcount)) __ax25_put_route(ax25_rt); } @@ -244,7 +244,7 @@ typedef struct ax25_cb { unsigned char window; struct timer_list timer, dtimer; struct sock *sk; /* Backlink to socket */ - atomic_t refcount; + refcount_t refcount; } ax25_cb; struct ax25_sock { @@ -266,11 +266,11 @@ static inline struct ax25_cb *sk_to_ax25(const struct sock *sk) hlist_for_each_entry(__ax25, list, ax25_node) #define ax25_cb_hold(__ax25) \ - atomic_inc(&((__ax25)->refcount)) + refcount_inc(&((__ax25)->refcount)) static __inline__ void ax25_cb_put(ax25_cb *ax25) { - if (atomic_dec_and_test(&ax25->refcount)) { + if (refcount_dec_and_test(&ax25->refcount)) { kfree(ax25->digipeat); kfree(ax25); } diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 99aa5e5e3100..fe98f0a5bef0 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -399,6 +399,7 @@ enum { #define HCI_LE_PING 0x10 #define HCI_LE_DATA_LEN_EXT 0x20 #define HCI_LE_EXT_SCAN_POLICY 0x80 +#define HCI_LE_CHAN_SEL_ALG2 0x40 /* Connection modes */ #define HCI_CM_ACTIVE 0x0000 @@ -1498,6 +1499,13 @@ struct hci_rp_le_read_max_data_len { __le16 rx_time; } __packed; +#define HCI_OP_LE_SET_DEFAULT_PHY 0x2031 +struct hci_cp_le_set_default_phy { + __u8 all_phys; + __u8 tx_phys; + __u8 rx_phys; +} __packed; + /* ---- HCI Events ---- */ #define HCI_EV_INQUIRY_COMPLETE 0x01 diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 1797235cd590..d79d28f5318c 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -104,6 +104,8 @@ struct bond_option { int __bond_opt_set(struct bonding *bond, unsigned int option, struct bond_opt_value *val); +int __bond_opt_set_notify(struct bonding *bond, unsigned int option, + struct bond_opt_value *val); int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf); const struct bond_opt_value *bond_opt_parse(const struct bond_option *opt, diff --git a/include/net/calipso.h b/include/net/calipso.h index b1b30cd36601..5f95b11a04bf 100644 --- a/include/net/calipso.h +++ b/include/net/calipso.h @@ -38,7 +38,7 @@ #include <linux/skbuff.h> #include <net/netlabel.h> #include <net/request_sock.h> -#include <linux/atomic.h> +#include <linux/refcount.h> #include <asm/unaligned.h> /* known doi values */ @@ -57,7 +57,7 @@ struct calipso_doi { u32 doi; u32 type; - atomic_t refcount; + refcount_t refcount; struct list_head list; struct rcu_head rcu; }; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index b083e6cbae8c..f12fa5245a45 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -649,6 +649,7 @@ struct survey_info { * @wep_keys: static WEP keys, if not NULL points to an array of * CFG80211_MAX_WEP_KEYS WEP keys * @wep_tx_key: key index (0..3) of the default TX static WEP key + * @psk: PSK (for devices supporting 4-way-handshake offload) */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -662,6 +663,7 @@ struct cfg80211_crypto_settings { bool control_port_no_encrypt; struct key_params *wep_keys; int wep_tx_key; + const u8 *psk; }; /** @@ -1441,6 +1443,9 @@ struct mesh_config { * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a] * @basic_rates: basic rates to use when creating the mesh * @beacon_rate: bitrate to be used for beacons + * @userspace_handles_dfs: whether user space controls DFS operation, i.e. + * changes the channel when a radar is detected. This is required + * to operate on DFS channels. * * These parameters are fixed when the mesh is created. */ @@ -1462,6 +1467,7 @@ struct mesh_setup { int mcast_rate[NUM_NL80211_BANDS]; u32 basic_rates; struct cfg80211_bitrate_mask beacon_rate; + bool userspace_handles_dfs; }; /** @@ -2106,6 +2112,8 @@ struct cfg80211_bss_selection { * @fils_erp_rrk: ERP re-authentication Root Key (rRK) used to derive additional * keys in FILS or %NULL if not specified. * @fils_erp_rrk_len: Length of @fils_erp_rrk in octets. + * @want_1x: indicates user-space supports and wants to use 802.1X driver + * offload of 4-way handshake. */ struct cfg80211_connect_params { struct ieee80211_channel *channel; @@ -2138,6 +2146,7 @@ struct cfg80211_connect_params { u16 fils_erp_next_seq_num; const u8 *fils_erp_rrk; size_t fils_erp_rrk_len; + bool want_1x; }; /** @@ -2560,6 +2569,23 @@ struct cfg80211_nan_func { }; /** + * struct cfg80211_pmk_conf - PMK configuration + * + * @aa: authenticator address + * @pmk_len: PMK length in bytes. + * @pmk: the PMK material + * @pmk_r0_name: PMK-R0 Name. NULL if not applicable (i.e., the PMK + * is not PMK-R0). When pmk_r0_name is not NULL, the pmk field + * holds PMK-R0. + */ +struct cfg80211_pmk_conf { + const u8 *aa; + u8 pmk_len; + const u8 *pmk; + const u8 *pmk_r0_name; +}; + +/** * struct cfg80211_ops - backend description for wireless configuration * * This struct is registered by fullmac card drivers and/or wireless stacks @@ -2875,6 +2901,13 @@ struct cfg80211_nan_func { * All other parameters must be ignored. * * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS + * + * @set_pmk: configure the PMK to be used for offloaded 802.1X 4-Way handshake. + * If not deleted through @del_pmk the PMK remains valid until disconnect + * upon which the driver should clear it. + * (invoked with the wireless_dev mutex held) + * @del_pmk: delete the previously configured PMK for the given authenticator. + * (invoked with the wireless_dev mutex held) */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -3163,6 +3196,11 @@ struct cfg80211_ops { int (*set_multicast_to_unicast)(struct wiphy *wiphy, struct net_device *dev, const bool enabled); + + int (*set_pmk)(struct wiphy *wiphy, struct net_device *dev, + const struct cfg80211_pmk_conf *conf); + int (*del_pmk)(struct wiphy *wiphy, struct net_device *dev, + const u8 *aa); }; /* @@ -5403,6 +5441,9 @@ cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length + * @authorized: true if the 802.1X authentication was done by the driver or is + * not needed (e.g., when Fast Transition protocol was used), false + * otherwise. Ignored for networks that don't use 802.1X authentication. */ struct cfg80211_roam_info { struct ieee80211_channel *channel; @@ -5412,6 +5453,7 @@ struct cfg80211_roam_info { size_t req_ie_len; const u8 *resp_ie; size_t resp_ie_len; + bool authorized; }; /** diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index a34b141f125f..880adb2f2afd 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -41,6 +41,7 @@ #include <net/netlabel.h> #include <net/request_sock.h> #include <linux/atomic.h> +#include <linux/refcount.h> #include <asm/unaligned.h> /* known doi values */ @@ -85,7 +86,7 @@ struct cipso_v4_doi { } map; u8 tags[CIPSO_V4_TAG_MAXCNT]; - atomic_t refcount; + refcount_t refcount; struct list_head list; struct rcu_head rcu; }; diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index f2ca135ddcc9..81210a8b8d7c 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -2,6 +2,7 @@ #define _NET_DN_FIB_H #include <linux/netlink.h> +#include <linux/refcount.h> extern const struct nla_policy rtm_dn_policy[]; @@ -28,7 +29,7 @@ struct dn_fib_info { struct dn_fib_info *fib_next; struct dn_fib_info *fib_prev; int fib_treeref; - atomic_t fib_clntref; + refcount_t fib_clntref; int fib_dead; unsigned int fib_flags; int fib_protocol; @@ -130,7 +131,7 @@ void dn_fib_free_info(struct dn_fib_info *fi); static inline void dn_fib_info_put(struct dn_fib_info *fi) { - if (atomic_dec_and_test(&fi->fib_clntref)) + if (refcount_dec_and_test(&fi->fib_clntref)) dn_fib_free_info(fi); } diff --git a/include/net/dsa.h b/include/net/dsa.h index 8e24677b1c62..58969b9a090c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -20,6 +20,7 @@ #include <linux/of.h> #include <linux/ethtool.h> #include <net/devlink.h> +#include <net/switchdev.h> struct tc_action; struct phy_device; @@ -27,13 +28,14 @@ struct fixed_phy_status; enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = 0, + DSA_TAG_PROTO_BRCM, DSA_TAG_PROTO_DSA, - DSA_TAG_PROTO_TRAILER, DSA_TAG_PROTO_EDSA, - DSA_TAG_PROTO_BRCM, - DSA_TAG_PROTO_QCA, - DSA_TAG_PROTO_MTK, + DSA_TAG_PROTO_KSZ, DSA_TAG_PROTO_LAN9303, + DSA_TAG_PROTO_MTK, + DSA_TAG_PROTO_QCA, + DSA_TAG_PROTO_TRAILER, DSA_TAG_LAST, /* MUST BE LAST */ }; @@ -120,27 +122,16 @@ struct dsa_switch_tree { */ struct dsa_platform_data *pd; - /* - * Reference to network device to use, and which tagging - * protocol to use. - */ - struct net_device *master_netdev; + /* Copy of tag_ops->rcv for faster access in hot path */ struct sk_buff * (*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); /* - * Original copy of the master netdev ethtool_ops - */ - struct ethtool_ops master_ethtool_ops; - const struct ethtool_ops *master_orig_ethtool_ops; - - /* - * The switch and port to which the CPU is attached. + * The switch port to which the CPU is attached. */ - struct dsa_switch *cpu_switch; - s8 cpu_port; + struct dsa_port *cpu_dp; /* * Data for the individual switch chips. @@ -180,12 +171,18 @@ struct dsa_port { struct dsa_switch *ds; unsigned int index; const char *name; + struct dsa_port *cpu_dp; struct net_device *netdev; struct device_node *dn; unsigned int ageing_time; u8 stp_state; struct net_device *bridge_dev; struct devlink_port devlink_port; + /* + * Original copy of the master netdev ethtool_ops + */ + struct ethtool_ops ethtool_ops; + const struct ethtool_ops *orig_ethtool_ops; }; struct dsa_switch { @@ -224,11 +221,6 @@ struct dsa_switch { s8 rtable[DSA_MAX_SWITCHES]; /* - * The lower device this switch uses to talk to the host - */ - struct net_device *master_netdev; - - /* * Slave mii_bus and devices for the individual ports. */ u32 dsa_port_mask; @@ -251,7 +243,7 @@ struct dsa_switch { static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) { - return !!(ds == ds->dst->cpu_switch && p == ds->dst->cpu_port); + return !!(ds->cpu_port_mask & (1 << p)); } static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) @@ -279,28 +271,12 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds) * Else return the (DSA) port number that connects to the * switch that is one hop closer to the cpu. */ - if (dst->cpu_switch == ds) - return dst->cpu_port; + if (dst->cpu_dp->ds == ds) + return dst->cpu_dp->index; else - return ds->rtable[dst->cpu_switch->index]; + return ds->rtable[dst->cpu_dp->ds->index]; } -struct switchdev_trans; -struct switchdev_obj; -struct switchdev_obj_port_fdb; -struct switchdev_obj_port_mdb; -struct switchdev_obj_port_vlan; - -#define DSA_NOTIFIER_BRIDGE_JOIN 1 -#define DSA_NOTIFIER_BRIDGE_LEAVE 2 - -/* DSA_NOTIFIER_BRIDGE_* */ -struct dsa_notifier_bridge_info { - struct net_device *br; - int sw_index; - int port; -}; - struct dsa_switch_ops { /* * Legacy probing. @@ -410,7 +386,7 @@ struct dsa_switch_ops { const struct switchdev_obj_port_vlan *vlan); int (*port_vlan_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_vlan *vlan, - int (*cb)(struct switchdev_obj *obj)); + switchdev_obj_dump_cb_t *cb); /* * Forwarding database @@ -425,7 +401,7 @@ struct dsa_switch_ops { const struct switchdev_obj_port_fdb *fdb); int (*port_fdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, - int (*cb)(struct switchdev_obj *obj)); + switchdev_obj_dump_cb_t *cb); /* * Multicast database @@ -440,7 +416,7 @@ struct dsa_switch_ops { const struct switchdev_obj_port_mdb *mdb); int (*port_mdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_mdb *mdb, - int (*cb)(struct switchdev_obj *obj)); + switchdev_obj_dump_cb_t *cb); /* * RXNFC @@ -480,23 +456,18 @@ struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); struct net_device *dsa_dev_to_net_device(struct device *dev); -static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) -{ - return dst->rcv != NULL; -} - +/* Keep inline for faster access in hot path */ static inline bool netdev_uses_dsa(struct net_device *dev) { #if IS_ENABLED(CONFIG_NET_DSA) - if (dev->dsa_ptr != NULL) - return dsa_uses_tagged_protocol(dev->dsa_ptr); + return dev->dsa_ptr && dev->dsa_ptr->rcv; #endif return false; } struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n); void dsa_unregister_switch(struct dsa_switch *ds); -int dsa_register_switch(struct dsa_switch *ds, struct device *dev); +int dsa_register_switch(struct dsa_switch *ds); #ifdef CONFIG_PM_SLEEP int dsa_switch_suspend(struct dsa_switch *ds); int dsa_switch_resume(struct dsa_switch *ds); diff --git a/include/net/dst.h b/include/net/dst.h index cfc043784166..f73611ec4017 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -31,9 +31,9 @@ struct sk_buff; struct dst_entry { + struct net_device *dev; struct rcu_head rcu_head; struct dst_entry *child; - struct net_device *dev; struct dst_ops *ops; unsigned long _metrics; unsigned long expires; @@ -51,13 +51,11 @@ struct dst_entry { #define DST_HOST 0x0001 #define DST_NOXFRM 0x0002 #define DST_NOPOLICY 0x0004 -#define DST_NOHASH 0x0008 -#define DST_NOCACHE 0x0010 -#define DST_NOCOUNT 0x0020 -#define DST_FAKE_RTABLE 0x0040 -#define DST_XFRM_TUNNEL 0x0080 -#define DST_XFRM_QUEUE 0x0100 -#define DST_METADATA 0x0200 +#define DST_NOCOUNT 0x0008 +#define DST_FAKE_RTABLE 0x0010 +#define DST_XFRM_TUNNEL 0x0020 +#define DST_XFRM_QUEUE 0x0040 +#define DST_METADATA 0x0080 short error; @@ -253,7 +251,7 @@ static inline void dst_hold(struct dst_entry *dst) * __pad_to_align_refcnt declaration in struct dst_entry */ BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); - atomic_inc(&dst->__refcnt); + WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0); } static inline void dst_use(struct dst_entry *dst, unsigned long time) @@ -278,6 +276,8 @@ static inline struct dst_entry *dst_clone(struct dst_entry *dst) void dst_release(struct dst_entry *dst); +void dst_release_immediate(struct dst_entry *dst); + static inline void refdst_drop(unsigned long refdst) { if (!(refdst & SKB_DST_NOREF)) @@ -334,10 +334,7 @@ static inline void skb_dst_force(struct sk_buff *skb) */ static inline bool dst_hold_safe(struct dst_entry *dst) { - if (dst->flags & DST_NOCACHE) - return atomic_inc_not_zero(&dst->__refcnt); - dst_hold(dst); - return true; + return atomic_inc_not_zero(&dst->__refcnt); } /** @@ -423,26 +420,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, void dst_init(struct dst_entry *dst, struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, unsigned short flags); -void __dst_free(struct dst_entry *dst); struct dst_entry *dst_destroy(struct dst_entry *dst); - -static inline void dst_free(struct dst_entry *dst) -{ - if (dst->obsolete > 0) - return; - if (!atomic_read(&dst->__refcnt)) { - dst = dst_destroy(dst); - if (!dst) - return; - } - __dst_free(dst); -} - -static inline void dst_rcu_free(struct rcu_head *head) -{ - struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - dst_free(dst); -} +void dst_dev_put(struct dst_entry *dst); static inline void dst_confirm(struct dst_entry *dst) { @@ -505,8 +484,6 @@ static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) return dst; } -void dst_subsys_init(void); - /* Flags for xfrm_lookup flags argument. */ enum { XFRM_LOOKUP_ICMP = 1 << 0, diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 701fc814d0af..a803129a4849 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -5,10 +5,22 @@ #include <net/ip_tunnels.h> #include <net/dst.h> +enum metadata_type { + METADATA_IP_TUNNEL, + METADATA_HW_PORT_MUX, +}; + +struct hw_port_info { + struct net_device *lower_dev; + u32 port_id; +}; + struct metadata_dst { struct dst_entry dst; + enum metadata_type type; union { struct ip_tunnel_info tun_info; + struct hw_port_info port_info; } u; }; @@ -27,7 +39,7 @@ static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb) struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; - if (md_dst) + if (md_dst && md_dst->type == METADATA_IP_TUNNEL) return &md_dst->u.tun_info; dst = skb_dst(skb); @@ -55,22 +67,33 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, a = (const struct metadata_dst *) skb_dst(skb_a); b = (const struct metadata_dst *) skb_dst(skb_b); - if (!a != !b || a->u.tun_info.options_len != b->u.tun_info.options_len) + if (!a != !b || a->type != b->type) return 1; - return memcmp(&a->u.tun_info, &b->u.tun_info, - sizeof(a->u.tun_info) + a->u.tun_info.options_len); + switch (a->type) { + case METADATA_HW_PORT_MUX: + return memcmp(&a->u.port_info, &b->u.port_info, + sizeof(a->u.port_info)); + case METADATA_IP_TUNNEL: + return memcmp(&a->u.tun_info, &b->u.tun_info, + sizeof(a->u.tun_info) + + a->u.tun_info.options_len); + default: + return 1; + } } void metadata_dst_free(struct metadata_dst *); -struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); -struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags); +struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, + gfp_t flags); +struct metadata_dst __percpu * +metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags); static inline struct metadata_dst *tun_rx_dst(int md_size) { struct metadata_dst *tun_dst; - tun_dst = metadata_dst_alloc(md_size, GFP_ATOMIC); + tun_dst = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!tun_dst) return NULL; @@ -85,11 +108,11 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) int md_size; struct metadata_dst *new_md; - if (!md_dst) + if (!md_dst || md_dst->type != METADATA_IP_TUNNEL) return ERR_PTR(-EINVAL); md_size = md_dst->u.tun_info.options_len; - new_md = metadata_dst_alloc(md_size, GFP_ATOMIC); + new_md = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!new_md) return ERR_PTR(-ENOMEM); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 76c7300626d6..c487bfa2f479 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -5,6 +5,7 @@ #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/fib_rules.h> +#include <linux/refcount.h> #include <net/flow.h> #include <net/rtnetlink.h> @@ -29,7 +30,7 @@ struct fib_rule { struct fib_rule __rcu *ctarget; struct net *fr_net; - atomic_t refcnt; + refcount_t refcnt; u32 pref; int suppress_ifgroup; int suppress_prefixlen; @@ -103,12 +104,12 @@ struct fib_rules_ops { static inline void fib_rule_get(struct fib_rule *rule) { - atomic_inc(&rule->refcnt); + refcount_inc(&rule->refcnt); } static inline void fib_rule_put(struct fib_rule *rule) { - if (atomic_dec_and_test(&rule->refcnt)) + if (refcount_dec_and_test(&rule->refcnt)) kfree_rcu(rule, rcu); } diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 8d21d448daa9..e2663e900b0a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -157,6 +157,24 @@ struct flow_dissector_key_eth_addrs { unsigned char src[ETH_ALEN]; }; +/** + * struct flow_dissector_key_tcp: + * @flags: flags + */ +struct flow_dissector_key_tcp { + __be16 flags; +}; + +/** + * struct flow_dissector_key_ip: + * @tos: tos + * @ttl: ttl + */ +struct flow_dissector_key_ip { + __u8 tos; + __u8 ttl; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -177,6 +195,8 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */ + FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */ + FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 68b88192b00c..c59a098221db 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -128,7 +128,6 @@ static inline int genl_err_attr(struct genl_info *info, int err, * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps - * @ops_list: operations list */ struct genl_ops { const struct nla_policy *policy; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f656f9051aca..d4088d1a688d 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -17,6 +17,7 @@ #include <net/snmp.h> #include <linux/ipv6.h> +#include <linux/refcount.h> /* inet6_dev.if_flags */ @@ -45,7 +46,7 @@ struct inet6_ifaddr { /* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */ __u32 valid_lft; __u32 prefered_lft; - atomic_t refcnt; + refcount_t refcnt; spinlock_t lock; int state; @@ -126,7 +127,7 @@ struct ifmcaddr6 { struct timer_list mca_timer; unsigned int mca_flags; int mca_users; - atomic_t mca_refcnt; + refcount_t mca_refcnt; spinlock_t mca_lock; unsigned long mca_cstamp; unsigned long mca_tstamp; @@ -146,7 +147,7 @@ struct ifacaddr6 { struct rt6_info *aca_rt; struct ifacaddr6 *aca_next; int aca_users; - atomic_t aca_refcnt; + refcount_t aca_refcnt; unsigned long aca_cstamp; unsigned long aca_tstamp; }; @@ -187,7 +188,7 @@ struct inet6_dev { struct ifacaddr6 *ac_list; rwlock_t lock; - atomic_t refcnt; + refcount_t refcnt; __u32 if_flags; int dead; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c7a577976bec..13e4c89a8231 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -75,6 +75,8 @@ struct inet_connection_sock_af_ops { * @icsk_pmtu_cookie Last pmtu seen by socket * @icsk_ca_ops Pluggable congestion control hook * @icsk_af_ops Operations which are AF_INET{4,6} specific + * @icsk_ulp_ops Pluggable ULP control hook + * @icsk_ulp_data ULP private data * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -97,6 +99,8 @@ struct inet_connection_sock { __u32 icsk_pmtu_cookie; const struct tcp_congestion_ops *icsk_ca_ops; const struct inet_connection_sock_af_ops *icsk_af_ops; + const struct tcp_ulp_ops *icsk_ulp_ops; + void *icsk_ulp_data; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:6, icsk_ca_setsockopt:1, diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 5894730ec82a..6fdcd2427776 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -50,7 +50,7 @@ struct inet_frag_queue { spinlock_t lock; struct timer_list timer; struct hlist_node list; - atomic_t refcnt; + refcount_t refcnt; struct sk_buff *fragments; struct sk_buff *fragments_tail; ktime_t stamp; @@ -92,7 +92,7 @@ struct inet_frags { */ u32 rnd; seqlock_t rnd_seqlock; - int qsize; + unsigned int qsize; unsigned int (*hashfn)(const struct inet_frag_queue *); bool (*match)(const struct inet_frag_queue *q, @@ -129,7 +129,7 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { - if (atomic_dec_and_test(&q->refcnt)) + if (refcount_dec_and_test(&q->refcnt)) inet_frag_destroy(q, f); } @@ -154,12 +154,12 @@ static inline int frag_mem_limit(struct netns_frags *nf) static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch); + percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch); } static inline void add_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch); + percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch); } static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 1178931288cb..5026b1f08bb8 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -32,7 +32,7 @@ #include <net/tcp_states.h> #include <net/netns/hash.h> -#include <linux/atomic.h> +#include <linux/refcount.h> #include <asm/byteorder.h> /* This is for all connections with a full identity, no wildcards. @@ -334,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net, sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, dport, dif, &refcounted); - if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt)) + if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } @@ -359,7 +359,6 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, refcounted); } -u32 sk_ehashfn(const struct sock *sk); u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport); diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 235c7811a86a..f2a215fc78e4 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -46,7 +46,7 @@ struct inet_peer { struct rcu_head gc_rcu; }; /* - * Once inet_peer is queued for deletion (refcnt == -1), following field + * Once inet_peer is queued for deletion (refcnt == 0), following field * is not available: rid * We can share memory with rcu_head to help keep inet_peer small. */ @@ -60,7 +60,7 @@ struct inet_peer { /* following fields might be frequently dirtied */ __u32 dtime; /* the time of last use of not referenced entries */ - atomic_t refcnt; + refcount_t refcnt; }; struct inet_peer_base { diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index c979c878df1c..1a88008cc6f5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -170,7 +170,7 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) static inline u32 rt6_get_cookie(const struct rt6_info *rt) { if (rt->rt6i_flags & RTF_PCPU || - (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from)) + (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from)) rt = (struct rt6_info *)(rt->dst.from); return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; @@ -277,7 +277,8 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), void *arg); int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc); + struct nl_info *info, struct mx6_config *mxc, + struct netlink_ext_ack *extack); int fib6_del(struct rt6_info *rt, struct nl_info *info); void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f5e625f53367..199056933dcb 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -22,6 +22,7 @@ struct route_info { #include <net/flow.h> #include <net/ip6_fib.h> #include <net/sock.h> +#include <net/lwtunnel.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/route.h> @@ -90,7 +91,7 @@ void ip6_route_cleanup(void); int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg); -int ip6_route_add(struct fib6_config *cfg); +int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack); int ip6_ins_rt(struct rt6_info *); int ip6_del_rt(struct rt6_info *); @@ -116,7 +117,6 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int flags); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); -int icmp6_dst_gc(void); void fib6_force_start_gc(struct net *net); @@ -233,4 +233,11 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, return daddr; } +static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b) +{ + return a->dst.dev == b->dst.dev && + a->rt6i_idev == b->rt6i_idev && + ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) && + !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate); +} #endif diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f7f6aa789c61..41d580c6185f 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -23,6 +23,7 @@ #include <net/inetpeer.h> #include <linux/percpu.h> #include <linux/notifier.h> +#include <linux/refcount.h> struct fib_config { u8 fc_dst_len; @@ -105,7 +106,7 @@ struct fib_info { struct hlist_node fib_lhash; struct net *fib_net; int fib_treeref; - atomic_t fib_clntref; + refcount_t fib_clntref; unsigned int fib_flags; unsigned char fib_dead; unsigned char fib_protocol; @@ -136,6 +137,7 @@ struct fib_rule; struct fib_table; struct fib_result { + __be32 prefix; unsigned char prefixlen; unsigned char nh_sel; unsigned char type; @@ -263,8 +265,10 @@ struct fib_table { int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags); -int fib_table_insert(struct net *, struct fib_table *, struct fib_config *); -int fib_table_delete(struct net *, struct fib_table *, struct fib_config *); +int fib_table_insert(struct net *, struct fib_table *, struct fib_config *, + struct netlink_ext_ack *extack); +int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, + struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct net *net, struct fib_table *table); @@ -427,12 +431,12 @@ void free_fib_info(struct fib_info *fi); static inline void fib_info_hold(struct fib_info *fi) { - atomic_inc(&fi->fib_clntref); + refcount_inc(&fi->fib_clntref); } static inline void fib_info_put(struct fib_info *fi) { - if (atomic_dec_and_test(&fi->fib_clntref)) + if (refcount_dec_and_test(&fi->fib_clntref)) free_fib_info(fi); } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3e505bbff8ca..6eac5cf8f1e6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -16,6 +16,7 @@ #include <linux/ipv6.h> #include <linux/hardirq.h> #include <linux/jhash.h> +#include <linux/refcount.h> #include <net/if_inet6.h> #include <net/ndisc.h> #include <net/flow.h> @@ -203,7 +204,7 @@ extern rwlock_t ip6_ra_lock; */ struct ipv6_txoptions { - atomic_t refcnt; + refcount_t refcnt; /* Length of this structure */ int tot_len; @@ -265,7 +266,7 @@ static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) rcu_read_lock(); opt = rcu_dereference(np->opt); if (opt) { - if (!atomic_inc_not_zero(&opt->refcnt)) + if (!refcount_inc_not_zero(&opt->refcnt)) opt = NULL; else opt = rcu_pointer_handoff(opt); @@ -276,7 +277,7 @@ static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) static inline void txopt_put(struct ipv6_txoptions *opt) { - if (opt && atomic_dec_and_test(&opt->refcnt)) + if (opt && refcount_dec_and_test(&opt->refcnt)) kfree_rcu(opt, rcu); } diff --git a/include/net/ipx.h b/include/net/ipx.h index e5cff6811b30..af32b97b5ddd 100644 --- a/include/net/ipx.h +++ b/include/net/ipx.h @@ -14,6 +14,7 @@ #include <linux/ipx.h> #include <linux/list.h> #include <linux/slab.h> +#include <linux/refcount.h> struct ipx_address { __be32 net; @@ -54,7 +55,7 @@ struct ipx_interface { /* IPX address */ __be32 if_netnum; unsigned char if_node[IPX_NODE_LEN]; - atomic_t refcnt; + refcount_t refcnt; /* physical device info */ struct net_device *if_dev; @@ -80,7 +81,7 @@ struct ipx_route { unsigned char ir_routed; unsigned char ir_router_node[IPX_NODE_LEN]; struct list_head node; /* node in ipx_routes list */ - atomic_t refcnt; + refcount_t refcnt; }; struct ipx_cb { @@ -139,7 +140,7 @@ const char *ipx_device_name(struct ipx_interface *intrfc); static __inline__ void ipxitf_hold(struct ipx_interface *intrfc) { - atomic_inc(&intrfc->refcnt); + refcount_inc(&intrfc->refcnt); } void ipxitf_down(struct ipx_interface *intrfc); @@ -157,18 +158,18 @@ int ipxrtr_ioctl(unsigned int cmd, void __user *arg); static __inline__ void ipxitf_put(struct ipx_interface *intrfc) { - if (atomic_dec_and_test(&intrfc->refcnt)) + if (refcount_dec_and_test(&intrfc->refcnt)) ipxitf_down(intrfc); } static __inline__ void ipxrtr_hold(struct ipx_route *rt) { - atomic_inc(&rt->refcnt); + refcount_inc(&rt->refcnt); } static __inline__ void ipxrtr_put(struct ipx_route *rt) { - if (atomic_dec_and_test(&rt->refcnt)) + if (refcount_dec_and_test(&rt->refcnt)) kfree(rt); } #endif /* _NET_INET_IPX_H_ */ diff --git a/include/net/lapb.h b/include/net/lapb.h index 9510f8725f03..85e773742f4e 100644 --- a/include/net/lapb.h +++ b/include/net/lapb.h @@ -1,6 +1,7 @@ #ifndef _LAPB_H #define _LAPB_H #include <linux/lapb.h> +#include <linux/refcount.h> #define LAPB_HEADER_LEN 20 /* LAPB over Ethernet + a bit more */ @@ -101,7 +102,7 @@ struct lapb_cb { struct lapb_frame frmr_data; unsigned char frmr_type; - atomic_t refcnt; + refcount_t refcnt; }; /* lapb_iface.c */ diff --git a/include/net/llc.h b/include/net/llc.h index e8e61d4fb458..dc35f25eb679 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -55,7 +55,7 @@ struct llc_sap { unsigned char state; unsigned char p_bit; unsigned char f_bit; - atomic_t refcnt; + refcount_t refcnt; int (*rcv_func)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, @@ -113,14 +113,14 @@ struct llc_sap *llc_sap_open(unsigned char lsap, struct net_device *orig_dev)); static inline void llc_sap_hold(struct llc_sap *sap) { - atomic_inc(&sap->refcnt); + refcount_inc(&sap->refcnt); } void llc_sap_close(struct llc_sap *sap); static inline void llc_sap_put(struct llc_sap *sap) { - if (atomic_dec_and_test(&sap->refcnt)) + if (refcount_dec_and_test(&sap->refcnt)) llc_sap_close(sap); } diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index ebfe237aad7e..7c26863b8cf4 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -35,7 +35,8 @@ struct lwtunnel_state { struct lwtunnel_encap_ops { int (*build_state)(struct nlattr *encap, unsigned int family, const void *cfg, - struct lwtunnel_state **ts); + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack); void (*destroy_state)(struct lwtunnel_state *lws); int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*input)(struct sk_buff *skb); @@ -107,12 +108,15 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num); -int lwtunnel_valid_encap_type(u16 encap_type); -int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len); +int lwtunnel_valid_encap_type(u16 encap_type, + struct netlink_ext_ack *extack); +int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, + struct netlink_ext_ack *extack); int lwtunnel_build_state(u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, - struct lwtunnel_state **lws); + struct lwtunnel_state **lws, + struct netlink_ext_ack *extack); int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate); int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate); @@ -172,11 +176,14 @@ static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, return -EOPNOTSUPP; } -static inline int lwtunnel_valid_encap_type(u16 encap_type) +static inline int lwtunnel_valid_encap_type(u16 encap_type, + struct netlink_ext_ack *extack) { + NL_SET_ERR_MSG(extack, "CONFIG_LWTUNNEL is not enabled in this kernel"); return -EOPNOTSUPP; } -static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len) +static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, + struct netlink_ext_ack *extack) { /* return 0 since we are not walking attr looking for * RTA_ENCAP_TYPE attribute on nexthops. @@ -187,7 +194,8 @@ static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len) static inline int lwtunnel_build_state(u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, - struct lwtunnel_state **lws) + struct lwtunnel_state **lws, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 76ed24a201eb..b2b5419467cc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4205,6 +4205,22 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, int max_rates); /** + * ieee80211_sta_set_expected_throughput - set the expected tpt for a station + * + * Call this function to notify mac80211 about a change in expected throughput + * to a station. A driver for a device that does rate control in firmware can + * call this function when the expected throughput estimate towards a station + * changes. The information is used to tune the CoDel AQM applied to traffic + * going towards that station (which can otherwise be too aggressive and cause + * slow stations to starve). + * + * @pubsta: the station to set throughput for. + * @thr: the current expected throughput in kbps. + */ +void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, + u32 thr); + +/** * ieee80211_tx_status - transmit status callback * * Call this function for all transmitted frames after they have been @@ -5436,6 +5452,9 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, */ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn); +void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, const u8 *addr, + unsigned int bit); + /** * ieee80211_start_rx_ba_session_offl - start a Rx BA session * @@ -5450,8 +5469,13 @@ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn); * @addr: station mac address * @tid: the rx tid */ -void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif, - const u8 *addr, u16 tid); +static inline void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif, + const u8 *addr, u16 tid) +{ + if (WARN_ON(tid >= IEEE80211_NUM_TIDS)) + return; + ieee80211_manage_rx_ba_offl(vif, addr, tid); +} /** * ieee80211_stop_rx_ba_session_offl - stop a Rx BA session @@ -5467,8 +5491,13 @@ void ieee80211_start_rx_ba_session_offl(struct ieee80211_vif *vif, * @addr: station mac address * @tid: the rx tid */ -void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, - const u8 *addr, u16 tid); +static inline void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, + const u8 *addr, u16 tid) +{ + if (WARN_ON(tid >= IEEE80211_NUM_TIDS)) + return; + ieee80211_manage_rx_ba_offl(vif, addr, tid + IEEE80211_NUM_TIDS); +} /* Rate control API */ diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 1036c902d2c9..31b1bb11ba3f 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -384,7 +384,7 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons rcu_read_lock_bh(); n = __ipv6_neigh_lookup_noref(dev, pkey); - if (n && !atomic_inc_not_zero(&n->refcnt)) + if (n && !refcount_inc_not_zero(&n->refcnt)) n = NULL; rcu_read_unlock_bh(); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index e4dd3a214034..afc39e3a3f7c 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -17,6 +17,7 @@ */ #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rcupdate.h> @@ -76,7 +77,7 @@ struct neigh_parms { void *sysctl_table; int dead; - atomic_t refcnt; + refcount_t refcnt; struct rcu_head rcu_head; int reachable_time; @@ -137,7 +138,7 @@ struct neighbour { unsigned long confirmed; unsigned long updated; rwlock_t lock; - atomic_t refcnt; + refcount_t refcnt; struct sk_buff_head arp_queue; unsigned int arp_queue_len_bytes; struct timer_list timer; @@ -317,6 +318,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); +bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev); int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb); @@ -394,12 +396,12 @@ void neigh_sysctl_unregister(struct neigh_parms *p); static inline void __neigh_parms_put(struct neigh_parms *parms) { - atomic_dec(&parms->refcnt); + refcount_dec(&parms->refcnt); } static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms) { - atomic_inc(&parms->refcnt); + refcount_inc(&parms->refcnt); return parms; } @@ -409,18 +411,18 @@ static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms) static inline void neigh_release(struct neighbour *neigh) { - if (atomic_dec_and_test(&neigh->refcnt)) + if (refcount_dec_and_test(&neigh->refcnt)) neigh_destroy(neigh); } static inline struct neighbour * neigh_clone(struct neighbour *neigh) { if (neigh) - atomic_inc(&neigh->refcnt); + refcount_inc(&neigh->refcnt); return neigh; } -#define neigh_hold(n) atomic_inc(&(n)->refcnt) +#define neigh_hold(n) refcount_inc(&(n)->refcnt) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fe80bb48ab1f..31a2b51bef2c 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -5,6 +5,7 @@ #define __NET_NET_NAMESPACE_H #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/workqueue.h> #include <linux/list.h> #include <linux/sysctl.h> @@ -46,7 +47,7 @@ struct netns_ipvs; #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) struct net { - atomic_t passive; /* To decided when the network + refcount_t passive; /* To decided when the network * namespace should be freed. */ atomic_t count; /* To decided when the network @@ -158,6 +159,7 @@ extern struct net init_net; struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, struct net *old_net); +void net_ns_barrier(void); #else /* CONFIG_NET_NS */ #include <linux/sched.h> #include <linux/nsproxy.h> @@ -168,6 +170,8 @@ static inline struct net *copy_net_ns(unsigned long flags, return ERR_PTR(-EINVAL); return old_net; } + +static inline void net_ns_barrier(void) {} #endif /* CONFIG_NET_NS */ diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 0b0c35c37125..925524ede6c8 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -8,7 +8,7 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); if (likely(skb->nf_bridge)) - atomic_set(&(skb->nf_bridge->use), 1); + refcount_set(&(skb->nf_bridge->use), 1); return skb->nf_bridge; } diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8ece3612d0cd..48407569585d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -225,9 +225,13 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, u32 seq); /* Iterate over all conntracks: if iter returns true, it's deleted. */ -void nf_ct_iterate_cleanup(struct net *net, - int (*iter)(struct nf_conn *i, void *data), - void *data, u32 portid, int report); +void nf_ct_iterate_cleanup_net(struct net *net, + int (*iter)(struct nf_conn *i, void *data), + void *data, u32 portid, int report); + +/* also set unconfirmed conntracks as dying. Only use in module exit path. */ +void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), + void *data); struct nf_conntrack_zone; diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index e01559b4d781..6d14b36e3a49 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -71,7 +71,7 @@ struct nf_conntrack_l3proto { struct module *me; }; -extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; +extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL /* Protocol pernet registration. */ @@ -100,7 +100,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; static inline struct nf_conntrack_l3proto * __nf_ct_l3proto_find(u_int16_t l3proto) { - if (unlikely(l3proto >= AF_MAX)) + if (unlikely(l3proto >= NFPROTO_NUMPROTO)) return &nf_conntrack_l3proto_generic; return rcu_dereference(nf_ct_l3protos[l3proto]); } diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8a8bab8d7b15..bd5be0d691d5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -281,6 +281,23 @@ struct nft_set_estimate { enum nft_set_class space; }; +/** + * struct nft_set_type - nf_tables set type + * + * @select_ops: function to select nft_set_ops + * @ops: default ops, used when no select_ops functions is present + * @list: used internally + * @owner: module reference + */ +struct nft_set_type { + const struct nft_set_ops *(*select_ops)(const struct nft_ctx *, + const struct nft_set_desc *desc, + u32 flags); + const struct nft_set_ops *ops; + struct list_head list; + struct module *owner; +}; + struct nft_set_ext; struct nft_expr; @@ -297,8 +314,6 @@ struct nft_expr; * @privsize: function to return size of set private data * @init: initialize private data of new set instance * @destroy: destroy private data of set instance - * @list: nf_tables_set_ops list node - * @owner: module reference * @elemsize: element private size * @features: features supported by the implementation */ @@ -336,7 +351,8 @@ struct nft_set_ops { struct nft_set *set, struct nft_set_iter *iter); - unsigned int (*privsize)(const struct nlattr * const nla[]); + unsigned int (*privsize)(const struct nlattr * const nla[], + const struct nft_set_desc *desc); bool (*estimate)(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est); @@ -345,14 +361,13 @@ struct nft_set_ops { const struct nlattr * const nla[]); void (*destroy)(const struct nft_set *set); - struct list_head list; - struct module *owner; unsigned int elemsize; u32 features; + const struct nft_set_type *type; }; -int nft_register_set(struct nft_set_ops *ops); -void nft_unregister_set(struct nft_set_ops *ops); +int nft_register_set(struct nft_set_type *type); +void nft_unregister_set(struct nft_set_type *type); /** * struct nft_set - nf_tables set instance diff --git a/include/net/netlabel.h b/include/net/netlabel.h index efe98068880f..72d6435fc16c 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -37,7 +37,7 @@ #include <linux/in6.h> #include <net/netlink.h> #include <net/request_sock.h> -#include <linux/atomic.h> +#include <linux/refcount.h> struct cipso_v4_doi; struct calipso_doi; @@ -136,7 +136,7 @@ struct netlbl_audit { * */ struct netlbl_lsm_cache { - atomic_t refcount; + refcount_t refcount; void (*free) (const void *data); void *data; }; @@ -295,7 +295,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags) cache = kzalloc(sizeof(*cache), flags); if (cache) - atomic_set(&cache->refcount, 1); + refcount_set(&cache->refcount, 1); return cache; } @@ -309,7 +309,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags) */ static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache) { - if (!atomic_dec_and_test(&cache->refcount)) + if (!refcount_dec_and_test(&cache->refcount)) return; if (cache->free) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index cd686c4fb32d..9a14a0850b0e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -122,6 +122,9 @@ struct netns_ipv4 { int sysctl_tcp_fin_timeout; unsigned int sysctl_tcp_notsent_lowat; int sysctl_tcp_tw_reuse; + int sysctl_tcp_sack; + int sysctl_tcp_window_scaling; + int sysctl_tcp_timestamps; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; diff --git a/include/net/netrom.h b/include/net/netrom.h index 110350aca3df..443a4ffca7aa 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -11,6 +11,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <net/sock.h> +#include <linux/refcount.h> #define NR_NETWORK_LEN 15 #define NR_TRANSPORT_LEN 5 @@ -93,7 +94,7 @@ struct nr_neigh { unsigned short count; unsigned int number; unsigned char failed; - atomic_t refcount; + refcount_t refcount; }; struct nr_route { @@ -109,7 +110,7 @@ struct nr_node { unsigned char which; unsigned char count; struct nr_route routes[3]; - atomic_t refcount; + refcount_t refcount; spinlock_t node_lock; }; @@ -118,21 +119,21 @@ struct nr_node { *********************************************************************/ #define nr_node_hold(__nr_node) \ - atomic_inc(&((__nr_node)->refcount)) + refcount_inc(&((__nr_node)->refcount)) static __inline__ void nr_node_put(struct nr_node *nr_node) { - if (atomic_dec_and_test(&nr_node->refcount)) { + if (refcount_dec_and_test(&nr_node->refcount)) { kfree(nr_node); } } #define nr_neigh_hold(__nr_neigh) \ - atomic_inc(&((__nr_neigh)->refcount)) + refcount_inc(&((__nr_neigh)->refcount)) static __inline__ void nr_neigh_put(struct nr_neigh *nr_neigh) { - if (atomic_dec_and_test(&nr_neigh->refcount)) { + if (refcount_dec_and_test(&nr_neigh->refcount)) { if (nr_neigh->ax25) ax25_cb_put(nr_neigh->ax25); kfree(nr_neigh->digipeat); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 269fd78bb0ae..537d0a0ad4c4 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -18,11 +18,32 @@ int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); #ifdef CONFIG_NET_CLS -void tcf_destroy_chain(struct tcf_proto __rcu **fl); +struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, + bool create); +void tcf_chain_put(struct tcf_chain *chain); +int tcf_block_get(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain); +void tcf_block_put(struct tcf_block *block); +int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode); + #else -static inline void tcf_destroy_chain(struct tcf_proto __rcu **fl) +static inline +int tcf_block_get(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain) +{ + return 0; +} + +static inline void tcf_block_put(struct tcf_block *block) { } + +static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) +{ + return TC_ACT_UNSPEC; +} #endif static inline unsigned long @@ -136,6 +157,25 @@ static inline void tcf_exts_to_list(const struct tcf_exts *exts, #endif } +static inline void +tcf_exts_stats_update(const struct tcf_exts *exts, + u64 bytes, u64 packets, u64 lastuse) +{ +#ifdef CONFIG_NET_CLS_ACT + int i; + + preempt_disable(); + + for (i = 0; i < exts->nr_actions; i++) { + struct tc_action *a = exts->actions[i]; + + tcf_action_stats_update(a, bytes, packets, lastuse); + } + + preempt_enable(); +#endif +} + /** * tcf_exts_exec - execute tc filter extensions * @skb: socket buffer diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index bec46f63f10c..2579c209ea51 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -113,9 +113,6 @@ static inline void qdisc_run(struct Qdisc *q) __qdisc_run(q); } -int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res, bool compat_mode); - static inline __be16 tc_skb_protocol(const struct sk_buff *skb) { /* We need to take extra care in case the skb came via diff --git a/include/net/request_sock.h b/include/net/request_sock.h index a12a5d25b27e..23e22054aa60 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -19,6 +19,7 @@ #include <linux/spinlock.h> #include <linux/types.h> #include <linux/bug.h> +#include <linux/refcount.h> #include <net/sock.h> @@ -29,7 +30,7 @@ struct proto; struct request_sock_ops { int family; - int obj_size; + unsigned int obj_size; struct kmem_cache *slab; char *slab_name; int (*rtx_syn_ack)(const struct sock *sk, @@ -89,7 +90,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, return NULL; req->rsk_listener = NULL; if (attach_listener) { - if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) { + if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { kmem_cache_free(ops->slab, req); return NULL; } @@ -100,7 +101,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, sk_node_init(&req_to_sk(req)->sk_node); sk_tx_queue_clear(req_to_sk(req)); req->saved_syn = NULL; - atomic_set(&req->rsk_refcnt, 0); + refcount_set(&req->rsk_refcnt, 0); return req; } @@ -108,7 +109,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, static inline void reqsk_free(struct request_sock *req) { /* temporary debugging */ - WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0); + WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); req->rsk_ops->destructor(req); if (req->rsk_listener) @@ -119,7 +120,7 @@ static inline void reqsk_free(struct request_sock *req) static inline void reqsk_put(struct request_sock *req) { - if (atomic_dec_and_test(&req->rsk_refcnt)) + if (refcount_dec_and_test(&req->rsk_refcnt)) reqsk_free(req); } diff --git a/include/net/route.h b/include/net/route.h index 2cc0e14c6359..cb0a76d9dde1 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -113,13 +113,16 @@ struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); -struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *flp, - const struct sk_buff *skb); +struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp, + const struct sk_buff *skb); +struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp, + struct fib_result *res, + const struct sk_buff *skb); static inline struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp) { - return __ip_route_output_key_hash(net, flp, NULL); + return ip_route_output_key_hash(net, flp, NULL); } struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, @@ -175,6 +178,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin); +int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin, + struct fib_result *res); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin) @@ -184,7 +190,9 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, rcu_read_lock(); err = ip_route_input_noref(skb, dst, src, tos, devin); if (!err) - skb_dst_force(skb); + skb_dst_force_safe(skb); + if (!skb_dst(skb)) + err = -EINVAL; rcu_read_unlock(); return err; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 78fa5fe32947..abe6b733d473 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -63,15 +63,18 @@ struct rtnl_link_ops { int maxtype; const struct nla_policy *policy; int (*validate)(struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); int (*newlink)(struct net *src_net, struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); int (*changelink)(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); void (*dellink)(struct net_device *dev, struct list_head *head); @@ -88,11 +91,13 @@ struct rtnl_link_ops { int slave_maxtype; const struct nla_policy *slave_policy; int (*slave_validate)(struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); int (*slave_changelink)(struct net_device *dev, struct net_device *slave_dev, struct nlattr *tb[], - struct nlattr *data[]); + struct nlattr *data[], + struct netlink_ext_ack *extack); size_t (*get_slave_size)(const struct net_device *dev, const struct net_device *slave_dev); int (*fill_slave_info)(struct sk_buff *skb, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 22e52093bfda..1c123e2b2415 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -8,6 +8,8 @@ #include <linux/pkt_cls.h> #include <linux/percpu.h> #include <linux/dynamic_queue_limits.h> +#include <linux/list.h> +#include <linux/refcount.h> #include <net/gen_stats.h> #include <net/rtnetlink.h> @@ -94,7 +96,7 @@ struct Qdisc { struct sk_buff *skb_bad_txq; struct rcu_head rcu_head; int padded; - atomic_t refcnt; + refcount_t refcnt; spinlock_t busylock ____cacheline_aligned_in_smp; }; @@ -153,7 +155,7 @@ struct Qdisc_class_ops { void (*walk)(struct Qdisc *, struct qdisc_walker * arg); /* Filter manipulation */ - struct tcf_proto __rcu ** (*tcf_chain)(struct Qdisc *, unsigned long); + struct tcf_block * (*tcf_block)(struct Qdisc *, unsigned long); bool (*tcf_cl_offload)(u32 classid); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); @@ -192,8 +194,13 @@ struct Qdisc_ops { struct tcf_result { - unsigned long class; - u32 classid; + union { + struct { + unsigned long class; + u32 classid; + }; + const struct tcf_proto *goto_tp; + }; }; struct tcf_proto_ops { @@ -236,6 +243,7 @@ struct tcf_proto { struct Qdisc *q; void *data; const struct tcf_proto_ops *ops; + struct tcf_chain *chain; struct rcu_head rcu; }; @@ -247,6 +255,19 @@ struct qdisc_skb_cb { unsigned char data[QDISC_CB_PRIV_LEN]; }; +struct tcf_chain { + struct tcf_proto __rcu *filter_chain; + struct tcf_proto __rcu **p_filter_chain; + struct list_head list; + struct tcf_block *block; + u32 index; /* chain index */ + unsigned int refcnt; +}; + +struct tcf_block { + struct list_head chain_list; +}; + static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h index 9b9fb122b31f..e5c57d0a082d 100644 --- a/include/net/sctp/auth.h +++ b/include/net/sctp/auth.h @@ -31,6 +31,7 @@ #define __sctp_auth_h__ #include <linux/list.h> +#include <linux/refcount.h> struct sctp_endpoint; struct sctp_association; @@ -53,7 +54,7 @@ struct sctp_hmac { * over SCTP-AUTH */ struct sctp_auth_bytes { - atomic_t refcnt; + refcount_t refcnt; __u32 len; __u8 data[]; }; @@ -76,7 +77,7 @@ static inline void sctp_auth_key_hold(struct sctp_auth_bytes *key) if (!key) return; - atomic_inc(&key->refcnt); + refcount_inc(&key->refcnt); } void sctp_auth_key_put(struct sctp_auth_bytes *key); @@ -97,8 +98,10 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc, struct sctp_hmac_algo_param *hmacs); int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc, __be16 hmac_id); -int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc); -int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc); +int sctp_auth_send_cid(enum sctp_cid chunk, + const struct sctp_association *asoc); +int sctp_auth_recv_cid(enum sctp_cid chunk, + const struct sctp_association *asoc); void sctp_auth_calculate_hmac(const struct sctp_association *asoc, struct sk_buff *skb, struct sctp_auth_chunk *auth, gfp_t gfp); diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index d4a20d00461c..d4679e7a5ed5 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -132,7 +132,7 @@ typedef union { struct sctp_association *asoc; struct sctp_transport *transport; struct sctp_bind_addr *bp; - sctp_init_chunk_t *init; + struct sctp_init_chunk *init; struct sctp_ulpevent *ulpevent; struct sctp_packet *packet; sctp_sackhdr_t *sackh; @@ -173,7 +173,7 @@ SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) SCTP_ARG_CONSTRUCTOR(ASOC, struct sctp_association *, asoc) SCTP_ARG_CONSTRUCTOR(TRANSPORT, struct sctp_transport *, transport) SCTP_ARG_CONSTRUCTOR(BA, struct sctp_bind_addr *, bp) -SCTP_ARG_CONSTRUCTOR(PEER_INIT, sctp_init_chunk_t *, init) +SCTP_ARG_CONSTRUCTOR(PEER_INIT, struct sctp_init_chunk *, init) SCTP_ARG_CONSTRUCTOR(ULPEVENT, struct sctp_ulpevent *, ulpevent) SCTP_ARG_CONSTRUCTOR(PACKET, struct sctp_packet *, packet) SCTP_ARG_CONSTRUCTOR(SACKH, sctp_sackhdr_t *, sackh) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index b07a745ab69f..9b18044c551e 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -130,7 +130,7 @@ typedef enum { */ typedef union { - sctp_cid_t chunk; + enum sctp_cid chunk; sctp_event_timeout_t timeout; sctp_event_other_t other; sctp_event_primitive_t primitive; @@ -141,7 +141,7 @@ static inline sctp_subtype_t \ SCTP_ST_## _name (_type _arg) \ { sctp_subtype_t _retval; _retval._elt = _arg; return _retval; } -SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, sctp_cid_t, chunk) +SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout) SCTP_SUBTYPE_CONSTRUCTOR(OTHER, sctp_event_other_t, other) SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) @@ -152,7 +152,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) /* Calculate the actual data size in a data chunk */ #define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\ - (unsigned long)(c->chunk_hdr)\ - - sizeof(sctp_data_chunk_t))) + - sizeof(struct sctp_data_chunk))) /* Internal error codes */ typedef enum { diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 069582ee5d7f..a9519a06a23b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -470,7 +470,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) #define _sctp_walk_params(pos, chunk, end, member)\ for (pos.v = chunk->member;\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ - ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ + ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\ pos.v += SCTP_PAD4(ntohs(pos.p->length))) #define sctp_walk_errors(err, chunk_hdr)\ @@ -478,7 +478,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ - sizeof(sctp_chunkhdr_t));\ + sizeof(struct sctp_chunkhdr));\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 47113f2c4b0a..860f378333b5 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -325,19 +325,17 @@ void sctp_generate_heartbeat_event(unsigned long peer); void sctp_generate_reconf_event(unsigned long peer); void sctp_generate_proto_unreach_event(unsigned long peer); -void sctp_ootb_pkt_free(struct sctp_packet *); +void sctp_ootb_pkt_free(struct sctp_packet *packet); -struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *, - const struct sctp_association *, - struct sctp_chunk *, +struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + struct sctp_chunk *chunk, gfp_t gfp, int *err, struct sctp_chunk **err_chk_p); -int sctp_addip_addr_config(struct sctp_association *, sctp_param_t, - struct sockaddr_storage*, int); /* 3rd level prototypes */ -__u32 sctp_generate_tag(const struct sctp_endpoint *); -__u32 sctp_generate_tsn(const struct sctp_endpoint *); +__u32 sctp_generate_tag(const struct sctp_endpoint *ep); +__u32 sctp_generate_tsn(const struct sctp_endpoint *ep); /* Extern declarations for major data structures. */ extern sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES]; @@ -349,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) __u16 size; size = ntohs(chunk->chunk_hdr->length); - size -= sizeof(sctp_data_chunk_t); + size -= sizeof(struct sctp_data_chunk); return size; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index a8b38e123f97..5ab29af8ca8a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -310,9 +310,10 @@ struct sctp_cookie { __u32 adaptation_ind; - __u8 auth_random[sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH]; + __u8 auth_random[sizeof(struct sctp_paramhdr) + + SCTP_AUTH_RANDOM_LENGTH]; __u8 auth_hmacs[SCTP_AUTH_NUM_HMACS * sizeof(__u16) + 2]; - __u8 auth_chunks[sizeof(sctp_paramhdr_t) + SCTP_AUTH_MAX_CHUNKS]; + __u8 auth_chunks[sizeof(struct sctp_paramhdr) + SCTP_AUTH_MAX_CHUNKS]; /* This is a shim for my peer's INIT packet, followed by * a copy of the raw address list of the association. @@ -377,10 +378,11 @@ typedef struct sctp_sender_hb_info { __u64 hb_nonce; } sctp_sender_hb_info_t; -int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp); -int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp); +int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, + gfp_t gfp); void sctp_stream_free(struct sctp_stream *stream); void sctp_stream_clear(struct sctp_stream *stream); +void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new); /* What is the current SSN number for this stream? */ #define sctp_ssn_peek(stream, type, sid) \ @@ -494,7 +496,7 @@ struct sctp_datamsg { /* Chunks waiting to be submitted to lower layer. */ struct list_head chunks; /* Reference counting. */ - atomic_t refcnt; + refcount_t refcnt; /* When is this message no longer interesting to the peer? */ unsigned long expires_at; /* Did the messenge fail to send? */ @@ -522,7 +524,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *); struct sctp_chunk { struct list_head list; - atomic_t refcnt; + refcount_t refcnt; /* How many times this chunk have been sent, for prsctp RTX policy */ int sent_count; @@ -733,7 +735,7 @@ struct sctp_transport { struct rhlist_head node; /* Reference counting. */ - atomic_t refcnt; + refcount_t refcnt; /* RTO-Pending : A flag used to track if one of the DATA * chunks sent to this address is currently being * used to compute a RTT. If this flag is 0, @@ -1172,7 +1174,7 @@ struct sctp_ep_common { * refcnt - Reference count access to this object. * dead - Do not attempt to use this object. */ - atomic_t refcnt; + refcount_t refcnt; bool dead; /* What socket does this endpoint belong to? */ @@ -1296,11 +1298,11 @@ int sctp_has_association(struct net *net, const union sctp_addr *laddr, int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - sctp_cid_t, sctp_init_chunk_t *peer_init, + enum sctp_cid cid, struct sctp_init_chunk *peer_init, struct sctp_chunk *chunk, struct sctp_chunk **err_chunk); int sctp_process_init(struct sctp_association *, struct sctp_chunk *chunk, const union sctp_addr *peer, - sctp_init_chunk_t *init, gfp_t gfp); + struct sctp_init_chunk *init, gfp_t gfp); __u32 sctp_generate_tag(const struct sctp_endpoint *); __u32 sctp_generate_tsn(const struct sctp_endpoint *); @@ -1750,7 +1752,7 @@ struct sctp_association { __u32 default_rcv_context; /* Stream arrays */ - struct sctp_stream *stream; + struct sctp_stream stream; /* All outbound chunks go through this structure. */ struct sctp_outq outqueue; @@ -1952,8 +1954,8 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *, const union sctp_addr *, const union sctp_addr *); void sctp_assoc_migrate(struct sctp_association *, struct sock *); -void sctp_assoc_update(struct sctp_association *old, - struct sctp_association *new); +int sctp_assoc_update(struct sctp_association *old, + struct sctp_association *new); __u32 sctp_association_get_next_tsn(struct sctp_association *); diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index b94006f6fbdd..031bf16d1521 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -8,10 +8,11 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); u32 secure_tcp_seq(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); -u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr); +u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr); u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport); -u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr); +u32 secure_tcpv6_ts_off(const struct net *net, + const __be32 *saddr, const __be32 *daddr); u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, diff --git a/include/net/sock.h b/include/net/sock.h index f33e3d134e0b..8c85791fc196 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -66,6 +66,7 @@ #include <linux/poll.h> #include <linux/atomic.h> +#include <linux/refcount.h> #include <net/dst.h> #include <net/checksum.h> #include <net/tcp_states.h> @@ -219,7 +220,7 @@ struct sock_common { u32 skc_tw_rcv_nxt; /* struct tcp_timewait_sock */ }; - atomic_t skc_refcnt; + refcount_t skc_refcnt; /* private: */ int skc_dontcopy_end[0]; union { @@ -253,6 +254,7 @@ struct sock_common { * @sk_ll_usec: usecs to busypoll when there is no data * @sk_allocation: allocation mode * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) + * @sk_pacing_status: Pacing status (requested, handled by sch_fq) * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) * @sk_sndbuf: size of send buffer in bytes * @sk_padding: unused element for alignment @@ -389,14 +391,14 @@ struct sock { /* ===== cache line for TX ===== */ int sk_wmem_queued; - atomic_t sk_wmem_alloc; + refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; struct sk_buff *sk_send_head; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; __u32 sk_dst_pending_confirm; - /* Note: 32bit hole on 64bit arches */ + u32 sk_pacing_status; /* see enum sk_pacing */ long sk_sndtimeo; struct timer_list sk_timer; __u32 sk_priority; @@ -475,6 +477,12 @@ struct sock { struct rcu_head sk_rcu; }; +enum sk_pacing { + SK_PACING_NONE = 0, + SK_PACING_NEEDED = 1, + SK_PACING_FQ = 2, +}; + #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) #define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk))) @@ -604,7 +612,7 @@ static inline bool __sk_del_node_init(struct sock *sk) static __always_inline void sock_hold(struct sock *sk) { - atomic_inc(&sk->sk_refcnt); + refcount_inc(&sk->sk_refcnt); } /* Ungrab socket in the context, which assumes that socket refcnt @@ -612,7 +620,7 @@ static __always_inline void sock_hold(struct sock *sk) */ static __always_inline void __sock_put(struct sock *sk) { - atomic_dec(&sk->sk_refcnt); + refcount_dec(&sk->sk_refcnt); } static inline bool sk_del_node_init(struct sock *sk) @@ -621,7 +629,7 @@ static inline bool sk_del_node_init(struct sock *sk) if (rc) { /* paranoid for a while -acme */ - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } return rc; @@ -643,7 +651,7 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk) if (rc) { /* paranoid for a while -acme */ - WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); __sock_put(sk); } return rc; @@ -900,7 +908,10 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) static inline void sk_incoming_cpu_update(struct sock *sk) { - sk->sk_incoming_cpu = raw_smp_processor_id(); + int cpu = raw_smp_processor_id(); + + if (unlikely(sk->sk_incoming_cpu != cpu)) + sk->sk_incoming_cpu = cpu; } static inline void sock_rps_record_flow_hash(__u32 hash) @@ -1073,6 +1084,7 @@ struct proto { bool (*stream_memory_free)(const struct sock *sk); /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); + void (*leave_memory_pressure)(struct sock *sk); atomic_long_t *memory_allocated; /* Current allocated memory. */ struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* @@ -1081,7 +1093,7 @@ struct proto { * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ - int *memory_pressure; + unsigned long *memory_pressure; long *sysctl_mem; int *sysctl_wmem; int *sysctl_rmem; @@ -1133,9 +1145,9 @@ static inline void sk_refcnt_debug_dec(struct sock *sk) static inline void sk_refcnt_debug_release(const struct sock *sk) { - if (atomic_read(&sk->sk_refcnt) != 1) + if (refcount_read(&sk->sk_refcnt) != 1) printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", - sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); + sk->sk_prot->name, sk, refcount_read(&sk->sk_refcnt)); } #else /* SOCK_REFCNT_DEBUG */ #define sk_refcnt_debug_inc(sk) do { } while (0) @@ -1186,25 +1198,6 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) return !!*sk->sk_prot->memory_pressure; } -static inline void sk_leave_memory_pressure(struct sock *sk) -{ - int *memory_pressure = sk->sk_prot->memory_pressure; - - if (!memory_pressure) - return; - - if (*memory_pressure) - *memory_pressure = 0; -} - -static inline void sk_enter_memory_pressure(struct sock *sk) -{ - if (!sk->sk_prot->enter_memory_pressure) - return; - - sk->sk_prot->enter_memory_pressure(sk); -} - static inline long sk_memory_allocated(const struct sock *sk) { @@ -1644,7 +1637,7 @@ void sock_init_data(struct socket *sock, struct sock *sk); /* Ungrab socket and destroy it, if it was the last reference. */ static inline void sock_put(struct sock *sk) { - if (atomic_dec_and_test(&sk->sk_refcnt)) + if (refcount_dec_and_test(&sk->sk_refcnt)) sk_free(sk); } /* Generic version of sock_put(), dealing with all sockets @@ -1704,6 +1697,7 @@ static inline void sock_orphan(struct sock *sk) static inline void sock_graft(struct sock *sk, struct socket *parent) { + WARN_ON(parent->sk); write_lock_bh(&sk->sk_callback_lock); sk->sk_wq = parent->wq; parent->sk = sk; @@ -1919,7 +1913,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro */ static inline int sk_wmem_alloc_get(const struct sock *sk) { - return atomic_read(&sk->sk_wmem_alloc) - 1; + return refcount_read(&sk->sk_wmem_alloc) - 1; } /** @@ -1953,11 +1947,10 @@ static inline bool sk_has_allocations(const struct sock *sk) * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory * barrier call. They were added due to the race found within the tcp code. * - * Consider following tcp code paths: - * - * CPU1 CPU2 + * Consider following tcp code paths:: * - * sys_select receive packet + * CPU1 CPU2 + * sys_select receive packet * ... ... * __add_wait_queue update tp->rcv_nxt * ... ... @@ -2035,8 +2028,8 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer, void sk_stop_timer(struct sock *sk, struct timer_list *timer); -int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, - unsigned int flags, +int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, + struct sk_buff *skb, unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb)); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); @@ -2063,7 +2056,7 @@ static inline unsigned long sock_wspace(struct sock *sk) int amt = 0; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { - amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amt = sk->sk_sndbuf - refcount_read(&sk->sk_wmem_alloc); if (amt < 0) amt = 0; } @@ -2144,7 +2137,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); */ static inline bool sock_writeable(const struct sock *sk) { - return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); + return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); } static inline gfp_t gfp_any(void) @@ -2264,7 +2257,7 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); * @tsflags: timestamping flags to use * @tx_flags: completed with instructions for time stamping * - * Note : callers should take care of initial *tx_flags value (usually 0) + * Note: callers should take care of initial ``*tx_flags`` value (usually 0) */ static inline void sock_tx_timestamp(const struct sock *sk, __u16 tsflags, __u8 *tx_flags) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 929d6af321cd..8ae9e3b6392e 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -46,6 +46,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_PORT_PARENT_ID, SWITCHDEV_ATTR_ID_PORT_STP_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, + SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT, SWITCHDEV_ATTR_ID_PORT_MROUTER, SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, @@ -62,6 +63,7 @@ struct switchdev_attr { struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ + unsigned long brport_flags_support; /* PORT_BRIDGE_FLAGS_SUPPORT */ bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ @@ -153,8 +155,11 @@ struct switchdev_ops { }; enum switchdev_notifier_type { - SWITCHDEV_FDB_ADD = 1, - SWITCHDEV_FDB_DEL, + SWITCHDEV_FDB_ADD_TO_BRIDGE = 1, + SWITCHDEV_FDB_DEL_TO_BRIDGE, + SWITCHDEV_FDB_ADD_TO_DEVICE, + SWITCHDEV_FDB_DEL_TO_DEVICE, + SWITCHDEV_FDB_OFFLOADED, }; struct switchdev_notifier_info { @@ -212,6 +217,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b); + +#define SWITCHDEV_SET_OPS(netdev, ops) ((netdev)->switchdev_ops = (ops)) #else static inline void switchdev_deferred_process(void) @@ -317,6 +324,8 @@ static inline bool switchdev_port_same_parent_id(struct net_device *a, return false; } +#define SWITCHDEV_SET_OPS(netdev, ops) do {} while (0) + #endif #endif /* _LINUX_SWITCHDEV_H_ */ diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index b6f173910226..d576374c4d6f 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -15,7 +15,7 @@ struct tcf_gact { }; #define to_gact(a) ((struct tcf_gact *)a) -static inline bool is_tcf_gact_shot(const struct tc_action *a) +static inline bool __is_tcf_gact_act(const struct tc_action *a, int act) { #ifdef CONFIG_NET_CLS_ACT struct tcf_gact *gact; @@ -24,10 +24,21 @@ static inline bool is_tcf_gact_shot(const struct tc_action *a) return false; gact = to_gact(a); - if (gact->tcf_action == TC_ACT_SHOT) + if (gact->tcf_action == act) return true; #endif return false; } + +static inline bool is_tcf_gact_shot(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_SHOT); +} + +static inline bool is_tcf_gact_trap(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_TRAP); +} + #endif /* __NET_TC_GACT_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index be6223c586fa..70483296157f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -46,6 +46,10 @@ #include <linux/seq_file.h> #include <linux/memcontrol.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/bpf-cgroup.h> + extern struct inet_hashinfo tcp_hashinfo; extern struct percpu_counter tcp_orphan_count; @@ -237,9 +241,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_timestamps; -extern int sysctl_tcp_window_scaling; -extern int sysctl_tcp_sack; extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; @@ -279,7 +280,7 @@ extern int sysctl_tcp_pacing_ca_ratio; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; -extern int tcp_memory_pressure; +extern unsigned long tcp_memory_pressure; /* optimized version of sk_under_memory_pressure() for TCP sockets */ static inline bool tcp_under_memory_pressure(const struct sock *sk) @@ -353,6 +354,8 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); +ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, + size_t size, int flags); void tcp_release_cb(struct sock *sk); void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); @@ -427,7 +430,7 @@ void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -void tcp_parse_options(const struct sk_buff *skb, +void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); @@ -519,8 +522,9 @@ static inline u32 tcp_cookie_time(void) u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); -__u32 cookie_init_timestamp(struct request_sock *req); -bool cookie_timestamp_decode(struct tcp_options_received *opt); +u64 cookie_init_timestamp(struct request_sock *req); +bool cookie_timestamp_decode(const struct net *net, + struct tcp_options_received *opt); bool cookie_ecn_ok(const struct tcp_options_received *opt, const struct net *net, const struct dst_entry *dst); @@ -574,6 +578,7 @@ void tcp_fin(struct sock *sk); void tcp_init_xmit_timers(struct sock *); static inline void tcp_clear_xmit_timers(struct sock *sk) { + hrtimer_cancel(&tcp_sk(sk)->pacing_timer); inet_csk_clear_xmit_timers(sk); } @@ -699,17 +704,61 @@ u32 __tcp_select_window(struct sock *sk); void tcp_send_window_probe(struct sock *sk); -/* TCP timestamps are only 32-bits, this causes a slight - * complication on 64-bit systems since we store a snapshot - * of jiffies in the buffer control blocks below. We decided - * to use only the low 32-bits of jiffies and hide the ugly - * casts with the following macro. +/* TCP uses 32bit jiffies to save some space. + * Note that this is different from tcp_time_stamp, which + * historically has been the same until linux-4.13. + */ +#define tcp_jiffies32 ((u32)jiffies) + +/* + * Deliver a 32bit value for TCP timestamp option (RFC 7323) + * It is no longer tied to jiffies, but to 1 ms clock. + * Note: double check if you want to use tcp_jiffies32 instead of this. + */ +#define TCP_TS_HZ 1000 + +static inline u64 tcp_clock_ns(void) +{ + return local_clock(); +} + +static inline u64 tcp_clock_us(void) +{ + return div_u64(tcp_clock_ns(), NSEC_PER_USEC); +} + +/* This should only be used in contexts where tp->tcp_mstamp is up to date */ +static inline u32 tcp_time_stamp(const struct tcp_sock *tp) +{ + return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ); +} + +/* Could use tcp_clock_us() / 1000, but this version uses a single divide */ +static inline u32 tcp_time_stamp_raw(void) +{ + return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ); +} + + +/* Refresh 1us clock of a TCP socket, + * ensuring monotically increasing values. */ -#define tcp_time_stamp ((__u32)(jiffies)) +static inline void tcp_mstamp_refresh(struct tcp_sock *tp) +{ + u64 val = tcp_clock_us(); + + if (val > tp->tcp_mstamp) + tp->tcp_mstamp = val; +} + +static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) +{ + return max_t(s64, t1 - t0, 0); +} static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) { - return skb->skb_mstamp.stamp_jiffies; + return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ); } @@ -774,9 +823,9 @@ struct tcp_skb_cb { /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ - struct skb_mstamp first_tx_mstamp; + u64 first_tx_mstamp; /* when we reached the "delivered" count */ - struct skb_mstamp delivered_mstamp; + u64 delivered_mstamp; } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -892,7 +941,7 @@ struct ack_sample { * A sample is invalid if "delivered" or "interval_us" is negative. */ struct rate_sample { - struct skb_mstamp prior_mstamp; /* starting timestamp for interval */ + u64 prior_mstamp; /* starting timestamp for interval */ u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ s32 delivered; /* number of packets delivered over interval */ long interval_us; /* time for tp->delivered to incr "delivered" */ @@ -955,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name); void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); -int tcp_set_congestion_control(struct sock *sk, const char *name); +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load); +void tcp_reinit_congestion_control(struct sock *sk, + const struct tcp_congestion_ops *ca); u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); @@ -1241,7 +1292,7 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) if (!sysctl_tcp_slow_start_after_idle || tp->packets_out || ca_ops->cong_control) return; - delta = tcp_time_stamp - tp->lsndtime; + delta = tcp_jiffies32 - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) tcp_cwnd_restart(sk, delta); } @@ -1277,6 +1328,7 @@ extern void tcp_openreq_init_rwin(struct request_sock *req, const struct dst_entry *dst); void tcp_enter_memory_pressure(struct sock *sk); +void tcp_leave_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) { @@ -1303,8 +1355,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) { const struct inet_connection_sock *icsk = &tp->inet_conn; - return min_t(u32, tcp_time_stamp - icsk->icsk_ack.lrcvtime, - tcp_time_stamp - tp->rcv_tstamp); + return min_t(u32, tcp_jiffies32 - icsk->icsk_ack.lrcvtime, + tcp_jiffies32 - tp->rcv_tstamp); } static inline int tcp_fin_time(const struct sock *sk) @@ -1395,6 +1447,7 @@ struct tcp_md5sig_key { u8 keylen; u8 family; /* AF_INET or AF_INET6 */ union tcp_md5_addr addr; + u8 prefixlen; u8 key[TCP_MD5SIG_MAXKEYLEN]; struct rcu_head rcu; }; @@ -1438,9 +1491,10 @@ struct tcp_md5sig_pool { int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb); int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, - int family, const u8 *newkey, u8 newkeylen, gfp_t gfp); + int family, u8 prefixlen, const u8 *newkey, u8 newkeylen, + gfp_t gfp); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, - int family); + int family, u8 prefixlen); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk); @@ -1800,6 +1854,7 @@ struct tcp_sock_af_ops { const struct sock *sk, const struct sk_buff *skb); int (*md5_parse)(struct sock *sk, + int optname, char __user *optval, int optlen); #endif @@ -1825,7 +1880,7 @@ struct tcp_request_sock_ops { struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, const struct request_sock *req); u32 (*init_seq)(const struct sk_buff *skb); - u32 (*init_ts_off)(const struct sk_buff *skb); + u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, @@ -1858,7 +1913,7 @@ void tcp_init(void); /* tcp_recovery.c */ extern void tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, - const struct skb_mstamp *xmit_time); + u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); /* @@ -1945,4 +2000,89 @@ static inline void tcp_listendrop(const struct sock *sk) __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); } +enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer); + +/* + * Interface for adding Upper Level Protocols over TCP + */ + +#define TCP_ULP_NAME_MAX 16 +#define TCP_ULP_MAX 128 +#define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) + +struct tcp_ulp_ops { + struct list_head list; + + /* initialize ulp */ + int (*init)(struct sock *sk); + /* cleanup ulp */ + void (*release)(struct sock *sk); + + char name[TCP_ULP_NAME_MAX]; + struct module *owner; +}; +int tcp_register_ulp(struct tcp_ulp_ops *type); +void tcp_unregister_ulp(struct tcp_ulp_ops *type); +int tcp_set_ulp(struct sock *sk, const char *name); +void tcp_get_available_ulp(char *buf, size_t len); +void tcp_cleanup_ulp(struct sock *sk); + +/* Call BPF_SOCK_OPS program that returns an int. If the return value + * is < 0, then the BPF op failed (for example if the loaded BPF + * program does not support the chosen operation or there is no BPF + * program loaded). + */ +#ifdef CONFIG_BPF +static inline int tcp_call_bpf(struct sock *sk, int op) +{ + struct bpf_sock_ops_kern sock_ops; + int ret; + + if (sk_fullsock(sk)) + sock_owned_by_me(sk); + + memset(&sock_ops, 0, sizeof(sock_ops)); + sock_ops.sk = sk; + sock_ops.op = op; + + ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); + if (ret == 0) + ret = sock_ops.reply; + else + ret = -1; + return ret; +} +#else +static inline int tcp_call_bpf(struct sock *sk, int op) +{ + return -EPERM; +} +#endif + +static inline u32 tcp_timeout_init(struct sock *sk) +{ + int timeout; + + timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT); + + if (timeout <= 0) + timeout = TCP_TIMEOUT_INIT; + return timeout; +} + +static inline u32 tcp_rwnd_init_bpf(struct sock *sk) +{ + int rwnd; + + rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT); + + if (rwnd < 0) + rwnd = 0; + return rwnd; +} + +static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) +{ + return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1); +} #endif /* _TCP_H */ diff --git a/include/net/tls.h b/include/net/tls.h new file mode 100644 index 000000000000..b89d397dd62f --- /dev/null +++ b/include/net/tls.h @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _TLS_OFFLOAD_H +#define _TLS_OFFLOAD_H + +#include <linux/types.h> + +#include <uapi/linux/tls.h> + + +/* Maximum data size carried in a TLS record */ +#define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14) + +#define TLS_HEADER_SIZE 5 +#define TLS_NONCE_OFFSET TLS_HEADER_SIZE + +#define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type) + +#define TLS_RECORD_TYPE_DATA 0x17 + +#define TLS_AAD_SPACE_SIZE 13 + +struct tls_sw_context { + struct crypto_aead *aead_send; + + /* Sending context */ + char aad_space[TLS_AAD_SPACE_SIZE]; + + unsigned int sg_plaintext_size; + int sg_plaintext_num_elem; + struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS]; + + unsigned int sg_encrypted_size; + int sg_encrypted_num_elem; + struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS]; + + /* AAD | sg_plaintext_data | sg_tag */ + struct scatterlist sg_aead_in[2]; + /* AAD | sg_encrypted_data (data contain overhead for hdr&iv&tag) */ + struct scatterlist sg_aead_out[2]; +}; + +enum { + TLS_PENDING_CLOSED_RECORD +}; + +struct tls_context { + union { + struct tls_crypto_info crypto_send; + struct tls12_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128; + }; + + void *priv_ctx; + + u16 prepend_size; + u16 tag_size; + u16 overhead_size; + u16 iv_size; + char *iv; + u16 rec_seq_size; + char *rec_seq; + + struct scatterlist *partially_sent_record; + u16 partially_sent_offset; + unsigned long flags; + + u16 pending_open_record_frags; + int (*push_pending_record)(struct sock *sk, int flags); + void (*free_resources)(struct sock *sk); + + void (*sk_write_space)(struct sock *sk); + void (*sk_proto_close)(struct sock *sk, long timeout); + + int (*setsockopt)(struct sock *sk, int level, + int optname, char __user *optval, + unsigned int optlen); + int (*getsockopt)(struct sock *sk, int level, + int optname, char __user *optval, + int __user *optlen); +}; + +int wait_on_pending_writer(struct sock *sk, long *timeo); +int tls_sk_query(struct sock *sk, int optname, char __user *optval, + int __user *optlen); +int tls_sk_attach(struct sock *sk, int optname, char __user *optval, + unsigned int optlen); + + +int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx); +int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int tls_sw_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags); +void tls_sw_close(struct sock *sk, long timeout); + +void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); +void tls_icsk_clean_acked(struct sock *sk); + +int tls_push_sg(struct sock *sk, struct tls_context *ctx, + struct scatterlist *sg, u16 first_offset, + int flags); +int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx, + int flags, long *timeo); + +static inline bool tls_is_pending_closed_record(struct tls_context *ctx) +{ + return test_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags); +} + +static inline int tls_complete_pending_work(struct sock *sk, + struct tls_context *ctx, + int flags, long *timeo) +{ + int rc = 0; + + if (unlikely(sk->sk_write_pending)) + rc = wait_on_pending_writer(sk, timeo); + + if (!rc && tls_is_pending_closed_record(ctx)) + rc = tls_push_pending_closed_record(sk, ctx, flags, timeo); + + return rc; +} + +static inline bool tls_is_partially_sent_record(struct tls_context *ctx) +{ + return !!ctx->partially_sent_record; +} + +static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) +{ + return tls_ctx->pending_open_record_frags; +} + +static inline void tls_err_abort(struct sock *sk) +{ + sk->sk_err = -EBADMSG; + sk->sk_error_report(sk); +} + +static inline bool tls_bigint_increment(unsigned char *seq, int len) +{ + int i; + + for (i = len - 1; i >= 0; i--) { + ++seq[i]; + if (seq[i] != 0) + break; + } + + return (i == -1); +} + +static inline void tls_advance_record_sn(struct sock *sk, + struct tls_context *ctx) +{ + if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size)) + tls_err_abort(sk); + tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + ctx->iv_size); +} + +static inline void tls_fill_prepend(struct tls_context *ctx, + char *buf, + size_t plaintext_len, + unsigned char record_type) +{ + size_t pkt_len, iv_size = ctx->iv_size; + + pkt_len = plaintext_len + iv_size + ctx->tag_size; + + /* we cover nonce explicit here as well, so buf should be of + * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE + */ + buf[0] = record_type; + buf[1] = TLS_VERSION_MINOR(ctx->crypto_send.version); + buf[2] = TLS_VERSION_MAJOR(ctx->crypto_send.version); + /* we can use IV for nonce explicit according to spec */ + buf[3] = pkt_len >> 8; + buf[4] = pkt_len & 0xFF; + memcpy(buf + TLS_NONCE_OFFSET, + ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size); +} + +static inline struct tls_context *tls_get_ctx(const struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ulp_data; +} + +static inline struct tls_sw_context *tls_sw_ctx( + const struct tls_context *tls_ctx) +{ + return (struct tls_sw_context *)tls_ctx->priv_ctx; +} + +static inline struct tls_offload_context *tls_offload_ctx( + const struct tls_context *tls_ctx) +{ + return (struct tls_offload_context *)tls_ctx->priv_ctx; +} + +int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, + unsigned char *record_type); + +#endif /* _TLS_OFFLOAD_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 3391dbd73959..972ce4baab6b 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -249,13 +249,8 @@ void udp_destruct_sock(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); -static inline struct sk_buff * -__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked, - int *off, int *err) -{ - return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - udp_skb_destructor, peeked, off, err); -} +struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, + int noblock, int *peeked, int *off, int *err); static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *err) { @@ -307,6 +302,67 @@ struct sock *__udp6_lib_lookup(struct net *net, struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport); +/* UDP uses skb->dev_scratch to cache as much information as possible and avoid + * possibly multiple cache miss on dequeue() + */ +#if BITS_PER_LONG == 64 + +/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on + * cold cache lines at recvmsg time. + * skb->len can be stored on 16 bits since the udp header has been already + * validated and pulled. + */ +struct udp_dev_scratch { + u32 truesize; + u16 len; + bool is_linear; + bool csum_unnecessary; +}; + +static inline unsigned int udp_skb_len(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->len; +} + +static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary; +} + +static inline bool udp_skb_is_linear(struct sk_buff *skb) +{ + return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear; +} + +#else +static inline unsigned int udp_skb_len(struct sk_buff *skb) +{ + return skb->len; +} + +static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) +{ + return skb_csum_unnecessary(skb); +} + +static inline bool udp_skb_is_linear(struct sk_buff *skb) +{ + return !skb_is_nonlinear(skb); +} +#endif + +static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, + struct iov_iter *to) +{ + int n, copy = len - off; + + n = copy_to_iter(skb->data + off, copy, to); + if (n == copy) + return 0; + + return -EFAULT; +} + /* * SNMP statistics for UDP and UDP-Lite */ diff --git a/include/net/udplite.h b/include/net/udplite.h index ea340524f99b..b7a18f63d86d 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -26,8 +26,8 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, /* Designate sk as UDP-Lite socket */ static inline int udplite_sk_init(struct sock *sk) { + udp_init_sock(sk); udp_sk(sk)->pcflag = UDPLITE_BIT; - sk->sk_destruct = udp_destruct_sock; return 0; } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 49a59202f85e..3f430e38ab82 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -183,7 +183,7 @@ struct vxlan_sock { struct hlist_node hlist; struct socket *sock; struct hlist_head vni_list[VNI_HASH_SIZE]; - atomic_t refcnt; + refcount_t refcnt; u32 flags; }; @@ -221,9 +221,17 @@ struct vxlan_config { bool no_share; }; +struct vxlan_dev_node { + struct hlist_node hlist; + struct vxlan_dev *vxlan; +}; + /* Pseudo network device */ struct vxlan_dev { - struct hlist_node hlist; /* vni hash table */ + struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ +#if IS_ENABLED(CONFIG_IPV6) + struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ +#endif struct list_head next; /* vxlan's per namespace list */ struct vxlan_sock __rcu *vn4_sock; /* listening socket for IPv4 */ #if IS_ENABLED(CONFIG_IPV6) @@ -232,7 +240,6 @@ struct vxlan_dev { struct net_device *dev; struct net *net; /* netns for packet i/o */ struct vxlan_rdst default_dst; /* default destination */ - u32 flags; /* VXLAN_F_* in vxlan.h */ struct timer_list age_timer; spinlock_t hash_lock; @@ -259,6 +266,7 @@ struct vxlan_dev { #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 #define VXLAN_F_COLLECT_METADATA 0x2000 #define VXLAN_F_GPE 0x4000 +#define VXLAN_F_IPV6_LINKLOCAL 0x8000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable @@ -273,6 +281,7 @@ struct vxlan_dev { /* Flags that can be set together with VXLAN_F_GPE. */ #define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ VXLAN_F_IPV6 | \ + VXLAN_F_IPV6_LINKLOCAL | \ VXLAN_F_UDP_ZERO_CSUM_TX | \ VXLAN_F_UDP_ZERO_CSUM6_TX | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ diff --git a/include/net/x25.h b/include/net/x25.h index 6d30a01d281d..2609b57bd459 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -11,6 +11,7 @@ #define _X25_H #include <linux/x25.h> #include <linux/slab.h> +#include <linux/refcount.h> #include <net/sock.h> #define X25_ADDR_LEN 16 @@ -129,7 +130,7 @@ struct x25_route { struct x25_address address; unsigned int sigdigits; struct net_device *dev; - atomic_t refcnt; + refcount_t refcnt; }; struct x25_neigh { @@ -141,7 +142,7 @@ struct x25_neigh { unsigned long t20; struct timer_list t20timer; unsigned long global_facil_mask; - atomic_t refcnt; + refcount_t refcnt; }; struct x25_sock { @@ -242,12 +243,12 @@ void x25_link_free(void); /* x25_neigh.c */ static __inline__ void x25_neigh_hold(struct x25_neigh *nb) { - atomic_inc(&nb->refcnt); + refcount_inc(&nb->refcnt); } static __inline__ void x25_neigh_put(struct x25_neigh *nb) { - if (atomic_dec_and_test(&nb->refcnt)) + if (refcount_dec_and_test(&nb->refcnt)) kfree(nb); } @@ -265,12 +266,12 @@ void x25_route_free(void); static __inline__ void x25_route_hold(struct x25_route *rt) { - atomic_inc(&rt->refcnt); + refcount_inc(&rt->refcnt); } static __inline__ void x25_route_put(struct x25_route *rt) { - if (atomic_dec_and_test(&rt->refcnt)) + if (refcount_dec_and_test(&rt->refcnt)) kfree(rt); } diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 62f5a259e597..c0916ab18d32 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -13,6 +13,7 @@ #include <linux/mutex.h> #include <linux/audit.h> #include <linux/slab.h> +#include <linux/refcount.h> #include <net/sock.h> #include <net/dst.h> @@ -137,7 +138,7 @@ struct xfrm_state { struct hlist_node bysrc; struct hlist_node byspi; - atomic_t refcnt; + refcount_t refcnt; spinlock_t lock; struct xfrm_id id; @@ -559,7 +560,7 @@ struct xfrm_policy { /* This lock only affects elements except for entry. */ rwlock_t lock; - atomic_t refcnt; + refcount_t refcnt; struct timer_list timer; struct flow_cache_object flo; @@ -631,7 +632,8 @@ struct xfrm_mgr { u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k); + const struct xfrm_kmaddress *k, + const struct xfrm_encap_tmpl *encap); bool (*is_alive)(const struct km_event *c); }; @@ -814,14 +816,14 @@ static inline void xfrm_audit_state_icvfail(struct xfrm_state *x, static inline void xfrm_pol_hold(struct xfrm_policy *policy) { if (likely(policy != NULL)) - atomic_inc(&policy->refcnt); + refcount_inc(&policy->refcnt); } void xfrm_policy_destroy(struct xfrm_policy *policy); static inline void xfrm_pol_put(struct xfrm_policy *policy) { - if (atomic_dec_and_test(&policy->refcnt)) + if (refcount_dec_and_test(&policy->refcnt)) xfrm_policy_destroy(policy); } @@ -836,18 +838,18 @@ void __xfrm_state_destroy(struct xfrm_state *); static inline void __xfrm_state_put(struct xfrm_state *x) { - atomic_dec(&x->refcnt); + refcount_dec(&x->refcnt); } static inline void xfrm_state_put(struct xfrm_state *x) { - if (atomic_dec_and_test(&x->refcnt)) + if (refcount_dec_and_test(&x->refcnt)) __xfrm_state_destroy(x); } static inline void xfrm_state_hold(struct xfrm_state *x) { - atomic_inc(&x->refcnt); + refcount_inc(&x->refcnt); } static inline bool addr_match(const void *token1, const void *token2, @@ -1028,7 +1030,7 @@ struct xfrm_offload { }; struct sec_path { - atomic_t refcnt; + refcount_t refcnt; int len; int olen; @@ -1049,7 +1051,7 @@ static inline struct sec_path * secpath_get(struct sec_path *sp) { if (sp) - atomic_inc(&sp->refcnt); + refcount_inc(&sp->refcnt); return sp; } @@ -1058,7 +1060,7 @@ void __secpath_destroy(struct sec_path *sp); static inline void secpath_put(struct sec_path *sp) { - if (sp && atomic_dec_and_test(&sp->refcnt)) + if (sp && refcount_dec_and_test(&sp->refcnt)) __secpath_destroy(sp); } @@ -1675,13 +1677,16 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); #ifdef CONFIG_XFRM_MIGRATE int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k); + const struct xfrm_kmaddress *k, + const struct xfrm_encap_tmpl *encap); struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net); struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, - struct xfrm_migrate *m); + struct xfrm_migrate *m, + struct xfrm_encap_tmpl *encap); int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, - struct xfrm_kmaddress *k, struct net *net); + struct xfrm_kmaddress *k, struct net *net, + struct xfrm_encap_tmpl *encap); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); |