summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c27
-rw-r--r--net/9p/trans_fd.c4
-rw-r--r--net/Kconfig5
-rw-r--r--net/ax25/Kconfig16
-rw-r--r--net/ax25/sysctl_net_ax25.c3
-rw-r--r--net/batman-adv/bat_iv_ogm.c1
-rw-r--r--net/batman-adv/bat_v.c23
-rw-r--r--net/batman-adv/bat_v_elp.c3
-rw-r--r--net/batman-adv/bat_v_ogm.c7
-rw-r--r--net/batman-adv/gateway_common.c162
-rw-r--r--net/batman-adv/gateway_common.h7
-rw-r--r--net/batman-adv/hard-interface.c34
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/netlink.c18
-rw-r--r--net/batman-adv/netlink.h6
-rw-r--r--net/batman-adv/routing.h4
-rw-r--r--net/batman-adv/soft-interface.c5
-rw-r--r--net/batman-adv/translation-table.c1
-rw-r--r--net/batman-adv/types.h13
-rw-r--r--net/bluetooth/af_bluetooth.c53
-rw-r--r--net/bluetooth/amp.h1
-rw-r--r--net/bluetooth/bnep/sock.c10
-rw-r--r--net/bluetooth/coredump.c3
-rw-r--r--net/bluetooth/eir.c2
-rw-r--r--net/bluetooth/hci_conn.c728
-rw-r--r--net/bluetooth/hci_core.c34
-rw-r--r--net/bluetooth/hci_debugfs.c3
-rw-r--r--net/bluetooth/hci_event.c265
-rw-r--r--net/bluetooth/hci_request.c21
-rw-r--r--net/bluetooth/hci_sock.c77
-rw-r--r--net/bluetooth/hci_sync.c310
-rw-r--r--net/bluetooth/hidp/sock.c10
-rw-r--r--net/bluetooth/iso.c302
-rw-r--r--net/bluetooth/l2cap_sock.c29
-rw-r--r--net/bluetooth/mgmt.c33
-rw-r--r--net/bluetooth/msft.c412
-rw-r--r--net/bluetooth/rfcomm/sock.c13
-rw-r--r--net/bluetooth/rfcomm/tty.c31
-rw-r--r--net/bluetooth/sco.c34
-rw-r--r--net/bpf/test_run.c22
-rw-r--r--net/bridge/br.c8
-rw-r--r--net/bridge/br_forward.c5
-rw-r--r--net/bridge/br_input.c4
-rw-r--r--net/bridge/br_netfilter_hooks.c3
-rw-r--r--net/bridge/br_netlink.c12
-rw-r--r--net/bridge/br_private.h20
-rw-r--r--net/bridge/br_switchdev.c15
-rw-r--r--net/bridge/br_vlan_tunnel.c15
-rw-r--r--net/bridge/netfilter/ebtables.c3
-rw-r--r--net/can/isotp.c22
-rw-r--r--net/can/j1939/socket.c10
-rw-r--r--net/can/raw.c35
-rw-r--r--net/ceph/messenger.c78
-rw-r--r--net/ceph/messenger_v1.c98
-rw-r--r--net/ceph/messenger_v2.c289
-rw-r--r--net/ceph/osd_client.c334
-rw-r--r--net/core/dev.c376
-rw-r--r--net/core/dev_ioctl.c187
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/filter.c15
-rw-r--r--net/core/flow_dissector.c60
-rw-r--r--net/core/flow_offload.c7
-rw-r--r--net/core/lwt_bpf.c7
-rw-r--r--net/core/neighbour.c8
-rw-r--r--net/core/net-sysfs.c1
-rw-r--r--net/core/netdev-genl.c54
-rw-r--r--net/core/of_net.c1
-rw-r--r--net/core/page_pool.c87
-rw-r--r--net/core/rtnetlink.c36
-rw-r--r--net/core/scm.c3
-rw-r--r--net/core/skbuff.c228
-rw-r--r--net/core/skmsg.c20
-rw-r--r--net/core/sock.c90
-rw-r--r--net/core/sock_map.c36
-rw-r--r--net/core/sysctl_net_core.c3
-rw-r--r--net/core/xdp.c2
-rw-r--r--net/dccp/feat.h1
-rw-r--r--net/dccp/ipv4.c23
-rw-r--r--net/dccp/ipv6.c15
-rw-r--r--net/dccp/ipv6.h4
-rw-r--r--net/dccp/proto.c20
-rw-r--r--net/devlink/Makefile3
-rw-r--r--net/devlink/core.c6
-rw-r--r--net/devlink/dev.c79
-rw-r--r--net/devlink/devl_internal.h143
-rw-r--r--net/devlink/dpipe.c917
-rw-r--r--net/devlink/health.c42
-rw-r--r--net/devlink/leftover.c9507
-rw-r--r--net/devlink/linecard.c606
-rw-r--r--net/devlink/netlink.c393
-rw-r--r--net/devlink/netlink_gen.c481
-rw-r--r--net/devlink/netlink_gen.h79
-rw-r--r--net/devlink/param.c865
-rw-r--r--net/devlink/port.c1515
-rw-r--r--net/devlink/rate.c722
-rw-r--r--net/devlink/region.c1260
-rw-r--r--net/devlink/resource.c579
-rw-r--r--net/devlink/sb.c996
-rw-r--r--net/devlink/trap.c1861
-rw-r--r--net/dsa/port.c53
-rw-r--r--net/dsa/slave.c9
-rw-r--r--net/dsa/tag_qca.c8
-rw-r--r--net/ethtool/channels.c2
-rw-r--r--net/ethtool/coalesce.c6
-rw-r--r--net/ethtool/common.c3
-rw-r--r--net/ethtool/debug.c2
-rw-r--r--net/ethtool/eee.c2
-rw-r--r--net/ethtool/eeprom.c9
-rw-r--r--net/ethtool/features.c2
-rw-r--r--net/ethtool/fec.c2
-rw-r--r--net/ethtool/ioctl.c91
-rw-r--r--net/ethtool/linkinfo.c2
-rw-r--r--net/ethtool/linkmodes.c2
-rw-r--r--net/ethtool/linkstate.c2
-rw-r--r--net/ethtool/mm.c2
-rw-r--r--net/ethtool/module.c5
-rw-r--r--net/ethtool/netlink.c96
-rw-r--r--net/ethtool/netlink.h2
-rw-r--r--net/ethtool/pause.c5
-rw-r--r--net/ethtool/phc_vclocks.c2
-rw-r--r--net/ethtool/plca.c4
-rw-r--r--net/ethtool/privflags.c2
-rw-r--r--net/ethtool/pse-pd.c6
-rw-r--r--net/ethtool/rings.c5
-rw-r--r--net/ethtool/rss.c3
-rw-r--r--net/ethtool/stats.c5
-rw-r--r--net/ethtool/strset.c2
-rw-r--r--net/ethtool/tsinfo.c2
-rw-r--r--net/ethtool/tunnels.c73
-rw-r--r--net/ethtool/wol.c5
-rw-r--r--net/handshake/Makefile2
-rw-r--r--net/handshake/alert.c110
-rw-r--r--net/handshake/handshake-test.c14
-rw-r--r--net/handshake/handshake.h6
-rw-r--r--net/handshake/netlink.c18
-rw-r--r--net/handshake/tlshd.c23
-rw-r--r--net/handshake/trace.c2
-rw-r--r--net/hsr/hsr_forward.c1
-rw-r--r--net/hsr/hsr_framereg.c4
-rw-r--r--net/hsr/hsr_main.h2
-rw-r--r--net/hsr/hsr_netlink.h2
-rw-r--r--net/ieee802154/6lowpan/reassembly.c8
-rw-r--r--net/ieee802154/nl802154.c4
-rw-r--r--net/ipv4/af_inet.c64
-rw-r--r--net/ipv4/bpf_tcp_ca.c2
-rw-r--r--net/ipv4/cipso_ipv4.c4
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c36
-rw-r--r--net/ipv4/fib_semantics.c5
-rw-r--r--net/ipv4/fib_trie.c3
-rw-r--r--net/ipv4/igmp.c5
-rw-r--r--net/ipv4/inet_diag.c22
-rw-r--r--net/ipv4/inet_hashtables.c102
-rw-r--r--net/ipv4/inet_timewait_sock.c2
-rw-r--r--net/ipv4/ip_forward.c1
-rw-r--r--net/ipv4/ip_fragment.c3
-rw-r--r--net/ipv4/ip_input.c3
-rw-r--r--net/ipv4/ip_output.c20
-rw-r--r--net/ipv4/ip_sockglue.c405
-rw-r--r--net/ipv4/ipmr.c1
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c19
-rw-r--r--net/ipv4/nexthop.c65
-rw-r--r--net/ipv4/ping.c7
-rw-r--r--net/ipv4/raw.c26
-rw-r--r--net/ipv4/route.c21
-rw-r--r--net/ipv4/sysctl_net_ipv4.c3
-rw-r--r--net/ipv4/tcp.c128
-rw-r--r--net/ipv4/tcp_fastopen.c2
-rw-r--r--net/ipv4/tcp_input.c69
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/tcp_metrics.c19
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_output.c42
-rw-r--r--net/ipv4/tcp_timer.c89
-rw-r--r--net/ipv4/udp.c103
-rw-r--r--net/ipv4/udp_tunnel_core.c2
-rw-r--r--net/ipv4/xfrm4_policy.c14
-rw-r--r--net/ipv6/addrconf.c95
-rw-r--r--net/ipv6/af_inet6.c22
-rw-r--r--net/ipv6/anycast.c2
-rw-r--r--net/ipv6/datagram.c9
-rw-r--r--net/ipv6/exthdrs.c7
-rw-r--r--net/ipv6/icmp.c11
-rw-r--r--net/ipv6/ila/ila_main.c1
-rw-r--r--net/ipv6/ila/ila_xlat.c1
-rw-r--r--net/ipv6/inet6_hashtables.c69
-rw-r--r--net/ipv6/ip6_fib.c55
-rw-r--r--net/ipv6/ip6_input.c3
-rw-r--r--net/ipv6/ip6_output.c21
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c22
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/ndisc.c17
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c3
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c11
-rw-r--r--net/ipv6/ping.c3
-rw-r--r--net/ipv6/raw.c19
-rw-r--r--net/ipv6/reassembly.c3
-rw-r--r--net/ipv6/route.c35
-rw-r--r--net/ipv6/rpl_iptunnel.c3
-rw-r--r--net/ipv6/seg6_local.c108
-rw-r--r--net/ipv6/sysctl_net_ipv6.c16
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/ipv6/udp.c101
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_policy.c9
-rw-r--r--net/kcm/kcmsock.c15
-rw-r--r--net/key/af_key.c1
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/l2tp/l2tp_ip6.c4
-rw-r--r--net/llc/llc_conn.c11
-rw-r--r--net/mac80211/cfg.c27
-rw-r--r--net/mac80211/fils_aead.c2
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/key.c2
-rw-r--r--net/mac80211/mesh.h1
-rw-r--r--net/mac80211/rx.c16
-rw-r--r--net/mac80211/wpa.c2
-rw-r--r--net/mpls/af_mpls.c6
-rw-r--r--net/mptcp/Makefile2
-rw-r--r--net/mptcp/bpf.c15
-rw-r--r--net/mptcp/ctrl.c17
-rw-r--r--net/mptcp/options.c5
-rw-r--r--net/mptcp/pm.c9
-rw-r--r--net/mptcp/pm_netlink.c33
-rw-r--r--net/mptcp/protocol.c685
-rw-r--r--net/mptcp/protocol.h65
-rw-r--r--net/mptcp/sched.c173
-rw-r--r--net/mptcp/sockopt.c77
-rw-r--r--net/mptcp/subflow.c41
-rw-r--r--net/ncsi/ncsi-aen.c5
-rw-r--r--net/ncsi/ncsi-netlink.c2
-rw-r--r--net/ncsi/ncsi-netlink.h2
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipset/ip_set_core.c22
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c4
-rw-r--r--net/netfilter/nf_bpf_link.c125
-rw-r--r--net/netfilter/nf_conntrack_bpf.c3
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nf_conntrack_extend.c4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c8
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c2
-rw-r--r--net/netfilter/nf_conntrack_standalone.c4
-rw-r--r--net/netfilter/nf_flow_table_offload.c22
-rw-r--r--net/netfilter/nf_log.c7
-rw-r--r--net/netfilter/nf_nat_core.c6
-rw-r--r--net/netfilter/nf_tables_api.c158
-rw-r--r--net/netfilter/nf_tables_offload.c13
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nfnetlink_osf.c8
-rw-r--r--net/netfilter/nft_cmp.c2
-rw-r--r--net/netfilter/nft_ct.c4
-rw-r--r--net/netfilter/nft_exthdr.c42
-rw-r--r--net/netfilter/nft_fib.c15
-rw-r--r--net/netfilter/nft_lookup.c6
-rw-r--r--net/netfilter/nft_masq.c8
-rw-r--r--net/netfilter/nft_meta.c6
-rw-r--r--net/netfilter/nft_nat.c8
-rw-r--r--net/netfilter/nft_osf.c6
-rw-r--r--net/netfilter/nft_redir.c8
-rw-r--r--net/netfilter/nft_set_hash.c14
-rw-r--r--net/netfilter/nft_set_pipapo.c17
-rw-r--r--net/netfilter/nft_set_rbtree.c19
-rw-r--r--net/netfilter/x_tables.c5
-rw-r--r--net/netfilter/xt_repldata.h2
-rw-r--r--net/netfilter/xt_sctp.c2
-rw-r--r--net/netfilter/xt_u32.c21
-rw-r--r--net/netlabel/netlabel_cipso_v4.h3
-rw-r--r--net/netlink/af_netlink.c128
-rw-r--r--net/netlink/af_netlink.h26
-rw-r--r--net/netlink/diag.c10
-rw-r--r--net/netlink/genetlink.c125
-rw-r--r--net/netrom/af_netrom.c5
-rw-r--r--net/nfc/nci/uart.c23
-rw-r--r--net/nfc/netlink.c4
-rw-r--r--net/openvswitch/actions.c42
-rw-r--r--net/openvswitch/conntrack.c83
-rw-r--r--net/openvswitch/datapath.c45
-rw-r--r--net/openvswitch/drop.h41
-rw-r--r--net/openvswitch/flow_netlink.c10
-rw-r--r--net/openvswitch/meter.c10
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/qrtr/af_qrtr.c5
-rw-r--r--net/qrtr/ns.c139
-rw-r--r--net/rds/rdma_transport.c12
-rw-r--r--net/rds/rdma_transport.h1
-rw-r--r--net/rds/rds.h3
-rw-r--r--net/rds/tcp.c3
-rw-r--r--net/rds/tcp.h1
-rw-r--r--net/sched/Kconfig4
-rw-r--r--net/sched/act_ct.c3
-rw-r--r--net/sched/cls_flower.c35
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/sch_api.c53
-rw-r--r--net/sched/sch_drr.c11
-rw-r--r--net/sched/sch_fq_pie.c27
-rw-r--r--net/sched/sch_hfsc.c14
-rw-r--r--net/sched/sch_htb.c17
-rw-r--r--net/sched/sch_ingress.c61
-rw-r--r--net/sched/sch_netem.c49
-rw-r--r--net/sched/sch_plug.c2
-rw-r--r--net/sched/sch_qfq.c34
-rw-r--r--net/sched/sch_taprio.c68
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/proc.c2
-rw-r--r--net/sctp/protocol.c5
-rw-r--r--net/sctp/socket.c17
-rw-r--r--net/sctp/sysctl.c4
-rw-r--r--net/smc/af_smc.c88
-rw-r--r--net/smc/smc.h5
-rw-r--r--net/smc/smc_clc.c147
-rw-r--r--net/smc/smc_clc.h53
-rw-r--r--net/smc/smc_core.c15
-rw-r--r--net/smc/smc_core.h26
-rw-r--r--net/smc/smc_ib.h1
-rw-r--r--net/smc/smc_llc.c25
-rw-r--r--net/smc/smc_stats.h3
-rw-r--r--net/smc/smc_sysctl.c3
-rw-r--r--net/socket.c184
-rw-r--r--net/sunrpc/.kunitconfig1
-rw-r--r--net/sunrpc/Kconfig35
-rw-r--r--net/sunrpc/auth_gss/Makefile2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_internal.h23
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c84
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c257
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c69
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c106
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_test.c196
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c77
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c287
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c7
-rw-r--r--net/sunrpc/clnt.c22
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/svc.c99
-rw-r--r--net/sunrpc/svc_xprt.c126
-rw-r--r--net/sunrpc/svcauth.c35
-rw-r--r--net/sunrpc/svcauth_unix.c9
-rw-r--r--net/sunrpc/svcsock.c181
-rw-r--r--net/sunrpc/xdr.c77
-rw-r--r--net/sunrpc/xprtrdma/verbs.c9
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
-rw-r--r--net/sunrpc/xprtsock.c100
-rw-r--r--net/switchdev/switchdev.c25
-rw-r--r--net/sysctl_net.c26
-rw-r--r--net/tipc/addr.h1
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/core.h2
-rw-r--r--net/tipc/link.c10
-rw-r--r--net/tipc/link.h2
-rw-r--r--net/tipc/name_distr.h1
-rw-r--r--net/tipc/net.h1
-rw-r--r--net/tipc/netlink_compat.c4
-rw-r--r--net/tipc/node.c4
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tipc/udp_media.c2
-rw-r--r--net/tls/tls.h60
-rw-r--r--net/tls/tls_device.c58
-rw-r--r--net/tls/tls_device_fallback.c62
-rw-r--r--net/tls/tls_main.c274
-rw-r--r--net/tls/tls_strp.c3
-rw-r--r--net/tls/tls_sw.c322
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/unix/scm.c9
-rw-r--r--net/unix/sysctl_net_unix.c3
-rw-r--r--net/vmw_vsock/virtio_transport_common.c104
-rw-r--r--net/vmw_vsock/vmci_transport.h3
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/mlme.c13
-rw-r--r--net/wireless/nl80211.c8
-rw-r--r--net/wireless/nl80211.h1
-rw-r--r--net/wireless/ocb.c3
-rw-r--r--net/wireless/pmsr.c3
-rw-r--r--net/xdp/xsk.c370
-rw-r--r--net/xdp/xsk_buff_pool.c7
-rw-r--r--net/xdp/xsk_diag.c3
-rw-r--r--net/xdp/xsk_queue.h95
-rw-r--r--net/xfrm/xfrm_device.c13
-rw-r--r--net/xfrm/xfrm_sysctl.c8
384 files changed, 18787 insertions, 15654 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b90781b9ece6..2a7f1b15714a 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -354,6 +354,26 @@ out:
return 0;
}
+static int vlan_hwtstamp_get(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg)
+{
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+
+ return generic_hwtstamp_get_lower(real_dev, cfg);
+}
+
+static int vlan_hwtstamp_set(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+
+ if (!net_eq(dev_net(dev), dev_net(real_dev)))
+ return -EOPNOTSUPP;
+
+ return generic_hwtstamp_set_lower(real_dev, cfg, extack);
+}
+
static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
@@ -365,14 +385,9 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
ifrr.ifr_ifru = ifr->ifr_ifru;
switch (cmd) {
- case SIOCSHWTSTAMP:
- if (!net_eq(dev_net(dev), dev_net(real_dev)))
- break;
- fallthrough;
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
- case SIOCGHWTSTAMP:
if (netif_device_present(real_dev) && ops->ndo_eth_ioctl)
err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd);
break;
@@ -1081,6 +1096,8 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_fix_features = vlan_dev_fix_features,
.ndo_get_iflink = vlan_dev_get_iflink,
.ndo_fill_forward_path = vlan_dev_fill_forward_path,
+ .ndo_hwtstamp_get = vlan_hwtstamp_get,
+ .ndo_hwtstamp_set = vlan_hwtstamp_set,
};
static void vlan_dev_free(struct net_device *dev)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 00b684616e8d..c4015f30f9fa 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1019,7 +1019,7 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
}
}
- err = csocket->ops->connect(csocket,
+ err = READ_ONCE(csocket->ops)->connect(csocket,
(struct sockaddr *)&sin_server,
sizeof(struct sockaddr_in), 0);
if (err < 0) {
@@ -1060,7 +1060,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
return err;
}
- err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
+ err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&sun_server,
sizeof(struct sockaddr_un) - 1, 0);
if (err < 0) {
pr_err("%s (%d): problem connecting socket: %s: %d\n",
diff --git a/net/Kconfig b/net/Kconfig
index 2fb25b534df5..d532ec33f1fe 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -52,6 +52,11 @@ config NET_INGRESS
config NET_EGRESS
bool
+config NET_XGRESS
+ select NET_INGRESS
+ select NET_EGRESS
+ bool
+
config NET_REDIRECT
bool
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index d3a9843a043d..fdb666607f10 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig
@@ -10,7 +10,7 @@ menuconfig HAMRADIO
If you want to connect your Linux box to an amateur radio, answer Y
here. You want to read <https://www.tapr.org/>
and more specifically about AX.25 on Linux
- <http://www.linux-ax25.org/>.
+ <https://linux-ax25.in-berlin.de>.
Note that the answer to this question won't directly affect the
kernel: saying N will just cause the configurator to skip all
@@ -61,7 +61,7 @@ config AX25_DAMA_SLAVE
configuration. Linux cannot yet act as a DAMA server. This option
only compiles DAMA slave support into the kernel. It still needs to
be enabled at runtime. For more about DAMA see
- <http://www.linux-ax25.org>. If unsure, say Y.
+ <https://linux-ax25.in-berlin.de>. If unsure, say Y.
# placeholder until implemented
config AX25_DAMA_MASTER
@@ -87,9 +87,9 @@ config NETROM
A comprehensive listing of all the software for Linux amateur radio
users as well as information about how to configure an AX.25 port is
contained in the Linux Ham Wiki, available from
- <http://www.linux-ax25.org>. You also might want to check out the
- file <file:Documentation/networking/ax25.rst>. More information about
- digital amateur radio in general is on the WWW at
+ <https://linux-ax25.in-berlin.de>. You also might want to check out
+ the file <file:Documentation/networking/ax25.rst>. More information
+ about digital amateur radio in general is on the WWW at
<https://www.tapr.org/>.
To compile this driver as a module, choose M here: the
@@ -106,9 +106,9 @@ config ROSE
A comprehensive listing of all the software for Linux amateur radio
users as well as information about how to configure an AX.25 port is
contained in the Linux Ham Wiki, available from
- <http://www.linux-ax25.org>. You also might want to check out the
- file <file:Documentation/networking/ax25.rst>. More information about
- digital amateur radio in general is on the WWW at
+ <https://linux-ax25.in-berlin.de>. You also might want to check out
+ the file <file:Documentation/networking/ax25.rst>. More information
+ about digital amateur radio in general is on the WWW at
<https://www.tapr.org/>.
To compile this driver as a module, choose M here: the
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index 2154d004d3dc..db66e11e7fe8 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -159,7 +159,8 @@ int ax25_register_dev_sysctl(ax25_dev *ax25_dev)
table[k].data = &ax25_dev->values[k];
snprintf(path, sizeof(path), "net/ax25/%s", ax25_dev->dev->name);
- ax25_dev->sysheader = register_net_sysctl(&init_net, path, table);
+ ax25_dev->sysheader = register_net_sysctl_sz(&init_net, path, table,
+ ARRAY_SIZE(ax25_param_table));
if (!ax25_dev->sysheader) {
kfree(table);
return -ENOMEM;
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 828fb393ee94..74b49c35ddc1 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -2516,6 +2516,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
},
.gw = {
.init_sel_class = batadv_iv_init_sel_class,
+ .sel_class_max = BATADV_TQ_MAX_VALUE,
.get_best_gw_node = batadv_iv_gw_get_best_gw_node,
.is_eligible = batadv_iv_gw_is_eligible,
.dump = batadv_iv_gw_dump,
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 54e41fc709c3..ac11f1f08db0 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/kref.h>
+#include <linux/limits.h>
#include <linux/list.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
@@ -34,7 +35,6 @@
#include "bat_v_elp.h"
#include "bat_v_ogm.h"
#include "gateway_client.h"
-#include "gateway_common.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
@@ -512,25 +512,6 @@ static void batadv_v_init_sel_class(struct batadv_priv *bat_priv)
atomic_set(&bat_priv->gw.sel_class, 50);
}
-static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv,
- char *buff, size_t count)
-{
- u32 old_class, class;
-
- if (!batadv_parse_throughput(bat_priv->soft_iface, buff,
- "B.A.T.M.A.N. V GW selection class",
- &class))
- return -EINVAL;
-
- old_class = atomic_read(&bat_priv->gw.sel_class);
- atomic_set(&bat_priv->gw.sel_class, class);
-
- if (old_class != class)
- batadv_gw_reselect(bat_priv);
-
- return count;
-}
-
/**
* batadv_v_gw_throughput_get() - retrieve the GW-bandwidth for a given GW
* @gw_node: the GW to retrieve the metric for
@@ -818,7 +799,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
},
.gw = {
.init_sel_class = batadv_v_init_sel_class,
- .store_sel_class = batadv_v_store_sel_class,
+ .sel_class_max = U32_MAX,
.get_best_gw_node = batadv_v_gw_get_best_gw_node,
.is_eligible = batadv_v_gw_is_eligible,
.dump = batadv_v_gw_dump,
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index acff565849ae..1d704574e6bf 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -505,7 +505,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_elp_packet *elp_packet;
struct batadv_hard_iface *primary_if;
- struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ struct ethhdr *ethhdr;
bool res;
int ret = NET_RX_DROP;
@@ -513,6 +513,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
if (!res)
goto free_skb;
+ ethhdr = eth_hdr(skb);
if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
goto free_skb;
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index e710e9afe78f..e503ee0d896b 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -123,8 +123,10 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- if (hard_iface->if_status != BATADV_IF_ACTIVE)
+ if (hard_iface->if_status != BATADV_IF_ACTIVE) {
+ kfree_skb(skb);
return;
+ }
batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX);
batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES,
@@ -985,7 +987,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_ogm2_packet *ogm_packet;
- struct ethhdr *ethhdr = eth_hdr(skb);
+ struct ethhdr *ethhdr;
int ogm_offset;
u8 *packet_pos;
int ret = NET_RX_DROP;
@@ -999,6 +1001,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
goto free_skb;
+ ethhdr = eth_hdr(skb);
if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
goto free_skb;
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 6a964a773f57..2dd36ef03c84 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -9,124 +9,15 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
-#include <linux/errno.h>
-#include <linux/kstrtox.h>
-#include <linux/limits.h>
-#include <linux/math64.h>
-#include <linux/netdevice.h>
#include <linux/stddef.h>
-#include <linux/string.h>
+#include <linux/types.h>
#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
#include "gateway_client.h"
-#include "log.h"
#include "tvlv.h"
/**
- * batadv_parse_throughput() - parse supplied string buffer to extract
- * throughput information
- * @net_dev: the soft interface net device
- * @buff: string buffer to parse
- * @description: text shown when throughput string cannot be parsed
- * @throughput: pointer holding the returned throughput information
- *
- * Return: false on parse error and true otherwise.
- */
-bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
- const char *description, u32 *throughput)
-{
- enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT;
- u64 lthroughput;
- char *tmp_ptr;
- int ret;
-
- if (strlen(buff) > 4) {
- tmp_ptr = buff + strlen(buff) - 4;
-
- if (strncasecmp(tmp_ptr, "mbit", 4) == 0)
- bw_unit_type = BATADV_BW_UNIT_MBIT;
-
- if (strncasecmp(tmp_ptr, "kbit", 4) == 0 ||
- bw_unit_type == BATADV_BW_UNIT_MBIT)
- *tmp_ptr = '\0';
- }
-
- ret = kstrtou64(buff, 10, &lthroughput);
- if (ret) {
- batadv_err(net_dev,
- "Invalid throughput speed for %s: %s\n",
- description, buff);
- return false;
- }
-
- switch (bw_unit_type) {
- case BATADV_BW_UNIT_MBIT:
- /* prevent overflow */
- if (U64_MAX / 10 < lthroughput) {
- batadv_err(net_dev,
- "Throughput speed for %s too large: %s\n",
- description, buff);
- return false;
- }
-
- lthroughput *= 10;
- break;
- case BATADV_BW_UNIT_KBIT:
- default:
- lthroughput = div_u64(lthroughput, 100);
- break;
- }
-
- if (lthroughput > U32_MAX) {
- batadv_err(net_dev,
- "Throughput speed for %s too large: %s\n",
- description, buff);
- return false;
- }
-
- *throughput = lthroughput;
-
- return true;
-}
-
-/**
- * batadv_parse_gw_bandwidth() - parse supplied string buffer to extract
- * download and upload bandwidth information
- * @net_dev: the soft interface net device
- * @buff: string buffer to parse
- * @down: pointer holding the returned download bandwidth information
- * @up: pointer holding the returned upload bandwidth information
- *
- * Return: false on parse error and true otherwise.
- */
-static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff,
- u32 *down, u32 *up)
-{
- char *slash_ptr;
- bool ret;
-
- slash_ptr = strchr(buff, '/');
- if (slash_ptr)
- *slash_ptr = 0;
-
- ret = batadv_parse_throughput(net_dev, buff, "download gateway speed",
- down);
- if (!ret)
- return false;
-
- /* we also got some upload info */
- if (slash_ptr) {
- ret = batadv_parse_throughput(net_dev, slash_ptr + 1,
- "upload gateway speed", up);
- if (!ret)
- return false;
- }
-
- return true;
-}
-
-/**
* batadv_gw_tvlv_container_update() - update the gw tvlv container after
* gateway setting change
* @bat_priv: the bat priv with all the soft interface information
@@ -156,57 +47,6 @@ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv)
}
/**
- * batadv_gw_bandwidth_set() - Parse and set download/upload gateway bandwidth
- * from supplied string buffer
- * @net_dev: netdev struct of the soft interface
- * @buff: the buffer containing the user data
- * @count: number of bytes in the buffer
- *
- * Return: 'count' on success or a negative error code in case of failure
- */
-ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
- size_t count)
-{
- struct batadv_priv *bat_priv = netdev_priv(net_dev);
- u32 down_curr;
- u32 up_curr;
- u32 down_new = 0;
- u32 up_new = 0;
- bool ret;
-
- down_curr = (unsigned int)atomic_read(&bat_priv->gw.bandwidth_down);
- up_curr = (unsigned int)atomic_read(&bat_priv->gw.bandwidth_up);
-
- ret = batadv_parse_gw_bandwidth(net_dev, buff, &down_new, &up_new);
- if (!ret)
- return -EINVAL;
-
- if (!down_new)
- down_new = 1;
-
- if (!up_new)
- up_new = down_new / 5;
-
- if (!up_new)
- up_new = 1;
-
- if (down_curr == down_new && up_curr == up_new)
- return count;
-
- batadv_gw_reselect(bat_priv);
- batadv_info(net_dev,
- "Changing gateway bandwidth from: '%u.%u/%u.%u MBit' to: '%u.%u/%u.%u MBit'\n",
- down_curr / 10, down_curr % 10, up_curr / 10, up_curr % 10,
- down_new / 10, down_new % 10, up_new / 10, up_new % 10);
-
- atomic_set(&bat_priv->gw.bandwidth_down, down_new);
- atomic_set(&bat_priv->gw.bandwidth_up, up_new);
- batadv_gw_tvlv_container_update(bat_priv);
-
- return count;
-}
-
-/**
* batadv_gw_tvlv_ogm_handler_v1() - process incoming gateway tvlv container
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node of the ogm
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 87c37f907261..5d097d6a1dd9 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -9,9 +9,6 @@
#include "main.h"
-#include <linux/netdevice.h>
-#include <linux/types.h>
-
/**
* enum batadv_bandwidth_units - bandwidth unit types
*/
@@ -27,12 +24,8 @@ enum batadv_bandwidth_units {
#define BATADV_GW_MODE_CLIENT_NAME "client"
#define BATADV_GW_MODE_SERVER_NAME "server"
-ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
- size_t count);
void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv);
void batadv_gw_init(struct batadv_priv *bat_priv);
void batadv_gw_free(struct batadv_priv *bat_priv);
-bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
- const char *description, u32 *throughput);
#endif /* _NET_BATMAN_ADV_GATEWAY_COMMON_H_ */
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 41c1ad33d009..96a412beab2d 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/compiler.h>
#include <linux/container_of.h>
#include <linux/errno.h>
#include <linux/gfp.h>
@@ -630,7 +631,19 @@ out:
*/
void batadv_update_min_mtu(struct net_device *soft_iface)
{
- soft_iface->mtu = batadv_hardif_min_mtu(soft_iface);
+ struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+ int limit_mtu;
+ int mtu;
+
+ mtu = batadv_hardif_min_mtu(soft_iface);
+
+ if (bat_priv->mtu_set_by_user)
+ limit_mtu = bat_priv->mtu_set_by_user;
+ else
+ limit_mtu = ETH_DATA_LEN;
+
+ mtu = min(mtu, limit_mtu);
+ dev_set_mtu(soft_iface, mtu);
/* Check if the local translate table should be cleaned up to match a
* new (and smaller) MTU.
@@ -699,9 +712,14 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
struct batadv_priv *bat_priv;
__be16 ethertype = htons(ETH_P_BATMAN);
int max_header_len = batadv_max_header_len();
+ unsigned int required_mtu;
+ unsigned int hardif_mtu;
int ret;
- if (hard_iface->net_dev->mtu < ETH_MIN_MTU + max_header_len)
+ hardif_mtu = READ_ONCE(hard_iface->net_dev->mtu);
+ required_mtu = READ_ONCE(soft_iface->mtu) + max_header_len;
+
+ if (hardif_mtu < ETH_MIN_MTU + max_header_len)
return -EINVAL;
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
@@ -734,18 +752,18 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
hard_iface->net_dev->name);
if (atomic_read(&bat_priv->fragmentation) &&
- hard_iface->net_dev->mtu < ETH_DATA_LEN + max_header_len)
+ hardif_mtu < required_mtu)
batadv_info(hard_iface->soft_iface,
"The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. Packets going over this interface will be fragmented on layer2 which could impact the performance. Setting the MTU to %i would solve the problem.\n",
- hard_iface->net_dev->name, hard_iface->net_dev->mtu,
- ETH_DATA_LEN + max_header_len);
+ hard_iface->net_dev->name, hardif_mtu,
+ required_mtu);
if (!atomic_read(&bat_priv->fragmentation) &&
- hard_iface->net_dev->mtu < ETH_DATA_LEN + max_header_len)
+ hardif_mtu < required_mtu)
batadv_info(hard_iface->soft_iface,
"The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. If you experience problems getting traffic through try increasing the MTU to %i.\n",
- hard_iface->net_dev->name, hard_iface->net_dev->mtu,
- ETH_DATA_LEN + max_header_len);
+ hard_iface->net_dev->name, hardif_mtu,
+ required_mtu);
if (batadv_hardif_is_iface_up(hard_iface))
batadv_hardif_activate_interface(hard_iface);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 156ed39eded1..10007c5894a1 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2023.1"
+#define BATADV_SOURCE_VERSION "2023.3"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index ad5714f737be..0c64d81a7761 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -377,7 +377,7 @@ nla_put_failure:
*
* Return: 0 on success, < 0 on error
*/
-int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv)
+static int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv)
{
struct sk_buff *msg;
int ret;
@@ -495,7 +495,10 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED];
atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr));
+
+ rtnl_lock();
batadv_update_min_mtu(bat_priv->soft_iface);
+ rtnl_unlock();
}
if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) {
@@ -548,15 +551,12 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
* algorithm in use implements the GW API
*/
- u32 sel_class_max = 0xffffffffu;
+ u32 sel_class_max = bat_priv->algo_ops->gw.sel_class_max;
u32 sel_class;
attr = info->attrs[BATADV_ATTR_GW_SEL_CLASS];
sel_class = nla_get_u32(attr);
- if (!bat_priv->algo_ops->gw.store_sel_class)
- sel_class_max = BATADV_TQ_MAX_VALUE;
-
if (sel_class >= 1 && sel_class <= sel_class_max) {
atomic_set(&bat_priv->gw.sel_class, sel_class);
batadv_gw_reselect(bat_priv);
@@ -858,8 +858,8 @@ nla_put_failure:
*
* Return: 0 on success, < 0 on error
*/
-int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
- struct batadv_hard_iface *hard_iface)
+static int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
+ struct batadv_hard_iface *hard_iface)
{
struct sk_buff *msg;
int ret;
@@ -1073,8 +1073,8 @@ nla_put_failure:
*
* Return: 0 on success, < 0 on error
*/
-int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
- struct batadv_softif_vlan *vlan)
+static int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
+ struct batadv_softif_vlan *vlan)
{
struct sk_buff *msg;
int ret;
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 48102cc7490c..876d2806a67d 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -21,12 +21,6 @@ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
u8 result, u32 test_time, u64 total_bytes,
u32 cookie);
-int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv);
-int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
- struct batadv_hard_iface *hard_iface);
-int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
- struct batadv_softif_vlan *vlan);
-
extern struct genl_family batadv_netlink_family;
#endif /* _NET_BATMAN_ADV_NETLINK_H_ */
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 5f387786e9a7..afd15b3879f1 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -27,10 +27,6 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
struct batadv_hard_iface *iface);
int batadv_recv_bcast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
-int batadv_recv_tt_query(struct sk_buff *skb,
- struct batadv_hard_iface *recv_if);
-int batadv_recv_roam_adv(struct sk_buff *skb,
- struct batadv_hard_iface *recv_if);
int batadv_recv_unicast_tvlv(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index d3fdf82282af..1bf1232a4f75 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -153,11 +153,14 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
{
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+
/* check ranges */
- if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
+ if (new_mtu < ETH_MIN_MTU || new_mtu > batadv_hardif_min_mtu(dev))
return -EINVAL;
dev->mtu = new_mtu;
+ bat_priv->mtu_set_by_user = new_mtu;
return 0;
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 36ca31252a73..b95c36765d04 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -774,7 +774,6 @@ check_roaming:
if (roamed_back) {
batadv_tt_global_free(bat_priv, tt_global,
"Roaming canceled");
- tt_global = NULL;
} else {
/* The global entry has to be marked as ROAMING and
* has to be kept for consistency purpose
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index ca9449ec9836..17d5ea1d8e84 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1547,6 +1547,12 @@ struct batadv_priv {
struct net_device *soft_iface;
/**
+ * @mtu_set_by_user: MTU was set once by user
+ * protected by rtnl_lock
+ */
+ int mtu_set_by_user;
+
+ /**
* @bat_counters: mesh internal traffic statistic counters (see
* batadv_counters)
*/
@@ -2191,11 +2197,10 @@ struct batadv_algo_gw_ops {
void (*init_sel_class)(struct batadv_priv *bat_priv);
/**
- * @store_sel_class: parse and stores a new GW selection class
- * (optional)
+ * @sel_class_max: maximum allowed GW selection class
*/
- ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
- size_t count);
+ u32 sel_class_max;
+
/**
* @get_best_gw_node: select the best GW from the list of available
* nodes (optional)
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 1c3c7ff5c3c6..336a76165454 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -140,6 +140,35 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto,
return err;
}
+struct sock *bt_sock_alloc(struct net *net, struct socket *sock,
+ struct proto *prot, int proto, gfp_t prio, int kern)
+{
+ struct sock *sk;
+
+ sk = sk_alloc(net, PF_BLUETOOTH, prio, prot, kern);
+ if (!sk)
+ return NULL;
+
+ sock_init_data(sock, sk);
+ INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
+
+ sock_reset_flag(sk, SOCK_ZAPPED);
+
+ sk->sk_protocol = proto;
+ sk->sk_state = BT_OPEN;
+
+ /* Init peer information so it can be properly monitored */
+ if (!kern) {
+ spin_lock(&sk->sk_peer_lock);
+ sk->sk_peer_pid = get_pid(task_tgid(current));
+ sk->sk_peer_cred = get_current_cred();
+ spin_unlock(&sk->sk_peer_lock);
+ }
+
+ return sk;
+}
+EXPORT_SYMBOL(bt_sock_alloc);
+
void bt_sock_link(struct bt_sock_list *l, struct sock *sk)
{
write_lock(&l->lock);
@@ -158,6 +187,9 @@ EXPORT_SYMBOL(bt_sock_unlink);
void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
{
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
BT_DBG("parent %p, sk %p", parent, sk);
sock_hold(sk);
@@ -170,6 +202,19 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q);
bt_sk(sk)->parent = parent;
+ /* Copy credentials from parent since for incoming connections the
+ * socket is allocated by the kernel.
+ */
+ spin_lock(&sk->sk_peer_lock);
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
+ sk->sk_peer_pid = get_pid(parent->sk_peer_pid);
+ sk->sk_peer_cred = get_cred(parent->sk_peer_cred);
+ spin_unlock(&sk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
+
if (bh)
bh_unlock_sock(sk);
else
@@ -288,8 +333,12 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
bt_sk(sk)->skb_msg_name(skb, msg->msg_name,
&msg->msg_namelen);
- if (bt_sk(sk)->skb_put_cmsg)
- bt_sk(sk)->skb_put_cmsg(skb, msg, sk);
+ if (test_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags)) {
+ u8 pkt_status = hci_skb_pkt_status(skb);
+
+ put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_STATUS,
+ sizeof(pkt_status), &pkt_status);
+ }
}
skb_free_datagram(sk, skb);
diff --git a/net/bluetooth/amp.h b/net/bluetooth/amp.h
index 832764dfbfb3..97c87abd129f 100644
--- a/net/bluetooth/amp.h
+++ b/net/bluetooth/amp.h
@@ -28,7 +28,6 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr,
int phylink_gen_key(struct hci_conn *hcon, u8 *data, u8 *len, u8 *type);
-void amp_read_loc_info(struct hci_dev *hdev, struct amp_mgr *mgr);
void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle);
void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr);
void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 57d509d77cb4..00d47bcf4d7d 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -205,21 +205,13 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, kern);
+ sk = bt_sock_alloc(net, sock, &bnep_proto, protocol, GFP_ATOMIC, kern);
if (!sk)
return -ENOMEM;
- sock_init_data(sock, sk);
-
sock->ops = &bnep_sock_ops;
-
sock->state = SS_UNCONNECTED;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = protocol;
- sk->sk_state = BT_OPEN;
-
bt_sock_link(&bnep_sk_list, sk);
return 0;
}
diff --git a/net/bluetooth/coredump.c b/net/bluetooth/coredump.c
index d2d2624ec708..ec97a4bab1c9 100644
--- a/net/bluetooth/coredump.c
+++ b/net/bluetooth/coredump.c
@@ -100,8 +100,7 @@ void hci_devcd_reset(struct hci_dev *hdev)
/* Call with hci_dev_lock only. */
static void hci_devcd_free(struct hci_dev *hdev)
{
- if (hdev->dump.head)
- vfree(hdev->dump.head);
+ vfree(hdev->dump.head);
hci_devcd_reset(hdev);
}
diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c
index 8a85f6cdfbc1..9214189279e8 100644
--- a/net/bluetooth/eir.c
+++ b/net/bluetooth/eir.c
@@ -33,7 +33,7 @@ u8 eir_append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
size_t complete_len;
/* no space left for name (+ NULL + type + len) */
- if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3)
+ if ((max_adv_len(hdev) - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3)
return ad_len;
/* use complete name if present and fits */
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 76222565e2df..9d5057cef30a 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -178,57 +178,6 @@ static void hci_conn_cleanup(struct hci_conn *conn)
hci_conn_put(conn);
}
-static void le_scan_cleanup(struct work_struct *work)
-{
- struct hci_conn *conn = container_of(work, struct hci_conn,
- le_scan_cleanup);
- struct hci_dev *hdev = conn->hdev;
- struct hci_conn *c = NULL;
-
- BT_DBG("%s hcon %p", hdev->name, conn);
-
- hci_dev_lock(hdev);
-
- /* Check that the hci_conn is still around */
- rcu_read_lock();
- list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) {
- if (c == conn)
- break;
- }
- rcu_read_unlock();
-
- if (c == conn) {
- hci_connect_le_scan_cleanup(conn, 0x00);
- hci_conn_cleanup(conn);
- }
-
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
- hci_conn_put(conn);
-}
-
-static void hci_connect_le_scan_remove(struct hci_conn *conn)
-{
- BT_DBG("%s hcon %p", conn->hdev->name, conn);
-
- /* We can't call hci_conn_del/hci_conn_cleanup here since that
- * could deadlock with another hci_conn_del() call that's holding
- * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work).
- * Instead, grab temporary extra references to the hci_dev and
- * hci_conn and perform the necessary cleanup in a separate work
- * callback.
- */
-
- hci_dev_hold(conn->hdev);
- hci_conn_get(conn);
-
- /* Even though we hold a reference to the hdev, many other
- * things might get cleaned up meanwhile, including the hdev's
- * own workqueue, so we can't use that for scheduling.
- */
- schedule_work(&conn->le_scan_cleanup);
-}
-
static void hci_acl_create_connection(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
@@ -679,13 +628,6 @@ static void hci_conn_timeout(struct work_struct *work)
if (refcnt > 0)
return;
- /* LE connections in scanning state need special handling */
- if (conn->state == BT_CONNECT && conn->type == LE_LINK &&
- test_bit(HCI_CONN_SCANNING, &conn->flags)) {
- hci_connect_le_scan_remove(conn);
- return;
- }
-
hci_abort_conn(conn, hci_proto_disconn_ind(conn));
}
@@ -791,7 +733,9 @@ struct iso_list_data {
u16 sync_handle;
};
int count;
- struct iso_cig_params pdu;
+ bool big_term;
+ bool pa_sync_term;
+ bool big_sync_term;
};
static void bis_list(struct hci_conn *conn, void *data)
@@ -809,17 +753,6 @@ static void bis_list(struct hci_conn *conn, void *data)
d->count++;
}
-static void find_bis(struct hci_conn *conn, void *data)
-{
- struct iso_list_data *d = data;
-
- /* Ignore unicast */
- if (bacmp(&conn->dst, BDADDR_ANY))
- return;
-
- d->count++;
-}
-
static int terminate_big_sync(struct hci_dev *hdev, void *data)
{
struct iso_list_data *d = data;
@@ -828,11 +761,8 @@ static int terminate_big_sync(struct hci_dev *hdev, void *data)
hci_remove_ext_adv_instance_sync(hdev, d->bis, NULL);
- /* Check if ISO connection is a BIS and terminate BIG if there are
- * no other connections using it.
- */
- hci_conn_hash_list_state(hdev, find_bis, ISO_LINK, BT_CONNECTED, d);
- if (d->count)
+ /* Only terminate BIG if it has been created */
+ if (!d->big_term)
return 0;
return hci_le_terminate_big_sync(hdev, d->big,
@@ -844,19 +774,21 @@ static void terminate_big_destroy(struct hci_dev *hdev, void *data, int err)
kfree(data);
}
-static int hci_le_terminate_big(struct hci_dev *hdev, u8 big, u8 bis)
+static int hci_le_terminate_big(struct hci_dev *hdev, struct hci_conn *conn)
{
struct iso_list_data *d;
int ret;
- bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", big, bis);
+ bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", conn->iso_qos.bcast.big,
+ conn->iso_qos.bcast.bis);
d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d)
return -ENOMEM;
- d->big = big;
- d->bis = bis;
+ d->big = conn->iso_qos.bcast.big;
+ d->bis = conn->iso_qos.bcast.bis;
+ d->big_term = test_and_clear_bit(HCI_CONN_BIG_CREATED, &conn->flags);
ret = hci_cmd_sync_queue(hdev, terminate_big_sync, d,
terminate_big_destroy);
@@ -873,31 +805,30 @@ static int big_terminate_sync(struct hci_dev *hdev, void *data)
bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", d->big,
d->sync_handle);
- /* Check if ISO connection is a BIS and terminate BIG if there are
- * no other connections using it.
- */
- hci_conn_hash_list_state(hdev, find_bis, ISO_LINK, BT_CONNECTED, d);
- if (d->count)
- return 0;
+ if (d->big_sync_term)
+ hci_le_big_terminate_sync(hdev, d->big);
- hci_le_big_terminate_sync(hdev, d->big);
+ if (d->pa_sync_term)
+ return hci_le_pa_terminate_sync(hdev, d->sync_handle);
- return hci_le_pa_terminate_sync(hdev, d->sync_handle);
+ return 0;
}
-static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle)
+static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *conn)
{
struct iso_list_data *d;
int ret;
- bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, sync_handle);
+ bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, conn->sync_handle);
d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d)
return -ENOMEM;
d->big = big;
- d->sync_handle = sync_handle;
+ d->sync_handle = conn->sync_handle;
+ d->pa_sync_term = test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags);
+ d->big_sync_term = test_and_clear_bit(HCI_CONN_BIG_SYNC, &conn->flags);
ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d,
terminate_big_destroy);
@@ -916,6 +847,7 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle)
static void bis_cleanup(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
+ struct hci_conn *bis;
bt_dev_dbg(hdev, "conn %p", conn);
@@ -923,17 +855,29 @@ static void bis_cleanup(struct hci_conn *conn)
if (!test_and_clear_bit(HCI_CONN_PER_ADV, &conn->flags))
return;
- hci_le_terminate_big(hdev, conn->iso_qos.bcast.big,
- conn->iso_qos.bcast.bis);
+ /* Check if ISO connection is a BIS and terminate advertising
+ * set and BIG if there are no other connections using it.
+ */
+ bis = hci_conn_hash_lookup_big(hdev, conn->iso_qos.bcast.big);
+ if (bis)
+ return;
+
+ hci_le_terminate_big(hdev, conn);
} else {
+ bis = hci_conn_hash_lookup_big_any_dst(hdev,
+ conn->iso_qos.bcast.big);
+
+ if (bis)
+ return;
+
hci_le_big_terminate(hdev, conn->iso_qos.bcast.big,
- conn->sync_handle);
+ conn);
}
}
static int remove_cig_sync(struct hci_dev *hdev, void *data)
{
- u8 handle = PTR_ERR(data);
+ u8 handle = PTR_UINT(data);
return hci_le_remove_cig_sync(hdev, handle);
}
@@ -942,7 +886,8 @@ static int hci_le_remove_cig(struct hci_dev *hdev, u8 handle)
{
bt_dev_dbg(hdev, "handle 0x%2.2x", handle);
- return hci_cmd_sync_queue(hdev, remove_cig_sync, ERR_PTR(handle), NULL);
+ return hci_cmd_sync_queue(hdev, remove_cig_sync, UINT_PTR(handle),
+ NULL);
}
static void find_cis(struct hci_conn *conn, void *data)
@@ -983,6 +928,25 @@ static void cis_cleanup(struct hci_conn *conn)
hci_le_remove_cig(hdev, conn->iso_qos.ucast.cig);
}
+static u16 hci_conn_hash_alloc_unset(struct hci_dev *hdev)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+ u16 handle = HCI_CONN_HANDLE_MAX + 1;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ /* Find the first unused handle */
+ if (handle == 0xffff || c->handle != handle)
+ break;
+ handle++;
+ }
+ rcu_read_unlock();
+
+ return handle;
+}
+
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
u8 role)
{
@@ -996,7 +960,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
bacpy(&conn->dst, dst);
bacpy(&conn->src, &hdev->bdaddr);
- conn->handle = HCI_CONN_HANDLE_UNSET;
+ conn->handle = hci_conn_hash_alloc_unset(hdev);
conn->hdev = hdev;
conn->type = type;
conn->role = role;
@@ -1059,7 +1023,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept);
INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle);
INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout);
- INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup);
atomic_set(&conn->refcnt, 0);
@@ -1081,6 +1044,29 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
return conn;
}
+static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason)
+{
+ if (!reason)
+ reason = HCI_ERROR_REMOTE_USER_TERM;
+
+ /* Due to race, SCO/ISO conn might be not established yet at this point,
+ * and nothing else will clean it up. In other cases it is done via HCI
+ * events.
+ */
+ switch (conn->type) {
+ case SCO_LINK:
+ case ESCO_LINK:
+ if (HCI_CONN_HANDLE_UNSET(conn->handle))
+ hci_conn_failed(conn, reason);
+ break;
+ case ISO_LINK:
+ if (conn->state != BT_CONNECTED &&
+ !test_bit(HCI_CONN_CREATE_CIS, &conn->flags))
+ hci_conn_failed(conn, reason);
+ break;
+ }
+}
+
static void hci_conn_unlink(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
@@ -1103,14 +1089,7 @@ static void hci_conn_unlink(struct hci_conn *conn)
if (!test_bit(HCI_UP, &hdev->flags))
continue;
- /* Due to race, SCO connection might be not established
- * yet at this point. Delete it now, otherwise it is
- * possible for it to be stuck and can't be deleted.
- */
- if ((child->type == SCO_LINK ||
- child->type == ESCO_LINK) &&
- child->handle == HCI_CONN_HANDLE_UNSET)
- hci_conn_del(child);
+ hci_conn_cleanup_child(child, conn->abort_reason);
}
return;
@@ -1273,9 +1252,41 @@ void hci_conn_failed(struct hci_conn *conn, u8 status)
hci_conn_del(conn);
}
+/* This function requires the caller holds hdev->lock */
+u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ bt_dev_dbg(hdev, "hcon %p handle 0x%4.4x", conn, handle);
+
+ if (conn->handle == handle)
+ return 0;
+
+ if (handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+ handle, HCI_CONN_HANDLE_MAX);
+ return HCI_ERROR_INVALID_PARAMETERS;
+ }
+
+ /* If abort_reason has been sent it means the connection is being
+ * aborted and the handle shall not be changed.
+ */
+ if (conn->abort_reason)
+ return conn->abort_reason;
+
+ conn->handle = handle;
+
+ return 0;
+}
+
static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
{
- struct hci_conn *conn = data;
+ struct hci_conn *conn;
+ u16 handle = PTR_UINT(data);
+
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn)
+ return;
bt_dev_dbg(hdev, "err %d", err);
@@ -1300,10 +1311,18 @@ done:
static int hci_connect_le_sync(struct hci_dev *hdev, void *data)
{
- struct hci_conn *conn = data;
+ struct hci_conn *conn;
+ u16 handle = PTR_UINT(data);
+
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn)
+ return 0;
bt_dev_dbg(hdev, "conn %p", conn);
+ clear_bit(HCI_CONN_SCANNING, &conn->flags);
+ conn->state = BT_CONNECT;
+
return hci_le_create_conn_sync(hdev, conn);
}
@@ -1373,10 +1392,8 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
conn->sec_level = BT_SECURITY_LOW;
conn->conn_timeout = conn_timeout;
- conn->state = BT_CONNECT;
- clear_bit(HCI_CONN_SCANNING, &conn->flags);
-
- err = hci_cmd_sync_queue(hdev, hci_connect_le_sync, conn,
+ err = hci_cmd_sync_queue(hdev, hci_connect_le_sync,
+ UINT_PTR(conn->handle),
create_le_conn_complete);
if (err) {
hci_conn_del(conn);
@@ -1440,25 +1457,23 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev,
static int qos_set_big(struct hci_dev *hdev, struct bt_iso_qos *qos)
{
- struct iso_list_data data;
+ struct hci_conn *conn;
+ u8 big;
/* Allocate a BIG if not set */
if (qos->bcast.big == BT_ISO_QOS_BIG_UNSET) {
- for (data.big = 0x00; data.big < 0xef; data.big++) {
- data.count = 0;
- data.bis = 0xff;
+ for (big = 0x00; big < 0xef; big++) {
- hci_conn_hash_list_state(hdev, bis_list, ISO_LINK,
- BT_BOUND, &data);
- if (!data.count)
+ conn = hci_conn_hash_lookup_big(hdev, big);
+ if (!conn)
break;
}
- if (data.big == 0xef)
+ if (big == 0xef)
return -EADDRNOTAVAIL;
/* Update BIG */
- qos->bcast.big = data.big;
+ qos->bcast.big = big;
}
return 0;
@@ -1466,28 +1481,27 @@ static int qos_set_big(struct hci_dev *hdev, struct bt_iso_qos *qos)
static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos)
{
- struct iso_list_data data;
+ struct hci_conn *conn;
+ u8 bis;
/* Allocate BIS if not set */
if (qos->bcast.bis == BT_ISO_QOS_BIS_UNSET) {
/* Find an unused adv set to advertise BIS, skip instance 0x00
* since it is reserved as general purpose set.
*/
- for (data.bis = 0x01; data.bis < hdev->le_num_of_adv_sets;
- data.bis++) {
- data.count = 0;
+ for (bis = 0x01; bis < hdev->le_num_of_adv_sets;
+ bis++) {
- hci_conn_hash_list_state(hdev, bis_list, ISO_LINK,
- BT_BOUND, &data);
- if (!data.count)
+ conn = hci_conn_hash_lookup_bis(hdev, BDADDR_ANY, bis);
+ if (!conn)
break;
}
- if (data.bis == hdev->le_num_of_adv_sets)
+ if (bis == hdev->le_num_of_adv_sets)
return -EADDRNOTAVAIL;
/* Update BIS */
- qos->bcast.bis = data.bis;
+ qos->bcast.bis = bis;
}
return 0;
@@ -1495,10 +1509,10 @@ static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos)
/* This function requires the caller holds hdev->lock */
static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
- struct bt_iso_qos *qos)
+ struct bt_iso_qos *qos, __u8 base_len,
+ __u8 *base)
{
struct hci_conn *conn;
- struct iso_list_data data;
int err;
/* Let's make sure that le is enabled.*/
@@ -1516,24 +1530,26 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
if (err)
return ERR_PTR(err);
- data.big = qos->bcast.big;
- data.bis = qos->bcast.bis;
- data.count = 0;
-
- /* Check if there is already a matching BIG/BIS */
- hci_conn_hash_list_state(hdev, bis_list, ISO_LINK, BT_BOUND, &data);
- if (data.count)
+ /* Check if the LE Create BIG command has already been sent */
+ conn = hci_conn_hash_lookup_per_adv_bis(hdev, dst, qos->bcast.big,
+ qos->bcast.big);
+ if (conn)
return ERR_PTR(-EADDRINUSE);
- conn = hci_conn_hash_lookup_bis(hdev, dst, qos->bcast.big, qos->bcast.bis);
- if (conn)
+ /* Check BIS settings against other bound BISes, since all
+ * BISes in a BIG must have the same value for all parameters
+ */
+ conn = hci_conn_hash_lookup_big(hdev, qos->bcast.big);
+
+ if (conn && (memcmp(qos, &conn->iso_qos, sizeof(*qos)) ||
+ base_len != conn->le_per_adv_data_len ||
+ memcmp(conn->le_per_adv_data, base, base_len)))
return ERR_PTR(-EADDRINUSE);
conn = hci_conn_add(hdev, ISO_LINK, dst, HCI_ROLE_MASTER);
if (!conn)
return ERR_PTR(-ENOMEM);
- set_bit(HCI_CONN_PER_ADV, &conn->flags);
conn->state = BT_CONNECT;
hci_conn_hold(conn);
@@ -1707,52 +1723,25 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
return sco;
}
-static void cis_add(struct iso_list_data *d, struct bt_iso_qos *qos)
-{
- struct hci_cis_params *cis = &d->pdu.cis[d->pdu.cp.num_cis];
-
- cis->cis_id = qos->ucast.cis;
- cis->c_sdu = cpu_to_le16(qos->ucast.out.sdu);
- cis->p_sdu = cpu_to_le16(qos->ucast.in.sdu);
- cis->c_phy = qos->ucast.out.phy ? qos->ucast.out.phy : qos->ucast.in.phy;
- cis->p_phy = qos->ucast.in.phy ? qos->ucast.in.phy : qos->ucast.out.phy;
- cis->c_rtn = qos->ucast.out.rtn;
- cis->p_rtn = qos->ucast.in.rtn;
-
- d->pdu.cp.num_cis++;
-}
-
-static void cis_list(struct hci_conn *conn, void *data)
-{
- struct iso_list_data *d = data;
-
- /* Skip if broadcast/ANY address */
- if (!bacmp(&conn->dst, BDADDR_ANY))
- return;
-
- if (d->cig != conn->iso_qos.ucast.cig || d->cis == BT_ISO_QOS_CIS_UNSET ||
- d->cis != conn->iso_qos.ucast.cis)
- return;
-
- d->count++;
-
- if (d->pdu.cp.cig_id == BT_ISO_QOS_CIG_UNSET ||
- d->count >= ARRAY_SIZE(d->pdu.cis))
- return;
-
- cis_add(d, &conn->iso_qos);
-}
-
static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos)
{
struct hci_dev *hdev = conn->hdev;
struct hci_cp_le_create_big cp;
+ struct iso_list_data data;
memset(&cp, 0, sizeof(cp));
+ data.big = qos->bcast.big;
+ data.bis = qos->bcast.bis;
+ data.count = 0;
+
+ /* Create a BIS for each bound connection */
+ hci_conn_hash_list_state(hdev, bis_list, ISO_LINK,
+ BT_BOUND, &data);
+
cp.handle = qos->bcast.big;
cp.adv_handle = qos->bcast.bis;
- cp.num_bis = 0x01;
+ cp.num_bis = data.count;
hci_cpu_to_le24(qos->bcast.out.interval, cp.bis.sdu_interval);
cp.bis.sdu = cpu_to_le16(qos->bcast.out.sdu);
cp.bis.latency = cpu_to_le16(qos->bcast.out.latency);
@@ -1766,25 +1755,62 @@ static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos)
return hci_send_cmd(hdev, HCI_OP_LE_CREATE_BIG, sizeof(cp), &cp);
}
-static void set_cig_params_complete(struct hci_dev *hdev, void *data, int err)
+static int set_cig_params_sync(struct hci_dev *hdev, void *data)
{
- struct iso_cig_params *pdu = data;
+ u8 cig_id = PTR_UINT(data);
+ struct hci_conn *conn;
+ struct bt_iso_qos *qos;
+ struct iso_cig_params pdu;
+ u8 cis_id;
- bt_dev_dbg(hdev, "");
+ conn = hci_conn_hash_lookup_cig(hdev, cig_id);
+ if (!conn)
+ return 0;
- if (err)
- bt_dev_err(hdev, "Unable to set CIG parameters: %d", err);
+ memset(&pdu, 0, sizeof(pdu));
- kfree(pdu);
-}
+ qos = &conn->iso_qos;
+ pdu.cp.cig_id = cig_id;
+ hci_cpu_to_le24(qos->ucast.out.interval, pdu.cp.c_interval);
+ hci_cpu_to_le24(qos->ucast.in.interval, pdu.cp.p_interval);
+ pdu.cp.sca = qos->ucast.sca;
+ pdu.cp.packing = qos->ucast.packing;
+ pdu.cp.framing = qos->ucast.framing;
+ pdu.cp.c_latency = cpu_to_le16(qos->ucast.out.latency);
+ pdu.cp.p_latency = cpu_to_le16(qos->ucast.in.latency);
+
+ /* Reprogram all CIS(s) with the same CIG, valid range are:
+ * num_cis: 0x00 to 0x1F
+ * cis_id: 0x00 to 0xEF
+ */
+ for (cis_id = 0x00; cis_id < 0xf0 &&
+ pdu.cp.num_cis < ARRAY_SIZE(pdu.cis); cis_id++) {
+ struct hci_cis_params *cis;
-static int set_cig_params_sync(struct hci_dev *hdev, void *data)
-{
- struct iso_cig_params *pdu = data;
- u32 plen;
+ conn = hci_conn_hash_lookup_cis(hdev, NULL, 0, cig_id, cis_id);
+ if (!conn)
+ continue;
+
+ qos = &conn->iso_qos;
+
+ cis = &pdu.cis[pdu.cp.num_cis++];
+ cis->cis_id = cis_id;
+ cis->c_sdu = cpu_to_le16(conn->iso_qos.ucast.out.sdu);
+ cis->p_sdu = cpu_to_le16(conn->iso_qos.ucast.in.sdu);
+ cis->c_phy = qos->ucast.out.phy ? qos->ucast.out.phy :
+ qos->ucast.in.phy;
+ cis->p_phy = qos->ucast.in.phy ? qos->ucast.in.phy :
+ qos->ucast.out.phy;
+ cis->c_rtn = qos->ucast.out.rtn;
+ cis->p_rtn = qos->ucast.in.rtn;
+ }
- plen = sizeof(pdu->cp) + pdu->cp.num_cis * sizeof(pdu->cis[0]);
- return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_CIG_PARAMS, plen, pdu,
+ if (!pdu.cp.num_cis)
+ return 0;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_CIG_PARAMS,
+ sizeof(pdu.cp) +
+ pdu.cp.num_cis * sizeof(pdu.cis[0]), &pdu,
HCI_CMD_TIMEOUT);
}
@@ -1792,7 +1818,6 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
{
struct hci_dev *hdev = conn->hdev;
struct iso_list_data data;
- struct iso_cig_params *pdu;
memset(&data, 0, sizeof(data));
@@ -1819,59 +1844,32 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
qos->ucast.cig = data.cig;
}
- data.pdu.cp.cig_id = qos->ucast.cig;
- hci_cpu_to_le24(qos->ucast.out.interval, data.pdu.cp.c_interval);
- hci_cpu_to_le24(qos->ucast.in.interval, data.pdu.cp.p_interval);
- data.pdu.cp.sca = qos->ucast.sca;
- data.pdu.cp.packing = qos->ucast.packing;
- data.pdu.cp.framing = qos->ucast.framing;
- data.pdu.cp.c_latency = cpu_to_le16(qos->ucast.out.latency);
- data.pdu.cp.p_latency = cpu_to_le16(qos->ucast.in.latency);
-
if (qos->ucast.cis != BT_ISO_QOS_CIS_UNSET) {
- data.count = 0;
- data.cig = qos->ucast.cig;
- data.cis = qos->ucast.cis;
-
- hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, BT_BOUND,
- &data);
- if (data.count)
+ if (hci_conn_hash_lookup_cis(hdev, NULL, 0, qos->ucast.cig,
+ qos->ucast.cis))
return false;
-
- cis_add(&data, qos);
+ goto done;
}
- /* Reprogram all CIS(s) with the same CIG */
- for (data.cig = qos->ucast.cig, data.cis = 0x00; data.cis < 0x11;
+ /* Allocate first available CIS if not set */
+ for (data.cig = qos->ucast.cig, data.cis = 0x00; data.cis < 0xf0;
data.cis++) {
- data.count = 0;
-
- hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, BT_BOUND,
- &data);
- if (data.count)
- continue;
-
- /* Allocate a CIS if not set */
- if (qos->ucast.cis == BT_ISO_QOS_CIS_UNSET) {
+ if (!hci_conn_hash_lookup_cis(hdev, NULL, 0, data.cig,
+ data.cis)) {
/* Update CIS */
qos->ucast.cis = data.cis;
- cis_add(&data, qos);
+ break;
}
}
- if (qos->ucast.cis == BT_ISO_QOS_CIS_UNSET || !data.pdu.cp.num_cis)
+ if (qos->ucast.cis == BT_ISO_QOS_CIS_UNSET)
return false;
- pdu = kmemdup(&data.pdu, sizeof(*pdu), GFP_KERNEL);
- if (!pdu)
+done:
+ if (hci_cmd_sync_queue(hdev, set_cig_params_sync,
+ UINT_PTR(qos->ucast.cig), NULL) < 0)
return false;
- if (hci_cmd_sync_queue(hdev, set_cig_params_sync, pdu,
- set_cig_params_complete) < 0) {
- kfree(pdu);
- return false;
- }
-
return true;
}
@@ -1888,6 +1886,8 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-ENOMEM);
cis->cleanup = cis_cleanup;
cis->dst_type = dst_type;
+ cis->iso_qos.ucast.cig = BT_ISO_QOS_CIG_UNSET;
+ cis->iso_qos.ucast.cis = BT_ISO_QOS_CIS_UNSET;
}
if (cis->state == BT_CONNECTED)
@@ -1931,6 +1931,8 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-EINVAL);
}
+ hci_conn_hold(cis);
+
cis->iso_qos = *qos;
cis->state = BT_BOUND;
@@ -1969,59 +1971,47 @@ bool hci_iso_setup_path(struct hci_conn *conn)
return true;
}
-static int hci_create_cis_sync(struct hci_dev *hdev, void *data)
+int hci_conn_check_create_cis(struct hci_conn *conn)
{
- return hci_le_create_cis_sync(hdev, data);
-}
+ if (conn->type != ISO_LINK || !bacmp(&conn->dst, BDADDR_ANY))
+ return -EINVAL;
-int hci_le_create_cis(struct hci_conn *conn)
-{
- struct hci_conn *cis;
- struct hci_link *link, *t;
- struct hci_dev *hdev = conn->hdev;
- int err;
+ if (!conn->parent || conn->parent->state != BT_CONNECTED ||
+ conn->state != BT_CONNECT || HCI_CONN_HANDLE_UNSET(conn->handle))
+ return 1;
- bt_dev_dbg(hdev, "hcon %p", conn);
+ return 0;
+}
- switch (conn->type) {
- case LE_LINK:
- if (conn->state != BT_CONNECTED || list_empty(&conn->link_list))
- return -EINVAL;
+static int hci_create_cis_sync(struct hci_dev *hdev, void *data)
+{
+ return hci_le_create_cis_sync(hdev);
+}
- cis = NULL;
+int hci_le_create_cis_pending(struct hci_dev *hdev)
+{
+ struct hci_conn *conn;
+ bool pending = false;
- /* hci_conn_link uses list_add_tail_rcu so the list is in
- * the same order as the connections are requested.
- */
- list_for_each_entry_safe(link, t, &conn->link_list, list) {
- if (link->conn->state == BT_BOUND) {
- err = hci_le_create_cis(link->conn);
- if (err)
- return err;
+ rcu_read_lock();
- cis = link->conn;
- }
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) {
+ rcu_read_unlock();
+ return -EBUSY;
}
- return cis ? 0 : -EINVAL;
- case ISO_LINK:
- cis = conn;
- break;
- default:
- return -EINVAL;
+ if (!hci_conn_check_create_cis(conn))
+ pending = true;
}
- if (cis->state == BT_CONNECT)
+ rcu_read_unlock();
+
+ if (!pending)
return 0;
/* Queue Create CIS */
- err = hci_cmd_sync_queue(hdev, hci_create_cis_sync, cis, NULL);
- if (err)
- return err;
-
- cis->state = BT_CONNECT;
-
- return 0;
+ return hci_cmd_sync_queue(hdev, hci_create_cis_sync, NULL, NULL);
}
static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn,
@@ -2051,16 +2041,6 @@ static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn,
qos->latency = conn->le_conn_latency;
}
-static void hci_bind_bis(struct hci_conn *conn,
- struct bt_iso_qos *qos)
-{
- /* Update LINK PHYs according to QoS preference */
- conn->le_tx_phy = qos->bcast.out.phy;
- conn->le_tx_phy = qos->bcast.out.phy;
- conn->iso_qos = *qos;
- conn->state = BT_BOUND;
-}
-
static int create_big_sync(struct hci_dev *hdev, void *data)
{
struct hci_conn *conn = data;
@@ -2140,7 +2120,8 @@ int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type,
return hci_cmd_sync_queue(hdev, create_pa_sync, cp, create_pa_complete);
}
-int hci_le_big_create_sync(struct hci_dev *hdev, struct bt_iso_qos *qos,
+int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
+ struct bt_iso_qos *qos,
__u16 sync_handle, __u8 num_bis, __u8 bis[])
{
struct _packed {
@@ -2156,6 +2137,9 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct bt_iso_qos *qos,
if (err)
return err;
+ if (hcon)
+ hcon->iso_qos.bcast.big = qos->bcast.big;
+
memset(&pdu, 0, sizeof(pdu));
pdu.cp.handle = qos->bcast.big;
pdu.cp.sync_handle = cpu_to_le16(sync_handle);
@@ -2183,27 +2167,80 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err)
}
}
-struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
- __u8 dst_type, struct bt_iso_qos *qos,
- __u8 base_len, __u8 *base)
+struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
+ struct bt_iso_qos *qos,
+ __u8 base_len, __u8 *base)
{
struct hci_conn *conn;
- int err;
+ __u8 eir[HCI_MAX_PER_AD_LENGTH];
+
+ if (base_len && base)
+ base_len = eir_append_service_data(eir, 0, 0x1851,
+ base, base_len);
/* We need hci_conn object using the BDADDR_ANY as dst */
- conn = hci_add_bis(hdev, dst, qos);
+ conn = hci_add_bis(hdev, dst, qos, base_len, eir);
if (IS_ERR(conn))
return conn;
- hci_bind_bis(conn, qos);
+ /* Update LINK PHYs according to QoS preference */
+ conn->le_tx_phy = qos->bcast.out.phy;
+ conn->le_tx_phy = qos->bcast.out.phy;
/* Add Basic Announcement into Peridic Adv Data if BASE is set */
if (base_len && base) {
- base_len = eir_append_service_data(conn->le_per_adv_data, 0,
- 0x1851, base, base_len);
+ memcpy(conn->le_per_adv_data, eir, sizeof(eir));
conn->le_per_adv_data_len = base_len;
}
+ hci_iso_qos_setup(hdev, conn, &qos->bcast.out,
+ conn->le_tx_phy ? conn->le_tx_phy :
+ hdev->le_tx_def_phys);
+
+ conn->iso_qos = *qos;
+ conn->state = BT_BOUND;
+
+ return conn;
+}
+
+static void bis_mark_per_adv(struct hci_conn *conn, void *data)
+{
+ struct iso_list_data *d = data;
+
+ /* Skip if not broadcast/ANY address */
+ if (bacmp(&conn->dst, BDADDR_ANY))
+ return;
+
+ if (d->big != conn->iso_qos.bcast.big ||
+ d->bis == BT_ISO_QOS_BIS_UNSET ||
+ d->bis != conn->iso_qos.bcast.bis)
+ return;
+
+ set_bit(HCI_CONN_PER_ADV, &conn->flags);
+}
+
+struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
+ __u8 dst_type, struct bt_iso_qos *qos,
+ __u8 base_len, __u8 *base)
+{
+ struct hci_conn *conn;
+ int err;
+ struct iso_list_data data;
+
+ conn = hci_bind_bis(hdev, dst, qos, base_len, base);
+ if (IS_ERR(conn))
+ return conn;
+
+ data.big = qos->bcast.big;
+ data.bis = qos->bcast.bis;
+
+ /* Set HCI_CONN_PER_ADV for all bound connections, to mark that
+ * the start periodic advertising and create BIG commands have
+ * been queued
+ */
+ hci_conn_hash_list_state(hdev, bis_mark_per_adv, ISO_LINK,
+ BT_BOUND, &data);
+
/* Queue start periodic advertising and create BIG */
err = hci_cmd_sync_queue(hdev, create_big_sync, conn,
create_big_complete);
@@ -2212,10 +2249,6 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(err);
}
- hci_iso_qos_setup(hdev, conn, &qos->bcast.out,
- conn->le_tx_phy ? conn->le_tx_phy :
- hdev->le_tx_def_phys);
-
return conn;
}
@@ -2257,11 +2290,12 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-ENOLINK);
}
- /* If LE is already connected and CIS handle is already set proceed to
- * Create CIS immediately.
- */
- if (le->state == BT_CONNECTED && cis->handle != HCI_CONN_HANDLE_UNSET)
- hci_le_create_cis(cis);
+ /* Link takes the refcount */
+ hci_conn_drop(cis);
+
+ cis->state = BT_CONNECT;
+
+ hci_le_create_cis_pending(hdev);
return cis;
}
@@ -2848,81 +2882,49 @@ u32 hci_conn_get_phy(struct hci_conn *conn)
return phys;
}
-int hci_abort_conn(struct hci_conn *conn, u8 reason)
+static int abort_conn_sync(struct hci_dev *hdev, void *data)
{
- int r = 0;
+ struct hci_conn *conn;
+ u16 handle = PTR_UINT(data);
- if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
+ conn = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!conn)
return 0;
- switch (conn->state) {
- case BT_CONNECTED:
- case BT_CONFIG:
- if (conn->type == AMP_LINK) {
- struct hci_cp_disconn_phy_link cp;
+ return hci_abort_conn_sync(hdev, conn, conn->abort_reason);
+}
- cp.phy_handle = HCI_PHY_HANDLE(conn->handle);
- cp.reason = reason;
- r = hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK,
- sizeof(cp), &cp);
- } else {
- struct hci_cp_disconnect dc;
+int hci_abort_conn(struct hci_conn *conn, u8 reason)
+{
+ struct hci_dev *hdev = conn->hdev;
- dc.handle = cpu_to_le16(conn->handle);
- dc.reason = reason;
- r = hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT,
- sizeof(dc), &dc);
- }
+ /* If abort_reason has already been set it means the connection is
+ * already being aborted so don't attempt to overwrite it.
+ */
+ if (conn->abort_reason)
+ return 0;
- conn->state = BT_DISCONN;
+ bt_dev_dbg(hdev, "handle 0x%2.2x reason 0x%2.2x", conn->handle, reason);
- break;
- case BT_CONNECT:
- if (conn->type == LE_LINK) {
- if (test_bit(HCI_CONN_SCANNING, &conn->flags))
- break;
- r = hci_send_cmd(conn->hdev,
- HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
- } else if (conn->type == ACL_LINK) {
- if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2)
- break;
- r = hci_send_cmd(conn->hdev,
- HCI_OP_CREATE_CONN_CANCEL,
- 6, &conn->dst);
- }
- break;
- case BT_CONNECT2:
- if (conn->type == ACL_LINK) {
- struct hci_cp_reject_conn_req rej;
-
- bacpy(&rej.bdaddr, &conn->dst);
- rej.reason = reason;
-
- r = hci_send_cmd(conn->hdev,
- HCI_OP_REJECT_CONN_REQ,
- sizeof(rej), &rej);
- } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
- struct hci_cp_reject_sync_conn_req rej;
-
- bacpy(&rej.bdaddr, &conn->dst);
-
- /* SCO rejection has its own limited set of
- * allowed error values (0x0D-0x0F) which isn't
- * compatible with most values passed to this
- * function. To be safe hard-code one of the
- * values that's suitable for SCO.
- */
- rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES;
+ conn->abort_reason = reason;
- r = hci_send_cmd(conn->hdev,
- HCI_OP_REJECT_SYNC_CONN_REQ,
- sizeof(rej), &rej);
+ /* If the connection is pending check the command opcode since that
+ * might be blocking on hci_cmd_sync_work while waiting its respective
+ * event so we need to hci_cmd_sync_cancel to cancel it.
+ *
+ * hci_connect_le serializes the connection attempts so only one
+ * connection can be in BT_CONNECT at time.
+ */
+ if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) {
+ switch (hci_skb_event(hdev->sent_cmd)) {
+ case HCI_EV_LE_CONN_COMPLETE:
+ case HCI_EV_LE_ENHANCED_CONN_COMPLETE:
+ case HCI_EVT_LE_CIS_ESTABLISHED:
+ hci_cmd_sync_cancel(hdev, -ECANCELED);
+ break;
}
- break;
- default:
- conn->state = BT_CLOSED;
- break;
}
- return r;
+ return hci_cmd_sync_queue(hdev, abort_conn_sync, UINT_PTR(conn->handle),
+ NULL);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 1ec83985f1ab..a5992f1b3c9b 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1074,9 +1074,9 @@ void hci_uuids_clear(struct hci_dev *hdev)
void hci_link_keys_clear(struct hci_dev *hdev)
{
- struct link_key *key;
+ struct link_key *key, *tmp;
- list_for_each_entry(key, &hdev->link_keys, list) {
+ list_for_each_entry_safe(key, tmp, &hdev->link_keys, list) {
list_del_rcu(&key->list);
kfree_rcu(key, rcu);
}
@@ -1084,9 +1084,9 @@ void hci_link_keys_clear(struct hci_dev *hdev)
void hci_smp_ltks_clear(struct hci_dev *hdev)
{
- struct smp_ltk *k;
+ struct smp_ltk *k, *tmp;
- list_for_each_entry(k, &hdev->long_term_keys, list) {
+ list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) {
list_del_rcu(&k->list);
kfree_rcu(k, rcu);
}
@@ -1094,9 +1094,9 @@ void hci_smp_ltks_clear(struct hci_dev *hdev)
void hci_smp_irks_clear(struct hci_dev *hdev)
{
- struct smp_irk *k;
+ struct smp_irk *k, *tmp;
- list_for_each_entry(k, &hdev->identity_resolving_keys, list) {
+ list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) {
list_del_rcu(&k->list);
kfree_rcu(k, rcu);
}
@@ -1104,9 +1104,9 @@ void hci_smp_irks_clear(struct hci_dev *hdev)
void hci_blocked_keys_clear(struct hci_dev *hdev)
{
- struct blocked_key *b;
+ struct blocked_key *b, *tmp;
- list_for_each_entry(b, &hdev->blocked_keys, list) {
+ list_for_each_entry_safe(b, tmp, &hdev->blocked_keys, list) {
list_del_rcu(&b->list);
kfree_rcu(b, rcu);
}
@@ -1949,15 +1949,15 @@ int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
switch (hci_get_adv_monitor_offload_ext(hdev)) {
case HCI_ADV_MONITOR_EXT_NONE:
- bt_dev_dbg(hdev, "%s add monitor %d status %d", hdev->name,
+ bt_dev_dbg(hdev, "add monitor %d status %d",
monitor->handle, status);
/* Message was not forwarded to controller - not an error */
break;
case HCI_ADV_MONITOR_EXT_MSFT:
status = msft_add_monitor_pattern(hdev, monitor);
- bt_dev_dbg(hdev, "%s add monitor %d msft status %d", hdev->name,
- monitor->handle, status);
+ bt_dev_dbg(hdev, "add monitor %d msft status %d",
+ handle, status);
break;
}
@@ -1976,15 +1976,15 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev,
switch (hci_get_adv_monitor_offload_ext(hdev)) {
case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */
- bt_dev_dbg(hdev, "%s remove monitor %d status %d", hdev->name,
+ bt_dev_dbg(hdev, "remove monitor %d status %d",
monitor->handle, status);
goto free_monitor;
case HCI_ADV_MONITOR_EXT_MSFT:
handle = monitor->handle;
status = msft_remove_monitor(hdev, monitor);
- bt_dev_dbg(hdev, "%s remove monitor %d msft status %d",
- hdev->name, handle, status);
+ bt_dev_dbg(hdev, "remove monitor %d msft status %d",
+ handle, status);
break;
}
@@ -2436,6 +2436,9 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
return NOTIFY_DONE;
+ /* To avoid a potential race with hci_unregister_dev. */
+ hci_dev_hold(hdev);
+
if (action == PM_SUSPEND_PREPARE)
ret = hci_suspend_dev(hdev);
else if (action == PM_POST_SUSPEND)
@@ -2445,6 +2448,7 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
bt_dev_err(hdev, "Suspend notifier action (%lu) failed: %d",
action, ret);
+ hci_dev_put(hdev);
return NOTIFY_DONE;
}
@@ -3891,7 +3895,7 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
if (conn) {
/* Send to upper protocol */
- bt_cb(skb)->sco.pkt_status = flags & 0x03;
+ hci_skb_pkt_status(skb) = flags & 0x03;
sco_recv_scodata(conn, skb);
return;
} else {
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index ec0df2f9188e..6b7741f6e95b 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -22,6 +22,7 @@
*/
#include <linux/debugfs.h>
+#include <linux/kstrtox.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -1152,7 +1153,7 @@ static ssize_t force_no_mitm_write(struct file *file,
return -EFAULT;
buf[buf_size] = '\0';
- if (strtobool(buf, &enable))
+ if (kstrtobool(buf, &enable))
return -EINVAL;
if (enable == hci_dev_test_flag(hdev, HCI_FORCE_NO_MITM))
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 31ca320ce38d..35f251041eeb 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1639,7 +1639,7 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data,
hci_dev_set_flag(hdev, HCI_LE_ADV);
- if (adv)
+ if (adv && !adv->periodic)
adv->enabled = true;
conn = hci_lookup_le_connect(hdev);
@@ -1747,7 +1747,7 @@ static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr,
{
struct discovery_state *d = &hdev->discovery;
- if (len > HCI_MAX_AD_LENGTH)
+ if (len > max_adv_len(hdev))
return;
bacpy(&d->last_adv_addr, bdaddr);
@@ -3173,19 +3173,15 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
* As the connection handle is set here for the first time, it indicates
* whether the connection is already set up.
*/
- if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ if (!HCI_CONN_HANDLE_UNSET(conn->handle)) {
bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
goto unlock;
}
if (!status) {
- conn->handle = __le16_to_cpu(ev->handle);
- if (conn->handle > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
- conn->handle, HCI_CONN_HANDLE_MAX);
- status = HCI_ERROR_INVALID_PARAMETERS;
+ status = hci_conn_set_handle(conn, __le16_to_cpu(ev->handle));
+ if (status)
goto done;
- }
if (conn->type == ACL_LINK) {
conn->state = BT_CONFIG;
@@ -3803,6 +3799,22 @@ static u8 hci_cc_le_read_buffer_size_v2(struct hci_dev *hdev, void *data,
return rp->status;
}
+static void hci_unbound_cis_failed(struct hci_dev *hdev, u8 cig, u8 status)
+{
+ struct hci_conn *conn, *tmp;
+
+ lockdep_assert_held(&hdev->lock);
+
+ list_for_each_entry_safe(conn, tmp, &hdev->conn_hash.list, list) {
+ if (conn->type != ISO_LINK || !bacmp(&conn->dst, BDADDR_ANY) ||
+ conn->state == BT_OPEN || conn->iso_qos.ucast.cig != cig)
+ continue;
+
+ if (HCI_CONN_HANDLE_UNSET(conn->handle))
+ hci_conn_failed(conn, status);
+ }
+}
+
static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -3810,6 +3822,7 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
struct hci_cp_le_set_cig_params *cp;
struct hci_conn *conn;
u8 status = rp->status;
+ bool pending = false;
int i;
bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
@@ -3823,12 +3836,15 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
+ /* BLUETOOTH CORE SPECIFICATION Version 5.4 | Vol 4, Part E page 2554
+ *
+ * If the Status return parameter is non-zero, then the state of the CIG
+ * and its CIS configurations shall not be changed by the command. If
+ * the CIG did not already exist, it shall not be created.
+ */
if (status) {
- while ((conn = hci_conn_hash_lookup_cig(hdev, rp->cig_id))) {
- conn->state = BT_CLOSED;
- hci_connect_cfm(conn, status);
- hci_conn_del(conn);
- }
+ /* Keep current configuration, fail only the unbound CIS */
+ hci_unbound_cis_failed(hdev, rp->cig_id, status);
goto unlock;
}
@@ -3848,17 +3864,17 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
if (conn->state != BT_BOUND && conn->state != BT_CONNECT)
continue;
- conn->handle = __le16_to_cpu(rp->handle[i]);
-
- bt_dev_dbg(hdev, "%p handle 0x%4.4x parent %p", conn,
- conn->handle, conn->parent);
+ if (hci_conn_set_handle(conn, __le16_to_cpu(rp->handle[i])))
+ continue;
- /* Create CIS if LE is already connected */
- if (conn->parent && conn->parent->state == BT_CONNECTED)
- hci_le_create_cis(conn);
+ if (conn->state == BT_CONNECT)
+ pending = true;
}
unlock:
+ if (pending)
+ hci_le_create_cis_pending(hdev);
+
hci_dev_unlock(hdev);
return rp->status;
@@ -3938,24 +3954,47 @@ static u8 hci_cc_le_set_per_adv_enable(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
struct hci_ev_status *rp = data;
- __u8 *sent;
+ struct hci_cp_le_set_per_adv_enable *cp;
+ struct adv_info *adv = NULL, *n;
+ u8 per_adv_cnt = 0;
bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
if (rp->status)
return rp->status;
- sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE);
- if (!sent)
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_PER_ADV_ENABLE);
+ if (!cp)
return rp->status;
hci_dev_lock(hdev);
- if (*sent)
+ adv = hci_find_adv_instance(hdev, cp->handle);
+
+ if (cp->enable) {
hci_dev_set_flag(hdev, HCI_LE_PER_ADV);
- else
+
+ if (adv)
+ adv->enabled = true;
+ } else {
+ /* If just one instance was disabled check if there are
+ * any other instance enabled before clearing HCI_LE_PER_ADV.
+ * The current periodic adv instance will be marked as
+ * disabled once extended advertising is also disabled.
+ */
+ list_for_each_entry_safe(adv, n, &hdev->adv_instances,
+ list) {
+ if (adv->periodic && adv->enabled)
+ per_adv_cnt++;
+ }
+
+ if (per_adv_cnt > 1)
+ goto unlock;
+
hci_dev_clear_flag(hdev, HCI_LE_PER_ADV);
+ }
+unlock:
hci_dev_unlock(hdev);
return rp->status;
@@ -4224,6 +4263,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data,
static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status)
{
struct hci_cp_le_create_cis *cp;
+ bool pending = false;
int i;
bt_dev_dbg(hdev, "status 0x%2.2x", status);
@@ -4246,12 +4286,18 @@ static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status)
conn = hci_conn_hash_lookup_handle(hdev, handle);
if (conn) {
+ if (test_and_clear_bit(HCI_CONN_CREATE_CIS,
+ &conn->flags))
+ pending = true;
conn->state = BT_CLOSED;
hci_connect_cfm(conn, status);
hci_conn_del(conn);
}
}
+ if (pending)
+ hci_le_create_cis_pending(hdev);
+
hci_dev_unlock(hdev);
}
@@ -4999,18 +5045,15 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
* As the connection handle is set here for the first time, it indicates
* whether the connection is already set up.
*/
- if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ if (!HCI_CONN_HANDLE_UNSET(conn->handle)) {
bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete event for existing connection");
goto unlock;
}
switch (status) {
case 0x00:
- conn->handle = __le16_to_cpu(ev->handle);
- if (conn->handle > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
- conn->handle, HCI_CONN_HANDLE_MAX);
- status = HCI_ERROR_INVALID_PARAMETERS;
+ status = hci_conn_set_handle(conn, __le16_to_cpu(ev->handle));
+ if (status) {
conn->state = BT_CLOSED;
break;
}
@@ -5863,7 +5906,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
* As the connection handle is set here for the first time, it indicates
* whether the connection is already set up.
*/
- if (conn->handle != HCI_CONN_HANDLE_UNSET) {
+ if (!HCI_CONN_HANDLE_UNSET(conn->handle)) {
bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for existing connection");
goto unlock;
}
@@ -6216,8 +6259,9 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
return;
}
- if (!ext_adv && len > HCI_MAX_AD_LENGTH) {
- bt_dev_err_ratelimited(hdev, "legacy adv larger than 31 bytes");
+ if (len > max_adv_len(hdev)) {
+ bt_dev_err_ratelimited(hdev,
+ "adv larger than maximum supported");
return;
}
@@ -6282,7 +6326,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
*/
conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved,
type);
- if (!ext_adv && conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) {
+ if (!ext_adv && conn && type == LE_ADV_IND &&
+ len <= max_adv_len(hdev)) {
/* Store report for later inclusion by
* mgmt_device_connected
*/
@@ -6423,7 +6468,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data,
info->length + 1))
break;
- if (info->length <= HCI_MAX_AD_LENGTH) {
+ if (info->length <= max_adv_len(hdev)) {
rssi = info->data[info->length];
process_adv_report(hdev, info->type, &info->bdaddr,
info->bdaddr_type, NULL, 0, rssi,
@@ -6536,19 +6581,56 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
struct hci_ev_le_pa_sync_established *ev = data;
int mask = hdev->link_mode;
__u8 flags = 0;
+ struct hci_conn *bis;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
- if (ev->status)
- return;
-
hci_dev_lock(hdev);
hci_dev_clear_flag(hdev, HCI_PA_SYNC);
mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ISO_LINK, &flags);
- if (!(mask & HCI_LM_ACCEPT))
+ if (!(mask & HCI_LM_ACCEPT)) {
hci_le_pa_term_sync(hdev, ev->handle);
+ goto unlock;
+ }
+
+ if (!(flags & HCI_PROTO_DEFER))
+ goto unlock;
+
+ /* Add connection to indicate the PA sync event */
+ bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY,
+ HCI_ROLE_SLAVE);
+
+ if (!bis)
+ goto unlock;
+
+ if (ev->status)
+ set_bit(HCI_CONN_PA_SYNC_FAILED, &bis->flags);
+ else
+ set_bit(HCI_CONN_PA_SYNC, &bis->flags);
+
+ /* Notify connection to iso layer */
+ hci_connect_cfm(bis, ev->status);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_ev_le_per_adv_report *ev = data;
+ int mask = hdev->link_mode;
+ __u8 flags = 0;
+
+ bt_dev_dbg(hdev, "sync_handle 0x%4.4x", le16_to_cpu(ev->sync_handle));
+
+ hci_dev_lock(hdev);
+
+ mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags);
+ if (!(mask & HCI_LM_ACCEPT))
+ hci_le_pa_term_sync(hdev, ev->sync_handle);
hci_dev_unlock(hdev);
}
@@ -6790,6 +6872,7 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
struct hci_evt_le_cis_established *ev = data;
struct hci_conn *conn;
struct bt_iso_qos *qos;
+ bool pending = false;
u16 handle = __le16_to_cpu(ev->handle);
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6813,6 +6896,8 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
qos = &conn->iso_qos;
+ pending = test_and_clear_bit(HCI_CONN_CREATE_CIS, &conn->flags);
+
/* Convert ISO Interval (1.25 ms slots) to SDU Interval (us) */
qos->ucast.in.interval = le16_to_cpu(ev->interval) * 1250;
qos->ucast.out.interval = qos->ucast.in.interval;
@@ -6854,10 +6939,14 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
+ conn->state = BT_CLOSED;
hci_connect_cfm(conn, ev->status);
hci_conn_del(conn);
unlock:
+ if (pending)
+ hci_le_create_cis_pending(hdev);
+
hci_dev_unlock(hdev);
}
@@ -6936,6 +7025,7 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
{
struct hci_evt_le_create_big_complete *ev = data;
struct hci_conn *conn;
+ __u8 i = 0;
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
@@ -6944,33 +7034,46 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
return;
hci_dev_lock(hdev);
+ rcu_read_lock();
- conn = hci_conn_hash_lookup_big(hdev, ev->handle);
- if (!conn)
- goto unlock;
+ /* Connect all BISes that are bound to the BIG */
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ if (bacmp(&conn->dst, BDADDR_ANY) ||
+ conn->type != ISO_LINK ||
+ conn->iso_qos.bcast.big != ev->handle)
+ continue;
- if (conn->type != ISO_LINK) {
- bt_dev_err(hdev,
- "Invalid connection link type handle 0x%2.2x",
- ev->handle);
- goto unlock;
- }
+ if (hci_conn_set_handle(conn,
+ __le16_to_cpu(ev->bis_handle[i++])))
+ continue;
- if (ev->num_bis)
- conn->handle = __le16_to_cpu(ev->bis_handle[0]);
+ if (!ev->status) {
+ conn->state = BT_CONNECTED;
+ set_bit(HCI_CONN_BIG_CREATED, &conn->flags);
+ rcu_read_unlock();
+ hci_debugfs_create_conn(conn);
+ hci_conn_add_sysfs(conn);
+ hci_iso_setup_path(conn);
+ rcu_read_lock();
+ continue;
+ }
- if (!ev->status) {
- conn->state = BT_CONNECTED;
- hci_debugfs_create_conn(conn);
- hci_conn_add_sysfs(conn);
- hci_iso_setup_path(conn);
- goto unlock;
+ hci_connect_cfm(conn, ev->status);
+ rcu_read_unlock();
+ hci_conn_del(conn);
+ rcu_read_lock();
}
- hci_connect_cfm(conn, ev->status);
- hci_conn_del(conn);
+ if (!ev->status && !i)
+ /* If no BISes have been connected for the BIG,
+ * terminate. This is in case all bound connections
+ * have been closed before the BIG creation
+ * has completed.
+ */
+ hci_le_terminate_big_sync(hdev, ev->handle,
+ HCI_ERROR_LOCAL_HOST_TERM);
-unlock:
+ rcu_read_unlock();
hci_dev_unlock(hdev);
}
@@ -6979,6 +7082,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
{
struct hci_evt_le_big_sync_estabilished *ev = data;
struct hci_conn *bis;
+ struct hci_conn *pa_sync;
int i;
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
@@ -6987,11 +7091,17 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
flex_array_size(ev, bis, ev->num_bis)))
return;
- if (ev->status)
- return;
-
hci_dev_lock(hdev);
+ if (!ev->status) {
+ pa_sync = hci_conn_hash_lookup_pa_sync(hdev, ev->handle);
+ if (pa_sync)
+ /* Also mark the BIG sync established event on the
+ * associated PA sync hcon
+ */
+ set_bit(HCI_CONN_BIG_SYNC, &pa_sync->flags);
+ }
+
for (i = 0; i < ev->num_bis; i++) {
u16 handle = le16_to_cpu(ev->bis[i]);
__le32 interval;
@@ -7005,6 +7115,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis->handle = handle;
}
+ if (ev->status != 0x42)
+ /* Mark PA sync as established */
+ set_bit(HCI_CONN_PA_SYNC, &bis->flags);
+
bis->iso_qos.bcast.big = ev->handle;
memset(&interval, 0, sizeof(interval));
memcpy(&interval, ev->latency, sizeof(ev->latency));
@@ -7013,9 +7127,25 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis->iso_qos.bcast.in.latency = le16_to_cpu(ev->interval) * 125 / 100;
bis->iso_qos.bcast.in.sdu = le16_to_cpu(ev->max_pdu);
- hci_iso_setup_path(bis);
+ if (!ev->status) {
+ set_bit(HCI_CONN_BIG_SYNC, &bis->flags);
+ hci_iso_setup_path(bis);
+ }
}
+ /* In case BIG sync failed, notify each failed connection to
+ * the user after all hci connections have been added
+ */
+ if (ev->status)
+ for (i = 0; i < ev->num_bis; i++) {
+ u16 handle = le16_to_cpu(ev->bis[i]);
+
+ bis = hci_conn_hash_lookup_handle(hdev, handle);
+
+ set_bit(HCI_CONN_BIG_SYNC_FAILED, &bis->flags);
+ hci_connect_cfm(bis, ev->status);
+ }
+
hci_dev_unlock(hdev);
}
@@ -7101,6 +7231,11 @@ static const struct hci_le_ev {
HCI_LE_EV(HCI_EV_LE_PA_SYNC_ESTABLISHED,
hci_le_pa_sync_estabilished_evt,
sizeof(struct hci_ev_le_pa_sync_established)),
+ /* [0x0f = HCI_EV_LE_PER_ADV_REPORT] */
+ HCI_LE_EV_VL(HCI_EV_LE_PER_ADV_REPORT,
+ hci_le_per_adv_report_evt,
+ sizeof(struct hci_ev_le_per_adv_report),
+ HCI_MAX_EVENT_SIZE),
/* [0x12 = HCI_EV_LE_EXT_ADV_SET_TERM] */
HCI_LE_EV(HCI_EV_LE_EXT_ADV_SET_TERM, hci_le_ext_adv_term_evt,
sizeof(struct hci_evt_le_ext_adv_set_term)),
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index f7e006a36382..6e023b0104b0 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -629,27 +629,6 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
}
}
-/* Returns true if an le connection is in the scanning state */
-static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev)
-{
- struct hci_conn_hash *h = &hdev->conn_hash;
- struct hci_conn *c;
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type == LE_LINK && c->state == BT_CONNECT &&
- test_bit(HCI_CONN_SCANNING, &c->flags)) {
- rcu_read_unlock();
- return true;
- }
- }
-
- rcu_read_unlock();
-
- return false;
-}
-
static void set_random_addr(struct hci_request *req, bdaddr_t *rpa);
static int hci_update_random_address(struct hci_request *req,
bool require_privacy, bool use_rpa,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1d249d839819..5e4f718073b7 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -264,6 +264,53 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
kfree_skb(skb_copy);
}
+static void hci_sock_copy_creds(struct sock *sk, struct sk_buff *skb)
+{
+ struct scm_creds *creds;
+
+ if (!sk || WARN_ON(!skb))
+ return;
+
+ creds = &bt_cb(skb)->creds;
+
+ /* Check if peer credentials is set */
+ if (!sk->sk_peer_pid) {
+ /* Check if parent peer credentials is set */
+ if (bt_sk(sk)->parent && bt_sk(sk)->parent->sk_peer_pid)
+ sk = bt_sk(sk)->parent;
+ else
+ return;
+ }
+
+ /* Check if scm_creds already set */
+ if (creds->pid == pid_vnr(sk->sk_peer_pid))
+ return;
+
+ memset(creds, 0, sizeof(*creds));
+
+ creds->pid = pid_vnr(sk->sk_peer_pid);
+ if (sk->sk_peer_cred) {
+ creds->uid = sk->sk_peer_cred->uid;
+ creds->gid = sk->sk_peer_cred->gid;
+ }
+}
+
+static struct sk_buff *hci_skb_clone(struct sk_buff *skb)
+{
+ struct sk_buff *nskb;
+
+ if (!skb)
+ return NULL;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return NULL;
+
+ hci_sock_copy_creds(skb->sk, nskb);
+
+ return nskb;
+}
+
/* Send frame to sockets with specific channel */
static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
int flag, struct sock *skip_sk)
@@ -289,7 +336,7 @@ static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
if (hci_pi(sk)->channel != channel)
continue;
- nskb = skb_clone(skb, GFP_ATOMIC);
+ nskb = hci_skb_clone(skb);
if (!nskb)
continue;
@@ -356,6 +403,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
if (!skb_copy)
return;
+ hci_sock_copy_creds(skb->sk, skb_copy);
+
/* Put header before the data */
hdr = skb_push(skb_copy, HCI_MON_HDR_SIZE);
hdr->opcode = opcode;
@@ -531,10 +580,12 @@ static struct sk_buff *create_monitor_ctrl_open(struct sock *sk)
return NULL;
}
- skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC);
+ skb = bt_skb_alloc(14 + TASK_COMM_LEN, GFP_ATOMIC);
if (!skb)
return NULL;
+ hci_sock_copy_creds(sk, skb);
+
flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0;
put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
@@ -580,6 +631,8 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk)
if (!skb)
return NULL;
+ hci_sock_copy_creds(sk, skb);
+
put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
__net_timestamp(skb);
@@ -606,6 +659,8 @@ static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index,
if (!skb)
return NULL;
+ hci_sock_copy_creds(sk, skb);
+
put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
put_unaligned_le16(opcode, skb_put(skb, 2));
@@ -638,6 +693,8 @@ send_monitor_note(struct sock *sk, const char *fmt, ...)
if (!skb)
return;
+ hci_sock_copy_creds(sk, skb);
+
va_start(args, fmt);
vsprintf(skb_put(skb, len), fmt, args);
*(u8 *)skb_put(skb, 1) = 0;
@@ -1494,6 +1551,7 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg,
static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
+ struct scm_cookie scm;
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
@@ -1538,11 +1596,16 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
break;
}
+ memset(&scm, 0, sizeof(scm));
+ scm.creds = bt_cb(skb)->creds;
+
skb_free_datagram(sk, skb);
if (flags & MSG_TRUNC)
copied = skblen;
+ scm_recv(sock, msg, &scm, flags);
+
return err ? : copied;
}
@@ -2143,18 +2206,12 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &hci_sock_ops;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, kern);
+ sk = bt_sock_alloc(net, sock, &hci_sk_proto, protocol, GFP_ATOMIC,
+ kern);
if (!sk)
return -ENOMEM;
- sock_init_data(sock, sk);
-
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = protocol;
-
sock->state = SS_UNCONNECTED;
- sk->sk_state = BT_OPEN;
sk->sk_destruct = hci_sock_destruct;
bt_sock_link(&hci_sk_list, sk);
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 4d1e32bb6a9c..9b93653c6197 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -3,6 +3,7 @@
* BlueZ - Bluetooth protocol stack for Linux
*
* Copyright (C) 2021 Intel Corporation
+ * Copyright 2023 NXP
*/
#include <linux/property.h>
@@ -1319,9 +1320,11 @@ int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance)
static int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
{
struct hci_cp_le_set_per_adv_enable cp;
+ struct adv_info *adv = NULL;
/* If periodic advertising already disabled there is nothing to do. */
- if (!hci_dev_test_flag(hdev, HCI_LE_PER_ADV))
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv || !adv->periodic || !adv->enabled)
return 0;
memset(&cp, 0, sizeof(cp));
@@ -1386,9 +1389,11 @@ static int hci_set_per_adv_data_sync(struct hci_dev *hdev, u8 instance)
static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
{
struct hci_cp_le_set_per_adv_enable cp;
+ struct adv_info *adv = NULL;
/* If periodic advertising already enabled there is nothing to do. */
- if (hci_dev_test_flag(hdev, HCI_LE_PER_ADV))
+ adv = hci_find_adv_instance(hdev, instance);
+ if (adv && adv->periodic && adv->enabled)
return 0;
memset(&cp, 0, sizeof(cp));
@@ -1458,22 +1463,19 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
sync_interval);
if (IS_ERR(adv))
return PTR_ERR(adv);
+ adv->pending = false;
added = true;
}
}
- /* Only start advertising if instance 0 or if a dedicated instance has
- * been added.
- */
- if (!adv || added) {
- err = hci_start_ext_adv_sync(hdev, instance);
- if (err < 0)
- goto fail;
+ /* Start advertising */
+ err = hci_start_ext_adv_sync(hdev, instance);
+ if (err < 0)
+ goto fail;
- err = hci_adv_bcast_annoucement(hdev, adv);
- if (err < 0)
- goto fail;
- }
+ err = hci_adv_bcast_annoucement(hdev, adv);
+ if (err < 0)
+ goto fail;
err = hci_set_per_adv_params_sync(hdev, instance, min_interval,
max_interval);
@@ -2670,27 +2672,6 @@ done:
return filter_policy;
}
-/* Returns true if an le connection is in the scanning state */
-static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev)
-{
- struct hci_conn_hash *h = &hdev->conn_hash;
- struct hci_conn *c;
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type == LE_LINK && c->state == BT_CONNECT &&
- test_bit(HCI_CONN_SCANNING, &c->flags)) {
- rcu_read_unlock();
- return true;
- }
- }
-
- rcu_read_unlock();
-
- return false;
-}
-
static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
u16 interval, u16 window,
u8 own_addr_type, u8 filter_policy)
@@ -4133,10 +4114,13 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev)
}
if (bis_capable(hdev)) {
+ events[1] |= 0x20; /* LE PA Report */
+ events[1] |= 0x40; /* LE PA Sync Established */
events[3] |= 0x04; /* LE Create BIG Complete */
events[3] |= 0x08; /* LE Terminate BIG Complete */
events[3] |= 0x10; /* LE BIG Sync Established */
events[3] |= 0x20; /* LE BIG Sync Loss */
+ events[4] |= 0x02; /* LE BIG Info Advertising Report */
}
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EVENT_MASK,
@@ -4684,7 +4668,10 @@ static const struct {
"advertised, but not supported."),
HCI_QUIRK_BROKEN(SET_RPA_TIMEOUT,
"HCI LE Set Random Private Address Timeout command is "
- "advertised, but not supported.")
+ "advertised, but not supported."),
+ HCI_QUIRK_BROKEN(LE_CODED,
+ "HCI LE Coded PHY feature bit is set, "
+ "but its usage is not supported.")
};
/* This function handles hdev setup stage:
@@ -5269,26 +5256,64 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
}
static int hci_le_connect_cancel_sync(struct hci_dev *hdev,
- struct hci_conn *conn)
+ struct hci_conn *conn, u8 reason)
{
+ /* Return reason if scanning since the connection shall probably be
+ * cleanup directly.
+ */
if (test_bit(HCI_CONN_SCANNING, &conn->flags))
- return 0;
+ return reason;
- if (test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
+ if (conn->role == HCI_ROLE_SLAVE ||
+ test_and_set_bit(HCI_CONN_CANCEL, &conn->flags))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_CREATE_CONN_CANCEL,
0, NULL, HCI_CMD_TIMEOUT);
}
-static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn)
+static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn,
+ u8 reason)
{
if (conn->type == LE_LINK)
- return hci_le_connect_cancel_sync(hdev, conn);
+ return hci_le_connect_cancel_sync(hdev, conn, reason);
+
+ if (conn->type == ISO_LINK) {
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 1857:
+ *
+ * If this command is issued for a CIS on the Central and the
+ * CIS is successfully terminated before being established,
+ * then an HCI_LE_CIS_Established event shall also be sent for
+ * this CIS with the Status Operation Cancelled by Host (0x44).
+ */
+ if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags))
+ return hci_disconnect_sync(hdev, conn, reason);
+
+ /* CIS with no Create CIS sent have nothing to cancel */
+ if (bacmp(&conn->dst, BDADDR_ANY))
+ return HCI_ERROR_LOCAL_HOST_TERM;
+
+ /* There is no way to cancel a BIS without terminating the BIG
+ * which is done later on connection cleanup.
+ */
+ return 0;
+ }
if (hdev->hci_ver < BLUETOOTH_VER_1_2)
return 0;
+ /* Wait for HCI_EV_CONN_COMPLETE, not HCI_EV_CMD_STATUS, when the
+ * reason is anything but HCI_ERROR_REMOTE_POWER_OFF. This reason is
+ * used when suspending or powering off, where we don't want to wait
+ * for the peer's response.
+ */
+ if (reason != HCI_ERROR_REMOTE_POWER_OFF)
+ return __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN_CANCEL,
+ 6, &conn->dst,
+ HCI_EV_CONN_COMPLETE,
+ HCI_CMD_TIMEOUT, NULL);
+
return __hci_cmd_sync_status(hdev, HCI_OP_CREATE_CONN_CANCEL,
6, &conn->dst, HCI_CMD_TIMEOUT);
}
@@ -5312,11 +5337,27 @@ static int hci_reject_sco_sync(struct hci_dev *hdev, struct hci_conn *conn,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
+static int hci_le_reject_cis_sync(struct hci_dev *hdev, struct hci_conn *conn,
+ u8 reason)
+{
+ struct hci_cp_le_reject_cis cp;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.handle = cpu_to_le16(conn->handle);
+ cp.reason = reason;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_REJECT_CIS,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
+
static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
u8 reason)
{
struct hci_cp_reject_conn_req cp;
+ if (conn->type == ISO_LINK)
+ return hci_le_reject_cis_sync(hdev, conn, reason);
+
if (conn->type == SCO_LINK || conn->type == ESCO_LINK)
return hci_reject_sco_sync(hdev, conn, reason);
@@ -5330,43 +5371,94 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
{
- int err;
+ int err = 0;
+ u16 handle = conn->handle;
+ struct hci_conn *c;
switch (conn->state) {
case BT_CONNECTED:
case BT_CONFIG:
- return hci_disconnect_sync(hdev, conn, reason);
+ err = hci_disconnect_sync(hdev, conn, reason);
+ break;
case BT_CONNECT:
- err = hci_connect_cancel_sync(hdev, conn);
- /* Cleanup hci_conn object if it cannot be cancelled as it
- * likelly means the controller and host stack are out of sync.
- */
- if (err) {
- hci_dev_lock(hdev);
- hci_conn_failed(conn, err);
- hci_dev_unlock(hdev);
- }
- return err;
+ err = hci_connect_cancel_sync(hdev, conn, reason);
+ break;
case BT_CONNECT2:
- return hci_reject_conn_sync(hdev, conn, reason);
+ err = hci_reject_conn_sync(hdev, conn, reason);
+ break;
+ case BT_OPEN:
+ hci_dev_lock(hdev);
+
+ /* Cleanup bis or pa sync connections */
+ if (test_and_clear_bit(HCI_CONN_BIG_SYNC_FAILED, &conn->flags) ||
+ test_and_clear_bit(HCI_CONN_PA_SYNC_FAILED, &conn->flags)) {
+ hci_conn_failed(conn, reason);
+ } else if (test_bit(HCI_CONN_PA_SYNC, &conn->flags) ||
+ test_bit(HCI_CONN_BIG_SYNC, &conn->flags)) {
+ conn->state = BT_CLOSED;
+ hci_disconn_cfm(conn, reason);
+ hci_conn_del(conn);
+ }
+
+ hci_dev_unlock(hdev);
+ return 0;
+ case BT_BOUND:
+ hci_dev_lock(hdev);
+ hci_conn_failed(conn, reason);
+ hci_dev_unlock(hdev);
+ return 0;
default:
+ hci_dev_lock(hdev);
conn->state = BT_CLOSED;
- break;
+ hci_disconn_cfm(conn, reason);
+ hci_conn_del(conn);
+ hci_dev_unlock(hdev);
+ return 0;
}
- return 0;
+ hci_dev_lock(hdev);
+
+ /* Check if the connection hasn't been cleanup while waiting
+ * commands to complete.
+ */
+ c = hci_conn_hash_lookup_handle(hdev, handle);
+ if (!c || c != conn) {
+ err = 0;
+ goto unlock;
+ }
+
+ /* Cleanup hci_conn object if it cannot be cancelled as it
+ * likelly means the controller and host stack are out of sync
+ * or in case of LE it was still scanning so it can be cleanup
+ * safely.
+ */
+ hci_conn_failed(conn, reason);
+
+unlock:
+ hci_dev_unlock(hdev);
+ return err;
}
static int hci_disconnect_all_sync(struct hci_dev *hdev, u8 reason)
{
- struct hci_conn *conn, *tmp;
- int err;
+ struct list_head *head = &hdev->conn_hash.list;
+ struct hci_conn *conn;
- list_for_each_entry_safe(conn, tmp, &hdev->conn_hash.list, list) {
- err = hci_abort_conn_sync(hdev, conn, reason);
- if (err)
- return err;
+ rcu_read_lock();
+ while ((conn = list_first_or_null_rcu(head, struct hci_conn, list))) {
+ /* Make sure the connection is not freed while unlocking */
+ conn = hci_conn_get(conn);
+ rcu_read_unlock();
+ /* Disregard possible errors since hci_conn_del shall have been
+ * called even in case of errors had occurred since it would
+ * then cause hci_conn_failed to be called which calls
+ * hci_conn_del internally.
+ */
+ hci_abort_conn_sync(hdev, conn, reason);
+ hci_conn_put(conn);
+ rcu_read_lock();
}
+ rcu_read_unlock();
return 0;
}
@@ -6253,63 +6345,99 @@ int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn)
done:
if (err == -ETIMEDOUT)
- hci_le_connect_cancel_sync(hdev, conn);
+ hci_le_connect_cancel_sync(hdev, conn, 0x00);
/* Re-enable advertising after the connection attempt is finished. */
hci_resume_advertising_sync(hdev);
return err;
}
-int hci_le_create_cis_sync(struct hci_dev *hdev, struct hci_conn *conn)
+int hci_le_create_cis_sync(struct hci_dev *hdev)
{
struct {
struct hci_cp_le_create_cis cp;
struct hci_cis cis[0x1f];
} cmd;
- u8 cig;
- struct hci_conn *hcon = conn;
+ struct hci_conn *conn;
+ u8 cig = BT_ISO_QOS_CIG_UNSET;
+
+ /* The spec allows only one pending LE Create CIS command at a time. If
+ * the command is pending now, don't do anything. We check for pending
+ * connections after each CIS Established event.
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 2566:
+ *
+ * If the Host issues this command before all the
+ * HCI_LE_CIS_Established events from the previous use of the
+ * command have been generated, the Controller shall return the
+ * error code Command Disallowed (0x0C).
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 2567:
+ *
+ * When the Controller receives the HCI_LE_Create_CIS command, the
+ * Controller sends the HCI_Command_Status event to the Host. An
+ * HCI_LE_CIS_Established event will be generated for each CIS when it
+ * is established or if it is disconnected or considered lost before
+ * being established; until all the events are generated, the command
+ * remains pending.
+ */
memset(&cmd, 0, sizeof(cmd));
- cmd.cis[0].acl_handle = cpu_to_le16(conn->parent->handle);
- cmd.cis[0].cis_handle = cpu_to_le16(conn->handle);
- cmd.cp.num_cis++;
- cig = conn->iso_qos.ucast.cig;
hci_dev_lock(hdev);
rcu_read_lock();
+ /* Wait until previous Create CIS has completed */
list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
- struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis];
+ if (test_bit(HCI_CONN_CREATE_CIS, &conn->flags))
+ goto done;
+ }
- if (conn == hcon || conn->type != ISO_LINK ||
- conn->state == BT_CONNECTED ||
- conn->iso_qos.ucast.cig != cig)
+ /* Find CIG with all CIS ready */
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ struct hci_conn *link;
+
+ if (hci_conn_check_create_cis(conn))
continue;
- /* Check if all CIS(s) belonging to a CIG are ready */
- if (!conn->parent || conn->parent->state != BT_CONNECTED ||
- conn->state != BT_CONNECT) {
- cmd.cp.num_cis = 0;
- break;
+ cig = conn->iso_qos.ucast.cig;
+
+ list_for_each_entry_rcu(link, &hdev->conn_hash.list, list) {
+ if (hci_conn_check_create_cis(link) > 0 &&
+ link->iso_qos.ucast.cig == cig &&
+ link->state != BT_CONNECTED) {
+ cig = BT_ISO_QOS_CIG_UNSET;
+ break;
+ }
}
- /* Group all CIS with state BT_CONNECT since the spec don't
- * allow to send them individually:
- *
- * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
- * page 2566:
- *
- * If the Host issues this command before all the
- * HCI_LE_CIS_Established events from the previous use of the
- * command have been generated, the Controller shall return the
- * error code Command Disallowed (0x0C).
- */
+ if (cig != BT_ISO_QOS_CIG_UNSET)
+ break;
+ }
+
+ if (cig == BT_ISO_QOS_CIG_UNSET)
+ goto done;
+
+ list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
+ struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis];
+
+ if (hci_conn_check_create_cis(conn) ||
+ conn->iso_qos.ucast.cig != cig)
+ continue;
+
+ set_bit(HCI_CONN_CREATE_CIS, &conn->flags);
cis->acl_handle = cpu_to_le16(conn->parent->handle);
cis->cis_handle = cpu_to_le16(conn->handle);
cmd.cp.num_cis++;
+
+ if (cmd.cp.num_cis >= ARRAY_SIZE(cmd.cis))
+ break;
}
+done:
rcu_read_unlock();
hci_dev_unlock(hdev);
@@ -6433,7 +6561,7 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
static int _update_adv_data_sync(struct hci_dev *hdev, void *data)
{
- u8 instance = PTR_ERR(data);
+ u8 instance = PTR_UINT(data);
return hci_update_adv_data_sync(hdev, instance);
}
@@ -6441,5 +6569,5 @@ static int _update_adv_data_sync(struct hci_dev *hdev, void *data)
int hci_update_adv_data(struct hci_dev *hdev, u8 instance)
{
return hci_cmd_sync_queue(hdev, _update_adv_data_sync,
- ERR_PTR(instance), NULL);
+ UINT_PTR(instance), NULL);
}
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 369ed92dac99..c93aaeb3a3fa 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -256,21 +256,13 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, kern);
+ sk = bt_sock_alloc(net, sock, &hidp_proto, protocol, GFP_ATOMIC, kern);
if (!sk)
return -ENOMEM;
- sock_init_data(sock, sk);
-
sock->ops = &hidp_sock_ops;
-
sock->state = SS_UNCONNECTED;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = protocol;
- sk->sk_state = BT_OPEN;
-
bt_sock_link(&hidp_sk_list, sk);
return 0;
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 505d62247268..16da946f5881 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -48,6 +48,12 @@ static void iso_sock_kill(struct sock *sk);
#define EIR_SERVICE_DATA_LENGTH 4
#define BASE_MAX_LENGTH (HCI_MAX_PER_AD_LENGTH - EIR_SERVICE_DATA_LENGTH)
+/* iso_pinfo flags values */
+enum {
+ BT_SK_BIG_SYNC,
+ BT_SK_PA_SYNC,
+};
+
struct iso_pinfo {
struct bt_sock bt;
bdaddr_t src;
@@ -58,7 +64,7 @@ struct iso_pinfo {
__u8 bc_num_bis;
__u8 bc_bis[ISO_MAX_NUM_BIS];
__u16 sync_handle;
- __u32 flags;
+ unsigned long flags;
struct bt_iso_qos qos;
bool qos_user_set;
__u8 base_len;
@@ -70,6 +76,8 @@ static struct bt_iso_qos default_qos;
static bool check_ucast_qos(struct bt_iso_qos *qos);
static bool check_bcast_qos(struct bt_iso_qos *qos);
+static bool iso_match_sid(struct sock *sk, void *data);
+static void iso_sock_disconn(struct sock *sk);
/* ---- ISO timers ---- */
#define ISO_CONN_TIMEOUT (HZ * 40)
@@ -287,13 +295,24 @@ static int iso_connect_bis(struct sock *sk)
goto unlock;
}
- hcon = hci_connect_bis(hdev, &iso_pi(sk)->dst,
- le_addr_type(iso_pi(sk)->dst_type),
- &iso_pi(sk)->qos, iso_pi(sk)->base_len,
- iso_pi(sk)->base);
- if (IS_ERR(hcon)) {
- err = PTR_ERR(hcon);
- goto unlock;
+ /* Just bind if DEFER_SETUP has been set */
+ if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ hcon = hci_bind_bis(hdev, &iso_pi(sk)->dst,
+ &iso_pi(sk)->qos, iso_pi(sk)->base_len,
+ iso_pi(sk)->base);
+ if (IS_ERR(hcon)) {
+ err = PTR_ERR(hcon);
+ goto unlock;
+ }
+ } else {
+ hcon = hci_connect_bis(hdev, &iso_pi(sk)->dst,
+ le_addr_type(iso_pi(sk)->dst_type),
+ &iso_pi(sk)->qos, iso_pi(sk)->base_len,
+ iso_pi(sk)->base);
+ if (IS_ERR(hcon)) {
+ err = PTR_ERR(hcon);
+ goto unlock;
+ }
}
conn = iso_conn_add(hcon);
@@ -317,6 +336,9 @@ static int iso_connect_bis(struct sock *sk)
if (hcon->state == BT_CONNECTED) {
iso_sock_clear_timer(sk);
sk->sk_state = BT_CONNECTED;
+ } else if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ iso_sock_clear_timer(sk);
+ sk->sk_state = BT_CONNECT;
} else {
sk->sk_state = BT_CONNECT;
iso_sock_set_timer(sk, sk->sk_sndtimeo);
@@ -579,6 +601,15 @@ static void iso_sock_cleanup_listen(struct sock *parent)
iso_sock_kill(sk);
}
+ /* If listening socket stands for a PA sync connection,
+ * properly disconnect the hcon and socket.
+ */
+ if (iso_pi(parent)->conn && iso_pi(parent)->conn->hcon &&
+ test_bit(HCI_CONN_PA_SYNC, &iso_pi(parent)->conn->hcon->flags)) {
+ iso_sock_disconn(parent);
+ return;
+ }
+
parent->sk_state = BT_CLOSED;
sock_set_flag(parent, SOCK_ZAPPED);
}
@@ -600,16 +631,14 @@ static void iso_sock_kill(struct sock *sk)
sock_put(sk);
}
-static void iso_conn_defer_reject(struct hci_conn *conn)
+static void iso_sock_disconn(struct sock *sk)
{
- struct hci_cp_le_reject_cis cp;
-
- BT_DBG("conn %p", conn);
-
- memset(&cp, 0, sizeof(cp));
- cp.handle = cpu_to_le16(conn->handle);
- cp.reason = HCI_ERROR_REJ_BAD_ADDR;
- hci_send_cmd(conn->hdev, HCI_OP_LE_REJECT_CIS, sizeof(cp), &cp);
+ sk->sk_state = BT_DISCONN;
+ iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT);
+ iso_conn_lock(iso_pi(sk)->conn);
+ hci_conn_drop(iso_pi(sk)->conn->hcon);
+ iso_pi(sk)->conn->hcon = NULL;
+ iso_conn_unlock(iso_pi(sk)->conn);
}
static void __iso_sock_close(struct sock *sk)
@@ -621,37 +650,22 @@ static void __iso_sock_close(struct sock *sk)
iso_sock_cleanup_listen(sk);
break;
+ case BT_CONNECT:
case BT_CONNECTED:
case BT_CONFIG:
- if (iso_pi(sk)->conn->hcon) {
- sk->sk_state = BT_DISCONN;
- iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT);
- iso_conn_lock(iso_pi(sk)->conn);
- hci_conn_drop(iso_pi(sk)->conn->hcon);
- iso_pi(sk)->conn->hcon = NULL;
- iso_conn_unlock(iso_pi(sk)->conn);
- } else {
+ if (iso_pi(sk)->conn->hcon)
+ iso_sock_disconn(sk);
+ else
iso_chan_del(sk, ECONNRESET);
- }
break;
case BT_CONNECT2:
- if (iso_pi(sk)->conn->hcon)
- iso_conn_defer_reject(iso_pi(sk)->conn->hcon);
- iso_chan_del(sk, ECONNRESET);
- break;
- case BT_CONNECT:
- /* In case of DEFER_SETUP the hcon would be bound to CIG which
- * needs to be removed so just call hci_conn_del so the cleanup
- * callback do what is needed.
- */
- if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) &&
- iso_pi(sk)->conn->hcon) {
- hci_conn_del(iso_pi(sk)->conn->hcon);
- iso_pi(sk)->conn->hcon = NULL;
- }
-
- iso_chan_del(sk, ECONNRESET);
+ if (iso_pi(sk)->conn->hcon &&
+ (test_bit(HCI_CONN_PA_SYNC, &iso_pi(sk)->conn->hcon->flags) ||
+ test_bit(HCI_CONN_PA_SYNC_FAILED, &iso_pi(sk)->conn->hcon->flags)))
+ iso_sock_disconn(sk);
+ else
+ iso_chan_del(sk, ECONNRESET);
break;
case BT_DISCONN:
iso_chan_del(sk, ECONNRESET);
@@ -724,21 +738,13 @@ static struct sock *iso_sock_alloc(struct net *net, struct socket *sock,
{
struct sock *sk;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &iso_proto, kern);
+ sk = bt_sock_alloc(net, sock, &iso_proto, proto, prio, kern);
if (!sk)
return NULL;
- sock_init_data(sock, sk);
- INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
-
sk->sk_destruct = iso_sock_destruct;
sk->sk_sndtimeo = ISO_CONN_TIMEOUT;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = proto;
- sk->sk_state = BT_OPEN;
-
/* Set address type as public as default src address is BDADDR_ANY */
iso_pi(sk)->src_type = BDADDR_LE_PUBLIC;
@@ -1154,6 +1160,29 @@ static void iso_conn_defer_accept(struct hci_conn *conn)
hci_send_cmd(hdev, HCI_OP_LE_ACCEPT_CIS, sizeof(cp), &cp);
}
+static void iso_conn_big_sync(struct sock *sk)
+{
+ int err;
+ struct hci_dev *hdev;
+
+ hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
+ iso_pi(sk)->src_type);
+
+ if (!hdev)
+ return;
+
+ if (!test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) {
+ err = hci_le_big_create_sync(hdev, iso_pi(sk)->conn->hcon,
+ &iso_pi(sk)->qos,
+ iso_pi(sk)->sync_handle,
+ iso_pi(sk)->bc_num_bis,
+ iso_pi(sk)->bc_bis);
+ if (err)
+ bt_dev_err(hdev, "hci_le_big_create_sync: %d",
+ err);
+ }
+}
+
static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
@@ -1166,8 +1195,15 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
switch (sk->sk_state) {
case BT_CONNECT2:
- iso_conn_defer_accept(pi->conn->hcon);
- sk->sk_state = BT_CONFIG;
+ if (pi->conn->hcon &&
+ test_bit(HCI_CONN_PA_SYNC, &pi->conn->hcon->flags)) {
+ iso_conn_big_sync(sk);
+ sk->sk_state = BT_LISTEN;
+ set_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags);
+ } else {
+ iso_conn_defer_accept(pi->conn->hcon);
+ sk->sk_state = BT_CONFIG;
+ }
release_sock(sk);
return 0;
case BT_CONNECT:
@@ -1202,6 +1238,12 @@ static bool check_io_qos(struct bt_iso_io_qos *qos)
static bool check_ucast_qos(struct bt_iso_qos *qos)
{
+ if (qos->ucast.cig > 0xef && qos->ucast.cig != BT_ISO_QOS_CIG_UNSET)
+ return false;
+
+ if (qos->ucast.cis > 0xef && qos->ucast.cis != BT_ISO_QOS_CIS_UNSET)
+ return false;
+
if (qos->ucast.sca > 0x07)
return false;
@@ -1291,6 +1333,18 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
break;
+ case BT_PKT_STATUS:
+ if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+ err = -EFAULT;
+ break;
+ }
+
+ if (opt)
+ set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
+ else
+ clear_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
+ break;
+
case BT_ISO_QOS:
if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND &&
sk->sk_state != BT_CONNECT2) {
@@ -1376,6 +1430,12 @@ static int iso_sock_getsockopt(struct socket *sock, int level, int optname,
break;
+ case BT_PKT_STATUS:
+ if (put_user(test_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags),
+ (int __user *)optval))
+ err = -EFAULT;
+ break;
+
case BT_ISO_QOS:
qos = iso_sock_get_qos(sk);
@@ -1386,7 +1446,8 @@ static int iso_sock_getsockopt(struct socket *sock, int level, int optname,
break;
case BT_ISO_BASE:
- if (sk->sk_state == BT_CONNECTED) {
+ if (sk->sk_state == BT_CONNECTED &&
+ !bacmp(&iso_pi(sk)->dst, BDADDR_ANY)) {
base_len = iso_pi(sk)->conn->hcon->le_per_adv_data_len;
base = iso_pi(sk)->conn->hcon->le_per_adv_data;
} else {
@@ -1466,7 +1527,7 @@ static int iso_sock_release(struct socket *sock)
iso_sock_close(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ if (sock_flag(sk, SOCK_LINGER) && READ_ONCE(sk->sk_lingertime) &&
!(current->flags & PF_EXITING)) {
lock_sock(sk);
err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
@@ -1504,11 +1565,17 @@ static bool iso_match_big(struct sock *sk, void *data)
return ev->handle == iso_pi(sk)->qos.bcast.big;
}
+static bool iso_match_pa_sync_flag(struct sock *sk, void *data)
+{
+ return test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags);
+}
+
static void iso_conn_ready(struct iso_conn *conn)
{
- struct sock *parent;
+ struct sock *parent = NULL;
struct sock *sk = conn->sk;
- struct hci_ev_le_big_sync_estabilished *ev;
+ struct hci_ev_le_big_sync_estabilished *ev = NULL;
+ struct hci_ev_le_pa_sync_established *ev2 = NULL;
struct hci_conn *hcon;
BT_DBG("conn %p", conn);
@@ -1520,15 +1587,32 @@ static void iso_conn_ready(struct iso_conn *conn)
if (!hcon)
return;
- ev = hci_recv_event_data(hcon->hdev,
- HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
- if (ev)
+ if (test_bit(HCI_CONN_BIG_SYNC, &hcon->flags) ||
+ test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags)) {
+ ev = hci_recv_event_data(hcon->hdev,
+ HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
+
+ /* Get reference to PA sync parent socket, if it exists */
parent = iso_get_sock_listen(&hcon->src,
&hcon->dst,
- iso_match_big, ev);
- else
+ iso_match_pa_sync_flag, NULL);
+ if (!parent && ev)
+ parent = iso_get_sock_listen(&hcon->src,
+ &hcon->dst,
+ iso_match_big, ev);
+ } else if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags) ||
+ test_bit(HCI_CONN_PA_SYNC_FAILED, &hcon->flags)) {
+ ev2 = hci_recv_event_data(hcon->hdev,
+ HCI_EV_LE_PA_SYNC_ESTABLISHED);
+ if (ev2)
+ parent = iso_get_sock_listen(&hcon->src,
+ &hcon->dst,
+ iso_match_sid, ev2);
+ }
+
+ if (!parent)
parent = iso_get_sock_listen(&hcon->src,
- BDADDR_ANY, NULL, NULL);
+ BDADDR_ANY, NULL, NULL);
if (!parent)
return;
@@ -1545,11 +1629,17 @@ static void iso_conn_ready(struct iso_conn *conn)
iso_sock_init(sk, parent);
bacpy(&iso_pi(sk)->src, &hcon->src);
- iso_pi(sk)->src_type = hcon->src_type;
+
+ /* Convert from HCI to three-value type */
+ if (hcon->src_type == ADDR_LE_DEV_PUBLIC)
+ iso_pi(sk)->src_type = BDADDR_LE_PUBLIC;
+ else
+ iso_pi(sk)->src_type = BDADDR_LE_RANDOM;
/* If hcon has no destination address (BDADDR_ANY) it means it
- * was created by HCI_EV_LE_BIG_SYNC_ESTABILISHED so we need to
- * initialize using the parent socket destination address.
+ * was created by HCI_EV_LE_BIG_SYNC_ESTABILISHED or
+ * HCI_EV_LE_PA_SYNC_ESTABLISHED so we need to initialize using
+ * the parent socket destination address.
*/
if (!bacmp(&hcon->dst, BDADDR_ANY)) {
bacpy(&hcon->dst, &iso_pi(parent)->dst);
@@ -1557,12 +1647,29 @@ static void iso_conn_ready(struct iso_conn *conn)
hcon->sync_handle = iso_pi(parent)->sync_handle;
}
+ if (ev2 && !ev2->status) {
+ iso_pi(sk)->sync_handle = iso_pi(parent)->sync_handle;
+ iso_pi(sk)->qos = iso_pi(parent)->qos;
+ iso_pi(sk)->bc_num_bis = iso_pi(parent)->bc_num_bis;
+ memcpy(iso_pi(sk)->bc_bis, iso_pi(parent)->bc_bis, ISO_MAX_NUM_BIS);
+ }
+
bacpy(&iso_pi(sk)->dst, &hcon->dst);
iso_pi(sk)->dst_type = hcon->dst_type;
+ iso_pi(sk)->sync_handle = iso_pi(parent)->sync_handle;
+ memcpy(iso_pi(sk)->base, iso_pi(parent)->base, iso_pi(parent)->base_len);
+ iso_pi(sk)->base_len = iso_pi(parent)->base_len;
hci_conn_hold(hcon);
iso_chan_add(conn, sk, parent);
+ if ((ev && ((struct hci_evt_le_big_sync_estabilished *)ev)->status) ||
+ (ev2 && ev2->status)) {
+ /* Trigger error signal on child socket */
+ sk->sk_err = ECONNREFUSED;
+ sk->sk_error_report(sk);
+ }
+
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
sk->sk_state = BT_CONNECT2;
else
@@ -1589,12 +1696,20 @@ static bool iso_match_sync_handle(struct sock *sk, void *data)
return le16_to_cpu(ev->sync_handle) == iso_pi(sk)->sync_handle;
}
+static bool iso_match_sync_handle_pa_report(struct sock *sk, void *data)
+{
+ struct hci_ev_le_per_adv_report *ev = data;
+
+ return le16_to_cpu(ev->sync_handle) == iso_pi(sk)->sync_handle;
+}
+
/* ----- ISO interface with lower layer (HCI) ----- */
int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
{
struct hci_ev_le_pa_sync_established *ev1;
struct hci_evt_le_big_info_adv_report *ev2;
+ struct hci_ev_le_per_adv_report *ev3;
struct sock *sk;
int lm = 0;
@@ -1610,12 +1725,15 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
* 2. HCI_EVT_LE_BIG_INFO_ADV_REPORT: When connect_ind is triggered by a
* a BIG Info it attempts to check if there any listening socket with
* the same sync_handle and if it does then attempt to create a sync.
+ * 3. HCI_EV_LE_PER_ADV_REPORT: When a PA report is received, it is stored
+ * in iso_pi(sk)->base so it can be passed up to user, in the case of a
+ * broadcast sink.
*/
ev1 = hci_recv_event_data(hdev, HCI_EV_LE_PA_SYNC_ESTABLISHED);
if (ev1) {
sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sid,
ev1);
- if (sk)
+ if (sk && !ev1->status)
iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle);
goto done;
@@ -1623,25 +1741,43 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
ev2 = hci_recv_event_data(hdev, HCI_EVT_LE_BIG_INFO_ADV_REPORT);
if (ev2) {
+ /* Try to get PA sync listening socket, if it exists */
sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
- iso_match_sync_handle, ev2);
+ iso_match_pa_sync_flag, NULL);
+ if (!sk)
+ sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
+ iso_match_sync_handle, ev2);
if (sk) {
int err;
if (ev2->num_bis < iso_pi(sk)->bc_num_bis)
iso_pi(sk)->bc_num_bis = ev2->num_bis;
- err = hci_le_big_create_sync(hdev,
- &iso_pi(sk)->qos,
- iso_pi(sk)->sync_handle,
- iso_pi(sk)->bc_num_bis,
- iso_pi(sk)->bc_bis);
- if (err) {
- bt_dev_err(hdev, "hci_le_big_create_sync: %d",
- err);
- sk = NULL;
+ if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) &&
+ !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) {
+ err = hci_le_big_create_sync(hdev, NULL,
+ &iso_pi(sk)->qos,
+ iso_pi(sk)->sync_handle,
+ iso_pi(sk)->bc_num_bis,
+ iso_pi(sk)->bc_bis);
+ if (err) {
+ bt_dev_err(hdev, "hci_le_big_create_sync: %d",
+ err);
+ sk = NULL;
+ }
}
}
+ }
+
+ ev3 = hci_recv_event_data(hdev, HCI_EV_LE_PER_ADV_REPORT);
+ if (ev3) {
+ sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
+ iso_match_sync_handle_pa_report, ev3);
+
+ if (sk) {
+ memcpy(iso_pi(sk)->base, ev3->data, ev3->length);
+ iso_pi(sk)->base_len = ev3->length;
+ }
} else {
sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL);
}
@@ -1676,13 +1812,19 @@ static void iso_connect_cfm(struct hci_conn *hcon, __u8 status)
}
/* Create CIS if pending */
- hci_le_create_cis(hcon);
+ hci_le_create_cis_pending(hcon->hdev);
return;
}
BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
- if (!status) {
+ /* Similar to the success case, if HCI_CONN_BIG_SYNC_FAILED or
+ * HCI_CONN_PA_SYNC_FAILED is set, queue the failed connection
+ * into the accept queue of the listening socket and wake up
+ * userspace, to inform the user about the event.
+ */
+ if (!status || test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags) ||
+ test_bit(HCI_CONN_PA_SYNC_FAILED, &hcon->flags)) {
struct iso_conn *conn;
conn = iso_conn_add(hcon);
@@ -1757,6 +1899,7 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
if (len == skb->len) {
/* Complete frame received */
+ hci_skb_pkt_status(skb) = flags & 0x03;
iso_recv_frame(conn, skb);
return;
}
@@ -1778,6 +1921,7 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
if (!conn->rx_skb)
goto drop;
+ hci_skb_pkt_status(conn->rx_skb) = flags & 0x03;
skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
skb->len);
conn->rx_len = len - skb->len;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 947ca580bb9a..3bdfc3f1e73d 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -178,21 +178,6 @@ done:
return err;
}
-static void l2cap_sock_init_pid(struct sock *sk)
-{
- struct l2cap_chan *chan = l2cap_pi(sk)->chan;
-
- /* Only L2CAP_MODE_EXT_FLOWCTL ever need to access the PID in order to
- * group the channels being requested.
- */
- if (chan->mode != L2CAP_MODE_EXT_FLOWCTL)
- return;
-
- spin_lock(&sk->sk_peer_lock);
- sk->sk_peer_pid = get_pid(task_tgid(current));
- spin_unlock(&sk->sk_peer_lock);
-}
-
static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
int alen, int flags)
{
@@ -268,8 +253,6 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
chan->mode != L2CAP_MODE_EXT_FLOWCTL)
chan->mode = L2CAP_MODE_LE_FLOWCTL;
- l2cap_sock_init_pid(sk);
-
err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid),
&la.l2_bdaddr, la.l2_bdaddr_type);
if (err)
@@ -325,8 +308,6 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
goto done;
}
- l2cap_sock_init_pid(sk);
-
sk->sk_max_ack_backlog = backlog;
sk->sk_ack_backlog = 0;
@@ -1858,21 +1839,13 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
struct sock *sk;
struct l2cap_chan *chan;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, kern);
+ sk = bt_sock_alloc(net, sock, &l2cap_proto, proto, prio, kern);
if (!sk)
return NULL;
- sock_init_data(sock, sk);
- INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
-
sk->sk_destruct = l2cap_sock_destruct;
sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = proto;
- sk->sk_state = BT_OPEN;
-
chan = l2cap_chan_create();
if (!chan) {
sk_free(sk);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d4498037fadc..ba2e00646e8e 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -944,6 +944,12 @@ static u32 get_current_settings(struct hci_dev *hdev)
if (cis_peripheral_capable(hdev))
settings |= MGMT_SETTING_CIS_PERIPHERAL;
+ if (bis_capable(hdev))
+ settings |= MGMT_SETTING_ISO_BROADCASTER;
+
+ if (sync_recv_capable(hdev))
+ settings |= MGMT_SETTING_ISO_SYNC_RECEIVER;
+
return settings;
}
@@ -3580,18 +3586,6 @@ unlock:
return err;
}
-static int abort_conn_sync(struct hci_dev *hdev, void *data)
-{
- struct hci_conn *conn;
- u16 handle = PTR_ERR(data);
-
- conn = hci_conn_hash_lookup_handle(hdev, handle);
- if (!conn)
- return 0;
-
- return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM);
-}
-
static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
@@ -3642,8 +3636,7 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
le_addr_type(addr->type));
if (conn->conn_reason == CONN_REASON_PAIR_DEVICE)
- hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle),
- NULL);
+ hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
unlock:
hci_dev_unlock(hdev);
@@ -5388,9 +5381,9 @@ static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count,
for (i = 0; i < pattern_count; i++) {
offset = patterns[i].offset;
length = patterns[i].length;
- if (offset >= HCI_MAX_AD_LENGTH ||
- length > HCI_MAX_AD_LENGTH ||
- (offset + length) > HCI_MAX_AD_LENGTH)
+ if (offset >= HCI_MAX_EXT_AD_LENGTH ||
+ length > HCI_MAX_EXT_AD_LENGTH ||
+ (offset + length) > HCI_MAX_EXT_AD_LENGTH)
return MGMT_STATUS_INVALID_PARAMS;
p = kmalloc(sizeof(*p), GFP_KERNEL);
@@ -8435,8 +8428,8 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
supported_flags = get_supported_adv_flags(hdev);
rp->supported_flags = cpu_to_le32(supported_flags);
- rp->max_adv_data_len = HCI_MAX_AD_LENGTH;
- rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH;
+ rp->max_adv_data_len = max_adv_len(hdev);
+ rp->max_scan_rsp_len = max_adv_len(hdev);
rp->max_instances = hdev->le_num_of_adv_sets;
rp->num_instances = hdev->adv_instance_cnt;
@@ -8472,7 +8465,7 @@ static u8 calculate_name_len(struct hci_dev *hdev)
static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags,
bool is_adv_data)
{
- u8 max_len = HCI_MAX_AD_LENGTH;
+ u8 max_len = max_adv_len(hdev);
if (is_adv_data) {
if (adv_flags & (MGMT_ADV_FLAG_DISCOV |
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index bf5cee48916c..abbafa6194ca 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -91,6 +91,33 @@ struct msft_ev_le_monitor_device {
struct msft_monitor_advertisement_handle_data {
__u8 msft_handle;
__u16 mgmt_handle;
+ __s8 rssi_high;
+ __s8 rssi_low;
+ __u8 rssi_low_interval;
+ __u8 rssi_sampling_period;
+ __u8 cond_type;
+ struct list_head list;
+};
+
+enum monitor_addr_filter_state {
+ AF_STATE_IDLE,
+ AF_STATE_ADDING,
+ AF_STATE_ADDED,
+ AF_STATE_REMOVING,
+};
+
+#define MSFT_MONITOR_ADVERTISEMENT_TYPE_ADDR 0x04
+struct msft_monitor_addr_filter_data {
+ __u8 msft_handle;
+ __u8 pattern_handle; /* address filters pertain to */
+ __u16 mgmt_handle;
+ int state;
+ __s8 rssi_high;
+ __s8 rssi_low;
+ __u8 rssi_low_interval;
+ __u8 rssi_sampling_period;
+ __u8 addr_type;
+ bdaddr_t bdaddr;
struct list_head list;
};
@@ -99,9 +126,12 @@ struct msft_data {
__u8 evt_prefix_len;
__u8 *evt_prefix;
struct list_head handle_map;
+ struct list_head address_filters;
__u8 resuming;
__u8 suspending;
__u8 filter_enabled;
+ /* To synchronize add/remove address filter and monitor device event.*/
+ struct mutex filter_lock;
};
bool msft_monitor_supported(struct hci_dev *hdev)
@@ -180,6 +210,24 @@ static struct msft_monitor_advertisement_handle_data *msft_find_handle_data
return NULL;
}
+/* This function requires the caller holds msft->filter_lock */
+static struct msft_monitor_addr_filter_data *msft_find_address_data
+ (struct hci_dev *hdev, u8 addr_type, bdaddr_t *addr,
+ u8 pattern_handle)
+{
+ struct msft_monitor_addr_filter_data *entry;
+ struct msft_data *msft = hdev->msft_data;
+
+ list_for_each_entry(entry, &msft->address_filters, list) {
+ if (entry->pattern_handle == pattern_handle &&
+ addr_type == entry->addr_type &&
+ !bacmp(addr, &entry->bdaddr))
+ return entry;
+ }
+
+ return NULL;
+}
+
/* This function requires the caller holds hdev->lock */
static int msft_monitor_device_del(struct hci_dev *hdev, __u16 mgmt_handle,
bdaddr_t *bdaddr, __u8 addr_type,
@@ -240,6 +288,7 @@ static int msft_le_monitor_advertisement_cb(struct hci_dev *hdev, u16 opcode,
handle_data->mgmt_handle = monitor->handle;
handle_data->msft_handle = rp->handle;
+ handle_data->cond_type = MSFT_MONITOR_ADVERTISEMENT_TYPE_PATTERN;
INIT_LIST_HEAD(&handle_data->list);
list_add(&handle_data->list, &msft->handle_map);
@@ -254,6 +303,70 @@ unlock:
return status;
}
+/* This function requires the caller holds hci_req_sync_lock */
+static void msft_remove_addr_filters_sync(struct hci_dev *hdev, u8 handle)
+{
+ struct msft_monitor_addr_filter_data *address_filter, *n;
+ struct msft_cp_le_cancel_monitor_advertisement cp;
+ struct msft_data *msft = hdev->msft_data;
+ struct list_head head;
+ struct sk_buff *skb;
+
+ INIT_LIST_HEAD(&head);
+
+ /* Cancel all corresponding address monitors */
+ mutex_lock(&msft->filter_lock);
+
+ list_for_each_entry_safe(address_filter, n, &msft->address_filters,
+ list) {
+ if (address_filter->pattern_handle != handle)
+ continue;
+
+ list_del(&address_filter->list);
+
+ /* Keep the address filter and let
+ * msft_add_address_filter_sync() remove and free the address
+ * filter.
+ */
+ if (address_filter->state == AF_STATE_ADDING) {
+ address_filter->state = AF_STATE_REMOVING;
+ continue;
+ }
+
+ /* Keep the address filter and let
+ * msft_cancel_address_filter_sync() remove and free the address
+ * filter
+ */
+ if (address_filter->state == AF_STATE_REMOVING)
+ continue;
+
+ list_add_tail(&address_filter->list, &head);
+ }
+
+ mutex_unlock(&msft->filter_lock);
+
+ list_for_each_entry_safe(address_filter, n, &head, list) {
+ list_del(&address_filter->list);
+
+ cp.sub_opcode = MSFT_OP_LE_CANCEL_MONITOR_ADVERTISEMENT;
+ cp.handle = address_filter->msft_handle;
+
+ skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp,
+ HCI_CMD_TIMEOUT);
+ if (IS_ERR_OR_NULL(skb)) {
+ kfree(address_filter);
+ continue;
+ }
+
+ kfree_skb(skb);
+
+ bt_dev_dbg(hdev, "MSFT: Canceled device %pMR address filter",
+ &address_filter->bdaddr);
+
+ kfree(address_filter);
+ }
+}
+
static int msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
u16 opcode,
struct adv_monitor *monitor,
@@ -263,6 +376,7 @@ static int msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
struct msft_monitor_advertisement_handle_data *handle_data;
struct msft_data *msft = hdev->msft_data;
int status = 0;
+ u8 msft_handle;
rp = (struct msft_rp_le_cancel_monitor_advertisement *)skb->data;
if (skb->len < sizeof(*rp)) {
@@ -293,11 +407,17 @@ static int msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
NULL, 0, false);
}
+ msft_handle = handle_data->msft_handle;
+
list_del(&handle_data->list);
kfree(handle_data);
- }
- hci_dev_unlock(hdev);
+ hci_dev_unlock(hdev);
+
+ msft_remove_addr_filters_sync(hdev, msft_handle);
+ } else {
+ hci_dev_unlock(hdev);
+ }
done:
return status;
@@ -394,12 +514,14 @@ static int msft_add_monitor_sync(struct hci_dev *hdev,
{
struct msft_cp_le_monitor_advertisement *cp;
struct msft_le_monitor_advertisement_pattern_data *pattern_data;
+ struct msft_monitor_advertisement_handle_data *handle_data;
struct msft_le_monitor_advertisement_pattern *pattern;
struct adv_pattern *entry;
size_t total_size = sizeof(*cp) + sizeof(*pattern_data);
ptrdiff_t offset = 0;
u8 pattern_count = 0;
struct sk_buff *skb;
+ int err;
if (!msft_monitor_pattern_valid(monitor))
return -EINVAL;
@@ -436,16 +558,31 @@ static int msft_add_monitor_sync(struct hci_dev *hdev,
skb = __hci_cmd_sync(hdev, hdev->msft_opcode, total_size, cp,
HCI_CMD_TIMEOUT);
- kfree(cp);
if (IS_ERR_OR_NULL(skb)) {
- if (!skb)
- return -EIO;
- return PTR_ERR(skb);
+ err = PTR_ERR(skb);
+ goto out_free;
}
- return msft_le_monitor_advertisement_cb(hdev, hdev->msft_opcode,
- monitor, skb);
+ err = msft_le_monitor_advertisement_cb(hdev, hdev->msft_opcode,
+ monitor, skb);
+ if (err)
+ goto out_free;
+
+ handle_data = msft_find_handle_data(hdev, monitor->handle, true);
+ if (!handle_data) {
+ err = -ENODATA;
+ goto out_free;
+ }
+
+ handle_data->rssi_high = cp->rssi_high;
+ handle_data->rssi_low = cp->rssi_low;
+ handle_data->rssi_low_interval = cp->rssi_low_interval;
+ handle_data->rssi_sampling_period = cp->rssi_sampling_period;
+
+out_free:
+ kfree(cp);
+ return err;
}
/* This function requires the caller holds hci_req_sync_lock */
@@ -538,6 +675,7 @@ void msft_do_close(struct hci_dev *hdev)
{
struct msft_data *msft = hdev->msft_data;
struct msft_monitor_advertisement_handle_data *handle_data, *tmp;
+ struct msft_monitor_addr_filter_data *address_filter, *n;
struct adv_monitor *monitor;
if (!msft)
@@ -559,6 +697,14 @@ void msft_do_close(struct hci_dev *hdev)
kfree(handle_data);
}
+ mutex_lock(&msft->filter_lock);
+ list_for_each_entry_safe(address_filter, n, &msft->address_filters,
+ list) {
+ list_del(&address_filter->list);
+ kfree(address_filter);
+ }
+ mutex_unlock(&msft->filter_lock);
+
hci_dev_lock(hdev);
/* Clear any devices that are being monitored and notify device lost */
@@ -568,6 +714,49 @@ void msft_do_close(struct hci_dev *hdev)
hci_dev_unlock(hdev);
}
+static int msft_cancel_address_filter_sync(struct hci_dev *hdev, void *data)
+{
+ struct msft_monitor_addr_filter_data *address_filter = data;
+ struct msft_cp_le_cancel_monitor_advertisement cp;
+ struct msft_data *msft = hdev->msft_data;
+ struct sk_buff *skb;
+ int err = 0;
+
+ if (!msft) {
+ bt_dev_err(hdev, "MSFT: msft data is freed");
+ return -EINVAL;
+ }
+
+ /* The address filter has been removed by hci dev close */
+ if (!test_bit(HCI_UP, &hdev->flags))
+ return 0;
+
+ mutex_lock(&msft->filter_lock);
+ list_del(&address_filter->list);
+ mutex_unlock(&msft->filter_lock);
+
+ cp.sub_opcode = MSFT_OP_LE_CANCEL_MONITOR_ADVERTISEMENT;
+ cp.handle = address_filter->msft_handle;
+
+ skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp,
+ HCI_CMD_TIMEOUT);
+ if (IS_ERR_OR_NULL(skb)) {
+ bt_dev_err(hdev, "MSFT: Failed to cancel address (%pMR) filter",
+ &address_filter->bdaddr);
+ err = -EIO;
+ goto done;
+ }
+ kfree_skb(skb);
+
+ bt_dev_dbg(hdev, "MSFT: Canceled device %pMR address filter",
+ &address_filter->bdaddr);
+
+done:
+ kfree(address_filter);
+
+ return err;
+}
+
void msft_register(struct hci_dev *hdev)
{
struct msft_data *msft = NULL;
@@ -581,7 +770,9 @@ void msft_register(struct hci_dev *hdev)
}
INIT_LIST_HEAD(&msft->handle_map);
+ INIT_LIST_HEAD(&msft->address_filters);
hdev->msft_data = msft;
+ mutex_init(&msft->filter_lock);
}
void msft_unregister(struct hci_dev *hdev)
@@ -596,6 +787,7 @@ void msft_unregister(struct hci_dev *hdev)
hdev->msft_data = NULL;
kfree(msft->evt_prefix);
+ mutex_destroy(&msft->filter_lock);
kfree(msft);
}
@@ -645,11 +837,149 @@ static void *msft_skb_pull(struct hci_dev *hdev, struct sk_buff *skb,
return data;
}
+static int msft_add_address_filter_sync(struct hci_dev *hdev, void *data)
+{
+ struct msft_monitor_addr_filter_data *address_filter = data;
+ struct msft_rp_le_monitor_advertisement *rp;
+ struct msft_cp_le_monitor_advertisement *cp;
+ struct msft_data *msft = hdev->msft_data;
+ struct sk_buff *skb = NULL;
+ bool remove = false;
+ size_t size;
+
+ if (!msft) {
+ bt_dev_err(hdev, "MSFT: msft data is freed");
+ return -EINVAL;
+ }
+
+ /* The address filter has been removed by hci dev close */
+ if (!test_bit(HCI_UP, &hdev->flags))
+ return -ENODEV;
+
+ /* We are safe to use the address filter from now on.
+ * msft_monitor_device_evt() wouldn't delete this filter because it's
+ * not been added by now.
+ * And all other functions that requiring hci_req_sync_lock wouldn't
+ * touch this filter before this func completes because it's protected
+ * by hci_req_sync_lock.
+ */
+
+ if (address_filter->state == AF_STATE_REMOVING) {
+ mutex_lock(&msft->filter_lock);
+ list_del(&address_filter->list);
+ mutex_unlock(&msft->filter_lock);
+ kfree(address_filter);
+ return 0;
+ }
+
+ size = sizeof(*cp) +
+ sizeof(address_filter->addr_type) +
+ sizeof(address_filter->bdaddr);
+ cp = kzalloc(size, GFP_KERNEL);
+ if (!cp) {
+ bt_dev_err(hdev, "MSFT: Alloc cmd param err");
+ remove = true;
+ goto done;
+ }
+ cp->sub_opcode = MSFT_OP_LE_MONITOR_ADVERTISEMENT;
+ cp->rssi_high = address_filter->rssi_high;
+ cp->rssi_low = address_filter->rssi_low;
+ cp->rssi_low_interval = address_filter->rssi_low_interval;
+ cp->rssi_sampling_period = address_filter->rssi_sampling_period;
+ cp->cond_type = MSFT_MONITOR_ADVERTISEMENT_TYPE_ADDR;
+ cp->data[0] = address_filter->addr_type;
+ memcpy(&cp->data[1], &address_filter->bdaddr,
+ sizeof(address_filter->bdaddr));
+
+ skb = __hci_cmd_sync(hdev, hdev->msft_opcode, size, cp,
+ HCI_CMD_TIMEOUT);
+ if (IS_ERR_OR_NULL(skb)) {
+ bt_dev_err(hdev, "Failed to enable address %pMR filter",
+ &address_filter->bdaddr);
+ skb = NULL;
+ remove = true;
+ goto done;
+ }
+
+ rp = skb_pull_data(skb, sizeof(*rp));
+ if (!rp || rp->sub_opcode != MSFT_OP_LE_MONITOR_ADVERTISEMENT ||
+ rp->status)
+ remove = true;
+
+done:
+ mutex_lock(&msft->filter_lock);
+
+ if (remove) {
+ bt_dev_warn(hdev, "MSFT: Remove address (%pMR) filter",
+ &address_filter->bdaddr);
+ list_del(&address_filter->list);
+ kfree(address_filter);
+ } else {
+ address_filter->state = AF_STATE_ADDED;
+ address_filter->msft_handle = rp->handle;
+ bt_dev_dbg(hdev, "MSFT: Address %pMR filter enabled",
+ &address_filter->bdaddr);
+ }
+ mutex_unlock(&msft->filter_lock);
+
+ kfree_skb(skb);
+
+ return 0;
+}
+
+/* This function requires the caller holds msft->filter_lock */
+static struct msft_monitor_addr_filter_data *msft_add_address_filter
+ (struct hci_dev *hdev, u8 addr_type, bdaddr_t *bdaddr,
+ struct msft_monitor_advertisement_handle_data *handle_data)
+{
+ struct msft_monitor_addr_filter_data *address_filter = NULL;
+ struct msft_data *msft = hdev->msft_data;
+ int err;
+
+ address_filter = kzalloc(sizeof(*address_filter), GFP_KERNEL);
+ if (!address_filter)
+ return NULL;
+
+ address_filter->state = AF_STATE_ADDING;
+ address_filter->msft_handle = 0xff;
+ address_filter->pattern_handle = handle_data->msft_handle;
+ address_filter->mgmt_handle = handle_data->mgmt_handle;
+ address_filter->rssi_high = handle_data->rssi_high;
+ address_filter->rssi_low = handle_data->rssi_low;
+ address_filter->rssi_low_interval = handle_data->rssi_low_interval;
+ address_filter->rssi_sampling_period = handle_data->rssi_sampling_period;
+ address_filter->addr_type = addr_type;
+ bacpy(&address_filter->bdaddr, bdaddr);
+
+ /* With the above AF_STATE_ADDING, duplicated address filter can be
+ * avoided when receiving monitor device event (found/lost) frequently
+ * for the same device.
+ */
+ list_add_tail(&address_filter->list, &msft->address_filters);
+
+ err = hci_cmd_sync_queue(hdev, msft_add_address_filter_sync,
+ address_filter, NULL);
+ if (err < 0) {
+ bt_dev_err(hdev, "MSFT: Add address %pMR filter err", bdaddr);
+ list_del(&address_filter->list);
+ kfree(address_filter);
+ return NULL;
+ }
+
+ bt_dev_dbg(hdev, "MSFT: Add device %pMR address filter",
+ &address_filter->bdaddr);
+
+ return address_filter;
+}
+
/* This function requires the caller holds hdev->lock */
static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
+ struct msft_monitor_addr_filter_data *n, *address_filter = NULL;
struct msft_ev_le_monitor_device *ev;
struct msft_monitor_advertisement_handle_data *handle_data;
+ struct msft_data *msft = hdev->msft_data;
+ u16 mgmt_handle = 0xffff;
u8 addr_type;
ev = msft_skb_pull(hdev, skb, MSFT_EV_LE_MONITOR_DEVICE, sizeof(*ev));
@@ -662,9 +992,53 @@ static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
ev->monitor_state, &ev->bdaddr);
handle_data = msft_find_handle_data(hdev, ev->monitor_handle, false);
- if (!handle_data)
+
+ if (!test_bit(HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER, &hdev->quirks)) {
+ if (!handle_data)
+ return;
+ mgmt_handle = handle_data->mgmt_handle;
+ goto report_state;
+ }
+
+ if (handle_data) {
+ /* Don't report any device found/lost event from pattern
+ * monitors. Pattern monitor always has its address filters for
+ * tracking devices.
+ */
+
+ address_filter = msft_find_address_data(hdev, ev->addr_type,
+ &ev->bdaddr,
+ handle_data->msft_handle);
+ if (address_filter)
+ return;
+
+ if (ev->monitor_state && handle_data->cond_type ==
+ MSFT_MONITOR_ADVERTISEMENT_TYPE_PATTERN)
+ msft_add_address_filter(hdev, ev->addr_type,
+ &ev->bdaddr, handle_data);
+
return;
+ }
+ /* This device event is not from pattern monitor.
+ * Report it if there is a corresponding address_filter for it.
+ */
+ list_for_each_entry(n, &msft->address_filters, list) {
+ if (n->state == AF_STATE_ADDED &&
+ n->msft_handle == ev->monitor_handle) {
+ mgmt_handle = n->mgmt_handle;
+ address_filter = n;
+ break;
+ }
+ }
+
+ if (!address_filter) {
+ bt_dev_warn(hdev, "MSFT: Unexpected device event %pMR, %u, %u",
+ &ev->bdaddr, ev->monitor_handle, ev->monitor_state);
+ return;
+ }
+
+report_state:
switch (ev->addr_type) {
case ADDR_LE_DEV_PUBLIC:
addr_type = BDADDR_LE_PUBLIC;
@@ -681,12 +1055,18 @@ static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
return;
}
- if (ev->monitor_state)
- msft_device_found(hdev, &ev->bdaddr, addr_type,
- handle_data->mgmt_handle);
- else
- msft_device_lost(hdev, &ev->bdaddr, addr_type,
- handle_data->mgmt_handle);
+ if (ev->monitor_state) {
+ msft_device_found(hdev, &ev->bdaddr, addr_type, mgmt_handle);
+ } else {
+ if (address_filter && address_filter->state == AF_STATE_ADDED) {
+ address_filter->state = AF_STATE_REMOVING;
+ hci_cmd_sync_queue(hdev,
+ msft_cancel_address_filter_sync,
+ address_filter,
+ NULL);
+ }
+ msft_device_lost(hdev, &ev->bdaddr, addr_type, mgmt_handle);
+ }
}
void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
@@ -724,7 +1104,9 @@ void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb)
switch (*evt) {
case MSFT_EV_LE_MONITOR_DEVICE:
+ mutex_lock(&msft->filter_lock);
msft_monitor_device_evt(hdev, skb);
+ mutex_unlock(&msft->filter_lock);
break;
default:
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 4397e14ff560..b54e8a530f55 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -268,18 +268,16 @@ static struct proto rfcomm_proto = {
.obj_size = sizeof(struct rfcomm_pinfo)
};
-static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern)
+static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock,
+ int proto, gfp_t prio, int kern)
{
struct rfcomm_dlc *d;
struct sock *sk;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, kern);
+ sk = bt_sock_alloc(net, sock, &rfcomm_proto, proto, prio, kern);
if (!sk)
return NULL;
- sock_init_data(sock, sk);
- INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
-
d = rfcomm_dlc_alloc(prio);
if (!d) {
sk_free(sk);
@@ -298,11 +296,6 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int
sk->sk_sndbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10;
sk->sk_rcvbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = proto;
- sk->sk_state = BT_OPEN;
-
bt_sock_link(&rfcomm_sk_list, sk);
BT_DBG("sk %p", sk);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 5697df9d4394..94ec913dfb76 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -771,7 +771,7 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp)
{
- struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+ struct rfcomm_dev *dev = tty->driver_data;
BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc,
dev->port.count);
@@ -779,17 +779,18 @@ static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp)
tty_port_close(&dev->port, tty, filp);
}
-static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
+static ssize_t rfcomm_tty_write(struct tty_struct *tty, const u8 *buf,
+ size_t count)
{
- struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+ struct rfcomm_dev *dev = tty->driver_data;
struct rfcomm_dlc *dlc = dev->dlc;
struct sk_buff *skb;
- int sent = 0, size;
+ size_t sent = 0, size;
- BT_DBG("tty %p count %d", tty, count);
+ BT_DBG("tty %p count %zu", tty, count);
while (count) {
- size = min_t(uint, count, dlc->mtu);
+ size = min_t(size_t, count, dlc->mtu);
skb = rfcomm_wmalloc(dev, size + RFCOMM_SKB_RESERVE, GFP_ATOMIC);
if (!skb)
@@ -810,7 +811,7 @@ static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, in
static unsigned int rfcomm_tty_write_room(struct tty_struct *tty)
{
- struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+ struct rfcomm_dev *dev = tty->driver_data;
int room = 0;
if (dev && dev->dlc)
@@ -864,7 +865,7 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty,
u8 baud, data_bits, stop_bits, parity, x_on, x_off;
u16 changes = 0;
- struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+ struct rfcomm_dev *dev = tty->driver_data;
BT_DBG("tty %p termios %p", tty, old);
@@ -996,7 +997,7 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty,
static void rfcomm_tty_throttle(struct tty_struct *tty)
{
- struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+ struct rfcomm_dev *dev = tty->driver_data;
BT_DBG("tty %p dev %p", tty, dev);
@@ -1005,7 +1006,7 @@ static void rfcomm_tty_throttle(struct tty_struct *tty)
static void rfcomm_tty_unthrottle(struct tty_struct *tty)
{
- struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+ struct rfcomm_dev *dev = tty->driver_data;
BT_DBG("tty %p dev %p", tty, dev);
@@ -1014,7 +1015,7 @@ static void rfcomm_tty_unthrottle(struct tty_struct *tty)
static unsigned int rfcomm_tty_chars_in_buffer(struct tty_struct *tty)
{
- struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+ struct rfcomm_dev *dev = tty->driver_data;
BT_DBG("tty %p dev %p", tty, dev);
@@ -1029,7 +1030,7 @@ static unsigned int rfcomm_tty_chars_in_buffer(struct tty_struct *tty)
static void rfcomm_tty_flush_buffer(struct tty_struct *tty)
{
- struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+ struct rfcomm_dev *dev = tty->driver_data;
BT_DBG("tty %p dev %p", tty, dev);
@@ -1052,7 +1053,7 @@ static void rfcomm_tty_wait_until_sent(struct tty_struct *tty, int timeout)
static void rfcomm_tty_hangup(struct tty_struct *tty)
{
- struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+ struct rfcomm_dev *dev = tty->driver_data;
BT_DBG("tty %p dev %p", tty, dev);
@@ -1061,7 +1062,7 @@ static void rfcomm_tty_hangup(struct tty_struct *tty)
static int rfcomm_tty_tiocmget(struct tty_struct *tty)
{
- struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+ struct rfcomm_dev *dev = tty->driver_data;
BT_DBG("tty %p dev %p", tty, dev);
@@ -1070,7 +1071,7 @@ static int rfcomm_tty_tiocmget(struct tty_struct *tty)
static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear)
{
- struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
+ struct rfcomm_dev *dev = tty->driver_data;
struct rfcomm_dlc *dlc = dev->dlc;
u8 v24_sig;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 7762604ddfc0..c736186aba26 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -68,7 +68,6 @@ struct sco_pinfo {
bdaddr_t dst;
__u32 flags;
__u16 setting;
- __u8 cmsg_mask;
struct bt_codec codec;
struct sco_conn *conn;
};
@@ -471,15 +470,6 @@ static void sco_sock_close(struct sock *sk)
release_sock(sk);
}
-static void sco_skb_put_cmsg(struct sk_buff *skb, struct msghdr *msg,
- struct sock *sk)
-{
- if (sco_pi(sk)->cmsg_mask & SCO_CMSG_PKT_STATUS)
- put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_STATUS,
- sizeof(bt_cb(skb)->sco.pkt_status),
- &bt_cb(skb)->sco.pkt_status);
-}
-
static void sco_sock_init(struct sock *sk, struct sock *parent)
{
BT_DBG("sk %p", sk);
@@ -488,8 +478,6 @@ static void sco_sock_init(struct sock *sk, struct sock *parent)
sk->sk_type = parent->sk_type;
bt_sk(sk)->flags = bt_sk(parent)->flags;
security_sk_clone(parent, sk);
- } else {
- bt_sk(sk)->skb_put_cmsg = sco_skb_put_cmsg;
}
}
@@ -504,21 +492,13 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
{
struct sock *sk;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, kern);
+ sk = bt_sock_alloc(net, sock, &sco_proto, proto, prio, kern);
if (!sk)
return NULL;
- sock_init_data(sock, sk);
- INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
-
sk->sk_destruct = sco_sock_destruct;
sk->sk_sndtimeo = SCO_CONN_TIMEOUT;
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- sk->sk_protocol = proto;
- sk->sk_state = BT_OPEN;
-
sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
sco_pi(sk)->codec.id = BT_CODEC_CVSD;
sco_pi(sk)->codec.cid = 0xffff;
@@ -915,9 +895,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
}
if (opt)
- sco_pi(sk)->cmsg_mask |= SCO_CMSG_PKT_STATUS;
+ set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
else
- sco_pi(sk)->cmsg_mask &= SCO_CMSG_PKT_STATUS;
+ clear_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
break;
case BT_CODEC:
@@ -1048,7 +1028,6 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
int len, err = 0;
struct bt_voice voice;
u32 phys;
- int pkt_status;
int buf_len;
struct codec_list *c;
u8 num_codecs, i, __user *ptr;
@@ -1102,9 +1081,8 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
break;
case BT_PKT_STATUS:
- pkt_status = (sco_pi(sk)->cmsg_mask & SCO_CMSG_PKT_STATUS);
-
- if (put_user(pkt_status, (int __user *)optval))
+ if (put_user(test_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags),
+ (int __user *)optval))
err = -EFAULT;
break;
@@ -1267,7 +1245,7 @@ static int sco_sock_release(struct socket *sock)
sco_sock_close(sk);
- if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+ if (sock_flag(sk, SOCK_LINGER) && READ_ONCE(sk->sk_lingertime) &&
!(current->flags & PF_EXITING)) {
lock_sock(sk);
err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2321bd2f9964..0841f8d82419 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -15,11 +15,12 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <net/net_namespace.h>
-#include <net/page_pool.h>
+#include <net/page_pool/helpers.h>
#include <linux/error-injection.h>
#include <linux/smp.h>
#include <linux/sock_diag.h>
#include <linux/netfilter.h>
+#include <net/netdev_rx_queue.h>
#include <net/xdp.h>
#include <net/netfilter/nf_bpf_link.h>
@@ -542,6 +543,7 @@ struct bpf_fentry_test_t {
int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
{
+ asm volatile ("");
return (long)arg;
}
@@ -555,12 +557,23 @@ __bpf_kfunc u32 bpf_fentry_test9(u32 *a)
return *a;
}
+void noinline bpf_fentry_test_sinfo(struct skb_shared_info *sinfo)
+{
+}
+
__bpf_kfunc int bpf_modify_return_test(int a, int *b)
{
*b += 1;
return a + *b;
}
+__bpf_kfunc int bpf_modify_return_test2(int a, int *b, short c, int d,
+ void *e, char f, int g)
+{
+ *b += 1;
+ return a + *b + c + d + (long)e + f + g;
+}
+
int noinline bpf_fentry_shadow_test(int a)
{
return a + 1;
@@ -596,6 +609,7 @@ __diag_pop();
BTF_SET8_START(bpf_test_modify_return_ids)
BTF_ID_FLAGS(func, bpf_modify_return_test)
+BTF_ID_FLAGS(func, bpf_modify_return_test2)
BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE)
BTF_SET8_END(bpf_test_modify_return_ids)
@@ -663,7 +677,11 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
case BPF_MODIFY_RETURN:
ret = bpf_modify_return_test(1, &b);
if (b != 2)
- side_effect = 1;
+ side_effect++;
+ b = 2;
+ ret += bpf_modify_return_test2(1, &b, 3, 4, (void *)5, 6, 7);
+ if (b != 2)
+ side_effect++;
break;
default:
goto out;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 4f5098d33a46..a6e94ceb7c9a 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -234,6 +234,14 @@ static int br_switchdev_blocking_event(struct notifier_block *nb,
br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb,
b->blocking_nb);
break;
+ case SWITCHDEV_BRPORT_REPLAY:
+ brport_info = ptr;
+ b = &brport_info->brport;
+
+ err = br_switchdev_port_replay(p, b->dev, b->ctx, b->atomic_nb,
+ b->blocking_nb, extack);
+ err = notifier_from_errno(err);
+ break;
}
out:
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6116eba1bd89..7431f89e897b 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -124,7 +124,7 @@ static int deliver_clone(const struct net_bridge_port *prev,
skb = skb_clone(skb, GFP_ATOMIC);
if (!skb) {
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
return -ENOMEM;
}
@@ -154,6 +154,7 @@ void br_forward(const struct net_bridge_port *to,
backup_port = rcu_dereference(to->backup_port);
if (unlikely(!backup_port))
goto out;
+ BR_INPUT_SKB_CB(skb)->backup_nhid = READ_ONCE(to->backup_nhid);
to = backup_port;
}
@@ -267,7 +268,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
skb = skb_copy(skb, GFP_ATOMIC);
if (!skb) {
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
return;
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index c34a0b0901b0..c729528b5e85 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -181,12 +181,12 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if ((mdst && mdst->host_joined) ||
br_multicast_is_router(brmctx, skb)) {
local_rcv = true;
- br->dev->stats.multicast++;
+ DEV_STATS_INC(br->dev, multicast);
}
mcast_hit = true;
} else {
local_rcv = true;
- br->dev->stats.multicast++;
+ DEV_STATS_INC(br->dev, multicast);
}
break;
case BR_PKT_UNICAST:
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 1a801fab9543..15186247b59a 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -1135,7 +1135,8 @@ static int br_netfilter_sysctl_init_net(struct net *net)
br_netfilter_sysctl_default(brnet);
- brnet->ctl_hdr = register_net_sysctl(net, "net/bridge", table);
+ brnet->ctl_hdr = register_net_sysctl_sz(net, "net/bridge", table,
+ ARRAY_SIZE(brnf_table));
if (!brnet->ctl_hdr) {
if (!net_eq(net, &init_net))
kfree(table);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 05c5863d2e20..10f0d33d8ccf 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -211,6 +211,7 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_IN_OPEN */
+ nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT */
+ nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_CNT */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_BACKUP_NHID */
+ 0;
}
@@ -319,6 +320,10 @@ static int br_port_fill_attrs(struct sk_buff *skb,
backup_p->dev->ifindex);
rcu_read_unlock();
+ if (p->backup_nhid &&
+ nla_put_u32(skb, IFLA_BRPORT_BACKUP_NHID, p->backup_nhid))
+ return -EMSGSIZE;
+
return 0;
}
@@ -895,6 +900,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MCAST_N_GROUPS] = { .type = NLA_REJECT },
[IFLA_BRPORT_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
[IFLA_BRPORT_NEIGH_VLAN_SUPPRESS] = NLA_POLICY_MAX(NLA_U8, 1),
+ [IFLA_BRPORT_BACKUP_NHID] = { .type = NLA_U32 },
};
/* Change the state of the port and notify spanning tree */
@@ -1065,6 +1071,12 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
return err;
}
+ if (tb[IFLA_BRPORT_BACKUP_NHID]) {
+ u32 backup_nhid = nla_get_u32(tb[IFLA_BRPORT_BACKUP_NHID]);
+
+ WRITE_ONCE(p->backup_nhid, backup_nhid);
+ }
+
return 0;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a63b32c1638e..a1f4acfa6994 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -387,6 +387,7 @@ struct net_bridge_port {
struct net_bridge_vlan_group __rcu *vlgrp;
#endif
struct net_bridge_port __rcu *backup_port;
+ u32 backup_nhid;
/* STP */
u8 priority;
@@ -605,6 +606,8 @@ struct br_input_skb_cb {
*/
unsigned long fwd_hwdoms;
#endif
+
+ u32 backup_nhid;
};
#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
@@ -971,7 +974,6 @@ int br_multicast_set_vlan_router(struct net_bridge_vlan *v, u8 mcast_router);
int br_multicast_toggle(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack);
int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val);
-int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
int br_multicast_set_igmp_version(struct net_bridge_mcast *brmctx,
unsigned long val);
#if IS_ENABLED(CONFIG_IPV6)
@@ -2115,6 +2117,12 @@ void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
struct notifier_block *atomic_nb,
struct notifier_block *blocking_nb);
+int br_switchdev_port_replay(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack);
+
bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb);
void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb);
@@ -2165,6 +2173,16 @@ br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
{
}
+static inline int
+br_switchdev_port_replay(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
{
return false;
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index ba95c4d74a60..ee84e783e1df 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -727,6 +727,8 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
err = br_switchdev_mdb_replay_one(nb, dev,
SWITCHDEV_OBJ_PORT_MDB(obj),
action, ctx, extack);
+ if (err == -EOPNOTSUPP)
+ err = 0;
if (err)
goto out_free_mdb;
}
@@ -759,8 +761,10 @@ static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
err = br_switchdev_mdb_replay(br_dev, dev, ctx, true, blocking_nb,
extack);
- if (err && err != -EOPNOTSUPP)
+ if (err) {
+ /* -EOPNOTSUPP not propagated from MDB replay. */
return err;
+ }
err = br_switchdev_fdb_replay(br_dev, ctx, true, atomic_nb);
if (err && err != -EOPNOTSUPP)
@@ -825,3 +829,12 @@ void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
nbp_switchdev_del(p);
}
+
+int br_switchdev_port_replay(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ return nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack);
+}
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 6399a8a69d07..81833ca7a2c7 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -201,6 +201,21 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
if (err)
return err;
+ if (BR_INPUT_SKB_CB(skb)->backup_nhid) {
+ tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
+ tunnel_id, 0);
+ if (!tunnel_dst)
+ return -ENOMEM;
+
+ tunnel_dst->u.tun_info.mode |= IP_TUNNEL_INFO_TX |
+ IP_TUNNEL_INFO_BRIDGE;
+ tunnel_dst->u.tun_info.key.nhid =
+ BR_INPUT_SKB_CB(skb)->backup_nhid;
+ skb_dst_set(skb, &tunnel_dst->dst);
+
+ return 0;
+ }
+
tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
skb_dst_set(skb, &tunnel_dst->dst);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 757ec46fc45a..aa23479b20b2 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2115,8 +2115,7 @@ static int size_entry_mwt(const struct ebt_entry *entry, const unsigned char *ba
return ret;
offsets[0] = sizeof(struct ebt_entry); /* matches come first */
- memcpy(&offsets[1], &entry->watchers_offset,
- sizeof(offsets) - sizeof(offsets[0]));
+ memcpy(&offsets[1], &entry->offsets, sizeof(entry->offsets));
if (state->buf_kern_start) {
buf_start = state->buf_kern_start + state->buf_kern_offset;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 99770ed28531..f02b5d3e4733 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -188,12 +188,6 @@ static bool isotp_register_rxid(struct isotp_sock *so)
return (isotp_bc_flags(so) == 0);
}
-static bool isotp_register_txecho(struct isotp_sock *so)
-{
- /* all modes but SF_BROADCAST register for tx echo skbs */
- return (isotp_bc_flags(so) != CAN_ISOTP_SF_BROADCAST);
-}
-
static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer)
{
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
@@ -1209,7 +1203,7 @@ static int isotp_release(struct socket *sock)
lock_sock(sk);
/* remove current filters & unregister */
- if (so->bound && isotp_register_txecho(so)) {
+ if (so->bound) {
if (so->ifindex) {
struct net_device *dev;
@@ -1332,14 +1326,12 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id),
isotp_rcv, sk, "isotp", sk);
- if (isotp_register_txecho(so)) {
- /* no consecutive frame echo skb in flight */
- so->cfecho = 0;
+ /* no consecutive frame echo skb in flight */
+ so->cfecho = 0;
- /* register for echo skb's */
- can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id),
- isotp_rcv_echo, sk, "isotpe", sk);
- }
+ /* register for echo skb's */
+ can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id),
+ isotp_rcv_echo, sk, "isotpe", sk);
dev_put(dev);
@@ -1560,7 +1552,7 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg,
case NETDEV_UNREGISTER:
lock_sock(sk);
/* remove current filters & unregister */
- if (so->bound && isotp_register_txecho(so)) {
+ if (so->bound) {
if (isotp_register_rxid(so))
can_rx_unregister(dev_net(dev), dev, so->rxid,
SINGLE_MASK(so->rxid),
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index feaec4ad6d16..b28c976f52a0 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -974,6 +974,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
struct sock_exterr_skb *serr;
struct sk_buff *skb;
char *state = "UNK";
+ u32 tsflags;
int err;
jsk = j1939_sk(sk);
@@ -981,13 +982,14 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
if (!(jsk->state & J1939_SOCK_ERRQUEUE))
return;
+ tsflags = READ_ONCE(sk->sk_tsflags);
switch (type) {
case J1939_ERRQUEUE_TX_ACK:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+ if (!(tsflags & SOF_TIMESTAMPING_TX_ACK))
return;
break;
case J1939_ERRQUEUE_TX_SCHED:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+ if (!(tsflags & SOF_TIMESTAMPING_TX_SCHED))
return;
break;
case J1939_ERRQUEUE_TX_ABORT:
@@ -997,7 +999,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
case J1939_ERRQUEUE_RX_DPO:
fallthrough;
case J1939_ERRQUEUE_RX_ABORT:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+ if (!(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
return;
break;
default:
@@ -1054,7 +1056,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
}
serr->opt_stats = true;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ if (tsflags & SOF_TIMESTAMPING_OPT_ID)
serr->ee.ee_data = session->tskey;
netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n",
diff --git a/net/can/raw.c b/net/can/raw.c
index e10f59375659..d50c3f3d892f 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -85,6 +85,7 @@ struct raw_sock {
int bound;
int ifindex;
struct net_device *dev;
+ netdevice_tracker dev_tracker;
struct list_head notifier;
int loopback;
int recv_own_msgs;
@@ -285,8 +286,10 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
case NETDEV_UNREGISTER:
lock_sock(sk);
/* remove current filters & unregister */
- if (ro->bound)
+ if (ro->bound) {
raw_disable_allfilters(dev_net(dev), dev, sk);
+ netdev_put(dev, &ro->dev_tracker);
+ }
if (ro->count > 1)
kfree(ro->filter);
@@ -391,10 +394,12 @@ static int raw_release(struct socket *sock)
/* remove current filters & unregister */
if (ro->bound) {
- if (ro->dev)
+ if (ro->dev) {
raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk);
- else
+ netdev_put(ro->dev, &ro->dev_tracker);
+ } else {
raw_disable_allfilters(sock_net(sk), NULL, sk);
+ }
}
if (ro->count > 1)
@@ -445,10 +450,10 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
goto out;
}
if (dev->type != ARPHRD_CAN) {
- dev_put(dev);
err = -ENODEV;
- goto out;
+ goto out_put_dev;
}
+
if (!(dev->flags & IFF_UP))
notify_enetdown = 1;
@@ -456,7 +461,9 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
/* filters set by default/setsockopt */
err = raw_enable_allfilters(sock_net(sk), dev, sk);
- dev_put(dev);
+ if (err)
+ goto out_put_dev;
+
} else {
ifindex = 0;
@@ -467,18 +474,28 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (!err) {
if (ro->bound) {
/* unregister old filters */
- if (ro->dev)
+ if (ro->dev) {
raw_disable_allfilters(dev_net(ro->dev),
ro->dev, sk);
- else
+ /* drop reference to old ro->dev */
+ netdev_put(ro->dev, &ro->dev_tracker);
+ } else {
raw_disable_allfilters(sock_net(sk), NULL, sk);
+ }
}
ro->ifindex = ifindex;
ro->bound = 1;
+ /* bind() ok -> hold a reference for new ro->dev */
ro->dev = dev;
+ if (ro->dev)
+ netdev_hold(ro->dev, &ro->dev_tracker, GFP_KERNEL);
}
- out:
+out_put_dev:
+ /* remove potential reference from dev_get_by_index() */
+ if (dev)
+ dev_put(dev);
+out:
release_sock(sk);
rtnl_unlock();
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 5eb4898cccd4..10a41cd9c523 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -969,6 +969,62 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
return true;
}
+static void ceph_msg_data_iter_cursor_init(struct ceph_msg_data_cursor *cursor,
+ size_t length)
+{
+ struct ceph_msg_data *data = cursor->data;
+
+ cursor->iov_iter = data->iter;
+ cursor->lastlen = 0;
+ iov_iter_truncate(&cursor->iov_iter, length);
+ cursor->resid = iov_iter_count(&cursor->iov_iter);
+}
+
+static struct page *ceph_msg_data_iter_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset, size_t *length)
+{
+ struct page *page;
+ ssize_t len;
+
+ if (cursor->lastlen)
+ iov_iter_revert(&cursor->iov_iter, cursor->lastlen);
+
+ len = iov_iter_get_pages2(&cursor->iov_iter, &page, PAGE_SIZE,
+ 1, page_offset);
+ BUG_ON(len < 0);
+
+ cursor->lastlen = len;
+
+ /*
+ * FIXME: The assumption is that the pages represented by the iov_iter
+ * are pinned, with the references held by the upper-level
+ * callers, or by virtue of being under writeback. Eventually,
+ * we'll get an iov_iter_get_pages2 variant that doesn't take
+ * page refs. Until then, just put the page ref.
+ */
+ VM_BUG_ON_PAGE(!PageWriteback(page) && page_count(page) < 2, page);
+ put_page(page);
+
+ *length = min_t(size_t, len, cursor->resid);
+ return page;
+}
+
+static bool ceph_msg_data_iter_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ BUG_ON(bytes > cursor->resid);
+ cursor->resid -= bytes;
+
+ if (bytes < cursor->lastlen) {
+ cursor->lastlen -= bytes;
+ } else {
+ iov_iter_advance(&cursor->iov_iter, bytes - cursor->lastlen);
+ cursor->lastlen = 0;
+ }
+
+ return cursor->resid;
+}
+
/*
* Message data is handled (sent or received) in pieces, where each
* piece resides on a single page. The network layer might not
@@ -996,6 +1052,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
case CEPH_MSG_DATA_BVECS:
ceph_msg_data_bvecs_cursor_init(cursor, length);
break;
+ case CEPH_MSG_DATA_ITER:
+ ceph_msg_data_iter_cursor_init(cursor, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
/* BUG(); */
@@ -1013,6 +1072,7 @@ void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor,
cursor->total_resid = length;
cursor->data = msg->data;
+ cursor->sr_resid = 0;
__ceph_msg_data_cursor_init(cursor);
}
@@ -1042,6 +1102,9 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
case CEPH_MSG_DATA_BVECS:
page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
break;
+ case CEPH_MSG_DATA_ITER:
+ page = ceph_msg_data_iter_next(cursor, page_offset, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
page = NULL;
@@ -1080,6 +1143,9 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes)
case CEPH_MSG_DATA_BVECS:
new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
break;
+ case CEPH_MSG_DATA_ITER:
+ new_piece = ceph_msg_data_iter_advance(cursor, bytes);
+ break;
case CEPH_MSG_DATA_NONE:
default:
BUG();
@@ -1879,6 +1945,18 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
}
EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+ struct iov_iter *iter)
+{
+ struct ceph_msg_data *data;
+
+ data = ceph_msg_data_add(msg);
+ data->type = CEPH_MSG_DATA_ITER;
+ data->iter = *iter;
+
+ msg->data_length += iov_iter_count(&data->iter);
+}
+
/*
* construct a new message with given type, size
* the new msg has a ref count of 1.
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index 3d57bb48a2b4..f9a50d7f0d20 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -159,9 +159,9 @@ static size_t sizeof_footer(struct ceph_connection *con)
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
- /* Initialize data cursor */
-
- ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
+ /* Initialize data cursor if it's not a sparse read */
+ if (!msg->sparse_read)
+ ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
}
/*
@@ -960,9 +960,9 @@ static void process_ack(struct ceph_connection *con)
prepare_read_tag(con);
}
-static int read_partial_message_section(struct ceph_connection *con,
- struct kvec *section,
- unsigned int sec_len, u32 *crc)
+static int read_partial_message_chunk(struct ceph_connection *con,
+ struct kvec *section,
+ unsigned int sec_len, u32 *crc)
{
int ret, left;
@@ -978,11 +978,91 @@ static int read_partial_message_section(struct ceph_connection *con,
section->iov_len += ret;
}
if (section->iov_len == sec_len)
- *crc = crc32c(0, section->iov_base, section->iov_len);
+ *crc = crc32c(*crc, section->iov_base, section->iov_len);
return 1;
}
+static inline int read_partial_message_section(struct ceph_connection *con,
+ struct kvec *section,
+ unsigned int sec_len, u32 *crc)
+{
+ *crc = 0;
+ return read_partial_message_chunk(con, section, sec_len, crc);
+}
+
+static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
+{
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+ bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
+
+ if (do_bounce && unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ while (cursor->sr_resid > 0) {
+ struct page *page, *rpage;
+ size_t off, len;
+ int ret;
+
+ page = ceph_msg_data_next(cursor, &off, &len);
+ rpage = do_bounce ? con->bounce_page : page;
+
+ /* clamp to what remains in extent */
+ len = min_t(int, len, cursor->sr_resid);
+ ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len);
+ if (ret <= 0)
+ return ret;
+ *crc = ceph_crc32c_page(*crc, rpage, off, ret);
+ ceph_msg_data_advance(cursor, (size_t)ret);
+ cursor->sr_resid -= ret;
+ if (do_bounce)
+ memcpy_page(page, off, rpage, off, ret);
+ }
+ return 1;
+}
+
+static int read_sparse_msg_data(struct ceph_connection *con)
+{
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+ bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
+ u32 crc = 0;
+ int ret = 1;
+
+ if (do_datacrc)
+ crc = con->in_data_crc;
+
+ do {
+ if (con->v1.in_sr_kvec.iov_base)
+ ret = read_partial_message_chunk(con,
+ &con->v1.in_sr_kvec,
+ con->v1.in_sr_len,
+ &crc);
+ else if (cursor->sr_resid > 0)
+ ret = read_sparse_msg_extent(con, &crc);
+
+ if (ret <= 0) {
+ if (do_datacrc)
+ con->in_data_crc = crc;
+ return ret;
+ }
+
+ memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
+ ret = con->ops->sparse_read(con, cursor,
+ (char **)&con->v1.in_sr_kvec.iov_base);
+ con->v1.in_sr_len = ret;
+ } while (ret > 0);
+
+ if (do_datacrc)
+ con->in_data_crc = crc;
+
+ return ret < 0 ? ret : 1; /* must return > 0 to indicate success */
+}
+
static int read_partial_msg_data(struct ceph_connection *con)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
@@ -1173,7 +1253,9 @@ static int read_partial_message(struct ceph_connection *con)
if (!m->num_data_items)
return -EIO;
- if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
+ if (m->sparse_read)
+ ret = read_sparse_msg_data(con);
+ else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
ret = read_partial_msg_data_bounce(con);
else
ret = read_partial_msg_data(con);
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index 1df1d29dee92..d09a39ff2cf0 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -8,9 +8,9 @@
#include <linux/ceph/ceph_debug.h>
#include <crypto/aead.h>
-#include <crypto/algapi.h> /* for crypto_memneq() */
#include <crypto/hash.h>
#include <crypto/sha2.h>
+#include <crypto/utils.h>
#include <linux/bvec.h>
#include <linux/crc32c.h>
#include <linux/net.h>
@@ -52,14 +52,16 @@
#define FRAME_LATE_STATUS_COMPLETE 0xe
#define FRAME_LATE_STATUS_ABORTED_MASK 0xf
-#define IN_S_HANDLE_PREAMBLE 1
-#define IN_S_HANDLE_CONTROL 2
-#define IN_S_HANDLE_CONTROL_REMAINDER 3
-#define IN_S_PREPARE_READ_DATA 4
-#define IN_S_PREPARE_READ_DATA_CONT 5
-#define IN_S_PREPARE_READ_ENC_PAGE 6
-#define IN_S_HANDLE_EPILOGUE 7
-#define IN_S_FINISH_SKIP 8
+#define IN_S_HANDLE_PREAMBLE 1
+#define IN_S_HANDLE_CONTROL 2
+#define IN_S_HANDLE_CONTROL_REMAINDER 3
+#define IN_S_PREPARE_READ_DATA 4
+#define IN_S_PREPARE_READ_DATA_CONT 5
+#define IN_S_PREPARE_READ_ENC_PAGE 6
+#define IN_S_PREPARE_SPARSE_DATA 7
+#define IN_S_PREPARE_SPARSE_DATA_CONT 8
+#define IN_S_HANDLE_EPILOGUE 9
+#define IN_S_FINISH_SKIP 10
#define OUT_S_QUEUE_DATA 1
#define OUT_S_QUEUE_DATA_CONT 2
@@ -967,12 +969,48 @@ static void init_sgs_cursor(struct scatterlist **sg,
}
}
+/**
+ * init_sgs_pages: set up scatterlist on an array of page pointers
+ * @sg: scatterlist to populate
+ * @pages: pointer to page array
+ * @dpos: position in the array to start (bytes)
+ * @dlen: len to add to sg (bytes)
+ * @pad: pointer to pad destination (if any)
+ *
+ * Populate the scatterlist from the page array, starting at an arbitrary
+ * byte in the array and running for a specified length.
+ */
+static void init_sgs_pages(struct scatterlist **sg, struct page **pages,
+ int dpos, int dlen, u8 *pad)
+{
+ int idx = dpos >> PAGE_SHIFT;
+ int off = offset_in_page(dpos);
+ int resid = dlen;
+
+ do {
+ int len = min(resid, (int)PAGE_SIZE - off);
+
+ sg_set_page(*sg, pages[idx], len, off);
+ *sg = sg_next(*sg);
+ off = 0;
+ ++idx;
+ resid -= len;
+ } while (resid);
+
+ if (need_padding(dlen)) {
+ sg_set_buf(*sg, pad, padding_len(dlen));
+ *sg = sg_next(*sg);
+ }
+}
+
static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
u8 *front_pad, u8 *middle_pad, u8 *data_pad,
- void *epilogue, bool add_tag)
+ void *epilogue, struct page **pages, int dpos,
+ bool add_tag)
{
struct ceph_msg_data_cursor cursor;
struct scatterlist *cur_sg;
+ int dlen = data_len(msg);
int sg_cnt;
int ret;
@@ -986,9 +1024,15 @@ static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
if (middle_len(msg))
sg_cnt += calc_sg_cnt(msg->middle->vec.iov_base,
middle_len(msg));
- if (data_len(msg)) {
- ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
- sg_cnt += calc_sg_cnt_cursor(&cursor);
+ if (dlen) {
+ if (pages) {
+ sg_cnt += calc_pages_for(dpos, dlen);
+ if (need_padding(dlen))
+ sg_cnt++;
+ } else {
+ ceph_msg_data_cursor_init(&cursor, msg, dlen);
+ sg_cnt += calc_sg_cnt_cursor(&cursor);
+ }
}
ret = sg_alloc_table(sgt, sg_cnt, GFP_NOIO);
@@ -1002,9 +1046,13 @@ static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
if (middle_len(msg))
init_sgs(&cur_sg, msg->middle->vec.iov_base, middle_len(msg),
middle_pad);
- if (data_len(msg)) {
- ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
- init_sgs_cursor(&cur_sg, &cursor, data_pad);
+ if (dlen) {
+ if (pages) {
+ init_sgs_pages(&cur_sg, pages, dpos, dlen, data_pad);
+ } else {
+ ceph_msg_data_cursor_init(&cursor, msg, dlen);
+ init_sgs_cursor(&cur_sg, &cursor, data_pad);
+ }
}
WARN_ON(!sg_is_last(cur_sg));
@@ -1039,10 +1087,53 @@ static int decrypt_control_remainder(struct ceph_connection *con)
padded_len(rem_len) + CEPH_GCM_TAG_LEN);
}
+/* Process sparse read data that lives in a buffer */
+static int process_v2_sparse_read(struct ceph_connection *con,
+ struct page **pages, int spos)
+{
+ struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
+ int ret;
+
+ for (;;) {
+ char *buf = NULL;
+
+ ret = con->ops->sparse_read(con, cursor, &buf);
+ if (ret <= 0)
+ return ret;
+
+ dout("%s: sparse_read return %x buf %p\n", __func__, ret, buf);
+
+ do {
+ int idx = spos >> PAGE_SHIFT;
+ int soff = offset_in_page(spos);
+ struct page *spage = con->v2.in_enc_pages[idx];
+ int len = min_t(int, ret, PAGE_SIZE - soff);
+
+ if (buf) {
+ memcpy_from_page(buf, spage, soff, len);
+ buf += len;
+ } else {
+ struct bio_vec bv;
+
+ get_bvec_at(cursor, &bv);
+ len = min_t(int, len, bv.bv_len);
+ memcpy_page(bv.bv_page, bv.bv_offset,
+ spage, soff, len);
+ ceph_msg_data_advance(cursor, len);
+ }
+ spos += len;
+ ret -= len;
+ } while (ret);
+ }
+}
+
static int decrypt_tail(struct ceph_connection *con)
{
struct sg_table enc_sgt = {};
struct sg_table sgt = {};
+ struct page **pages = NULL;
+ bool sparse = con->in_msg->sparse_read;
+ int dpos = 0;
int tail_len;
int ret;
@@ -1053,9 +1144,14 @@ static int decrypt_tail(struct ceph_connection *con)
if (ret)
goto out;
+ if (sparse) {
+ dpos = padded_len(front_len(con->in_msg) + padded_len(middle_len(con->in_msg)));
+ pages = con->v2.in_enc_pages;
+ }
+
ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
- MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
- con->v2.in_buf, true);
+ MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
+ con->v2.in_buf, pages, dpos, true);
if (ret)
goto out;
@@ -1065,6 +1161,12 @@ static int decrypt_tail(struct ceph_connection *con)
if (ret)
goto out;
+ if (sparse && data_len(con->in_msg)) {
+ ret = process_v2_sparse_read(con, con->v2.in_enc_pages, dpos);
+ if (ret)
+ goto out;
+ }
+
WARN_ON(!con->v2.in_enc_page_cnt);
ceph_release_page_vector(con->v2.in_enc_pages,
con->v2.in_enc_page_cnt);
@@ -1588,7 +1690,7 @@ static int prepare_message_secure(struct ceph_connection *con)
encode_epilogue_secure(con, false);
ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop,
- &con->v2.out_epil, false);
+ &con->v2.out_epil, NULL, 0, false);
if (ret)
goto out;
@@ -1825,6 +1927,123 @@ static void prepare_read_data_cont(struct ceph_connection *con)
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+static int prepare_sparse_read_cont(struct ceph_connection *con)
+{
+ int ret;
+ struct bio_vec bv;
+ char *buf = NULL;
+ struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
+
+ WARN_ON(con->v2.in_state != IN_S_PREPARE_SPARSE_DATA_CONT);
+
+ if (iov_iter_is_bvec(&con->v2.in_iter)) {
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ con->in_data_crc = crc32c(con->in_data_crc,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+ get_bvec_at(cursor, &bv);
+ memcpy_to_page(bv.bv_page, bv.bv_offset,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+ } else {
+ con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
+ con->v2.in_bvec.bv_page,
+ con->v2.in_bvec.bv_offset,
+ con->v2.in_bvec.bv_len);
+ }
+
+ ceph_msg_data_advance(cursor, con->v2.in_bvec.bv_len);
+ cursor->sr_resid -= con->v2.in_bvec.bv_len;
+ dout("%s: advance by 0x%x sr_resid 0x%x\n", __func__,
+ con->v2.in_bvec.bv_len, cursor->sr_resid);
+ WARN_ON_ONCE(cursor->sr_resid > cursor->total_resid);
+ if (cursor->sr_resid) {
+ get_bvec_at(cursor, &bv);
+ if (bv.bv_len > cursor->sr_resid)
+ bv.bv_len = cursor->sr_resid;
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
+ set_in_bvec(con, &bv);
+ con->v2.data_len_remain -= bv.bv_len;
+ return 0;
+ }
+ } else if (iov_iter_is_kvec(&con->v2.in_iter)) {
+ /* On first call, we have no kvec so don't compute crc */
+ if (con->v2.in_kvec_cnt) {
+ WARN_ON_ONCE(con->v2.in_kvec_cnt > 1);
+ con->in_data_crc = crc32c(con->in_data_crc,
+ con->v2.in_kvecs[0].iov_base,
+ con->v2.in_kvecs[0].iov_len);
+ }
+ } else {
+ return -EIO;
+ }
+
+ /* get next extent */
+ ret = con->ops->sparse_read(con, cursor, &buf);
+ if (ret <= 0) {
+ if (ret < 0)
+ return ret;
+
+ reset_in_kvecs(con);
+ add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
+ return 0;
+ }
+
+ if (buf) {
+ /* receive into buffer */
+ reset_in_kvecs(con);
+ add_in_kvec(con, buf, ret);
+ con->v2.data_len_remain -= ret;
+ return 0;
+ }
+
+ if (ret > cursor->total_resid) {
+ pr_warn("%s: ret 0x%x total_resid 0x%zx resid 0x%zx\n",
+ __func__, ret, cursor->total_resid, cursor->resid);
+ return -EIO;
+ }
+ get_bvec_at(cursor, &bv);
+ if (bv.bv_len > cursor->sr_resid)
+ bv.bv_len = cursor->sr_resid;
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
+ set_in_bvec(con, &bv);
+ con->v2.data_len_remain -= ret;
+ return ret;
+}
+
+static int prepare_sparse_read_data(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->in_msg;
+
+ dout("%s: starting sparse read\n", __func__);
+
+ if (WARN_ON_ONCE(!con->ops->sparse_read))
+ return -EOPNOTSUPP;
+
+ if (!con_secure(con))
+ con->in_data_crc = -1;
+
+ reset_in_kvecs(con);
+ con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
+ con->v2.data_len_remain = data_len(msg);
+ return prepare_sparse_read_cont(con);
+}
+
static int prepare_read_tail_plain(struct ceph_connection *con)
{
struct ceph_msg *msg = con->in_msg;
@@ -1845,7 +2064,10 @@ static int prepare_read_tail_plain(struct ceph_connection *con)
}
if (data_len(msg)) {
- con->v2.in_state = IN_S_PREPARE_READ_DATA;
+ if (msg->sparse_read)
+ con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
+ else
+ con->v2.in_state = IN_S_PREPARE_READ_DATA;
} else {
add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
@@ -2898,6 +3120,12 @@ static int populate_in_iter(struct ceph_connection *con)
prepare_read_enc_page(con);
ret = 0;
break;
+ case IN_S_PREPARE_SPARSE_DATA:
+ ret = prepare_sparse_read_data(con);
+ break;
+ case IN_S_PREPARE_SPARSE_DATA_CONT:
+ ret = prepare_sparse_read_cont(con);
+ break;
case IN_S_HANDLE_EPILOGUE:
ret = handle_epilogue(con);
break;
@@ -3489,6 +3717,23 @@ static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
con->v2.in_state = IN_S_FINISH_SKIP;
}
+static void revoke_at_prepare_sparse_data(struct ceph_connection *con)
+{
+ int resid; /* current piece of data */
+ int remaining;
+
+ WARN_ON(con_secure(con));
+ WARN_ON(!data_len(con->in_msg));
+ WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
+ resid = iov_iter_count(&con->v2.in_iter);
+ dout("%s con %p resid %d\n", __func__, con, resid);
+
+ remaining = CEPH_EPILOGUE_PLAIN_LEN + con->v2.data_len_remain;
+ con->v2.in_iter.count -= resid;
+ set_in_skip(con, resid + remaining);
+ con->v2.in_state = IN_S_FINISH_SKIP;
+}
+
static void revoke_at_handle_epilogue(struct ceph_connection *con)
{
int resid;
@@ -3505,6 +3750,7 @@ static void revoke_at_handle_epilogue(struct ceph_connection *con)
void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
{
switch (con->v2.in_state) {
+ case IN_S_PREPARE_SPARSE_DATA:
case IN_S_PREPARE_READ_DATA:
revoke_at_prepare_read_data(con);
break;
@@ -3514,6 +3760,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
case IN_S_PREPARE_READ_ENC_PAGE:
revoke_at_prepare_read_enc_page(con);
break;
+ case IN_S_PREPARE_SPARSE_DATA_CONT:
+ revoke_at_prepare_sparse_data(con);
+ break;
case IN_S_HANDLE_EPILOGUE:
revoke_at_handle_epilogue(con);
break;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 658a6f2320cf..d3a759e052c8 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -171,6 +171,13 @@ static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
osd_data->num_bvecs = num_bvecs;
}
+static void ceph_osd_iter_init(struct ceph_osd_data *osd_data,
+ struct iov_iter *iter)
+{
+ osd_data->type = CEPH_OSD_DATA_TYPE_ITER;
+ osd_data->iter = *iter;
+}
+
static struct ceph_osd_data *
osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
{
@@ -264,6 +271,22 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
}
EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
+/**
+ * osd_req_op_extent_osd_iter - Set up an operation with an iterator buffer
+ * @osd_req: The request to set up
+ * @which: Index of the operation in which to set the iter
+ * @iter: The buffer iterator
+ */
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+ unsigned int which, struct iov_iter *iter)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+ ceph_osd_iter_init(osd_data, iter);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_iter);
+
static void osd_req_op_cls_request_info_pagelist(
struct ceph_osd_request *osd_req,
unsigned int which, struct ceph_pagelist *pagelist)
@@ -346,6 +369,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
#endif /* CONFIG_BLOCK */
case CEPH_OSD_DATA_TYPE_BVECS:
return osd_data->bvec_pos.iter.bi_size;
+ case CEPH_OSD_DATA_TYPE_ITER:
+ return iov_iter_count(&osd_data->iter);
default:
WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
return 0;
@@ -376,8 +401,10 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
switch (op->op) {
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
case CEPH_OSD_OP_WRITE:
case CEPH_OSD_OP_WRITEFULL:
+ kfree(op->extent.sparse_ext);
ceph_osd_data_release(&op->extent.osd_data);
break;
case CEPH_OSD_OP_CALL:
@@ -669,6 +696,7 @@ static void get_num_data_items(struct ceph_osd_request *req,
/* reply */
case CEPH_OSD_OP_STAT:
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
case CEPH_OSD_OP_LIST_WATCHERS:
*num_reply_data_items += 1;
break;
@@ -738,7 +766,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
opcode != CEPH_OSD_OP_WRITEFULL && opcode != CEPH_OSD_OP_ZERO &&
- opcode != CEPH_OSD_OP_TRUNCATE);
+ opcode != CEPH_OSD_OP_TRUNCATE && opcode != CEPH_OSD_OP_SPARSE_READ);
op->extent.offset = offset;
op->extent.length = length;
@@ -951,6 +979,8 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
#endif
} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) {
ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos);
+ } else if (osd_data->type == CEPH_OSD_DATA_TYPE_ITER) {
+ ceph_msg_data_add_iter(msg, &osd_data->iter);
} else {
BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
}
@@ -963,6 +993,7 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
case CEPH_OSD_OP_STAT:
break;
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
case CEPH_OSD_OP_WRITE:
case CEPH_OSD_OP_WRITEFULL:
case CEPH_OSD_OP_ZERO:
@@ -1017,6 +1048,10 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
dst->copy_from.src_fadvise_flags =
cpu_to_le32(src->copy_from.src_fadvise_flags);
break;
+ case CEPH_OSD_OP_ASSERT_VER:
+ dst->assert_ver.unused = cpu_to_le64(0);
+ dst->assert_ver.ver = cpu_to_le64(src->assert_ver.ver);
+ break;
default:
pr_err("unsupported osd opcode %s\n",
ceph_osd_op_name(src->op));
@@ -1059,7 +1094,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE &&
- opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE);
+ opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE &&
+ opcode != CEPH_OSD_OP_SPARSE_READ);
req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
GFP_NOFS);
@@ -1100,15 +1136,30 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
if (flags & CEPH_OSD_FLAG_WRITE)
req->r_data_offset = off;
- if (num_ops > 1)
+ if (num_ops > 1) {
+ int num_req_ops, num_rep_ops;
+
/*
- * This is a special case for ceph_writepages_start(), but it
- * also covers ceph_uninline_data(). If more multi-op request
- * use cases emerge, we will need a separate helper.
+ * If this is a multi-op write request, assume that we'll need
+ * request ops. If it's a multi-op read then assume we'll need
+ * reply ops. Anything else and call it -EINVAL.
*/
- r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_ops, 0);
- else
+ if (flags & CEPH_OSD_FLAG_WRITE) {
+ num_req_ops = num_ops;
+ num_rep_ops = 0;
+ } else if (flags & CEPH_OSD_FLAG_READ) {
+ num_req_ops = 0;
+ num_rep_ops = num_ops;
+ } else {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_req_ops,
+ num_rep_ops);
+ } else {
r = ceph_osdc_alloc_messages(req, GFP_NOFS);
+ }
if (r)
goto fail;
@@ -1120,6 +1171,18 @@ fail:
}
EXPORT_SYMBOL(ceph_osdc_new_request);
+int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
+{
+ op->extent.sparse_ext_cnt = cnt;
+ op->extent.sparse_ext = kmalloc_array(cnt,
+ sizeof(*op->extent.sparse_ext),
+ GFP_NOFS);
+ if (!op->extent.sparse_ext)
+ return -ENOMEM;
+ return 0;
+}
+EXPORT_SYMBOL(__ceph_alloc_sparse_ext_map);
+
/*
* We keep osd requests in an rbtree, sorted by ->r_tid.
*/
@@ -1177,6 +1240,7 @@ static void osd_init(struct ceph_osd *osd)
{
refcount_set(&osd->o_ref, 1);
RB_CLEAR_NODE(&osd->o_node);
+ spin_lock_init(&osd->o_requests_lock);
osd->o_requests = RB_ROOT;
osd->o_linger_requests = RB_ROOT;
osd->o_backoff_mappings = RB_ROOT;
@@ -1187,6 +1251,13 @@ static void osd_init(struct ceph_osd *osd)
mutex_init(&osd->lock);
}
+static void ceph_init_sparse_read(struct ceph_sparse_read *sr)
+{
+ kfree(sr->sr_extent);
+ memset(sr, '\0', sizeof(*sr));
+ sr->sr_state = CEPH_SPARSE_READ_HDR;
+}
+
static void osd_cleanup(struct ceph_osd *osd)
{
WARN_ON(!RB_EMPTY_NODE(&osd->o_node));
@@ -1197,6 +1268,8 @@ static void osd_cleanup(struct ceph_osd *osd)
WARN_ON(!list_empty(&osd->o_osd_lru));
WARN_ON(!list_empty(&osd->o_keepalive_item));
+ ceph_init_sparse_read(&osd->o_sparse_read);
+
if (osd->o_auth.authorizer) {
WARN_ON(osd_homeless(osd));
ceph_auth_destroy_authorizer(osd->o_auth.authorizer);
@@ -1216,6 +1289,9 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
osd_init(osd);
osd->o_osdc = osdc;
osd->o_osd = onum;
+ osd->o_sparse_op_idx = -1;
+
+ ceph_init_sparse_read(&osd->o_sparse_read);
ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
@@ -1406,7 +1482,9 @@ static void link_request(struct ceph_osd *osd, struct ceph_osd_request *req)
atomic_inc(&osd->o_osdc->num_homeless);
get_osd(osd);
+ spin_lock(&osd->o_requests_lock);
insert_request(&osd->o_requests, req);
+ spin_unlock(&osd->o_requests_lock);
req->r_osd = osd;
}
@@ -1418,7 +1496,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req)
req, req->r_tid);
req->r_osd = NULL;
+ spin_lock(&osd->o_requests_lock);
erase_request(&osd->o_requests, req);
+ spin_unlock(&osd->o_requests_lock);
put_osd(osd);
if (!osd_homeless(osd))
@@ -2016,6 +2096,7 @@ static void setup_request_data(struct ceph_osd_request *req)
&op->raw_data_in);
break;
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
ceph_osdc_msg_data_add(reply_msg,
&op->extent.osd_data);
break;
@@ -2435,8 +2516,10 @@ static void finish_request(struct ceph_osd_request *req)
req->r_end_latency = ktime_get();
- if (req->r_osd)
+ if (req->r_osd) {
+ ceph_init_sparse_read(&req->r_osd->o_sparse_read);
unlink_request(req->r_osd, req);
+ }
atomic_dec(&osdc->num_requests);
/*
@@ -3795,6 +3878,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
* one (type of) reply back.
*/
WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK));
+ req->r_version = m.user_version;
req->r_result = m.result ?: data_len;
finish_request(req);
mutex_unlock(&osd->lock);
@@ -5348,6 +5432,24 @@ static void osd_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
ceph_msg_put(msg);
}
+/* How much sparse data was requested? */
+static u64 sparse_data_requested(struct ceph_osd_request *req)
+{
+ u64 len = 0;
+
+ if (req->r_flags & CEPH_OSD_FLAG_READ) {
+ int i;
+
+ for (i = 0; i < req->r_num_ops; ++i) {
+ struct ceph_osd_req_op *op = &req->r_ops[i];
+
+ if (op->op == CEPH_OSD_OP_SPARSE_READ)
+ len += op->extent.length;
+ }
+ }
+ return len;
+}
+
/*
* Lookup and return message for incoming reply. Don't try to do
* anything about a larger than preallocated data portion of the
@@ -5364,6 +5466,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
int front_len = le32_to_cpu(hdr->front_len);
int data_len = le32_to_cpu(hdr->data_len);
u64 tid = le64_to_cpu(hdr->tid);
+ u64 srlen;
down_read(&osdc->lock);
if (!osd_registered(osd)) {
@@ -5396,7 +5499,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
req->r_reply = m;
}
- if (data_len > req->r_reply->data_length) {
+ srlen = sparse_data_requested(req);
+ if (!srlen && data_len > req->r_reply->data_length) {
pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n",
__func__, osd->o_osd, req->r_tid, data_len,
req->r_reply->data_length);
@@ -5406,6 +5510,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
}
m = ceph_msg_get(req->r_reply);
+ m->sparse_read = (bool)srlen;
+
dout("get_reply tid %lld %p\n", tid, m);
out_unlock_session:
@@ -5638,9 +5744,217 @@ static int osd_check_message_signature(struct ceph_msg *msg)
return ceph_auth_check_message_signature(auth, msg);
}
+static void advance_cursor(struct ceph_msg_data_cursor *cursor, size_t len,
+ bool zero)
+{
+ while (len) {
+ struct page *page;
+ size_t poff, plen;
+
+ page = ceph_msg_data_next(cursor, &poff, &plen);
+ if (plen > len)
+ plen = len;
+ if (zero)
+ zero_user_segment(page, poff, poff + plen);
+ len -= plen;
+ ceph_msg_data_advance(cursor, plen);
+ }
+}
+
+static int prep_next_sparse_read(struct ceph_connection *con,
+ struct ceph_msg_data_cursor *cursor)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_sparse_read *sr = &o->o_sparse_read;
+ struct ceph_osd_request *req;
+ struct ceph_osd_req_op *op;
+
+ spin_lock(&o->o_requests_lock);
+ req = lookup_request(&o->o_requests, le64_to_cpu(con->in_msg->hdr.tid));
+ if (!req) {
+ spin_unlock(&o->o_requests_lock);
+ return -EBADR;
+ }
+
+ if (o->o_sparse_op_idx < 0) {
+ u64 srlen = sparse_data_requested(req);
+
+ dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n",
+ __func__, o->o_osd, srlen);
+ ceph_msg_data_cursor_init(cursor, con->in_msg, srlen);
+ } else {
+ u64 end;
+
+ op = &req->r_ops[o->o_sparse_op_idx];
+
+ WARN_ON_ONCE(op->extent.sparse_ext);
+
+ /* hand back buffer we took earlier */
+ op->extent.sparse_ext = sr->sr_extent;
+ sr->sr_extent = NULL;
+ op->extent.sparse_ext_cnt = sr->sr_count;
+ sr->sr_ext_len = 0;
+ dout("%s: [%d] completed extent array len %d cursor->resid %zd\n",
+ __func__, o->o_osd, op->extent.sparse_ext_cnt, cursor->resid);
+ /* Advance to end of data for this operation */
+ end = ceph_sparse_ext_map_end(op);
+ if (end < sr->sr_req_len)
+ advance_cursor(cursor, sr->sr_req_len - end, false);
+ }
+
+ ceph_init_sparse_read(sr);
+
+ /* find next op in this request (if any) */
+ while (++o->o_sparse_op_idx < req->r_num_ops) {
+ op = &req->r_ops[o->o_sparse_op_idx];
+ if (op->op == CEPH_OSD_OP_SPARSE_READ)
+ goto found;
+ }
+
+ /* reset for next sparse read request */
+ spin_unlock(&o->o_requests_lock);
+ o->o_sparse_op_idx = -1;
+ return 0;
+found:
+ sr->sr_req_off = op->extent.offset;
+ sr->sr_req_len = op->extent.length;
+ sr->sr_pos = sr->sr_req_off;
+ dout("%s: [%d] new sparse read op at idx %d 0x%llx~0x%llx\n", __func__,
+ o->o_osd, o->o_sparse_op_idx, sr->sr_req_off, sr->sr_req_len);
+
+ /* hand off request's sparse extent map buffer */
+ sr->sr_ext_len = op->extent.sparse_ext_cnt;
+ op->extent.sparse_ext_cnt = 0;
+ sr->sr_extent = op->extent.sparse_ext;
+ op->extent.sparse_ext = NULL;
+
+ spin_unlock(&o->o_requests_lock);
+ return 1;
+}
+
+#ifdef __BIG_ENDIAN
+static inline void convert_extent_map(struct ceph_sparse_read *sr)
+{
+ int i;
+
+ for (i = 0; i < sr->sr_count; i++) {
+ struct ceph_sparse_extent *ext = &sr->sr_extent[i];
+
+ ext->off = le64_to_cpu((__force __le64)ext->off);
+ ext->len = le64_to_cpu((__force __le64)ext->len);
+ }
+}
+#else
+static inline void convert_extent_map(struct ceph_sparse_read *sr)
+{
+}
+#endif
+
+#define MAX_EXTENTS 4096
+
+static int osd_sparse_read(struct ceph_connection *con,
+ struct ceph_msg_data_cursor *cursor,
+ char **pbuf)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_sparse_read *sr = &o->o_sparse_read;
+ u32 count = sr->sr_count;
+ u64 eoff, elen;
+ int ret;
+
+ switch (sr->sr_state) {
+ case CEPH_SPARSE_READ_HDR:
+next_op:
+ ret = prep_next_sparse_read(con, cursor);
+ if (ret <= 0)
+ return ret;
+
+ /* number of extents */
+ ret = sizeof(sr->sr_count);
+ *pbuf = (char *)&sr->sr_count;
+ sr->sr_state = CEPH_SPARSE_READ_EXTENTS;
+ break;
+ case CEPH_SPARSE_READ_EXTENTS:
+ /* Convert sr_count to host-endian */
+ count = le32_to_cpu((__force __le32)sr->sr_count);
+ sr->sr_count = count;
+ dout("[%d] got %u extents\n", o->o_osd, count);
+
+ if (count > 0) {
+ if (!sr->sr_extent || count > sr->sr_ext_len) {
+ /*
+ * Apply a hard cap to the number of extents.
+ * If we have more, assume something is wrong.
+ */
+ if (count > MAX_EXTENTS) {
+ dout("%s: OSD returned 0x%x extents in a single reply!\n",
+ __func__, count);
+ return -EREMOTEIO;
+ }
+
+ /* no extent array provided, or too short */
+ kfree(sr->sr_extent);
+ sr->sr_extent = kmalloc_array(count,
+ sizeof(*sr->sr_extent),
+ GFP_NOIO);
+ if (!sr->sr_extent)
+ return -ENOMEM;
+ sr->sr_ext_len = count;
+ }
+ ret = count * sizeof(*sr->sr_extent);
+ *pbuf = (char *)sr->sr_extent;
+ sr->sr_state = CEPH_SPARSE_READ_DATA_LEN;
+ break;
+ }
+ /* No extents? Read data len */
+ fallthrough;
+ case CEPH_SPARSE_READ_DATA_LEN:
+ convert_extent_map(sr);
+ ret = sizeof(sr->sr_datalen);
+ *pbuf = (char *)&sr->sr_datalen;
+ sr->sr_state = CEPH_SPARSE_READ_DATA;
+ break;
+ case CEPH_SPARSE_READ_DATA:
+ if (sr->sr_index >= count) {
+ sr->sr_state = CEPH_SPARSE_READ_HDR;
+ goto next_op;
+ }
+
+ eoff = sr->sr_extent[sr->sr_index].off;
+ elen = sr->sr_extent[sr->sr_index].len;
+
+ dout("[%d] ext %d off 0x%llx len 0x%llx\n",
+ o->o_osd, sr->sr_index, eoff, elen);
+
+ if (elen > INT_MAX) {
+ dout("Sparse read extent length too long (0x%llx)\n",
+ elen);
+ return -EREMOTEIO;
+ }
+
+ /* zero out anything from sr_pos to start of extent */
+ if (sr->sr_pos < eoff)
+ advance_cursor(cursor, eoff - sr->sr_pos, true);
+
+ /* Set position to end of extent */
+ sr->sr_pos = eoff + elen;
+
+ /* send back the new length and nullify the ptr */
+ cursor->sr_resid = elen;
+ ret = elen;
+ *pbuf = NULL;
+
+ /* Bump the array index */
+ ++sr->sr_index;
+ break;
+ }
+ return ret;
+}
+
static const struct ceph_connection_operations osd_con_ops = {
.get = osd_get_con,
.put = osd_put_con,
+ .sparse_read = osd_sparse_read,
.alloc_msg = osd_alloc_msg,
.dispatch = osd_dispatch,
.fault = osd_fault,
diff --git a/net/core/dev.c b/net/core/dev.c
index 69a3e544676c..85df22f05c38 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -69,7 +69,7 @@
*/
#include <linux/uaccess.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/types.h>
@@ -107,6 +107,7 @@
#include <net/pkt_cls.h>
#include <net/checksum.h>
#include <net/xfrm.h>
+#include <net/tcx.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -132,6 +133,7 @@
#include <trace/events/net.h>
#include <trace/events/skb.h>
#include <trace/events/qdisc.h>
+#include <trace/events/xdp.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
#include <linux/static_key.h>
@@ -150,11 +152,11 @@
#include <linux/pm_runtime.h>
#include <linux/prandom.h>
#include <linux/once_lite.h>
+#include <net/netdev_rx_queue.h>
#include "dev.h"
#include "net-sysfs.h"
-
static DEFINE_SPINLOCK(ptype_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
struct list_head ptype_all __read_mostly; /* Taps */
@@ -388,6 +390,8 @@ static void list_netdevice(struct net_device *dev)
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock(&dev_base_lock);
+ /* We reserved the ifindex, this can't fail */
+ WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
dev_base_seq_inc(net);
}
@@ -397,8 +401,12 @@ static void list_netdevice(struct net_device *dev)
*/
static void unlist_netdevice(struct net_device *dev, bool lock)
{
+ struct net *net = dev_net(dev);
+
ASSERT_RTNL();
+ xa_erase(&net->dev_by_index, dev->ifindex);
+
/* Unlink dev from the device chain */
if (lock)
write_lock(&dev_base_lock);
@@ -1072,7 +1080,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
return -EINVAL;
/* Use one page as a bit array of possible slots */
- inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
+ inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC);
if (!inuse)
return -ENOMEM;
@@ -1101,7 +1109,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
}
i = find_first_zero_bit(inuse, max_netdevices);
- free_page((unsigned long) inuse);
+ bitmap_free(inuse);
}
snprintf(buf, IFNAMSIZ, name, i);
@@ -2384,8 +2392,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
struct xps_map *map = NULL;
int pos;
- if (dev_maps)
- map = xmap_dereference(dev_maps->attr_map[tci]);
+ map = xmap_dereference(dev_maps->attr_map[tci]);
if (!map)
return false;
@@ -3882,69 +3889,201 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
EXPORT_SYMBOL(dev_loopback_xmit);
#ifdef CONFIG_NET_EGRESS
-static struct sk_buff *
-sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+static struct netdev_queue *
+netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
{
+ int qm = skb_get_queue_mapping(skb);
+
+ return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
+}
+
+static bool netdev_xmit_txqueue_skipped(void)
+{
+ return __this_cpu_read(softnet_data.xmit.skip_txqueue);
+}
+
+void netdev_xmit_skip_txqueue(bool skip)
+{
+ __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
+}
+EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
+#endif /* CONFIG_NET_EGRESS */
+
+#ifdef CONFIG_NET_XGRESS
+static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
+{
+ int ret = TC_ACT_UNSPEC;
#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
- struct tcf_result cl_res;
+ struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
+ struct tcf_result res;
if (!miniq)
- return skb;
+ return ret;
- /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
- mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
+ mini_qdisc_bstats_cpu_update(miniq, skb);
+ ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
+ /* Only tcf related quirks below. */
+ switch (ret) {
+ case TC_ACT_SHOT:
+ mini_qdisc_qstats_cpu_drop(miniq);
+ break;
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
- skb->tc_index = TC_H_MIN(cl_res.classid);
+ skb->tc_index = TC_H_MIN(res.classid);
break;
+ }
+#endif /* CONFIG_NET_CLS_ACT */
+ return ret;
+}
+
+static DEFINE_STATIC_KEY_FALSE(tcx_needed_key);
+
+void tcx_inc(void)
+{
+ static_branch_inc(&tcx_needed_key);
+}
+
+void tcx_dec(void)
+{
+ static_branch_dec(&tcx_needed_key);
+}
+
+static __always_inline enum tcx_action_base
+tcx_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
+ const bool needs_mac)
+{
+ const struct bpf_mprog_fp *fp;
+ const struct bpf_prog *prog;
+ int ret = TCX_NEXT;
+
+ if (needs_mac)
+ __skb_push(skb, skb->mac_len);
+ bpf_mprog_foreach_prog(entry, fp, prog) {
+ bpf_compute_data_pointers(skb);
+ ret = bpf_prog_run(prog, skb);
+ if (ret != TCX_NEXT)
+ break;
+ }
+ if (needs_mac)
+ __skb_pull(skb, skb->mac_len);
+ return tcx_action_code(skb, ret);
+}
+
+static __always_inline struct sk_buff *
+sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev, bool *another)
+{
+ struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
+ int sch_ret;
+
+ if (!entry)
+ return skb;
+ if (*pt_prev) {
+ *ret = deliver_skb(skb, *pt_prev, orig_dev);
+ *pt_prev = NULL;
+ }
+
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+ tcx_set_ingress(skb, true);
+
+ if (static_branch_unlikely(&tcx_needed_key)) {
+ sch_ret = tcx_run(entry, skb, true);
+ if (sch_ret != TC_ACT_UNSPEC)
+ goto ingress_verdict;
+ }
+ sch_ret = tc_run(tcx_entry(entry), skb);
+ingress_verdict:
+ switch (sch_ret) {
+ case TC_ACT_REDIRECT:
+ /* skb_mac_header check was done by BPF, so we can safely
+ * push the L2 header back before redirecting to another
+ * netdev.
+ */
+ __skb_push(skb, skb->mac_len);
+ if (skb_do_redirect(skb) == -EAGAIN) {
+ __skb_pull(skb, skb->mac_len);
+ *another = true;
+ break;
+ }
+ *ret = NET_RX_SUCCESS;
+ return NULL;
case TC_ACT_SHOT:
- mini_qdisc_qstats_cpu_drop(miniq);
- *ret = NET_XMIT_DROP;
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
+ *ret = NET_RX_DROP;
return NULL;
+ /* used by tc_run */
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
- *ret = NET_XMIT_SUCCESS;
consume_skb(skb);
+ fallthrough;
+ case TC_ACT_CONSUMED:
+ *ret = NET_RX_SUCCESS;
return NULL;
+ }
+
+ return skb;
+}
+
+static __always_inline struct sk_buff *
+sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+{
+ struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
+ int sch_ret;
+
+ if (!entry)
+ return skb;
+
+ /* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was
+ * already set by the caller.
+ */
+ if (static_branch_unlikely(&tcx_needed_key)) {
+ sch_ret = tcx_run(entry, skb, false);
+ if (sch_ret != TC_ACT_UNSPEC)
+ goto egress_verdict;
+ }
+ sch_ret = tc_run(tcx_entry(entry), skb);
+egress_verdict:
+ switch (sch_ret) {
case TC_ACT_REDIRECT:
/* No need to push/pop skb's mac_header here on egress! */
skb_do_redirect(skb);
*ret = NET_XMIT_SUCCESS;
return NULL;
- default:
- break;
+ case TC_ACT_SHOT:
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+ *ret = NET_XMIT_DROP;
+ return NULL;
+ /* used by tc_run */
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
+ consume_skb(skb);
+ fallthrough;
+ case TC_ACT_CONSUMED:
+ *ret = NET_XMIT_SUCCESS;
+ return NULL;
}
-#endif /* CONFIG_NET_CLS_ACT */
return skb;
}
-
-static struct netdev_queue *
-netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
-{
- int qm = skb_get_queue_mapping(skb);
-
- return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
-}
-
-static bool netdev_xmit_txqueue_skipped(void)
+#else
+static __always_inline struct sk_buff *
+sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev, bool *another)
{
- return __this_cpu_read(softnet_data.xmit.skip_txqueue);
+ return skb;
}
-void netdev_xmit_skip_txqueue(bool skip)
+static __always_inline struct sk_buff *
+sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
- __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
+ return skb;
}
-EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
-#endif /* CONFIG_NET_EGRESS */
+#endif /* CONFIG_NET_XGRESS */
#ifdef CONFIG_XPS
static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
@@ -4128,9 +4267,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
skb_update_prio(skb);
qdisc_pkt_len_init(skb);
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_at_ingress = 0;
-#endif
+ tcx_set_ingress(skb, false);
#ifdef CONFIG_NET_EGRESS
if (static_branch_unlikely(&egress_needed_key)) {
if (nf_hook_egress_active()) {
@@ -5064,72 +5201,6 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
#endif
-static inline struct sk_buff *
-sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
- struct net_device *orig_dev, bool *another)
-{
-#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
- struct tcf_result cl_res;
-
- /* If there's at least one ingress present somewhere (so
- * we get here via enabled static key), remaining devices
- * that are not configured with an ingress qdisc will bail
- * out here.
- */
- if (!miniq)
- return skb;
-
- if (*pt_prev) {
- *ret = deliver_skb(skb, *pt_prev, orig_dev);
- *pt_prev = NULL;
- }
-
- qdisc_skb_cb(skb)->pkt_len = skb->len;
- tc_skb_cb(skb)->mru = 0;
- tc_skb_cb(skb)->post_ct = false;
- skb->tc_at_ingress = 1;
- mini_qdisc_bstats_cpu_update(miniq, skb);
-
- switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
- case TC_ACT_OK:
- case TC_ACT_RECLASSIFY:
- skb->tc_index = TC_H_MIN(cl_res.classid);
- break;
- case TC_ACT_SHOT:
- mini_qdisc_qstats_cpu_drop(miniq);
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
- *ret = NET_RX_DROP;
- return NULL;
- case TC_ACT_STOLEN:
- case TC_ACT_QUEUED:
- case TC_ACT_TRAP:
- consume_skb(skb);
- *ret = NET_RX_SUCCESS;
- return NULL;
- case TC_ACT_REDIRECT:
- /* skb_mac_header check was done by cls/act_bpf, so
- * we can safely push the L2 header back before
- * redirecting to another netdev
- */
- __skb_push(skb, skb->mac_len);
- if (skb_do_redirect(skb) == -EAGAIN) {
- __skb_pull(skb, skb->mac_len);
- *another = true;
- break;
- }
- *ret = NET_RX_SUCCESS;
- return NULL;
- case TC_ACT_CONSUMED:
- *ret = NET_RX_SUCCESS;
- return NULL;
- default:
- break;
- }
-#endif /* CONFIG_NET_CLS_ACT */
- return skb;
-}
-
/**
* netdev_is_rx_handler_busy - check if receive handler is registered
* @dev: device to check
@@ -6316,12 +6387,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
* softirq mode will happen in the next round of napi_schedule().
* This should not cause hiccups/stalls to the live traffic.
*/
- list_for_each_entry(napi, &dev->napi_list, dev_list) {
- if (threaded)
- set_bit(NAPI_STATE_THREADED, &napi->state);
- else
- clear_bit(NAPI_STATE_THREADED, &napi->state);
- }
+ list_for_each_entry(napi, &dev->napi_list, dev_list)
+ assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
return err;
}
@@ -9413,6 +9480,7 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct net *net = current->nsproxy->net_ns;
struct bpf_link_primer link_primer;
+ struct netlink_ext_ack extack = {};
struct bpf_xdp_link *link;
struct net_device *dev;
int err, fd;
@@ -9440,12 +9508,13 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
goto unlock;
}
- err = dev_xdp_attach_link(dev, NULL, link);
+ err = dev_xdp_attach_link(dev, &extack, link);
rtnl_unlock();
if (err) {
link->dev = NULL;
bpf_link_cleanup(&link_primer);
+ trace_bpf_xdp_link_attach_failed(extack._msg);
goto out_put_dev;
}
@@ -9509,23 +9578,40 @@ err_out:
}
/**
- * dev_new_index - allocate an ifindex
- * @net: the applicable net namespace
+ * dev_index_reserve() - allocate an ifindex in a namespace
+ * @net: the applicable net namespace
+ * @ifindex: requested ifindex, pass %0 to get one allocated
+ *
+ * Allocate a ifindex for a new device. Caller must either use the ifindex
+ * to store the device (via list_netdevice()) or call dev_index_release()
+ * to give the index up.
*
- * Returns a suitable unique value for a new device interface
- * number. The caller must hold the rtnl semaphore or the
- * dev_base_lock to be sure it remains unique.
+ * Return: a suitable unique value for a new device interface number or -errno.
*/
-static int dev_new_index(struct net *net)
+static int dev_index_reserve(struct net *net, u32 ifindex)
{
- int ifindex = net->ifindex;
+ int err;
- for (;;) {
- if (++ifindex <= 0)
- ifindex = 1;
- if (!__dev_get_by_index(net, ifindex))
- return net->ifindex = ifindex;
+ if (ifindex > INT_MAX) {
+ DEBUG_NET_WARN_ON_ONCE(1);
+ return -EINVAL;
}
+
+ if (!ifindex)
+ err = xa_alloc_cyclic(&net->dev_by_index, &ifindex, NULL,
+ xa_limit_31b, &net->ifindex, GFP_KERNEL);
+ else
+ err = xa_insert(&net->dev_by_index, ifindex, NULL, GFP_KERNEL);
+ if (err < 0)
+ return err;
+
+ return ifindex;
+}
+
+static void dev_index_release(struct net *net, int ifindex)
+{
+ /* Expect only unused indexes, unlist_netdevice() removes the used */
+ WARN_ON(xa_erase(&net->dev_by_index, ifindex));
}
/* Delayed registration/unregisteration */
@@ -9995,11 +10081,10 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
}
- ret = -EBUSY;
- if (!dev->ifindex)
- dev->ifindex = dev_new_index(net);
- else if (__dev_get_by_index(net, dev->ifindex))
+ ret = dev_index_reserve(net, dev->ifindex);
+ if (ret < 0)
goto err_uninit;
+ dev->ifindex = ret;
/* Transfer changeable features to wanted_features and enable
* software offloads (GSO and GRO).
@@ -10046,7 +10131,7 @@ int register_netdevice(struct net_device *dev)
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
if (ret)
- goto err_uninit;
+ goto err_ifindex_release;
ret = netdev_register_kobject(dev);
write_lock(&dev_base_lock);
@@ -10102,6 +10187,8 @@ out:
err_uninit_notify:
call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
+err_ifindex_release:
+ dev_index_release(net, dev->ifindex);
err_uninit:
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
@@ -10617,6 +10704,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
+ dev->xdp_zc_max_segs = 1;
dev->gso_max_segs = GSO_MAX_SEGS;
dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
@@ -10838,7 +10926,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
/* Shutdown queueing discipline. */
dev_shutdown(dev);
-
+ dev_tcx_uninstall(dev);
dev_xdp_uninstall(dev);
bpf_dev_bound_netdev_unregister(dev);
@@ -10978,9 +11066,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
}
/* Check that new_ifindex isn't used yet. */
- err = -EBUSY;
- if (new_ifindex && __dev_get_by_index(net, new_ifindex))
- goto out;
+ if (new_ifindex) {
+ err = dev_index_reserve(net, new_ifindex);
+ if (err < 0)
+ goto out;
+ } else {
+ /* If there is an ifindex conflict assign a new one */
+ err = dev_index_reserve(net, dev->ifindex);
+ if (err == -EBUSY)
+ err = dev_index_reserve(net, 0);
+ if (err < 0)
+ goto out;
+ new_ifindex = err;
+ }
/*
* And now a mini version of register_netdevice unregister_netdevice.
@@ -11008,13 +11106,6 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
rcu_barrier();
new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
- /* If there is an ifindex conflict assign a new one */
- if (!new_ifindex) {
- if (__dev_get_by_index(net, dev->ifindex))
- new_ifindex = dev_new_index(net);
- else
- new_ifindex = dev->ifindex;
- }
rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
new_ifindex);
@@ -11192,6 +11283,8 @@ static int __net_init netdev_init(struct net *net)
if (net->dev_index_head == NULL)
goto err_idx;
+ xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC1);
+
RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
return 0;
@@ -11289,6 +11382,7 @@ static void __net_exit netdev_exit(struct net *net)
{
kfree(net->dev_name_head);
kfree(net->dev_index_head);
+ xa_destroy(&net->dev_by_index);
if (net != &init_net)
WARN_ON_ONCE(!list_empty(&net->dev_base_head));
}
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 3730945ee294..b46aedc36939 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -5,6 +5,7 @@
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
#include <linux/net_tstamp.h>
+#include <linux/phylib_stubs.h>
#include <linux/wireless.h>
#include <linux/if_bridge.h>
#include <net/dsa_stubs.h>
@@ -252,14 +253,121 @@ static int dev_eth_ioctl(struct net_device *dev,
return ops->ndo_eth_ioctl(dev, ifr, cmd);
}
+/**
+ * dev_get_hwtstamp_phylib() - Get hardware timestamping settings of NIC
+ * or of attached phylib PHY
+ * @dev: Network device
+ * @cfg: Timestamping configuration structure
+ *
+ * Helper for enforcing a common policy that phylib timestamping, if available,
+ * should take precedence in front of hardware timestamping provided by the
+ * netdev.
+ *
+ * Note: phy_mii_ioctl() only handles SIOCSHWTSTAMP (not SIOCGHWTSTAMP), and
+ * there only exists a phydev->mii_ts->hwtstamp() method. So this will return
+ * -EOPNOTSUPP for phylib for now, which is still more accurate than letting
+ * the netdev handle the GET request.
+ */
+static int dev_get_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg)
+{
+ if (phy_has_hwtstamp(dev->phydev))
+ return phy_hwtstamp_get(dev->phydev, cfg);
+
+ return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg);
+}
+
static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr)
{
- return dev_eth_ioctl(dev, ifr, SIOCGHWTSTAMP);
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct kernel_hwtstamp_config kernel_cfg = {};
+ struct hwtstamp_config cfg;
+ int err;
+
+ if (!ops->ndo_hwtstamp_get)
+ return dev_eth_ioctl(dev, ifr, SIOCGHWTSTAMP); /* legacy */
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ kernel_cfg.ifr = ifr;
+ err = dev_get_hwtstamp_phylib(dev, &kernel_cfg);
+ if (err)
+ return err;
+
+ /* If the request was resolved through an unconverted driver, omit
+ * the copy_to_user(), since the implementation has already done that
+ */
+ if (!kernel_cfg.copied_to_user) {
+ hwtstamp_config_from_kernel(&cfg, &kernel_cfg);
+
+ if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/**
+ * dev_set_hwtstamp_phylib() - Change hardware timestamping of NIC
+ * or of attached phylib PHY
+ * @dev: Network device
+ * @cfg: Timestamping configuration structure
+ * @extack: Netlink extended ack message structure, for error reporting
+ *
+ * Helper for enforcing a common policy that phylib timestamping, if available,
+ * should take precedence in front of hardware timestamping provided by the
+ * netdev. If the netdev driver needs to perform specific actions even for PHY
+ * timestamping to work properly (a switch port must trap the timestamped
+ * frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in
+ * dev->priv_flags.
+ */
+static int dev_set_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ bool phy_ts = phy_has_hwtstamp(dev->phydev);
+ struct kernel_hwtstamp_config old_cfg = {};
+ bool changed = false;
+ int err;
+
+ cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV;
+
+ if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) {
+ err = ops->ndo_hwtstamp_get(dev, &old_cfg);
+ if (err)
+ return err;
+ }
+
+ if (!phy_ts || (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) {
+ err = ops->ndo_hwtstamp_set(dev, cfg, extack);
+ if (err) {
+ if (extack->_msg)
+ netdev_err(dev, "%s\n", extack->_msg);
+ return err;
+ }
+ }
+
+ if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS))
+ changed = kernel_hwtstamp_config_changed(&old_cfg, cfg);
+
+ if (phy_ts) {
+ err = phy_hwtstamp_set(dev->phydev, cfg, extack);
+ if (err) {
+ if (changed)
+ ops->ndo_hwtstamp_set(dev, &old_cfg, NULL);
+ return err;
+ }
+ }
+
+ return 0;
}
static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
{
- struct kernel_hwtstamp_config kernel_cfg;
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct kernel_hwtstamp_config kernel_cfg = {};
struct netlink_ext_ack extack = {};
struct hwtstamp_config cfg;
int err;
@@ -268,6 +376,7 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
return -EFAULT;
hwtstamp_config_to_kernel(&kernel_cfg, &cfg);
+ kernel_cfg.ifr = ifr;
err = net_hwtstamp_validate(&kernel_cfg);
if (err)
@@ -280,8 +389,80 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
return err;
}
- return dev_eth_ioctl(dev, ifr, SIOCSHWTSTAMP);
+ if (!ops->ndo_hwtstamp_set)
+ return dev_eth_ioctl(dev, ifr, SIOCSHWTSTAMP); /* legacy */
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ err = dev_set_hwtstamp_phylib(dev, &kernel_cfg, &extack);
+ if (err)
+ return err;
+
+ /* The driver may have modified the configuration, so copy the
+ * updated version of it back to user space
+ */
+ if (!kernel_cfg.copied_to_user) {
+ hwtstamp_config_from_kernel(&cfg, &kernel_cfg);
+
+ if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int generic_hwtstamp_ioctl_lower(struct net_device *dev, int cmd,
+ struct kernel_hwtstamp_config *kernel_cfg)
+{
+ struct ifreq ifrr;
+ int err;
+
+ strscpy_pad(ifrr.ifr_name, dev->name, IFNAMSIZ);
+ ifrr.ifr_ifru = kernel_cfg->ifr->ifr_ifru;
+
+ err = dev_eth_ioctl(dev, &ifrr, cmd);
+ if (err)
+ return err;
+
+ kernel_cfg->ifr->ifr_ifru = ifrr.ifr_ifru;
+ kernel_cfg->copied_to_user = true;
+
+ return 0;
+}
+
+int generic_hwtstamp_get_lower(struct net_device *dev,
+ struct kernel_hwtstamp_config *kernel_cfg)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ if (ops->ndo_hwtstamp_get)
+ return dev_get_hwtstamp_phylib(dev, kernel_cfg);
+
+ /* Legacy path: unconverted lower driver */
+ return generic_hwtstamp_ioctl_lower(dev, SIOCGHWTSTAMP, kernel_cfg);
+}
+EXPORT_SYMBOL(generic_hwtstamp_get_lower);
+
+int generic_hwtstamp_set_lower(struct net_device *dev,
+ struct kernel_hwtstamp_config *kernel_cfg,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
+ if (ops->ndo_hwtstamp_set)
+ return dev_set_hwtstamp_phylib(dev, kernel_cfg, extack);
+
+ /* Legacy path: unconverted lower driver */
+ return generic_hwtstamp_ioctl_lower(dev, SIOCSHWTSTAMP, kernel_cfg);
}
+EXPORT_SYMBOL(generic_hwtstamp_set_lower);
static int dev_siocbond(struct net_device *dev,
struct ifreq *ifr, unsigned int cmd)
diff --git a/net/core/dst.c b/net/core/dst.c
index 79d9306ad1ee..980e2fd2f013 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -152,7 +152,7 @@ void dst_dev_put(struct dst_entry *dst)
dst->obsolete = DST_OBSOLETE_DEAD;
if (dst->ops->ifdown)
- dst->ops->ifdown(dst, dev, true);
+ dst->ops->ifdown(dst, dev);
dst->input = dst_discard;
dst->output = dst_discard_out;
dst->dev = blackhole_netdev;
diff --git a/net/core/filter.c b/net/core/filter.c
index 28a59596987a..a094694899c9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4339,13 +4339,8 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
enum bpf_map_type map_type = ri->map_type;
- if (map_type == BPF_MAP_TYPE_XSKMAP) {
- /* XDP_REDIRECT is not supported AF_XDP yet. */
- if (unlikely(xdp_buff_has_frags(xdp)))
- return -EOPNOTSUPP;
-
+ if (map_type == BPF_MAP_TYPE_XSKMAP)
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
- }
return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
xdp_prog);
@@ -7350,8 +7345,8 @@ BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
return -EOPNOTSUPP;
if (unlikely(dev_net(skb->dev) != sock_net(sk)))
return -ENETUNREACH;
- if (unlikely(sk_fullsock(sk) && sk->sk_reuseport))
- return -ESOCKTNOSUPPORT;
+ if (sk_unhashed(sk))
+ return -EOPNOTSUPP;
if (sk_is_refcounted(sk) &&
unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
return -ENOENT;
@@ -9306,7 +9301,7 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
__u8 value_reg = si->dst_reg;
__u8 skb_reg = si->src_reg;
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_XGRESS
/* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, read skb->tstamp as is if tstamp_type_access is true.
@@ -9340,7 +9335,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
__u8 value_reg = si->src_reg;
__u8 skb_reg = si->dst_reg;
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_XGRESS
/* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, write skb->tstamp as is if tstamp_type_access is true.
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 85a2d0d9bd39..272f09251343 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -40,7 +40,7 @@
static void dissector_set_key(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
{
- flow_dissector->used_keys |= (1 << key_id);
+ flow_dissector->used_keys |= (1ULL << key_id);
}
void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
@@ -205,6 +205,50 @@ static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen);
}
+static void __skb_flow_dissect_ah(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, const void *data,
+ int nhoff, int hlen)
+{
+ struct flow_dissector_key_ipsec *key_ah;
+ struct ip_auth_hdr _hdr, *hdr;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPSEC))
+ return;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+ if (!hdr)
+ return;
+
+ key_ah = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPSEC,
+ target_container);
+
+ key_ah->spi = hdr->spi;
+}
+
+static void __skb_flow_dissect_esp(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, const void *data,
+ int nhoff, int hlen)
+{
+ struct flow_dissector_key_ipsec *key_esp;
+ struct ip_esp_hdr _hdr, *hdr;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPSEC))
+ return;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+ if (!hdr)
+ return;
+
+ key_esp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPSEC,
+ target_container);
+
+ key_esp->spi = hdr->spi;
+}
+
static void __skb_flow_dissect_l2tpv3(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container, const void *data,
@@ -1402,7 +1446,7 @@ proto_again:
break;
}
- nhoff += ntohs(hdr->message_length);
+ nhoff += sizeof(struct ptp_header);
fdret = FLOW_DISSECT_RET_OUT_GOOD;
break;
}
@@ -1571,7 +1615,14 @@ ip_proto_again:
__skb_flow_dissect_l2tpv3(skb, flow_dissector, target_container,
data, nhoff, hlen);
break;
-
+ case IPPROTO_ESP:
+ __skb_flow_dissect_esp(skb, flow_dissector, target_container,
+ data, nhoff, hlen);
+ break;
+ case IPPROTO_AH:
+ __skb_flow_dissect_ah(skb, flow_dissector, target_container,
+ data, nhoff, hlen);
+ break;
default:
break;
}
@@ -1780,8 +1831,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
memset(&keys, 0, sizeof(keys));
__skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
- &keys, NULL, 0, 0, 0,
- FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+ &keys, NULL, 0, 0, 0, 0);
return __flow_hash_from_keys(&keys, &hashrnd);
}
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index acfc1f88ea79..bc5169482710 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -146,6 +146,13 @@ void flow_rule_match_tcp(const struct flow_rule *rule,
}
EXPORT_SYMBOL(flow_rule_match_tcp);
+void flow_rule_match_ipsec(const struct flow_rule *rule,
+ struct flow_match_ipsec *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPSEC, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ipsec);
+
void flow_rule_match_icmp(const struct flow_rule *rule,
struct flow_match_icmp *out)
{
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 8b6b5e72b217..4a0797f0a154 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -60,9 +60,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
ret = BPF_OK;
} else {
skb_reset_mac_header(skb);
- ret = skb_do_redirect(skb);
- if (ret == 0)
- ret = BPF_REDIRECT;
+ skb_do_redirect(skb);
+ ret = BPF_REDIRECT;
}
break;
@@ -255,7 +254,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
if (unlikely(err))
- return err;
+ return net_xmit_errno(err);
/* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
return LWTUNNEL_XMIT_DONE;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ddd0f32de20e..6b76cd103195 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3779,6 +3779,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
const char *dev_name_source;
char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
char *p_name;
+ size_t neigh_vars_size;
t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
if (!t)
@@ -3790,11 +3791,13 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
t->neigh_vars[i].extra2 = p;
}
+ neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
if (dev) {
dev_name_source = dev->name;
/* Terminate the table early */
memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
+ neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
} else {
struct neigh_table *tbl = p->tbl;
dev_name_source = "default";
@@ -3841,8 +3844,9 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
p_name, dev_name_source);
- t->sysctl_header =
- register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
+ t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
+ neigh_path, t->neigh_vars,
+ neigh_vars_size);
if (!t->sysctl_header)
goto free;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 15e3f4606b5f..fccaa5bac0ed 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -23,6 +23,7 @@
#include <linux/of.h>
#include <linux/of_net.h>
#include <linux/cpu.h>
+#include <net/netdev_rx_queue.h>
#include "dev.h"
#include "net-sysfs.h"
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index a4270fafdf11..c1aea8b756b6 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -10,11 +10,11 @@
static int
netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
- u32 portid, u32 seq, int flags, u32 cmd)
+ const struct genl_info *info)
{
void *hdr;
- hdr = genlmsg_put(rsp, portid, seq, &netdev_nl_family, flags, cmd);
+ hdr = genlmsg_iput(rsp, info);
if (!hdr)
return -EMSGSIZE;
@@ -25,6 +25,14 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
return -EINVAL;
}
+ if (netdev->xdp_features & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
+ if (nla_put_u32(rsp, NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
+ netdev->xdp_zc_max_segs)) {
+ genlmsg_cancel(rsp, hdr);
+ return -EINVAL;
+ }
+ }
+
genlmsg_end(rsp, hdr);
return 0;
@@ -33,17 +41,20 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
static void
netdev_genl_dev_notify(struct net_device *netdev, int cmd)
{
+ struct genl_info info;
struct sk_buff *ntf;
if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev),
NETDEV_NLGRP_MGMT))
return;
+ genl_info_init_ntf(&info, &netdev_nl_family, cmd);
+
ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!ntf)
return;
- if (netdev_nl_dev_fill(netdev, ntf, 0, 0, 0, cmd)) {
+ if (netdev_nl_dev_fill(netdev, ntf, &info)) {
nlmsg_free(ntf);
return;
}
@@ -72,8 +83,7 @@ int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
netdev = __dev_get_by_index(genl_info_net(info), ifindex);
if (netdev)
- err = netdev_nl_dev_fill(netdev, rsp, info->snd_portid,
- info->snd_seq, 0, info->genlhdr->cmd);
+ err = netdev_nl_dev_fill(netdev, rsp, info);
else
err = -ENODEV;
@@ -93,43 +103,19 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
- int idx = 0, s_idx;
- int h, s_h;
- int err;
-
- s_h = cb->args[0];
- s_idx = cb->args[1];
+ int err = 0;
rtnl_lock();
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- struct hlist_head *head;
-
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry(netdev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- err = netdev_nl_dev_fill(netdev, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0,
- NETDEV_CMD_DEV_GET);
- if (err < 0)
- break;
-cont:
- idx++;
- }
+ for_each_netdev_dump(net, netdev, cb->args[0]) {
+ err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb));
+ if (err < 0)
+ break;
}
-
rtnl_unlock();
if (err != -EMSGSIZE)
return err;
- cb->args[1] = idx;
- cb->args[0] = h;
- cb->seq = net->dev_base_seq;
-
return skb->len;
}
diff --git a/net/core/of_net.c b/net/core/of_net.c
index 55d3fe229269..93ea425b9248 100644
--- a/net/core/of_net.c
+++ b/net/core/of_net.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/of_net.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/phy.h>
#include <linux/export.h>
#include <linux/device.h>
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index a3e12a61d456..77cb75e63aca 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -10,7 +10,7 @@
#include <linux/slab.h>
#include <linux/device.h>
-#include <net/page_pool.h>
+#include <net/page_pool/helpers.h>
#include <net/xdp.h>
#include <linux/dma-direction.h>
@@ -58,6 +58,17 @@ static const char pp_stats[][ETH_GSTRING_LEN] = {
"rx_pp_recycle_released_ref",
};
+/**
+ * page_pool_get_stats() - fetch page pool stats
+ * @pool: pool from which page was allocated
+ * @stats: struct page_pool_stats to fill in
+ *
+ * Retrieve statistics about the page_pool. This API is only available
+ * if the kernel has been configured with ``CONFIG_PAGE_POOL_STATS=y``.
+ * A pointer to a caller allocated struct page_pool_stats structure
+ * is passed to this API which is filled in. The caller can then report
+ * those stats to the user (perhaps via ethtool, debugfs, etc.).
+ */
bool page_pool_get_stats(struct page_pool *pool,
struct page_pool_stats *stats)
{
@@ -224,6 +235,10 @@ static int page_pool_init(struct page_pool *pool,
return 0;
}
+/**
+ * page_pool_create() - create a page pool.
+ * @params: parameters, see struct page_pool_params
+ */
struct page_pool *page_pool_create(const struct page_pool_params *params)
{
struct page_pool *pool;
@@ -492,7 +507,7 @@ static s32 page_pool_inflight(struct page_pool *pool)
* a regular page (that will eventually be returned to the normal
* page-allocator via put_page).
*/
-void page_pool_release_page(struct page_pool *pool, struct page *page)
+static void page_pool_return_page(struct page_pool *pool, struct page *page)
{
dma_addr_t dma;
int count;
@@ -518,13 +533,6 @@ skip_dma_unmap:
*/
count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
trace_page_pool_state_release(pool, page, count);
-}
-EXPORT_SYMBOL(page_pool_release_page);
-
-/* Return a page to the page allocator, cleaning up our state */
-static void page_pool_return_page(struct page_pool *pool, struct page *page)
-{
- page_pool_release_page(pool, page);
put_page(page);
/* An optimization would be to call __free_pages(page, pool->p.order)
@@ -579,6 +587,8 @@ static __always_inline struct page *
__page_pool_put_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size, bool allow_direct)
{
+ lockdep_assert_no_hardirq();
+
/* This allocator is optimized for the XDP mode that uses
* one-frame-per-page, but have fallbacks that act like the
* regular page allocator APIs.
@@ -616,9 +626,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
* will be invoking put_page.
*/
recycle_stat_inc(pool, released_refcnt);
- /* Do not replace this with page_pool_return_page() */
- page_pool_release_page(pool, page);
- put_page(page);
+ page_pool_return_page(pool, page);
return NULL;
}
@@ -635,7 +643,21 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
}
EXPORT_SYMBOL(page_pool_put_defragged_page);
-/* Caller must not use data area after call, as this function overwrites it */
+/**
+ * page_pool_put_page_bulk() - release references on multiple pages
+ * @pool: pool from which pages were allocated
+ * @data: array holding page pointers
+ * @count: number of pages in @data
+ *
+ * Tries to refill a number of pages into the ptr_ring cache holding ptr_ring
+ * producer lock. If the ptr_ring is full, page_pool_put_page_bulk()
+ * will release leftover pages to the page allocator.
+ * page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx
+ * completion loop for the XDP_REDIRECT use case.
+ *
+ * Please note the caller must not use data area after running
+ * page_pool_put_page_bulk(), as this function overwrites it.
+ */
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count)
{
@@ -915,42 +937,3 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
}
}
EXPORT_SYMBOL(page_pool_update_nid);
-
-bool page_pool_return_skb_page(struct page *page, bool napi_safe)
-{
- struct napi_struct *napi;
- struct page_pool *pp;
- bool allow_direct;
-
- page = compound_head(page);
-
- /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
- * in order to preserve any existing bits, such as bit 0 for the
- * head page of compound page and bit 1 for pfmemalloc page, so
- * mask those bits for freeing side when doing below checking,
- * and page_is_pfmemalloc() is checked in __page_pool_put_page()
- * to avoid recycling the pfmemalloc page.
- */
- if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
- return false;
-
- pp = page->pp;
-
- /* Allow direct recycle if we have reasons to believe that we are
- * in the same context as the consumer would run, so there's
- * no possible race.
- */
- napi = READ_ONCE(pp->p.napi);
- allow_direct = napi_safe && napi &&
- READ_ONCE(napi->list_owner) == smp_processor_id();
-
- /* Driver set this to memory recycling info. Reset it on recycle.
- * This will *not* work for NIC using a split-page memory model.
- * The page will be returned to the pool here regardless of the
- * 'flipped' fragment being in use or not.
- */
- page_pool_put_full_page(pp, page, allow_direct);
-
- return true;
-}
-EXPORT_SYMBOL(page_pool_return_skb_page);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index aef25aa5cf1d..4a2ec33bfb51 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -61,7 +61,7 @@
#include "dev.h"
#define RTNL_MAX_TYPE 50
-#define RTNL_SLAVE_MAX_TYPE 43
+#define RTNL_SLAVE_MAX_TYPE 44
struct rtnl_link {
rtnl_doit_func doit;
@@ -1273,7 +1273,6 @@ static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
struct net_device *dev,
int vfs_num,
- struct nlattr *vfinfo,
u32 ext_filter_mask)
{
struct ifla_vf_rss_query_en vf_rss_query_en;
@@ -1343,7 +1342,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
vf_trust.setting = ivi.trusted;
vf = nla_nest_start_noflag(skb, IFLA_VF_INFO);
if (!vf)
- goto nla_put_vfinfo_failure;
+ return -EMSGSIZE;
if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
nla_put(skb, IFLA_VF_BROADCAST, sizeof(vf_broadcast), &vf_broadcast) ||
nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
@@ -1414,8 +1413,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
nla_put_vf_failure:
nla_nest_cancel(skb, vf);
-nla_put_vfinfo_failure:
- nla_nest_cancel(skb, vfinfo);
return -EMSGSIZE;
}
@@ -1441,8 +1438,10 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
return -EMSGSIZE;
for (i = 0; i < num_vfs; i++) {
- if (rtnl_fill_vfinfo(skb, dev, i, vfinfo, ext_filter_mask))
+ if (rtnl_fill_vfinfo(skb, dev, i, ext_filter_mask)) {
+ nla_nest_cancel(skb, vfinfo);
return -EMSGSIZE;
+ }
}
nla_nest_end(skb, vfinfo);
@@ -2268,13 +2267,27 @@ out_err:
return err;
}
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
- struct netlink_ext_ack *exterr)
+int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
+ struct netlink_ext_ack *exterr)
{
- return nla_parse_deprecated(tb, IFLA_MAX, head, len, ifla_policy,
+ const struct ifinfomsg *ifmp;
+ const struct nlattr *attrs;
+ size_t len;
+
+ ifmp = nla_data(nla_peer);
+ attrs = nla_data(nla_peer) + sizeof(struct ifinfomsg);
+ len = nla_len(nla_peer) - sizeof(struct ifinfomsg);
+
+ if (ifmp->ifi_index < 0) {
+ NL_SET_ERR_MSG_ATTR(exterr, nla_peer,
+ "ifindex can't be negative");
+ return -EINVAL;
+ }
+
+ return nla_parse_deprecated(tb, IFLA_MAX, attrs, len, ifla_policy,
exterr);
}
-EXPORT_SYMBOL(rtnl_nla_parse_ifla);
+EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg);
struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
{
@@ -3547,6 +3560,9 @@ replay:
if (ifm->ifi_index > 0) {
link_specified = true;
dev = __dev_get_by_index(net, ifm->ifi_index);
+ } else if (ifm->ifi_index < 0) {
+ NL_SET_ERR_MSG(extack, "ifindex can't be negative");
+ return -EINVAL;
} else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) {
link_specified = true;
dev = rtnl_dev_get(net, tb);
diff --git a/net/core/scm.c b/net/core/scm.c
index 3cd7dd377e53..880027ecf516 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -130,6 +130,7 @@ EXPORT_SYMBOL(__scm_destroy);
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
struct cmsghdr *cmsg;
int err;
@@ -153,7 +154,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
switch (cmsg->cmsg_type)
{
case SCM_RIGHTS:
- if (!sock->ops || sock->ops->family != PF_UNIX)
+ if (!ops || ops->family != PF_UNIX)
goto error;
err=scm_fp_copy(cmsg, &p->fp);
if (err<0)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a298992060e6..4eaf7ed0d1f4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -73,7 +73,7 @@
#include <net/mpls.h>
#include <net/mptcp.h>
#include <net/mctp.h>
-#include <net/page_pool.h>
+#include <net/page_pool/helpers.h>
#include <net/dropreason.h>
#include <linux/uaccess.h>
@@ -550,7 +550,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
bool *pfmemalloc)
{
bool ret_pfmemalloc = false;
- unsigned int obj_size;
+ size_t obj_size;
void *obj;
obj_size = SKB_HEAD_ALIGN(*size);
@@ -567,7 +567,13 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
goto out;
}
- *size = obj_size = kmalloc_size_roundup(obj_size);
+
+ obj_size = kmalloc_size_roundup(obj_size);
+ /* The following cast might truncate high-order bits of obj_size, this
+ * is harmless because kmalloc(obj_size >= 2^32) will fail anyway.
+ */
+ *size = (unsigned int)obj_size;
+
/*
* Try a regular allocation, when that fails and we're not entitled
* to the reserves, fail.
@@ -879,11 +885,56 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+bool napi_pp_put_page(struct page *page, bool napi_safe)
+{
+ bool allow_direct = false;
+ struct page_pool *pp;
+
+ page = compound_head(page);
+
+ /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
+ * in order to preserve any existing bits, such as bit 0 for the
+ * head page of compound page and bit 1 for pfmemalloc page, so
+ * mask those bits for freeing side when doing below checking,
+ * and page_is_pfmemalloc() is checked in __page_pool_put_page()
+ * to avoid recycling the pfmemalloc page.
+ */
+ if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
+ return false;
+
+ pp = page->pp;
+
+ /* Allow direct recycle if we have reasons to believe that we are
+ * in the same context as the consumer would run, so there's
+ * no possible race.
+ * __page_pool_put_page() makes sure we're not in hardirq context
+ * and interrupts are enabled prior to accessing the cache.
+ */
+ if (napi_safe || in_softirq()) {
+ const struct napi_struct *napi = READ_ONCE(pp->p.napi);
+
+ allow_direct = napi &&
+ READ_ONCE(napi->list_owner) == smp_processor_id();
+ }
+
+ /* Driver set this to memory recycling info. Reset it on recycle.
+ * This will *not* work for NIC using a split-page memory model.
+ * The page will be returned to the pool here regardless of the
+ * 'flipped' fragment being in use or not.
+ */
+ page_pool_put_full_page(pp, page, allow_direct);
+
+ return true;
+}
+EXPORT_SYMBOL(napi_pp_put_page);
+#endif
+
static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
{
if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
return false;
- return page_pool_return_skb_page(virt_to_page(data), napi_safe);
+ return napi_pp_put_page(virt_to_page(data), napi_safe);
}
static void skb_kfree_head(void *head, unsigned int end_offset)
@@ -3656,20 +3707,23 @@ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
EXPORT_SYMBOL(skb_dequeue_tail);
/**
- * skb_queue_purge - empty a list
+ * skb_queue_purge_reason - empty a list
* @list: list to empty
+ * @reason: drop reason
*
* Delete all buffers on an &sk_buff list. Each buffer is removed from
* the list and one reference dropped. This function takes the list
* lock and is atomic with respect to other list locking functions.
*/
-void skb_queue_purge(struct sk_buff_head *list)
+void skb_queue_purge_reason(struct sk_buff_head *list,
+ enum skb_drop_reason reason)
{
struct sk_buff *skb;
+
while ((skb = skb_dequeue(list)) != NULL)
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
}
-EXPORT_SYMBOL(skb_queue_purge);
+EXPORT_SYMBOL(skb_queue_purge_reason);
/**
* skb_rbtree_purge - empty a skb rbtree
@@ -3697,6 +3751,27 @@ unsigned int skb_rbtree_purge(struct rb_root *root)
return sum;
}
+void skb_errqueue_purge(struct sk_buff_head *list)
+{
+ struct sk_buff *skb, *next;
+ struct sk_buff_head kill;
+ unsigned long flags;
+
+ __skb_queue_head_init(&kill);
+
+ spin_lock_irqsave(&list->lock, flags);
+ skb_queue_walk_safe(list, skb, next) {
+ if (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ZEROCOPY ||
+ SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING)
+ continue;
+ __skb_unlink(skb, list);
+ __skb_queue_tail(&kill, skb);
+ }
+ spin_unlock_irqrestore(&list->lock, flags);
+ __skb_queue_purge(&kill);
+}
+EXPORT_SYMBOL(skb_errqueue_purge);
+
/**
* skb_queue_head - queue a buffer at the list head
* @list: list to use
@@ -4354,21 +4429,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
- skb_frag_t *frag = skb_shinfo(head_skb)->frags;
unsigned int mss = skb_shinfo(head_skb)->gso_size;
unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
- struct sk_buff *frag_skb = head_skb;
unsigned int offset = doffset;
unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
unsigned int partial_segs = 0;
unsigned int headroom;
unsigned int len = head_skb->len;
+ struct sk_buff *frag_skb;
+ skb_frag_t *frag;
__be16 proto;
bool csum, sg;
- int nfrags = skb_shinfo(head_skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
- int pos;
+ int nfrags, pos;
if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
@@ -4445,6 +4519,13 @@ normal:
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
+ if (skb_orphan_frags(head_skb, GFP_ATOMIC))
+ return ERR_PTR(-ENOMEM);
+
+ nfrags = skb_shinfo(head_skb)->nr_frags;
+ frag = skb_shinfo(head_skb)->frags;
+ frag_skb = head_skb;
+
do {
struct sk_buff *nskb;
skb_frag_t *nskb_frag;
@@ -4465,6 +4546,10 @@ normal:
(skb_headlen(list_skb) == len || sg)) {
BUG_ON(skb_headlen(list_skb) > len);
+ nskb = skb_clone(list_skb, GFP_ATOMIC);
+ if (unlikely(!nskb))
+ goto err;
+
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
@@ -4483,12 +4568,8 @@ normal:
frag++;
}
- nskb = skb_clone(list_skb, GFP_ATOMIC);
list_skb = list_skb->next;
- if (unlikely(!nskb))
- goto err;
-
if (unlikely(pskb_trim(nskb, len))) {
kfree_skb(nskb);
goto err;
@@ -4564,12 +4645,16 @@ normal:
skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
SKBFL_SHARED_FRAG;
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+ if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
goto err;
while (pos < offset + len) {
if (i >= nfrags) {
+ if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
+ skb_zerocopy_clone(nskb, list_skb,
+ GFP_ATOMIC))
+ goto err;
+
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
@@ -4583,10 +4668,6 @@ normal:
i--;
frag--;
}
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb,
- GFP_ATOMIC))
- goto err;
list_skb = list_skb->next;
}
@@ -4716,23 +4797,13 @@ static const u8 skb_ext_type_len[] = {
static __always_inline unsigned int skb_ext_total_length(void)
{
- return SKB_EXT_CHUNKSIZEOF(struct skb_ext) +
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- skb_ext_type_len[SKB_EXT_BRIDGE_NF] +
-#endif
-#ifdef CONFIG_XFRM
- skb_ext_type_len[SKB_EXT_SEC_PATH] +
-#endif
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- skb_ext_type_len[TC_SKB_EXT] +
-#endif
-#if IS_ENABLED(CONFIG_MPTCP)
- skb_ext_type_len[SKB_EXT_MPTCP] +
-#endif
-#if IS_ENABLED(CONFIG_MCTP_FLOWS)
- skb_ext_type_len[SKB_EXT_MCTP] +
-#endif
- 0;
+ unsigned int l = SKB_EXT_CHUNKSIZEOF(struct skb_ext);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(skb_ext_type_len); i++)
+ l += skb_ext_type_len[i];
+
+ return l;
}
static void skb_extensions_init(void)
@@ -4750,12 +4821,23 @@ static void skb_extensions_init(void)
static void skb_extensions_init(void) {}
#endif
+/* The SKB kmem_cache slab is critical for network performance. Never
+ * merge/alias the slab with similar sized objects. This avoids fragmentation
+ * that hurts performance of kmem_cache_{alloc,free}_bulk APIs.
+ */
+#ifndef CONFIG_SLUB_TINY
+#define FLAG_SKB_NO_MERGE SLAB_NO_MERGE
+#else /* CONFIG_SLUB_TINY - simple loop in kmem_cache_alloc_bulk */
+#define FLAG_SKB_NO_MERGE 0
+#endif
+
void __init skb_init(void)
{
skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",
sizeof(struct sk_buff),
0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|
+ FLAG_SKB_NO_MERGE,
offsetof(struct sk_buff, cb),
sizeof_field(struct sk_buff, cb),
NULL);
@@ -5137,7 +5219,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_info = tstype;
serr->opt_stats = opt_stats;
serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk_is_tcp(sk))
serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
@@ -5193,21 +5275,23 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
{
struct sk_buff *skb;
bool tsonly, opt_stats = false;
+ u32 tsflags;
if (!sk)
return;
- if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+ tsflags = READ_ONCE(sk->sk_tsflags);
+ if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
return;
- tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+ tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
if (!skb_may_tx_timestamp(sk, tsonly))
return;
if (tsonly) {
#ifdef CONFIG_INET
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
+ if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
sk_is_tcp(sk)) {
skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
ack_skb);
@@ -6204,7 +6288,7 @@ EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
*
* @header_len: size of linear part
* @data_len: needed length in frags
- * @max_page_order: max page order desired.
+ * @order: max page order desired.
* @errcode: pointer to error code if any
* @gfp_mask: allocation mask
*
@@ -6212,21 +6296,17 @@ EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
*/
struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
unsigned long data_len,
- int max_page_order,
+ int order,
int *errcode,
gfp_t gfp_mask)
{
- int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
unsigned long chunk;
struct sk_buff *skb;
struct page *page;
- int i;
+ int nr_frags = 0;
*errcode = -EMSGSIZE;
- /* Note this test could be relaxed, if we succeed to allocate
- * high order pages...
- */
- if (npages > MAX_SKB_FRAGS)
+ if (unlikely(data_len > MAX_SKB_FRAGS * (PAGE_SIZE << order)))
return NULL;
*errcode = -ENOBUFS;
@@ -6234,34 +6314,32 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
if (!skb)
return NULL;
- skb->truesize += npages << PAGE_SHIFT;
-
- for (i = 0; npages > 0; i++) {
- int order = max_page_order;
-
- while (order) {
- if (npages >= 1 << order) {
- page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
- __GFP_COMP |
- __GFP_NOWARN,
- order);
- if (page)
- goto fill_page;
- /* Do not retry other high order allocations */
- order = 1;
- max_page_order = 0;
- }
+ while (data_len) {
+ if (nr_frags == MAX_SKB_FRAGS - 1)
+ goto failure;
+ while (order && PAGE_ALIGN(data_len) < (PAGE_SIZE << order))
order--;
+
+ if (order) {
+ page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
+ __GFP_COMP |
+ __GFP_NOWARN,
+ order);
+ if (!page) {
+ order--;
+ continue;
+ }
+ } else {
+ page = alloc_page(gfp_mask);
+ if (!page)
+ goto failure;
}
- page = alloc_page(gfp_mask);
- if (!page)
- goto failure;
-fill_page:
chunk = min_t(unsigned long, data_len,
PAGE_SIZE << order);
- skb_fill_page_desc(skb, i, page, 0, chunk);
+ skb_fill_page_desc(skb, nr_frags, page, 0, chunk);
+ nr_frags++;
+ skb->truesize += (PAGE_SIZE << order);
data_len -= chunk;
- npages -= 1 << order;
}
return skb;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index ef1a2eb6520b..6c31eefbd777 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -612,12 +612,18 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
+ int err = 0;
+
if (!ingress) {
if (!sock_writeable(psock->sk))
return -EAGAIN;
return skb_send_sock(psock->sk, skb, off, len);
}
- return sk_psock_skb_ingress(psock, skb, off, len);
+ skb_get(skb);
+ err = sk_psock_skb_ingress(psock, skb, off, len);
+ if (err < 0)
+ kfree_skb(skb);
+ return err;
}
static void sk_psock_skb_state(struct sk_psock *psock,
@@ -685,9 +691,7 @@ static void sk_psock_backlog(struct work_struct *work)
} while (len);
skb = skb_dequeue(&psock->ingress_skb);
- if (!ingress) {
- kfree_skb(skb);
- }
+ kfree_skb(skb);
}
end:
mutex_unlock(&psock->work_mutex);
@@ -1204,13 +1208,17 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
+ const struct proto_ops *ops;
int copied;
trace_sk_data_ready(sk);
- if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
+ if (unlikely(!sock))
+ return;
+ ops = READ_ONCE(sock->ops);
+ if (!ops || !ops->read_skb)
return;
- copied = sock->ops->read_skb(sk, sk_psock_verdict_recv);
+ copied = ops->read_skb(sk, sk_psock_verdict_recv);
if (copied >= 0) {
struct sk_psock *psock;
diff --git a/net/core/sock.c b/net/core/sock.c
index c9cffb7acbea..16584e2dd648 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -765,9 +765,10 @@ bool sk_mc_loop(struct sock *sk)
return false;
if (!sk)
return true;
- switch (sk->sk_family) {
+ /* IPV6_ADDRFORM can change sk->sk_family under us. */
+ switch (READ_ONCE(sk->sk_family)) {
case AF_INET:
- return inet_sk(sk)->mc_loop;
+ return inet_test_bit(MC_LOOP, sk);
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
return inet6_sk(sk)->mc_loop;
@@ -797,7 +798,7 @@ EXPORT_SYMBOL(sock_set_reuseport);
void sock_no_linger(struct sock *sk)
{
lock_sock(sk);
- sk->sk_lingertime = 0;
+ WRITE_ONCE(sk->sk_lingertime, 0);
sock_set_flag(sk, SOCK_LINGER);
release_sock(sk);
}
@@ -893,7 +894,7 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
if (!match)
return -EINVAL;
- sk->sk_bind_phc = phc_index;
+ WRITE_ONCE(sk->sk_bind_phc, phc_index);
return 0;
}
@@ -936,7 +937,7 @@ int sock_set_timestamping(struct sock *sk, int optname,
return ret;
}
- sk->sk_tsflags = val;
+ WRITE_ONCE(sk->sk_tsflags, val);
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
@@ -1044,7 +1045,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
return -ENOMEM;
}
- sk->sk_forward_alloc += pages << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
WRITE_ONCE(sk->sk_reserved_mem,
sk->sk_reserved_mem + (pages << PAGE_SHIFT));
@@ -1230,15 +1231,15 @@ set_sndbuf:
ret = -EFAULT;
break;
}
- if (!ling.l_onoff)
+ if (!ling.l_onoff) {
sock_reset_flag(sk, SOCK_LINGER);
- else {
-#if (BITS_PER_LONG == 32)
- if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
- sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+ } else {
+ unsigned long t_sec = ling.l_linger;
+
+ if (t_sec >= MAX_SCHEDULE_TIMEOUT / HZ)
+ WRITE_ONCE(sk->sk_lingertime, MAX_SCHEDULE_TIMEOUT);
else
-#endif
- sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
+ WRITE_ONCE(sk->sk_lingertime, t_sec * HZ);
sock_set_flag(sk, SOCK_LINGER);
}
break;
@@ -1247,17 +1248,11 @@ set_sndbuf:
break;
case SO_PASSCRED:
- if (valbool)
- set_bit(SOCK_PASSCRED, &sock->flags);
- else
- clear_bit(SOCK_PASSCRED, &sock->flags);
+ assign_bit(SOCK_PASSCRED, &sock->flags, valbool);
break;
case SO_PASSPIDFD:
- if (valbool)
- set_bit(SOCK_PASSPIDFD, &sock->flags);
- else
- clear_bit(SOCK_PASSPIDFD, &sock->flags);
+ assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool);
break;
case SO_TIMESTAMP_OLD:
@@ -1283,14 +1278,19 @@ set_sndbuf:
break;
case SO_RCVLOWAT:
+ {
+ int (*set_rcvlowat)(struct sock *sk, int val) = NULL;
+
if (val < 0)
val = INT_MAX;
- if (sock && sock->ops->set_rcvlowat)
- ret = sock->ops->set_rcvlowat(sk, val);
+ if (sock)
+ set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat;
+ if (set_rcvlowat)
+ ret = set_rcvlowat(sk, val);
else
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
break;
-
+ }
case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW:
ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
@@ -1361,10 +1361,7 @@ set_sndbuf:
break;
case SO_PASSSEC:
- if (valbool)
- set_bit(SOCK_PASSSEC, &sock->flags);
- else
- clear_bit(SOCK_PASSSEC, &sock->flags);
+ assign_bit(SOCK_PASSSEC, &sock->flags, valbool);
break;
case SO_MARK:
if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
@@ -1388,11 +1385,16 @@ set_sndbuf:
break;
case SO_PEEK_OFF:
- if (sock->ops->set_peek_off)
- ret = sock->ops->set_peek_off(sk, val);
+ {
+ int (*set_peek_off)(struct sock *sk, int val);
+
+ set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
+ if (set_peek_off)
+ ret = set_peek_off(sk, val);
else
ret = -EOPNOTSUPP;
break;
+ }
case SO_NOFCS:
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
@@ -1691,7 +1693,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
case SO_LINGER:
lv = sizeof(v.ling);
v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
- v.ling.l_linger = sk->sk_lingertime / HZ;
+ v.ling.l_linger = READ_ONCE(sk->sk_lingertime) / HZ;
break;
case SO_BSDCOMPAT:
@@ -1717,8 +1719,8 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
case SO_TIMESTAMPING_OLD:
lv = sizeof(v.timestamping);
- v.timestamping.flags = sk->sk_tsflags;
- v.timestamping.bind_phc = sk->sk_bind_phc;
+ v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+ v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
break;
case SO_RCVTIMEO_OLD:
@@ -1823,14 +1825,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
case SO_PEERNAME:
{
- char address[128];
+ struct sockaddr_storage address;
- lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
+ lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 2);
if (lv < 0)
return -ENOTCONN;
if (lv < len)
return -EINVAL;
- if (copy_to_sockptr(optval, address, len))
+ if (copy_to_sockptr(optval, &address, len))
return -EFAULT;
goto lenout;
}
@@ -1867,7 +1869,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;
case SO_PEEK_OFF:
- if (!sock->ops->set_peek_off)
+ if (!READ_ONCE(sock->ops)->set_peek_off)
return -EOPNOTSUPP;
v.val = READ_ONCE(sk->sk_peek_off);
@@ -2745,9 +2747,9 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
break;
- if (sk->sk_shutdown & SEND_SHUTDOWN)
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
break;
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
break;
timeo = schedule_timeout(timeo);
}
@@ -2775,7 +2777,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
goto failure;
err = -EPIPE;
- if (sk->sk_shutdown & SEND_SHUTDOWN)
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
goto failure;
if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
@@ -3137,10 +3139,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
{
int ret, amt = sk_mem_pages(size);
- sk->sk_forward_alloc += amt << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
ret = __sk_mem_raise_allocated(sk, size, amt, kind);
if (!ret)
- sk->sk_forward_alloc -= amt << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
return ret;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -3172,7 +3174,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
void __sk_mem_reclaim(struct sock *sk, int amount)
{
amount >>= PAGE_SHIFT;
- sk->sk_forward_alloc -= amount << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
__sk_mem_reduce_allocated(sk, amount);
}
EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -3741,7 +3743,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
- mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+ mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 8f07fea39d9e..cb11750b1df5 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -18,7 +18,7 @@ struct bpf_stab {
struct bpf_map map;
struct sock **sks;
struct sk_psock_progs progs;
- raw_spinlock_t lock;
+ spinlock_t lock;
};
#define SOCK_CREATE_FLAG_MASK \
@@ -44,7 +44,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&stab->map, attr);
- raw_spin_lock_init(&stab->lock);
+ spin_lock_init(&stab->lock);
stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries *
sizeof(struct sock *),
@@ -411,7 +411,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
struct sock *sk;
int err = 0;
- raw_spin_lock_bh(&stab->lock);
+ spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
sk = xchg(psk, NULL);
@@ -421,7 +421,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
else
err = -EINVAL;
- raw_spin_unlock_bh(&stab->lock);
+ spin_unlock_bh(&stab->lock);
return err;
}
@@ -487,7 +487,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
psock = sk_psock(sk);
WARN_ON_ONCE(!psock);
- raw_spin_lock_bh(&stab->lock);
+ spin_lock_bh(&stab->lock);
osk = stab->sks[idx];
if (osk && flags == BPF_NOEXIST) {
ret = -EEXIST;
@@ -501,10 +501,10 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
stab->sks[idx] = sk;
if (osk)
sock_map_unref(osk, &stab->sks[idx]);
- raw_spin_unlock_bh(&stab->lock);
+ spin_unlock_bh(&stab->lock);
return 0;
out_unlock:
- raw_spin_unlock_bh(&stab->lock);
+ spin_unlock_bh(&stab->lock);
if (psock)
sk_psock_put(sk, psock);
out_free:
@@ -835,7 +835,7 @@ struct bpf_shtab_elem {
struct bpf_shtab_bucket {
struct hlist_head head;
- raw_spinlock_t lock;
+ spinlock_t lock;
};
struct bpf_shtab {
@@ -910,7 +910,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
* is okay since it's going away only after RCU grace period.
* However, we need to check whether it's still present.
*/
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
elem_probe = sock_hash_lookup_elem_raw(&bucket->head, elem->hash,
elem->key, map->key_size);
if (elem_probe && elem_probe == elem) {
@@ -918,7 +918,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
sock_map_unref(elem->sk, elem);
sock_hash_free_elem(htab, elem);
}
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
}
static long sock_hash_delete_elem(struct bpf_map *map, void *key)
@@ -932,7 +932,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
if (elem) {
hlist_del_rcu(&elem->node);
@@ -940,7 +940,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
sock_hash_free_elem(htab, elem);
ret = 0;
}
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
return ret;
}
@@ -1000,7 +1000,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
if (elem && flags == BPF_NOEXIST) {
ret = -EEXIST;
@@ -1026,10 +1026,10 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
sock_map_unref(elem->sk, elem);
sock_hash_free_elem(htab, elem);
}
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
return 0;
out_unlock:
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
sk_psock_put(sk, psock);
out_free:
sk_psock_free_link(link);
@@ -1115,7 +1115,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
for (i = 0; i < htab->buckets_num; i++) {
INIT_HLIST_HEAD(&htab->buckets[i].head);
- raw_spin_lock_init(&htab->buckets[i].lock);
+ spin_lock_init(&htab->buckets[i].lock);
}
return &htab->map;
@@ -1147,11 +1147,11 @@ static void sock_hash_free(struct bpf_map *map)
* exists, psock exists and holds a ref to socket. That
* lets us to grab a socket ref too.
*/
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
hlist_for_each_entry(elem, &bucket->head, node)
sock_hold(elem->sk);
hlist_move_list(&bucket->head, &unlink_list);
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
/* Process removed entries out of atomic context to
* block for socket lock before deleting the psock's
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 782273bb93c2..03f1edb948d7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -712,7 +712,8 @@ static __net_init int sysctl_core_net_init(struct net *net)
tmp->data += (char *)net - (char *)&init_net;
}
- net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl);
+ net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl,
+ ARRAY_SIZE(netns_core_table));
if (net->core.sysctl_hdr == NULL)
goto err_reg;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 8362130bf085..a70670fe9a2d 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -14,7 +14,7 @@
#include <linux/idr.h>
#include <linux/rhashtable.h>
#include <linux/bug.h>
-#include <net/page_pool.h>
+#include <net/page_pool/helpers.h>
#include <net/xdp.h>
#include <net/xdp_priv.h> /* struct xdp_mem_allocator */
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index d76c9be5bfca..57d9c026aa3f 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -105,7 +105,6 @@ extern int sysctl_dccp_rx_ccid;
extern int sysctl_dccp_tx_ccid;
int dccp_feat_init(struct sock *sk);
-void dccp_feat_initialise_sysctls(void);
int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
u8 const *list, u8 len);
int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index fa8079303cb0..69453b936bd5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -130,7 +130,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr,
inet->inet_sport,
inet->inet_dport);
- inet->inet_id = get_random_u16();
+ atomic_set(&inet->inet_id, get_random_u16());
err = dccp_connect(sk);
rt = NULL;
@@ -247,7 +247,6 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
const u8 offset = iph->ihl << 2;
const struct dccp_hdr *dh;
struct dccp_sock *dp;
- struct inet_sock *inet;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct sock *sk;
@@ -255,12 +254,12 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
int err;
struct net *net = dev_net(skb->dev);
- /* Only need dccph_dport & dccph_sport which are the first
- * 4 bytes in dccp header.
- * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
- */
- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+ if (!pskb_may_pull(skb, offset + sizeof(*dh)))
+ return -EINVAL;
+ dh = (struct dccp_hdr *)(skb->data + offset);
+ if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
+ return -EINVAL;
+ iph = (struct iphdr *)skb->data;
dh = (struct dccp_hdr *)(skb->data + offset);
sk = __inet_lookup_established(net, &dccp_hashinfo,
@@ -361,8 +360,7 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
* --ANK (980905)
*/
- inet = inet_sk(sk);
- if (!sock_owned_by_user(sk) && inet->recverr) {
+ if (!sock_owned_by_user(sk) && inet_test_bit(RECVERR, sk)) {
sk->sk_err = err;
sk_error_report(sk);
} else { /* Only an error on timeout */
@@ -432,7 +430,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
- newinet->inet_id = get_random_u16();
+ atomic_set(&newinet->inet_id, get_random_u16());
if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
goto put_and_exit;
@@ -474,7 +472,8 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
.flowi4_oif = inet_iif(skb),
.daddr = iph->saddr,
.saddr = iph->daddr,
- .flowi4_tos = RT_CONN_FLAGS(sk),
+ .flowi4_tos = ip_sock_rt_tos(sk),
+ .flowi4_scope = ip_sock_rt_scope(sk),
.flowi4_proto = sk->sk_protocol,
.fl4_sport = dccp_hdr(skb)->dccph_dport,
.fl4_dport = dccp_hdr(skb)->dccph_sport,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d29d1163203d..c693a570682f 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -74,7 +74,7 @@ static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
+ const struct ipv6hdr *hdr;
const struct dccp_hdr *dh;
struct dccp_sock *dp;
struct ipv6_pinfo *np;
@@ -83,12 +83,12 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
__u64 seq;
struct net *net = dev_net(skb->dev);
- /* Only need dccph_dport & dccph_sport which are the first
- * 4 bytes in dccp header.
- * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
- */
- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+ if (!pskb_may_pull(skb, offset + sizeof(*dh)))
+ return -EINVAL;
+ dh = (struct dccp_hdr *)(skb->data + offset);
+ if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
+ return -EINVAL;
+ hdr = (const struct ipv6hdr *)skb->data;
dh = (struct dccp_hdr *)(skb->data + offset);
sk = __inet6_lookup_established(net, &dccp_hashinfo,
@@ -1056,6 +1056,7 @@ static struct proto dccp_v6_prot = {
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct dccp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.rsk_prot = &dccp6_request_sock_ops,
.twsk_prot = &dccp6_timewait_sock_ops,
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
index 7e4c2a3b322b..c5d14c48def1 100644
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h
@@ -13,10 +13,6 @@
struct dccp6_sock {
struct dccp_sock dccp;
- /*
- * ipv6_pinfo has to be the last member of dccp6_sock,
- * see inet6_sk_generic.
- */
struct ipv6_pinfo inet6;
};
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 4e3266e4d7c3..fcc5c9d64f46 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -315,11 +315,15 @@ EXPORT_SYMBOL_GPL(dccp_disconnect);
__poll_t dccp_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
- __poll_t mask;
struct sock *sk = sock->sk;
+ __poll_t mask;
+ u8 shutdown;
+ int state;
sock_poll_wait(file, sock, wait);
- if (sk->sk_state == DCCP_LISTEN)
+
+ state = inet_sk_state_load(sk);
+ if (state == DCCP_LISTEN)
return inet_csk_listen_poll(sk);
/* Socket is not locked. We are protected from async events
@@ -328,20 +332,21 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
*/
mask = 0;
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
mask = EPOLLERR;
+ shutdown = READ_ONCE(sk->sk_shutdown);
- if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
+ if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED)
mask |= EPOLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
/* Connected? */
- if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
+ if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
if (atomic_read(&sk->sk_rmem_alloc) > 0)
mask |= EPOLLIN | EPOLLRDNORM;
- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+ if (!(shutdown & SEND_SHUTDOWN)) {
if (sk_stream_is_writeable(sk)) {
mask |= EPOLLOUT | EPOLLWRNORM;
} else { /* send SIGIO later */
@@ -359,7 +364,6 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
}
return mask;
}
-
EXPORT_SYMBOL_GPL(dccp_poll);
int dccp_ioctl(struct sock *sk, int cmd, int *karg)
diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index ef91a76646a3..000da622116a 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y := leftover.o core.o netlink.o dev.o health.o
+obj-y := core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \
+ resource.o param.o region.o health.o trap.o rate.o linecard.o
diff --git a/net/devlink/core.c b/net/devlink/core.c
index c23ebabadc52..6cec4afb01fb 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -5,9 +5,15 @@
*/
#include <net/genetlink.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/devlink.h>
#include "devl_internal.h"
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
+
DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
void *devlink_priv(struct devlink *devlink)
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index bf1d6f1bcfc7..bba4ace7d22b 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -174,7 +174,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
+static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
{
struct sk_buff *msg;
int err;
@@ -196,7 +196,7 @@ void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
-int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct sk_buff *msg;
@@ -217,17 +217,44 @@ int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
}
static int
-devlink_nl_cmd_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
+devlink_nl_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
{
return devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI);
+ cb->nlh->nlmsg_seq, flags);
}
-const struct devlink_cmd devl_cmd_get = {
- .dump_one = devlink_nl_cmd_get_dump_one,
-};
+int devlink_nl_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(msg, cb, devlink_nl_get_dump_one);
+}
+
+void devlink_notify_register(struct devlink *devlink)
+{
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+ devlink_linecards_notify_register(devlink);
+ devlink_ports_notify_register(devlink);
+ devlink_trap_policers_notify_register(devlink);
+ devlink_trap_groups_notify_register(devlink);
+ devlink_traps_notify_register(devlink);
+ devlink_rates_notify_register(devlink);
+ devlink_regions_notify_register(devlink);
+ devlink_params_notify_register(devlink);
+}
+
+void devlink_notify_unregister(struct devlink *devlink)
+{
+ devlink_params_notify_unregister(devlink);
+ devlink_regions_notify_unregister(devlink);
+ devlink_rates_notify_unregister(devlink);
+ devlink_traps_notify_unregister(devlink);
+ devlink_trap_groups_notify_unregister(devlink);
+ devlink_trap_policers_notify_unregister(devlink);
+ devlink_ports_notify_unregister(devlink);
+ devlink_linecards_notify_unregister(devlink);
+ devlink_notify(devlink, DEVLINK_CMD_DEL);
+}
static void devlink_reload_failed_set(struct devlink *devlink,
bool reload_failed)
@@ -804,7 +831,7 @@ err_cancel_msg:
return err;
}
-int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_info_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct sk_buff *msg;
@@ -826,23 +853,24 @@ int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info)
}
static int
-devlink_nl_cmd_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
+devlink_nl_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
{
int err;
err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->nlh->nlmsg_seq, flags,
cb->extack);
if (err == -EOPNOTSUPP)
err = 0;
return err;
}
-const struct devlink_cmd devl_cmd_info_get = {
- .dump_one = devlink_nl_cmd_info_get_dump_one,
-};
+int devlink_nl_info_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(msg, cb, devlink_nl_info_get_dump_one);
+}
static int devlink_nl_flash_update_fill(struct sk_buff *msg,
struct devlink *devlink,
@@ -1204,8 +1232,7 @@ err_cancel_msg:
return err;
}
-int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct sk_buff *msg;
@@ -1228,23 +1255,25 @@ int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int
-devlink_nl_cmd_selftests_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
+static int devlink_nl_selftests_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
{
if (!devlink->ops->selftest_check)
return 0;
return devlink_nl_selftests_fill(msg, devlink,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->nlh->nlmsg_seq, flags,
cb->extack);
}
-const struct devlink_cmd devl_cmd_selftests_get = {
- .dump_one = devlink_nl_cmd_selftests_get_dump_one,
-};
+int devlink_nl_selftests_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_selftests_get_dump_one);
+}
static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id,
enum devlink_selftest_status test_status)
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 62921b2eb0d3..f6b5fea2e13c 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -3,6 +3,7 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
+#include <linux/etherdevice.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/notifier.h>
@@ -11,6 +12,10 @@
#include <linux/xarray.h>
#include <net/devlink.h>
#include <net/net_namespace.h>
+#include <net/rtnetlink.h>
+#include <rdma/ib_verbs.h>
+
+#include "netlink_gen.h"
#define DEVLINK_REGISTERED XA_MARK_1
@@ -90,9 +95,6 @@ static inline bool devl_is_registered(struct devlink *devlink)
/* Netlink */
#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
-#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
-#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
-#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4)
enum devlink_multicast_groups {
DEVLINK_MCGRP_CONFIG,
@@ -114,21 +116,16 @@ struct devlink_nl_dump_state {
};
};
-struct devlink_cmd {
- int (*dump_one)(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb);
-};
-
-extern const struct genl_small_ops devlink_nl_ops[56];
+typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags);
struct devlink *
devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
-void devlink_notify_unregister(struct devlink *devlink);
-void devlink_notify_register(struct devlink *devlink);
-
-int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb);
+int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
+ devlink_nl_dump_one_func_t *dump_one);
static inline struct devlink_nl_dump_state *
devlink_dump_state(struct netlink_callback *cb)
@@ -148,31 +145,36 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
return 0;
}
-/* Commands */
-extern const struct devlink_cmd devl_cmd_get;
-extern const struct devlink_cmd devl_cmd_port_get;
-extern const struct devlink_cmd devl_cmd_sb_get;
-extern const struct devlink_cmd devl_cmd_sb_pool_get;
-extern const struct devlink_cmd devl_cmd_sb_port_pool_get;
-extern const struct devlink_cmd devl_cmd_sb_tc_pool_bind_get;
-extern const struct devlink_cmd devl_cmd_param_get;
-extern const struct devlink_cmd devl_cmd_region_get;
-extern const struct devlink_cmd devl_cmd_info_get;
-extern const struct devlink_cmd devl_cmd_health_reporter_get;
-extern const struct devlink_cmd devl_cmd_trap_get;
-extern const struct devlink_cmd devl_cmd_trap_group_get;
-extern const struct devlink_cmd devl_cmd_trap_policer_get;
-extern const struct devlink_cmd devl_cmd_rate_get;
-extern const struct devlink_cmd devl_cmd_linecard_get;
-extern const struct devlink_cmd devl_cmd_selftests_get;
+int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info);
/* Notify */
-void devlink_notify(struct devlink *devlink, enum devlink_command cmd);
+void devlink_notify_register(struct devlink *devlink);
+void devlink_notify_unregister(struct devlink *devlink);
+void devlink_ports_notify_register(struct devlink *devlink);
+void devlink_ports_notify_unregister(struct devlink *devlink);
+void devlink_params_notify_register(struct devlink *devlink);
+void devlink_params_notify_unregister(struct devlink *devlink);
+void devlink_regions_notify_register(struct devlink *devlink);
+void devlink_regions_notify_unregister(struct devlink *devlink);
+void devlink_trap_policers_notify_register(struct devlink *devlink);
+void devlink_trap_policers_notify_unregister(struct devlink *devlink);
+void devlink_trap_groups_notify_register(struct devlink *devlink);
+void devlink_trap_groups_notify_unregister(struct devlink *devlink);
+void devlink_traps_notify_register(struct devlink *devlink);
+void devlink_traps_notify_unregister(struct devlink *devlink);
+void devlink_rates_notify_register(struct devlink *devlink);
+void devlink_rates_notify_unregister(struct devlink *devlink);
+void devlink_linecards_notify_register(struct devlink *devlink);
+void devlink_linecards_notify_unregister(struct devlink *devlink);
/* Ports */
+#define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port) \
+ WARN_ON_ONCE(!(devlink_port)->initialized)
+
+struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
+ unsigned int port_index);
int devlink_port_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr);
-
struct devlink_port *
devlink_port_get_from_info(struct devlink *devlink, struct genl_info *info);
struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
@@ -199,31 +201,66 @@ int devlink_resources_validate(struct devlink *devlink,
struct devlink_resource *resource,
struct genl_info *info);
-/* Line cards */
-struct devlink_linecard;
-
-struct devlink_linecard *
-devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info);
-
/* Rates */
int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack);
-struct devlink_rate *
-devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info);
-struct devlink_rate *
-devlink_rate_node_get_from_info(struct devlink *devlink,
- struct genl_info *info);
+
+/* Linecards */
+struct devlink_linecard {
+ struct list_head list;
+ struct devlink *devlink;
+ unsigned int index;
+ const struct devlink_linecard_ops *ops;
+ void *priv;
+ enum devlink_linecard_state state;
+ struct mutex state_lock; /* Protects state */
+ const char *type;
+ struct devlink_linecard_type *types;
+ unsigned int types_count;
+ struct devlink *nested_devlink;
+};
+
/* Devlink nl cmds */
-int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info);
-int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
+int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
struct genl_info *info);
+int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_resource_dump(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb);
+int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
@@ -236,3 +273,13 @@ int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
struct genl_info *info);
+int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
diff --git a/net/devlink/dpipe.c b/net/devlink/dpipe.c
new file mode 100644
index 000000000000..431227c412e5
--- /dev/null
+++ b/net/devlink/dpipe.c
@@ -0,0 +1,917 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
+ {
+ .name = "destination mac",
+ .id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
+ .bitwidth = 48,
+ },
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
+ .name = "ethernet",
+ .id = DEVLINK_DPIPE_HEADER_ETHERNET,
+ .fields = devlink_dpipe_fields_ethernet,
+ .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet),
+ .global = true,
+};
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ethernet);
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = {
+ {
+ .name = "destination ip",
+ .id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
+ .bitwidth = 32,
+ },
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
+ .name = "ipv4",
+ .id = DEVLINK_DPIPE_HEADER_IPV4,
+ .fields = devlink_dpipe_fields_ipv4,
+ .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4),
+ .global = true,
+};
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv4);
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = {
+ {
+ .name = "destination ip",
+ .id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
+ .bitwidth = 128,
+ },
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
+ .name = "ipv6",
+ .id = DEVLINK_DPIPE_HEADER_IPV6,
+ .fields = devlink_dpipe_fields_ipv6,
+ .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6),
+ .global = true,
+};
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv6);
+
+int devlink_dpipe_match_put(struct sk_buff *skb,
+ struct devlink_dpipe_match *match)
+{
+ struct devlink_dpipe_header *header = match->header;
+ struct devlink_dpipe_field *field = &header->fields[match->field_id];
+ struct nlattr *match_attr;
+
+ match_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_MATCH);
+ if (!match_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, match_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, match_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_match_put);
+
+static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table,
+ struct sk_buff *skb)
+{
+ struct nlattr *matches_attr;
+
+ matches_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_TABLE_MATCHES);
+ if (!matches_attr)
+ return -EMSGSIZE;
+
+ if (table->table_ops->matches_dump(table->priv, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, matches_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, matches_attr);
+ return -EMSGSIZE;
+}
+
+int devlink_dpipe_action_put(struct sk_buff *skb,
+ struct devlink_dpipe_action *action)
+{
+ struct devlink_dpipe_header *header = action->header;
+ struct devlink_dpipe_field *field = &header->fields[action->field_id];
+ struct nlattr *action_attr;
+
+ action_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ACTION);
+ if (!action_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, action_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, action_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_action_put);
+
+static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table,
+ struct sk_buff *skb)
+{
+ struct nlattr *actions_attr;
+
+ actions_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_TABLE_ACTIONS);
+ if (!actions_attr)
+ return -EMSGSIZE;
+
+ if (table->table_ops->actions_dump(table->priv, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, actions_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, actions_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_table_put(struct sk_buff *skb,
+ struct devlink_dpipe_table *table)
+{
+ struct nlattr *table_attr;
+ u64 table_size;
+
+ table_size = table->table_ops->size_get(table->priv);
+ table_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLE);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
+ table->counters_enabled))
+ goto nla_put_failure;
+
+ if (table->resource_valid) {
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
+ table->resource_id, DEVLINK_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
+ table->resource_units, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ }
+ if (devlink_dpipe_matches_put(table, skb))
+ goto nla_put_failure;
+
+ if (devlink_dpipe_actions_put(table, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, table_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, table_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb,
+ struct genl_info *info)
+{
+ int err;
+
+ if (*pskb) {
+ err = genlmsg_reply(*pskb, info);
+ if (err)
+ return err;
+ }
+ *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!*pskb)
+ return -ENOMEM;
+ return 0;
+}
+
+static int devlink_dpipe_tables_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct list_head *dpipe_tables,
+ const char *table_name)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_dpipe_table *table;
+ struct nlattr *tables_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ bool incomplete;
+ void *hdr;
+ int i;
+ int err;
+
+ table = list_first_entry(dpipe_tables,
+ struct devlink_dpipe_table, list);
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr) {
+ nlmsg_free(skb);
+ return -EMSGSIZE;
+ }
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+ tables_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLES);
+ if (!tables_attr)
+ goto nla_put_failure;
+
+ i = 0;
+ incomplete = false;
+ list_for_each_entry_from(table, dpipe_tables, list) {
+ if (!table_name) {
+ err = devlink_dpipe_table_put(skb, table);
+ if (err) {
+ if (!i)
+ goto err_table_put;
+ incomplete = true;
+ break;
+ }
+ } else {
+ if (!strcmp(table->name, table_name)) {
+ err = devlink_dpipe_table_put(skb, table);
+ if (err)
+ break;
+ }
+ }
+ i++;
+ }
+
+ nla_nest_end(skb, tables_attr);
+ genlmsg_end(skb, hdr);
+ if (incomplete)
+ goto start_again;
+
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+ goto send_done;
+ }
+
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_table_put:
+ nlmsg_free(skb);
+ return err;
+}
+
+int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const char *table_name = NULL;
+
+ if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+
+ return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0,
+ &devlink->dpipe_table_list,
+ table_name);
+}
+
+static int devlink_dpipe_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE,
+ value->value_size, value->value))
+ return -EMSGSIZE;
+ if (value->mask)
+ if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK,
+ value->value_size, value->mask))
+ return -EMSGSIZE;
+ if (value->mapping_valid)
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING,
+ value->mapping_value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_action_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (!value->action)
+ return -EINVAL;
+ if (devlink_dpipe_action_put(skb, value->action))
+ return -EMSGSIZE;
+ if (devlink_dpipe_value_put(skb, value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_action_values_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *values,
+ unsigned int values_count)
+{
+ struct nlattr *action_attr;
+ int i;
+ int err;
+
+ for (i = 0; i < values_count; i++) {
+ action_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_ACTION_VALUE);
+ if (!action_attr)
+ return -EMSGSIZE;
+ err = devlink_dpipe_action_value_put(skb, &values[i]);
+ if (err)
+ goto err_action_value_put;
+ nla_nest_end(skb, action_attr);
+ }
+ return 0;
+
+err_action_value_put:
+ nla_nest_cancel(skb, action_attr);
+ return err;
+}
+
+static int devlink_dpipe_match_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (!value->match)
+ return -EINVAL;
+ if (devlink_dpipe_match_put(skb, value->match))
+ return -EMSGSIZE;
+ if (devlink_dpipe_value_put(skb, value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_match_values_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *values,
+ unsigned int values_count)
+{
+ struct nlattr *match_attr;
+ int i;
+ int err;
+
+ for (i = 0; i < values_count; i++) {
+ match_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_MATCH_VALUE);
+ if (!match_attr)
+ return -EMSGSIZE;
+ err = devlink_dpipe_match_value_put(skb, &values[i]);
+ if (err)
+ goto err_match_value_put;
+ nla_nest_end(skb, match_attr);
+ }
+ return 0;
+
+err_match_value_put:
+ nla_nest_cancel(skb, match_attr);
+ return err;
+}
+
+static int devlink_dpipe_entry_put(struct sk_buff *skb,
+ struct devlink_dpipe_entry *entry)
+{
+ struct nlattr *entry_attr, *matches_attr, *actions_attr;
+ int err;
+
+ entry_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (entry->counter_valid)
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
+ entry->counter, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ matches_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES);
+ if (!matches_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_match_values_put(skb, entry->match_values,
+ entry->match_values_count);
+ if (err) {
+ nla_nest_cancel(skb, matches_attr);
+ goto err_match_values_put;
+ }
+ nla_nest_end(skb, matches_attr);
+
+ actions_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES);
+ if (!actions_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_action_values_put(skb, entry->action_values,
+ entry->action_values_count);
+ if (err) {
+ nla_nest_cancel(skb, actions_attr);
+ goto err_action_values_put;
+ }
+ nla_nest_end(skb, actions_attr);
+
+ nla_nest_end(skb, entry_attr);
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_match_values_put:
+err_action_values_put:
+ nla_nest_cancel(skb, entry_attr);
+ return err;
+}
+
+static struct devlink_dpipe_table *
+devlink_dpipe_table_find(struct list_head *dpipe_tables,
+ const char *table_name, struct devlink *devlink)
+{
+ struct devlink_dpipe_table *table;
+
+ list_for_each_entry_rcu(table, dpipe_tables, list,
+ lockdep_is_held(&devlink->lock)) {
+ if (!strcmp(table->name, table_name))
+ return table;
+ }
+ return NULL;
+}
+
+int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ struct devlink *devlink;
+ int err;
+
+ err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb,
+ dump_ctx->info);
+ if (err)
+ return err;
+
+ dump_ctx->hdr = genlmsg_put(dump_ctx->skb,
+ dump_ctx->info->snd_portid,
+ dump_ctx->info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI,
+ dump_ctx->cmd);
+ if (!dump_ctx->hdr)
+ goto nla_put_failure;
+
+ devlink = dump_ctx->info->user_ptr[0];
+ if (devlink_nl_put_handle(dump_ctx->skb, devlink))
+ goto nla_put_failure;
+ dump_ctx->nest = nla_nest_start_noflag(dump_ctx->skb,
+ DEVLINK_ATTR_DPIPE_ENTRIES);
+ if (!dump_ctx->nest)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ nlmsg_free(dump_ctx->skb);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare);
+
+int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
+ struct devlink_dpipe_entry *entry)
+{
+ return devlink_dpipe_entry_put(dump_ctx->skb, entry);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append);
+
+int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ nla_nest_end(dump_ctx->skb, dump_ctx->nest);
+ genlmsg_end(dump_ctx->skb, dump_ctx->hdr);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
+
+void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
+
+{
+ unsigned int value_count, value_index;
+ struct devlink_dpipe_value *value;
+
+ value = entry->action_values;
+ value_count = entry->action_values_count;
+ for (value_index = 0; value_index < value_count; value_index++) {
+ kfree(value[value_index].value);
+ kfree(value[value_index].mask);
+ }
+
+ value = entry->match_values;
+ value_count = entry->match_values_count;
+ for (value_index = 0; value_index < value_count; value_index++) {
+ kfree(value[value_index].value);
+ kfree(value[value_index].mask);
+ }
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_clear);
+
+static int devlink_dpipe_entries_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct devlink_dpipe_table *table)
+{
+ struct devlink_dpipe_dump_ctx dump_ctx;
+ struct nlmsghdr *nlh;
+ int err;
+
+ dump_ctx.skb = NULL;
+ dump_ctx.cmd = cmd;
+ dump_ctx.info = info;
+
+ err = table->table_ops->entries_dump(table->priv,
+ table->counters_enabled,
+ &dump_ctx);
+ if (err)
+ return err;
+
+send_done:
+ nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
+ if (err)
+ return err;
+ goto send_done;
+ }
+ return genlmsg_reply(dump_ctx.skb, info);
+}
+
+int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_dpipe_table *table;
+ const char *table_name;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME))
+ return -EINVAL;
+
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name, devlink);
+ if (!table)
+ return -EINVAL;
+
+ if (!table->table_ops->entries_dump)
+ return -EINVAL;
+
+ return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET,
+ 0, table);
+}
+
+static int devlink_dpipe_fields_put(struct sk_buff *skb,
+ const struct devlink_dpipe_header *header)
+{
+ struct devlink_dpipe_field *field;
+ struct nlattr *field_attr;
+ int i;
+
+ for (i = 0; i < header->fields_count; i++) {
+ field = &header->fields[i];
+ field_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_FIELD);
+ if (!field_attr)
+ return -EMSGSIZE;
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type))
+ goto nla_put_failure;
+ nla_nest_end(skb, field_attr);
+ }
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, field_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_header_put(struct sk_buff *skb,
+ struct devlink_dpipe_header *header)
+{
+ struct nlattr *fields_attr, *header_attr;
+ int err;
+
+ header_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADER);
+ if (!header_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ fields_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_DPIPE_HEADER_FIELDS);
+ if (!fields_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_fields_put(skb, header);
+ if (err) {
+ nla_nest_cancel(skb, fields_attr);
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, fields_attr);
+ nla_nest_end(skb, header_attr);
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+ nla_nest_cancel(skb, header_attr);
+ return err;
+}
+
+static int devlink_dpipe_headers_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct devlink_dpipe_headers *
+ dpipe_headers)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct nlattr *headers_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ void *hdr;
+ int i, j;
+ int err;
+
+ i = 0;
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr) {
+ nlmsg_free(skb);
+ return -EMSGSIZE;
+ }
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+ headers_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADERS);
+ if (!headers_attr)
+ goto nla_put_failure;
+
+ j = 0;
+ for (; i < dpipe_headers->headers_count; i++) {
+ err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]);
+ if (err) {
+ if (!j)
+ goto err_table_put;
+ break;
+ }
+ j++;
+ }
+ nla_nest_end(skb, headers_attr);
+ genlmsg_end(skb, hdr);
+ if (i != dpipe_headers->headers_count)
+ goto start_again;
+
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+ goto send_done;
+ }
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_table_put:
+ nlmsg_free(skb);
+ return err;
+}
+
+int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (!devlink->dpipe_headers)
+ return -EOPNOTSUPP;
+ return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET,
+ 0, devlink->dpipe_headers);
+}
+
+static int devlink_dpipe_table_counters_set(struct devlink *devlink,
+ const char *table_name,
+ bool enable)
+{
+ struct devlink_dpipe_table *table;
+
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name, devlink);
+ if (!table)
+ return -EINVAL;
+
+ if (table->counter_control_extern)
+ return -EOPNOTSUPP;
+
+ if (!(table->counters_enabled ^ enable))
+ return 0;
+
+ table->counters_enabled = enable;
+ if (table->table_ops->counters_set_update)
+ table->table_ops->counters_set_update(table->priv, enable);
+ return 0;
+}
+
+int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const char *table_name;
+ bool counters_enable;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME) ||
+ GENL_REQ_ATTR_CHECK(info,
+ DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED))
+ return -EINVAL;
+
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+ counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]);
+
+ return devlink_dpipe_table_counters_set(devlink, table_name,
+ counters_enable);
+}
+
+/**
+ * devl_dpipe_headers_register - register dpipe headers
+ *
+ * @devlink: devlink
+ * @dpipe_headers: dpipe header array
+ *
+ * Register the headers supported by hardware.
+ */
+void devl_dpipe_headers_register(struct devlink *devlink,
+ struct devlink_dpipe_headers *dpipe_headers)
+{
+ lockdep_assert_held(&devlink->lock);
+
+ devlink->dpipe_headers = dpipe_headers;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_headers_register);
+
+/**
+ * devl_dpipe_headers_unregister - unregister dpipe headers
+ *
+ * @devlink: devlink
+ *
+ * Unregister the headers supported by hardware.
+ */
+void devl_dpipe_headers_unregister(struct devlink *devlink)
+{
+ lockdep_assert_held(&devlink->lock);
+
+ devlink->dpipe_headers = NULL;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_headers_unregister);
+
+/**
+ * devlink_dpipe_table_counter_enabled - check if counter allocation
+ * required
+ * @devlink: devlink
+ * @table_name: tables name
+ *
+ * Used by driver to check if counter allocation is required.
+ * After counter allocation is turned on the table entries
+ * are updated to include counter statistics.
+ *
+ * After that point on the driver must respect the counter
+ * state so that each entry added to the table is added
+ * with a counter.
+ */
+bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+ bool enabled;
+
+ rcu_read_lock();
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name, devlink);
+ enabled = false;
+ if (table)
+ enabled = table->counters_enabled;
+ rcu_read_unlock();
+ return enabled;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
+
+/**
+ * devl_dpipe_table_register - register dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ * @table_ops: table ops
+ * @priv: priv
+ * @counter_control_extern: external control for counters
+ */
+int devl_dpipe_table_register(struct devlink *devlink,
+ const char *table_name,
+ struct devlink_dpipe_table_ops *table_ops,
+ void *priv, bool counter_control_extern)
+{
+ struct devlink_dpipe_table *table;
+
+ lockdep_assert_held(&devlink->lock);
+
+ if (WARN_ON(!table_ops->size_get))
+ return -EINVAL;
+
+ if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name,
+ devlink))
+ return -EEXIST;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ table->name = table_name;
+ table->table_ops = table_ops;
+ table->priv = priv;
+ table->counter_control_extern = counter_control_extern;
+
+ list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_table_register);
+
+/**
+ * devl_dpipe_table_unregister - unregister dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ */
+void devl_dpipe_table_unregister(struct devlink *devlink,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+
+ lockdep_assert_held(&devlink->lock);
+
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name, devlink);
+ if (!table)
+ return;
+ list_del_rcu(&table->list);
+ kfree_rcu(table, rcu);
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_table_unregister);
+
+/**
+ * devl_dpipe_table_resource_set - set the resource id
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ * @resource_id: resource id
+ * @resource_units: number of resource's units consumed per table's entry
+ */
+int devl_dpipe_table_resource_set(struct devlink *devlink,
+ const char *table_name, u64 resource_id,
+ u64 resource_units)
+{
+ struct devlink_dpipe_table *table;
+
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name, devlink);
+ if (!table)
+ return -EINVAL;
+
+ table->resource_id = resource_id;
+ table->resource_units = resource_units;
+ table->resource_valid = true;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_dpipe_table_resource_set);
diff --git a/net/devlink/health.c b/net/devlink/health.c
index 194340a8bb86..638cad8d5c65 100644
--- a/net/devlink/health.c
+++ b/net/devlink/health.c
@@ -356,8 +356,8 @@ devlink_health_reporter_get_from_info(struct devlink *devlink,
return devlink_health_reporter_get_from_attrs(devlink, info->attrs);
}
-int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
- struct genl_info *info)
+int devlink_nl_health_reporter_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
@@ -384,18 +384,29 @@ int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int
-devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
+static int devlink_nl_health_reporter_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ const struct genl_info *info = genl_info_dump(cb);
struct devlink_health_reporter *reporter;
+ unsigned long port_index_end = ULONG_MAX;
+ struct nlattr **attrs = info->attrs;
+ unsigned long port_index_start = 0;
struct devlink_port *port;
unsigned long port_index;
int idx = 0;
int err;
+ if (attrs && attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ port_index_start = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
+ port_index_end = port_index_start;
+ flags |= NLM_F_DUMP_FILTERED;
+ goto per_port_dump;
+ }
+
list_for_each_entry(reporter, &devlink->reporter_list, list) {
if (idx < state->idx) {
idx++;
@@ -405,14 +416,16 @@ devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
DEVLINK_CMD_HEALTH_REPORTER_GET,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ flags);
if (err) {
state->idx = idx;
return err;
}
idx++;
}
- xa_for_each(&devlink->ports, port_index, port) {
+per_port_dump:
+ xa_for_each_range(&devlink->ports, port_index, port,
+ port_index_start, port_index_end) {
list_for_each_entry(reporter, &port->reporter_list, list) {
if (idx < state->idx) {
idx++;
@@ -422,7 +435,7 @@ devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
DEVLINK_CMD_HEALTH_REPORTER_GET,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ flags);
if (err) {
state->idx = idx;
return err;
@@ -434,9 +447,12 @@ devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
return 0;
}
-const struct devlink_cmd devl_cmd_health_reporter_get = {
- .dump_one = devlink_nl_cmd_health_reporter_get_dump_one,
-};
+int devlink_nl_health_reporter_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb,
+ devlink_nl_health_reporter_get_dump_one);
+}
int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
struct genl_info *info)
@@ -1248,7 +1264,7 @@ out:
static struct devlink_health_reporter *
devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ const struct genl_info *info = genl_info_dump(cb);
struct devlink_health_reporter *reporter;
struct nlattr **attrs = info->attrs;
struct devlink *devlink;
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
deleted file mode 100644
index 1f00f874471f..000000000000
--- a/net/devlink/leftover.c
+++ /dev/null
@@ -1,9507 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/core/devlink.c - Network physical/parent device Netlink interface
- *
- * Heavily inspired by net/wireless/
- * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
- */
-
-#include <linux/etherdevice.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/gfp.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-#include <linux/refcount.h>
-#include <linux/workqueue.h>
-#include <linux/u64_stats_sync.h>
-#include <linux/timekeeping.h>
-#include <rdma/ib_verbs.h>
-#include <net/netlink.h>
-#include <net/genetlink.h>
-#include <net/rtnetlink.h>
-#include <net/net_namespace.h>
-#include <net/sock.h>
-#include <net/devlink.h>
-#define CREATE_TRACE_POINTS
-#include <trace/events/devlink.h>
-
-#include "devl_internal.h"
-
-struct devlink_linecard {
- struct list_head list;
- struct devlink *devlink;
- unsigned int index;
- const struct devlink_linecard_ops *ops;
- void *priv;
- enum devlink_linecard_state state;
- struct mutex state_lock; /* Protects state */
- const char *type;
- struct devlink_linecard_type *types;
- unsigned int types_count;
- struct devlink *nested_devlink;
-};
-
-/**
- * struct devlink_resource - devlink resource
- * @name: name of the resource
- * @id: id, per devlink instance
- * @size: size of the resource
- * @size_new: updated size of the resource, reload is needed
- * @size_valid: valid in case the total size of the resource is valid
- * including its children
- * @parent: parent resource
- * @size_params: size parameters
- * @list: parent list
- * @resource_list: list of child resources
- * @occ_get: occupancy getter callback
- * @occ_get_priv: occupancy getter callback priv
- */
-struct devlink_resource {
- const char *name;
- u64 id;
- u64 size;
- u64 size_new;
- bool size_valid;
- struct devlink_resource *parent;
- struct devlink_resource_size_params size_params;
- struct list_head list;
- struct list_head resource_list;
- devlink_resource_occ_get_t *occ_get;
- void *occ_get_priv;
-};
-
-static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
- {
- .name = "destination mac",
- .id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
- .bitwidth = 48,
- },
-};
-
-struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
- .name = "ethernet",
- .id = DEVLINK_DPIPE_HEADER_ETHERNET,
- .fields = devlink_dpipe_fields_ethernet,
- .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet),
- .global = true,
-};
-EXPORT_SYMBOL_GPL(devlink_dpipe_header_ethernet);
-
-static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = {
- {
- .name = "destination ip",
- .id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
- .bitwidth = 32,
- },
-};
-
-struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
- .name = "ipv4",
- .id = DEVLINK_DPIPE_HEADER_IPV4,
- .fields = devlink_dpipe_fields_ipv4,
- .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4),
- .global = true,
-};
-EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv4);
-
-static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = {
- {
- .name = "destination ip",
- .id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
- .bitwidth = 128,
- },
-};
-
-struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
- .name = "ipv6",
- .id = DEVLINK_DPIPE_HEADER_IPV6,
- .fields = devlink_dpipe_fields_ipv6,
- .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6),
- .global = true,
-};
-EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv6);
-
-EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
-EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
-EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
-
-#define DEVLINK_PORT_FN_CAPS_VALID_MASK \
- (_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
-
-static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
- [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
- [DEVLINK_PORT_FN_ATTR_STATE] =
- NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
- DEVLINK_PORT_FN_STATE_ACTIVE),
- [DEVLINK_PORT_FN_ATTR_CAPS] =
- NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
-};
-
-#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \
- WARN_ON_ONCE(!(devlink_port)->registered)
-#define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port) \
- WARN_ON_ONCE((devlink_port)->registered)
-#define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port) \
- WARN_ON_ONCE(!(devlink_port)->initialized)
-
-static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
- unsigned int port_index)
-{
- return xa_load(&devlink->ports, port_index);
-}
-
-struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
- struct nlattr **attrs)
-{
- if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
- u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
- struct devlink_port *devlink_port;
-
- devlink_port = devlink_port_get_by_index(devlink, port_index);
- if (!devlink_port)
- return ERR_PTR(-ENODEV);
- return devlink_port;
- }
- return ERR_PTR(-EINVAL);
-}
-
-struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
- struct genl_info *info)
-{
- return devlink_port_get_from_attrs(devlink, info->attrs);
-}
-
-static inline bool
-devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
-{
- return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF;
-}
-
-static inline bool
-devlink_rate_is_node(struct devlink_rate *devlink_rate)
-{
- return devlink_rate->type == DEVLINK_RATE_TYPE_NODE;
-}
-
-static struct devlink_rate *
-devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
-{
- struct devlink_rate *devlink_rate;
- struct devlink_port *devlink_port;
-
- devlink_port = devlink_port_get_from_attrs(devlink, info->attrs);
- if (IS_ERR(devlink_port))
- return ERR_CAST(devlink_port);
- devlink_rate = devlink_port->devlink_rate;
- return devlink_rate ?: ERR_PTR(-ENODEV);
-}
-
-static struct devlink_rate *
-devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name)
-{
- static struct devlink_rate *devlink_rate;
-
- list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
- if (devlink_rate_is_node(devlink_rate) &&
- !strcmp(node_name, devlink_rate->name))
- return devlink_rate;
- }
- return ERR_PTR(-ENODEV);
-}
-
-static struct devlink_rate *
-devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
-{
- const char *rate_node_name;
- size_t len;
-
- if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME])
- return ERR_PTR(-EINVAL);
- rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]);
- len = strlen(rate_node_name);
- /* Name cannot be empty or decimal number */
- if (!len || strspn(rate_node_name, "0123456789") == len)
- return ERR_PTR(-EINVAL);
-
- return devlink_rate_node_get_by_name(devlink, rate_node_name);
-}
-
-struct devlink_rate *
-devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info)
-{
- return devlink_rate_node_get_from_attrs(devlink, info->attrs);
-}
-
-struct devlink_rate *
-devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
-{
- struct nlattr **attrs = info->attrs;
-
- if (attrs[DEVLINK_ATTR_PORT_INDEX])
- return devlink_rate_leaf_get_from_info(devlink, info);
- else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME])
- return devlink_rate_node_get_from_info(devlink, info);
- else
- return ERR_PTR(-EINVAL);
-}
-
-static struct devlink_linecard *
-devlink_linecard_get_by_index(struct devlink *devlink,
- unsigned int linecard_index)
-{
- struct devlink_linecard *devlink_linecard;
-
- list_for_each_entry(devlink_linecard, &devlink->linecard_list, list) {
- if (devlink_linecard->index == linecard_index)
- return devlink_linecard;
- }
- return NULL;
-}
-
-static bool devlink_linecard_index_exists(struct devlink *devlink,
- unsigned int linecard_index)
-{
- return devlink_linecard_get_by_index(devlink, linecard_index);
-}
-
-static struct devlink_linecard *
-devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
-{
- if (attrs[DEVLINK_ATTR_LINECARD_INDEX]) {
- u32 linecard_index = nla_get_u32(attrs[DEVLINK_ATTR_LINECARD_INDEX]);
- struct devlink_linecard *linecard;
-
- linecard = devlink_linecard_get_by_index(devlink, linecard_index);
- if (!linecard)
- return ERR_PTR(-ENODEV);
- return linecard;
- }
- return ERR_PTR(-EINVAL);
-}
-
-struct devlink_linecard *
-devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
-{
- return devlink_linecard_get_from_attrs(devlink, info->attrs);
-}
-
-struct devlink_sb {
- struct list_head list;
- unsigned int index;
- u32 size;
- u16 ingress_pools_count;
- u16 egress_pools_count;
- u16 ingress_tc_count;
- u16 egress_tc_count;
-};
-
-static u16 devlink_sb_pool_count(struct devlink_sb *devlink_sb)
-{
- return devlink_sb->ingress_pools_count + devlink_sb->egress_pools_count;
-}
-
-static struct devlink_sb *devlink_sb_get_by_index(struct devlink *devlink,
- unsigned int sb_index)
-{
- struct devlink_sb *devlink_sb;
-
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- if (devlink_sb->index == sb_index)
- return devlink_sb;
- }
- return NULL;
-}
-
-static bool devlink_sb_index_exists(struct devlink *devlink,
- unsigned int sb_index)
-{
- return devlink_sb_get_by_index(devlink, sb_index);
-}
-
-static struct devlink_sb *devlink_sb_get_from_attrs(struct devlink *devlink,
- struct nlattr **attrs)
-{
- if (attrs[DEVLINK_ATTR_SB_INDEX]) {
- u32 sb_index = nla_get_u32(attrs[DEVLINK_ATTR_SB_INDEX]);
- struct devlink_sb *devlink_sb;
-
- devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
- if (!devlink_sb)
- return ERR_PTR(-ENODEV);
- return devlink_sb;
- }
- return ERR_PTR(-EINVAL);
-}
-
-static struct devlink_sb *devlink_sb_get_from_info(struct devlink *devlink,
- struct genl_info *info)
-{
- return devlink_sb_get_from_attrs(devlink, info->attrs);
-}
-
-static int devlink_sb_pool_index_get_from_attrs(struct devlink_sb *devlink_sb,
- struct nlattr **attrs,
- u16 *p_pool_index)
-{
- u16 val;
-
- if (!attrs[DEVLINK_ATTR_SB_POOL_INDEX])
- return -EINVAL;
-
- val = nla_get_u16(attrs[DEVLINK_ATTR_SB_POOL_INDEX]);
- if (val >= devlink_sb_pool_count(devlink_sb))
- return -EINVAL;
- *p_pool_index = val;
- return 0;
-}
-
-static int devlink_sb_pool_index_get_from_info(struct devlink_sb *devlink_sb,
- struct genl_info *info,
- u16 *p_pool_index)
-{
- return devlink_sb_pool_index_get_from_attrs(devlink_sb, info->attrs,
- p_pool_index);
-}
-
-static int
-devlink_sb_pool_type_get_from_attrs(struct nlattr **attrs,
- enum devlink_sb_pool_type *p_pool_type)
-{
- u8 val;
-
- if (!attrs[DEVLINK_ATTR_SB_POOL_TYPE])
- return -EINVAL;
-
- val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_TYPE]);
- if (val != DEVLINK_SB_POOL_TYPE_INGRESS &&
- val != DEVLINK_SB_POOL_TYPE_EGRESS)
- return -EINVAL;
- *p_pool_type = val;
- return 0;
-}
-
-static int
-devlink_sb_pool_type_get_from_info(struct genl_info *info,
- enum devlink_sb_pool_type *p_pool_type)
-{
- return devlink_sb_pool_type_get_from_attrs(info->attrs, p_pool_type);
-}
-
-static int
-devlink_sb_th_type_get_from_attrs(struct nlattr **attrs,
- enum devlink_sb_threshold_type *p_th_type)
-{
- u8 val;
-
- if (!attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE])
- return -EINVAL;
-
- val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]);
- if (val != DEVLINK_SB_THRESHOLD_TYPE_STATIC &&
- val != DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC)
- return -EINVAL;
- *p_th_type = val;
- return 0;
-}
-
-static int
-devlink_sb_th_type_get_from_info(struct genl_info *info,
- enum devlink_sb_threshold_type *p_th_type)
-{
- return devlink_sb_th_type_get_from_attrs(info->attrs, p_th_type);
-}
-
-static int
-devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
- struct nlattr **attrs,
- enum devlink_sb_pool_type pool_type,
- u16 *p_tc_index)
-{
- u16 val;
-
- if (!attrs[DEVLINK_ATTR_SB_TC_INDEX])
- return -EINVAL;
-
- val = nla_get_u16(attrs[DEVLINK_ATTR_SB_TC_INDEX]);
- if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS &&
- val >= devlink_sb->ingress_tc_count)
- return -EINVAL;
- if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS &&
- val >= devlink_sb->egress_tc_count)
- return -EINVAL;
- *p_tc_index = val;
- return 0;
-}
-
-static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
- u32 cap, bool is_enable)
-{
- caps->selector |= cap;
- if (is_enable)
- caps->value |= cap;
-}
-
-static int devlink_port_fn_roce_fill(struct devlink_port *devlink_port,
- struct nla_bitfield32 *caps,
- struct netlink_ext_ack *extack)
-{
- bool is_enable;
- int err;
-
- if (!devlink_port->ops->port_fn_roce_get)
- return 0;
-
- err = devlink_port->ops->port_fn_roce_get(devlink_port, &is_enable,
- extack);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
- }
-
- devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable);
- return 0;
-}
-
-static int devlink_port_fn_migratable_fill(struct devlink_port *devlink_port,
- struct nla_bitfield32 *caps,
- struct netlink_ext_ack *extack)
-{
- bool is_enable;
- int err;
-
- if (!devlink_port->ops->port_fn_migratable_get ||
- devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
- return 0;
-
- err = devlink_port->ops->port_fn_migratable_get(devlink_port,
- &is_enable, extack);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
- }
-
- devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_MIGRATABLE, is_enable);
- return 0;
-}
-
-static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
- struct sk_buff *msg,
- struct netlink_ext_ack *extack,
- bool *msg_updated)
-{
- struct nla_bitfield32 caps = {};
- int err;
-
- err = devlink_port_fn_roce_fill(devlink_port, &caps, extack);
- if (err)
- return err;
-
- err = devlink_port_fn_migratable_fill(devlink_port, &caps, extack);
- if (err)
- return err;
-
- if (!caps.selector)
- return 0;
- err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
- caps.selector);
- if (err)
- return err;
-
- *msg_updated = true;
- return 0;
-}
-
-static int
-devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
- struct genl_info *info,
- enum devlink_sb_pool_type pool_type,
- u16 *p_tc_index)
-{
- return devlink_sb_tc_index_get_from_attrs(devlink_sb, info->attrs,
- pool_type, p_tc_index);
-}
-
-struct devlink_region {
- struct devlink *devlink;
- struct devlink_port *port;
- struct list_head list;
- union {
- const struct devlink_region_ops *ops;
- const struct devlink_port_region_ops *port_ops;
- };
- struct mutex snapshot_lock; /* protects snapshot_list,
- * max_snapshots and cur_snapshots
- * consistency.
- */
- struct list_head snapshot_list;
- u32 max_snapshots;
- u32 cur_snapshots;
- u64 size;
-};
-
-struct devlink_snapshot {
- struct list_head list;
- struct devlink_region *region;
- u8 *data;
- u32 id;
-};
-
-static struct devlink_region *
-devlink_region_get_by_name(struct devlink *devlink, const char *region_name)
-{
- struct devlink_region *region;
-
- list_for_each_entry(region, &devlink->region_list, list)
- if (!strcmp(region->ops->name, region_name))
- return region;
-
- return NULL;
-}
-
-static struct devlink_region *
-devlink_port_region_get_by_name(struct devlink_port *port,
- const char *region_name)
-{
- struct devlink_region *region;
-
- list_for_each_entry(region, &port->region_list, list)
- if (!strcmp(region->ops->name, region_name))
- return region;
-
- return NULL;
-}
-
-static struct devlink_snapshot *
-devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
-{
- struct devlink_snapshot *snapshot;
-
- list_for_each_entry(snapshot, &region->snapshot_list, list)
- if (snapshot->id == id)
- return snapshot;
-
- return NULL;
-}
-
-static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
-{
- struct nlattr *nested_attr;
-
- nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK);
- if (!nested_attr)
- return -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- nla_nest_end(msg, nested_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, nested_attr);
- return -EMSGSIZE;
-}
-
-int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port)
-{
- if (devlink_nl_put_handle(msg, devlink_port->devlink))
- return -EMSGSIZE;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
- return -EMSGSIZE;
- return 0;
-}
-
-size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port)
-{
- struct devlink *devlink = devlink_port->devlink;
-
- return nla_total_size(strlen(devlink->dev->bus->name) + 1) /* DEVLINK_ATTR_BUS_NAME */
- + nla_total_size(strlen(dev_name(devlink->dev)) + 1) /* DEVLINK_ATTR_DEV_NAME */
- + nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */
-}
-
-static int devlink_nl_port_attrs_put(struct sk_buff *msg,
- struct devlink_port *devlink_port)
-{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
-
- if (!devlink_port->attrs_set)
- return 0;
- if (attrs->lanes) {
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_LANES, attrs->lanes))
- return -EMSGSIZE;
- }
- if (nla_put_u8(msg, DEVLINK_ATTR_PORT_SPLITTABLE, attrs->splittable))
- return -EMSGSIZE;
- if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour))
- return -EMSGSIZE;
- switch (devlink_port->attrs.flavour) {
- case DEVLINK_PORT_FLAVOUR_PCI_PF:
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
- attrs->pci_pf.controller) ||
- nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_pf.pf))
- return -EMSGSIZE;
- if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_pf.external))
- return -EMSGSIZE;
- break;
- case DEVLINK_PORT_FLAVOUR_PCI_VF:
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
- attrs->pci_vf.controller) ||
- nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_vf.pf) ||
- nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER, attrs->pci_vf.vf))
- return -EMSGSIZE;
- if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_vf.external))
- return -EMSGSIZE;
- break;
- case DEVLINK_PORT_FLAVOUR_PCI_SF:
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
- attrs->pci_sf.controller) ||
- nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
- attrs->pci_sf.pf) ||
- nla_put_u32(msg, DEVLINK_ATTR_PORT_PCI_SF_NUMBER,
- attrs->pci_sf.sf))
- return -EMSGSIZE;
- break;
- case DEVLINK_PORT_FLAVOUR_PHYSICAL:
- case DEVLINK_PORT_FLAVOUR_CPU:
- case DEVLINK_PORT_FLAVOUR_DSA:
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
- attrs->phys.port_number))
- return -EMSGSIZE;
- if (!attrs->split)
- return 0;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
- attrs->phys.port_number))
- return -EMSGSIZE;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
- attrs->phys.split_subport_number))
- return -EMSGSIZE;
- break;
- default:
- break;
- }
- return 0;
-}
-
-static int devlink_port_fn_hw_addr_fill(struct devlink_port *port,
- struct sk_buff *msg,
- struct netlink_ext_ack *extack,
- bool *msg_updated)
-{
- u8 hw_addr[MAX_ADDR_LEN];
- int hw_addr_len;
- int err;
-
- if (!port->ops->port_fn_hw_addr_get)
- return 0;
-
- err = port->ops->port_fn_hw_addr_get(port, hw_addr, &hw_addr_len,
- extack);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
- }
- err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr);
- if (err)
- return err;
- *msg_updated = true;
- return 0;
-}
-
-static int devlink_nl_rate_fill(struct sk_buff *msg,
- struct devlink_rate *devlink_rate,
- enum devlink_command cmd, u32 portid, u32 seq,
- int flags, struct netlink_ext_ack *extack)
-{
- struct devlink *devlink = devlink_rate->devlink;
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type))
- goto nla_put_failure;
-
- if (devlink_rate_is_leaf(devlink_rate)) {
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
- devlink_rate->devlink_port->index))
- goto nla_put_failure;
- } else if (devlink_rate_is_node(devlink_rate)) {
- if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME,
- devlink_rate->name))
- goto nla_put_failure;
- }
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE,
- devlink_rate->tx_share, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX,
- devlink_rate->tx_max, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY,
- devlink_rate->tx_priority))
- goto nla_put_failure;
-
- if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_WEIGHT,
- devlink_rate->tx_weight))
- goto nla_put_failure;
-
- if (devlink_rate->parent)
- if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME,
- devlink_rate->parent->name))
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static bool
-devlink_port_fn_state_valid(enum devlink_port_fn_state state)
-{
- return state == DEVLINK_PORT_FN_STATE_INACTIVE ||
- state == DEVLINK_PORT_FN_STATE_ACTIVE;
-}
-
-static bool
-devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
-{
- return opstate == DEVLINK_PORT_FN_OPSTATE_DETACHED ||
- opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
-}
-
-static int devlink_port_fn_state_fill(struct devlink_port *port,
- struct sk_buff *msg,
- struct netlink_ext_ack *extack,
- bool *msg_updated)
-{
- enum devlink_port_fn_opstate opstate;
- enum devlink_port_fn_state state;
- int err;
-
- if (!port->ops->port_fn_state_get)
- return 0;
-
- err = port->ops->port_fn_state_get(port, &state, &opstate, extack);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
- }
- if (!devlink_port_fn_state_valid(state)) {
- WARN_ON_ONCE(1);
- NL_SET_ERR_MSG(extack, "Invalid state read from driver");
- return -EINVAL;
- }
- if (!devlink_port_fn_opstate_valid(opstate)) {
- WARN_ON_ONCE(1);
- NL_SET_ERR_MSG(extack, "Invalid operational state read from driver");
- return -EINVAL;
- }
- if (nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_STATE, state) ||
- nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_OPSTATE, opstate))
- return -EMSGSIZE;
- *msg_updated = true;
- return 0;
-}
-
-static int
-devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable,
- struct netlink_ext_ack *extack)
-{
- return devlink_port->ops->port_fn_migratable_set(devlink_port, enable,
- extack);
-}
-
-static int
-devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
- struct netlink_ext_ack *extack)
-{
- return devlink_port->ops->port_fn_roce_set(devlink_port, enable,
- extack);
-}
-
-static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
- const struct nlattr *attr,
- struct netlink_ext_ack *extack)
-{
- struct nla_bitfield32 caps;
- u32 caps_value;
- int err;
-
- caps = nla_get_bitfield32(attr);
- caps_value = caps.value & caps.selector;
- if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) {
- err = devlink_port_fn_roce_set(devlink_port,
- caps_value & DEVLINK_PORT_FN_CAP_ROCE,
- extack);
- if (err)
- return err;
- }
- if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
- err = devlink_port_fn_mig_set(devlink_port, caps_value &
- DEVLINK_PORT_FN_CAP_MIGRATABLE,
- extack);
- if (err)
- return err;
- }
- return 0;
-}
-
-static int
-devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *function_attr;
- bool msg_updated = false;
- int err;
-
- function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION);
- if (!function_attr)
- return -EMSGSIZE;
-
- err = devlink_port_fn_hw_addr_fill(port, msg, extack, &msg_updated);
- if (err)
- goto out;
- err = devlink_port_fn_caps_fill(port, msg, extack, &msg_updated);
- if (err)
- goto out;
- err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
-out:
- if (err || !msg_updated)
- nla_nest_cancel(msg, function_attr);
- else
- nla_nest_end(msg, function_attr);
- return err;
-}
-
-static int devlink_nl_port_fill(struct sk_buff *msg,
- struct devlink_port *devlink_port,
- enum devlink_command cmd, u32 portid, u32 seq,
- int flags, struct netlink_ext_ack *extack)
-{
- struct devlink *devlink = devlink_port->devlink;
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
- goto nla_put_failure;
-
- spin_lock_bh(&devlink_port->type_lock);
- if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
- goto nla_put_failure_type_locked;
- if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
- nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE,
- devlink_port->desired_type))
- goto nla_put_failure_type_locked;
- if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
- if (devlink_port->type_eth.netdev &&
- (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
- devlink_port->type_eth.ifindex) ||
- nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
- devlink_port->type_eth.ifname)))
- goto nla_put_failure_type_locked;
- }
- if (devlink_port->type == DEVLINK_PORT_TYPE_IB) {
- struct ib_device *ibdev = devlink_port->type_ib.ibdev;
-
- if (ibdev &&
- nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME,
- ibdev->name))
- goto nla_put_failure_type_locked;
- }
- spin_unlock_bh(&devlink_port->type_lock);
- if (devlink_nl_port_attrs_put(msg, devlink_port))
- goto nla_put_failure;
- if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
- goto nla_put_failure;
- if (devlink_port->linecard &&
- nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX,
- devlink_port->linecard->index))
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure_type_locked:
- spin_unlock_bh(&devlink_port->type_lock);
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void devlink_port_notify(struct devlink_port *devlink_port,
- enum devlink_command cmd)
-{
- struct devlink *devlink = devlink_port->devlink;
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
-
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static void devlink_rate_notify(struct devlink_rate *devlink_rate,
- enum devlink_command cmd)
-{
- struct devlink *devlink = devlink_rate->devlink;
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
-
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_rate_fill(msg, devlink_rate, cmd, 0, 0, 0, NULL);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static int
-devlink_nl_cmd_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_rate *devlink_rate;
- int idx = 0;
- int err = 0;
-
- list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
- enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
- u32 id = NETLINK_CB(cb->skb).portid;
-
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, NULL);
- if (err) {
- state->idx = idx;
- break;
- }
- idx++;
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_rate_get = {
- .dump_one = devlink_nl_cmd_rate_get_dump_one,
-};
-
-static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_rate *devlink_rate = info->user_ptr[1];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW,
- info->snd_portid, info->snd_seq, 0,
- info->extack);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static bool
-devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
- struct devlink_rate *parent)
-{
- while (parent) {
- if (parent == devlink_rate)
- return true;
- parent = parent->parent;
- }
- return false;
-}
-
-static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW,
- info->snd_portid, info->snd_seq, 0,
- info->extack);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_cmd_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_port *devlink_port;
- unsigned long port_index;
- int err = 0;
-
- xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) {
- err = devlink_nl_port_fill(msg, devlink_port,
- DEVLINK_CMD_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, cb->extack);
- if (err) {
- state->idx = port_index;
- break;
- }
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_port_get = {
- .dump_one = devlink_nl_cmd_port_get_dump_one,
-};
-
-static int devlink_port_type_set(struct devlink_port *devlink_port,
- enum devlink_port_type port_type)
-
-{
- int err;
-
- if (!devlink_port->ops->port_type_set)
- return -EOPNOTSUPP;
-
- if (port_type == devlink_port->type)
- return 0;
-
- err = devlink_port->ops->port_type_set(devlink_port, port_type);
- if (err)
- return err;
-
- devlink_port->desired_type = port_type;
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
- return 0;
-}
-
-static int devlink_port_function_hw_addr_set(struct devlink_port *port,
- const struct nlattr *attr,
- struct netlink_ext_ack *extack)
-{
- const u8 *hw_addr;
- int hw_addr_len;
-
- hw_addr = nla_data(attr);
- hw_addr_len = nla_len(attr);
- if (hw_addr_len > MAX_ADDR_LEN) {
- NL_SET_ERR_MSG(extack, "Port function hardware address too long");
- return -EINVAL;
- }
- if (port->type == DEVLINK_PORT_TYPE_ETH) {
- if (hw_addr_len != ETH_ALEN) {
- NL_SET_ERR_MSG(extack, "Address must be 6 bytes for Ethernet device");
- return -EINVAL;
- }
- if (!is_unicast_ether_addr(hw_addr)) {
- NL_SET_ERR_MSG(extack, "Non-unicast hardware address unsupported");
- return -EINVAL;
- }
- }
-
- return port->ops->port_fn_hw_addr_set(port, hw_addr, hw_addr_len,
- extack);
-}
-
-static int devlink_port_fn_state_set(struct devlink_port *port,
- const struct nlattr *attr,
- struct netlink_ext_ack *extack)
-{
- enum devlink_port_fn_state state;
-
- state = nla_get_u8(attr);
- return port->ops->port_fn_state_set(port, state, extack);
-}
-
-static int devlink_port_function_validate(struct devlink_port *devlink_port,
- struct nlattr **tb,
- struct netlink_ext_ack *extack)
-{
- const struct devlink_port_ops *ops = devlink_port->ops;
- struct nlattr *attr;
-
- if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
- !ops->port_fn_hw_addr_set) {
- NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
- "Port doesn't support function attributes");
- return -EOPNOTSUPP;
- }
- if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
- NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
- "Function does not support state setting");
- return -EOPNOTSUPP;
- }
- attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
- if (attr) {
- struct nla_bitfield32 caps;
-
- caps = nla_get_bitfield32(attr);
- if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE &&
- !ops->port_fn_roce_set) {
- NL_SET_ERR_MSG_ATTR(extack, attr,
- "Port doesn't support RoCE function attribute");
- return -EOPNOTSUPP;
- }
- if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
- if (!ops->port_fn_migratable_set) {
- NL_SET_ERR_MSG_ATTR(extack, attr,
- "Port doesn't support migratable function attribute");
- return -EOPNOTSUPP;
- }
- if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
- NL_SET_ERR_MSG_ATTR(extack, attr,
- "migratable function attribute supported for VFs only");
- return -EOPNOTSUPP;
- }
- }
- }
- return 0;
-}
-
-static int devlink_port_function_set(struct devlink_port *port,
- const struct nlattr *attr,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
- int err;
-
- err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr,
- devlink_function_nl_policy, extack);
- if (err < 0) {
- NL_SET_ERR_MSG(extack, "Fail to parse port function attributes");
- return err;
- }
-
- err = devlink_port_function_validate(port, tb, extack);
- if (err)
- return err;
-
- attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
- if (attr) {
- err = devlink_port_function_hw_addr_set(port, attr, extack);
- if (err)
- return err;
- }
-
- attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
- if (attr) {
- err = devlink_port_fn_caps_set(port, attr, extack);
- if (err)
- return err;
- }
-
- /* Keep this as the last function attribute set, so that when
- * multiple port function attributes are set along with state,
- * Those can be applied first before activating the state.
- */
- attr = tb[DEVLINK_PORT_FN_ATTR_STATE];
- if (attr)
- err = devlink_port_fn_state_set(port, attr, extack);
-
- if (!err)
- devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
- return err;
-}
-
-static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- int err;
-
- if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
- enum devlink_port_type port_type;
-
- port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
- err = devlink_port_type_set(devlink_port, port_type);
- if (err)
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_PORT_FUNCTION]) {
- struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
- struct netlink_ext_ack *extack = info->extack;
-
- err = devlink_port_function_set(devlink_port, attr, extack);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = info->user_ptr[0];
- u32 count;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_SPLIT_COUNT))
- return -EINVAL;
- if (!devlink_port->ops->port_split)
- return -EOPNOTSUPP;
-
- count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
-
- if (!devlink_port->attrs.splittable) {
- /* Split ports cannot be split. */
- if (devlink_port->attrs.split)
- NL_SET_ERR_MSG(info->extack, "Port cannot be split further");
- else
- NL_SET_ERR_MSG(info->extack, "Port cannot be split");
- return -EINVAL;
- }
-
- if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) {
- NL_SET_ERR_MSG(info->extack, "Invalid split count");
- return -EINVAL;
- }
-
- return devlink_port->ops->port_split(devlink, devlink_port, count,
- info->extack);
-}
-
-static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = info->user_ptr[0];
-
- if (!devlink_port->ops->port_unsplit)
- return -EOPNOTSUPP;
- return devlink_port->ops->port_unsplit(devlink, devlink_port, info->extack);
-}
-
-static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct netlink_ext_ack *extack = info->extack;
- struct devlink_port_new_attrs new_attrs = {};
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_port *devlink_port;
- struct sk_buff *msg;
- int err;
-
- if (!devlink->ops->port_new)
- return -EOPNOTSUPP;
-
- if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] ||
- !info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]) {
- NL_SET_ERR_MSG(extack, "Port flavour or PCI PF are not specified");
- return -EINVAL;
- }
- new_attrs.flavour = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_FLAVOUR]);
- new_attrs.pfnum =
- nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]);
-
- if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
- /* Port index of the new port being created by driver. */
- new_attrs.port_index =
- nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
- new_attrs.port_index_valid = true;
- }
- if (info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]) {
- new_attrs.controller =
- nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]);
- new_attrs.controller_valid = true;
- }
- if (new_attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_SF &&
- info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]) {
- new_attrs.sfnum = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]);
- new_attrs.sfnum_valid = true;
- }
-
- err = devlink->ops->port_new(devlink, &new_attrs,
- extack, &devlink_port);
- if (err)
- return err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg) {
- err = -ENOMEM;
- goto err_out_port_del;
- }
- err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
- info->snd_portid, info->snd_seq, 0, NULL);
- if (WARN_ON_ONCE(err))
- goto err_out_msg_free;
- err = genlmsg_reply(msg, info);
- if (err)
- goto err_out_port_del;
- return 0;
-
-err_out_msg_free:
- nlmsg_free(msg);
-err_out_port_del:
- devlink_port->ops->port_del(devlink, devlink_port, NULL);
- return err;
-}
-
-static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
-
- if (!devlink_port->ops->port_del)
- return -EOPNOTSUPP;
-
- return devlink_port->ops->port_del(devlink, devlink_port, extack);
-}
-
-static int
-devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
- struct genl_info *info,
- struct nlattr *nla_parent)
-{
- struct devlink *devlink = devlink_rate->devlink;
- const char *parent_name = nla_data(nla_parent);
- const struct devlink_ops *ops = devlink->ops;
- size_t len = strlen(parent_name);
- struct devlink_rate *parent;
- int err = -EOPNOTSUPP;
-
- parent = devlink_rate->parent;
-
- if (parent && !len) {
- if (devlink_rate_is_leaf(devlink_rate))
- err = ops->rate_leaf_parent_set(devlink_rate, NULL,
- devlink_rate->priv, NULL,
- info->extack);
- else if (devlink_rate_is_node(devlink_rate))
- err = ops->rate_node_parent_set(devlink_rate, NULL,
- devlink_rate->priv, NULL,
- info->extack);
- if (err)
- return err;
-
- refcount_dec(&parent->refcnt);
- devlink_rate->parent = NULL;
- } else if (len) {
- parent = devlink_rate_node_get_by_name(devlink, parent_name);
- if (IS_ERR(parent))
- return -ENODEV;
-
- if (parent == devlink_rate) {
- NL_SET_ERR_MSG(info->extack, "Parent to self is not allowed");
- return -EINVAL;
- }
-
- if (devlink_rate_is_node(devlink_rate) &&
- devlink_rate_is_parent_node(devlink_rate, parent->parent)) {
- NL_SET_ERR_MSG(info->extack, "Node is already a parent of parent node.");
- return -EEXIST;
- }
-
- if (devlink_rate_is_leaf(devlink_rate))
- err = ops->rate_leaf_parent_set(devlink_rate, parent,
- devlink_rate->priv, parent->priv,
- info->extack);
- else if (devlink_rate_is_node(devlink_rate))
- err = ops->rate_node_parent_set(devlink_rate, parent,
- devlink_rate->priv, parent->priv,
- info->extack);
- if (err)
- return err;
-
- if (devlink_rate->parent)
- /* we're reassigning to other parent in this case */
- refcount_dec(&devlink_rate->parent->refcnt);
-
- refcount_inc(&parent->refcnt);
- devlink_rate->parent = parent;
- }
-
- return 0;
-}
-
-static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
- const struct devlink_ops *ops,
- struct genl_info *info)
-{
- struct nlattr *nla_parent, **attrs = info->attrs;
- int err = -EOPNOTSUPP;
- u32 priority;
- u32 weight;
- u64 rate;
-
- if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) {
- rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]);
- if (devlink_rate_is_leaf(devlink_rate))
- err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv,
- rate, info->extack);
- else if (devlink_rate_is_node(devlink_rate))
- err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv,
- rate, info->extack);
- if (err)
- return err;
- devlink_rate->tx_share = rate;
- }
-
- if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) {
- rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]);
- if (devlink_rate_is_leaf(devlink_rate))
- err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv,
- rate, info->extack);
- else if (devlink_rate_is_node(devlink_rate))
- err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv,
- rate, info->extack);
- if (err)
- return err;
- devlink_rate->tx_max = rate;
- }
-
- if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]) {
- priority = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]);
- if (devlink_rate_is_leaf(devlink_rate))
- err = ops->rate_leaf_tx_priority_set(devlink_rate, devlink_rate->priv,
- priority, info->extack);
- else if (devlink_rate_is_node(devlink_rate))
- err = ops->rate_node_tx_priority_set(devlink_rate, devlink_rate->priv,
- priority, info->extack);
-
- if (err)
- return err;
- devlink_rate->tx_priority = priority;
- }
-
- if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]) {
- weight = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]);
- if (devlink_rate_is_leaf(devlink_rate))
- err = ops->rate_leaf_tx_weight_set(devlink_rate, devlink_rate->priv,
- weight, info->extack);
- else if (devlink_rate_is_node(devlink_rate))
- err = ops->rate_node_tx_weight_set(devlink_rate, devlink_rate->priv,
- weight, info->extack);
-
- if (err)
- return err;
- devlink_rate->tx_weight = weight;
- }
-
- nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME];
- if (nla_parent) {
- err = devlink_nl_rate_parent_node_set(devlink_rate, info,
- nla_parent);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
- struct genl_info *info,
- enum devlink_rate_type type)
-{
- struct nlattr **attrs = info->attrs;
-
- if (type == DEVLINK_RATE_TYPE_LEAF) {
- if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) {
- NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the leafs");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) {
- NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the leafs");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
- !ops->rate_leaf_parent_set) {
- NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the leafs");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_leaf_tx_priority_set) {
- NL_SET_ERR_MSG_ATTR(info->extack,
- attrs[DEVLINK_ATTR_RATE_TX_PRIORITY],
- "TX priority set isn't supported for the leafs");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_leaf_tx_weight_set) {
- NL_SET_ERR_MSG_ATTR(info->extack,
- attrs[DEVLINK_ATTR_RATE_TX_WEIGHT],
- "TX weight set isn't supported for the leafs");
- return false;
- }
- } else if (type == DEVLINK_RATE_TYPE_NODE) {
- if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
- NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) {
- NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the nodes");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
- !ops->rate_node_parent_set) {
- NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the nodes");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_node_tx_priority_set) {
- NL_SET_ERR_MSG_ATTR(info->extack,
- attrs[DEVLINK_ATTR_RATE_TX_PRIORITY],
- "TX priority set isn't supported for the nodes");
- return false;
- }
- if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_node_tx_weight_set) {
- NL_SET_ERR_MSG_ATTR(info->extack,
- attrs[DEVLINK_ATTR_RATE_TX_WEIGHT],
- "TX weight set isn't supported for the nodes");
- return false;
- }
- } else {
- WARN(1, "Unknown type of rate object");
- return false;
- }
-
- return true;
-}
-
-static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_rate *devlink_rate = info->user_ptr[1];
- struct devlink *devlink = devlink_rate->devlink;
- const struct devlink_ops *ops = devlink->ops;
- int err;
-
- if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type))
- return -EOPNOTSUPP;
-
- err = devlink_nl_rate_set(devlink_rate, ops, info);
-
- if (!err)
- devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
- return err;
-}
-
-static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_rate *rate_node;
- const struct devlink_ops *ops;
- int err;
-
- ops = devlink->ops;
- if (!ops || !ops->rate_node_new || !ops->rate_node_del) {
- NL_SET_ERR_MSG(info->extack, "Rate nodes aren't supported");
- return -EOPNOTSUPP;
- }
-
- if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE))
- return -EOPNOTSUPP;
-
- rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs);
- if (!IS_ERR(rate_node))
- return -EEXIST;
- else if (rate_node == ERR_PTR(-EINVAL))
- return -EINVAL;
-
- rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
- if (!rate_node)
- return -ENOMEM;
-
- rate_node->devlink = devlink;
- rate_node->type = DEVLINK_RATE_TYPE_NODE;
- rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL);
- if (!rate_node->name) {
- err = -ENOMEM;
- goto err_strdup;
- }
-
- err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack);
- if (err)
- goto err_node_new;
-
- err = devlink_nl_rate_set(rate_node, ops, info);
- if (err)
- goto err_rate_set;
-
- refcount_set(&rate_node->refcnt, 1);
- list_add(&rate_node->list, &devlink->rate_list);
- devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
- return 0;
-
-err_rate_set:
- ops->rate_node_del(rate_node, rate_node->priv, info->extack);
-err_node_new:
- kfree(rate_node->name);
-err_strdup:
- kfree(rate_node);
- return err;
-}
-
-static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_rate *rate_node = info->user_ptr[1];
- struct devlink *devlink = rate_node->devlink;
- const struct devlink_ops *ops = devlink->ops;
- int err;
-
- if (refcount_read(&rate_node->refcnt) > 1) {
- NL_SET_ERR_MSG(info->extack, "Node has children. Cannot delete node.");
- return -EBUSY;
- }
-
- devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
- err = ops->rate_node_del(rate_node, rate_node->priv, info->extack);
- if (rate_node->parent)
- refcount_dec(&rate_node->parent->refcnt);
- list_del(&rate_node->list);
- kfree(rate_node->name);
- kfree(rate_node);
- return err;
-}
-
-struct devlink_linecard_type {
- const char *type;
- const void *priv;
-};
-
-static int devlink_nl_linecard_fill(struct sk_buff *msg,
- struct devlink *devlink,
- struct devlink_linecard *linecard,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags,
- struct netlink_ext_ack *extack)
-{
- struct devlink_linecard_type *linecard_type;
- struct nlattr *attr;
- void *hdr;
- int i;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index))
- goto nla_put_failure;
- if (nla_put_u8(msg, DEVLINK_ATTR_LINECARD_STATE, linecard->state))
- goto nla_put_failure;
- if (linecard->type &&
- nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE, linecard->type))
- goto nla_put_failure;
-
- if (linecard->types_count) {
- attr = nla_nest_start(msg,
- DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES);
- if (!attr)
- goto nla_put_failure;
- for (i = 0; i < linecard->types_count; i++) {
- linecard_type = &linecard->types[i];
- if (nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE,
- linecard_type->type)) {
- nla_nest_cancel(msg, attr);
- goto nla_put_failure;
- }
- }
- nla_nest_end(msg, attr);
- }
-
- if (linecard->nested_devlink &&
- devlink_nl_put_nested_handle(msg, linecard->nested_devlink))
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void devlink_linecard_notify(struct devlink_linecard *linecard,
- enum devlink_command cmd)
-{
- struct devlink *devlink = linecard->devlink;
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW &&
- cmd != DEVLINK_CMD_LINECARD_DEL);
-
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_linecard_fill(msg, devlink, linecard, cmd, 0, 0, 0,
- NULL);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static int devlink_nl_cmd_linecard_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_linecard *linecard = info->user_ptr[1];
- struct devlink *devlink = linecard->devlink;
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- mutex_lock(&linecard->state_lock);
- err = devlink_nl_linecard_fill(msg, devlink, linecard,
- DEVLINK_CMD_LINECARD_NEW,
- info->snd_portid, info->snd_seq, 0,
- info->extack);
- mutex_unlock(&linecard->state_lock);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_cmd_linecard_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_linecard *linecard;
- int idx = 0;
- int err = 0;
-
- list_for_each_entry(linecard, &devlink->linecard_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- mutex_lock(&linecard->state_lock);
- err = devlink_nl_linecard_fill(msg, devlink, linecard,
- DEVLINK_CMD_LINECARD_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI,
- cb->extack);
- mutex_unlock(&linecard->state_lock);
- if (err) {
- state->idx = idx;
- break;
- }
- idx++;
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_linecard_get = {
- .dump_one = devlink_nl_cmd_linecard_get_dump_one,
-};
-
-static struct devlink_linecard_type *
-devlink_linecard_type_lookup(struct devlink_linecard *linecard,
- const char *type)
-{
- struct devlink_linecard_type *linecard_type;
- int i;
-
- for (i = 0; i < linecard->types_count; i++) {
- linecard_type = &linecard->types[i];
- if (!strcmp(type, linecard_type->type))
- return linecard_type;
- }
- return NULL;
-}
-
-static int devlink_linecard_type_set(struct devlink_linecard *linecard,
- const char *type,
- struct netlink_ext_ack *extack)
-{
- const struct devlink_linecard_ops *ops = linecard->ops;
- struct devlink_linecard_type *linecard_type;
- int err;
-
- mutex_lock(&linecard->state_lock);
- if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
- NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
- err = -EBUSY;
- goto out;
- }
- if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
- NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
- err = -EBUSY;
- goto out;
- }
-
- linecard_type = devlink_linecard_type_lookup(linecard, type);
- if (!linecard_type) {
- NL_SET_ERR_MSG(extack, "Unsupported line card type provided");
- err = -EINVAL;
- goto out;
- }
-
- if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED &&
- linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
- NL_SET_ERR_MSG(extack, "Line card already provisioned");
- err = -EBUSY;
- /* Check if the line card is provisioned in the same
- * way the user asks. In case it is, make the operation
- * to return success.
- */
- if (ops->same_provision &&
- ops->same_provision(linecard, linecard->priv,
- linecard_type->type,
- linecard_type->priv))
- err = 0;
- goto out;
- }
-
- linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING;
- linecard->type = linecard_type->type;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
- err = ops->provision(linecard, linecard->priv, linecard_type->type,
- linecard_type->priv, extack);
- if (err) {
- /* Provisioning failed. Assume the linecard is unprovisioned
- * for future operations.
- */
- mutex_lock(&linecard->state_lock);
- linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
- linecard->type = NULL;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
- }
- return err;
-
-out:
- mutex_unlock(&linecard->state_lock);
- return err;
-}
-
-static int devlink_linecard_type_unset(struct devlink_linecard *linecard,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- mutex_lock(&linecard->state_lock);
- if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
- NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
- err = -EBUSY;
- goto out;
- }
- if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
- NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
- err = -EBUSY;
- goto out;
- }
- if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
- linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
- linecard->type = NULL;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- err = 0;
- goto out;
- }
-
- if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) {
- NL_SET_ERR_MSG(extack, "Line card is not provisioned");
- err = 0;
- goto out;
- }
- linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONING;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
- err = linecard->ops->unprovision(linecard, linecard->priv,
- extack);
- if (err) {
- /* Unprovisioning failed. Assume the linecard is unprovisioned
- * for future operations.
- */
- mutex_lock(&linecard->state_lock);
- linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
- linecard->type = NULL;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
- }
- return err;
-
-out:
- mutex_unlock(&linecard->state_lock);
- return err;
-}
-
-static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_linecard *linecard = info->user_ptr[1];
- struct netlink_ext_ack *extack = info->extack;
- int err;
-
- if (info->attrs[DEVLINK_ATTR_LINECARD_TYPE]) {
- const char *type;
-
- type = nla_data(info->attrs[DEVLINK_ATTR_LINECARD_TYPE]);
- if (strcmp(type, "")) {
- err = devlink_linecard_type_set(linecard, type, extack);
- if (err)
- return err;
- } else {
- err = devlink_linecard_type_unset(linecard, extack);
- if (err)
- return err;
- }
- }
-
- return 0;
-}
-
-static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
- struct devlink_sb *devlink_sb,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
-{
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_SIZE, devlink_sb->size))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_POOL_COUNT,
- devlink_sb->ingress_pools_count))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_POOL_COUNT,
- devlink_sb->egress_pools_count))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_TC_COUNT,
- devlink_sb->ingress_tc_count))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_TC_COUNT,
- devlink_sb->egress_tc_count))
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb;
- struct sk_buff *msg;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
- DEVLINK_CMD_SB_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_cmd_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_sb *devlink_sb;
- int idx = 0;
- int err = 0;
-
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
- DEVLINK_CMD_SB_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- state->idx = idx;
- break;
- }
- idx++;
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_sb_get = {
- .dump_one = devlink_nl_cmd_sb_get_dump_one,
-};
-
-static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink,
- struct devlink_sb *devlink_sb,
- u16 pool_index, enum devlink_command cmd,
- u32 portid, u32 seq, int flags)
-{
- struct devlink_sb_pool_info pool_info;
- void *hdr;
- int err;
-
- err = devlink->ops->sb_pool_get(devlink, devlink_sb->index,
- pool_index, &pool_info);
- if (err)
- return err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
- goto nla_put_failure;
- if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_info.pool_type))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_SIZE, pool_info.size))
- goto nla_put_failure;
- if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,
- pool_info.threshold_type))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_CELL_SIZE,
- pool_info.cell_size))
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb;
- struct sk_buff *msg;
- u16 pool_index;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
- &pool_index);
- if (err)
- return err;
-
- if (!devlink->ops->sb_pool_get)
- return -EOPNOTSUPP;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index,
- DEVLINK_CMD_SB_POOL_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
- struct devlink *devlink,
- struct devlink_sb *devlink_sb,
- u32 portid, u32 seq)
-{
- u16 pool_count = devlink_sb_pool_count(devlink_sb);
- u16 pool_index;
- int err;
-
- for (pool_index = 0; pool_index < pool_count; pool_index++) {
- if (*p_idx < start) {
- (*p_idx)++;
- continue;
- }
- err = devlink_nl_sb_pool_fill(msg, devlink,
- devlink_sb,
- pool_index,
- DEVLINK_CMD_SB_POOL_NEW,
- portid, seq, NLM_F_MULTI);
- if (err)
- return err;
- (*p_idx)++;
- }
- return 0;
-}
-
-static int
-devlink_nl_cmd_sb_pool_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_sb *devlink_sb;
- int err = 0;
- int idx = 0;
-
- if (!devlink->ops->sb_pool_get)
- return 0;
-
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- err = __sb_pool_get_dumpit(msg, state->idx, &idx,
- devlink, devlink_sb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- state->idx = idx;
- break;
- }
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_sb_pool_get = {
- .dump_one = devlink_nl_cmd_sb_pool_get_dump_one,
-};
-
-static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
- u16 pool_index, u32 size,
- enum devlink_sb_threshold_type threshold_type,
- struct netlink_ext_ack *extack)
-
-{
- const struct devlink_ops *ops = devlink->ops;
-
- if (ops->sb_pool_set)
- return ops->sb_pool_set(devlink, sb_index, pool_index,
- size, threshold_type, extack);
- return -EOPNOTSUPP;
-}
-
-static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- enum devlink_sb_threshold_type threshold_type;
- struct devlink_sb *devlink_sb;
- u16 pool_index;
- u32 size;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
- &pool_index);
- if (err)
- return err;
-
- err = devlink_sb_th_type_get_from_info(info, &threshold_type);
- if (err)
- return err;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_POOL_SIZE))
- return -EINVAL;
-
- size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]);
- return devlink_sb_pool_set(devlink, devlink_sb->index,
- pool_index, size, threshold_type,
- info->extack);
-}
-
-static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
- struct devlink *devlink,
- struct devlink_port *devlink_port,
- struct devlink_sb *devlink_sb,
- u16 pool_index,
- enum devlink_command cmd,
- u32 portid, u32 seq, int flags)
-{
- const struct devlink_ops *ops = devlink->ops;
- u32 threshold;
- void *hdr;
- int err;
-
- err = ops->sb_port_pool_get(devlink_port, devlink_sb->index,
- pool_index, &threshold);
- if (err)
- return err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
- goto nla_put_failure;
-
- if (ops->sb_occ_port_pool_get) {
- u32 cur;
- u32 max;
-
- err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index,
- pool_index, &cur, &max);
- if (err && err != -EOPNOTSUPP)
- goto sb_occ_get_failure;
- if (!err) {
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
- goto nla_put_failure;
- }
- }
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- err = -EMSGSIZE;
-sb_occ_get_failure:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = devlink_port->devlink;
- struct devlink_sb *devlink_sb;
- struct sk_buff *msg;
- u16 pool_index;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
- &pool_index);
- if (err)
- return err;
-
- if (!devlink->ops->sb_port_pool_get)
- return -EOPNOTSUPP;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port,
- devlink_sb, pool_index,
- DEVLINK_CMD_SB_PORT_POOL_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
- struct devlink *devlink,
- struct devlink_sb *devlink_sb,
- u32 portid, u32 seq)
-{
- struct devlink_port *devlink_port;
- u16 pool_count = devlink_sb_pool_count(devlink_sb);
- unsigned long port_index;
- u16 pool_index;
- int err;
-
- xa_for_each(&devlink->ports, port_index, devlink_port) {
- for (pool_index = 0; pool_index < pool_count; pool_index++) {
- if (*p_idx < start) {
- (*p_idx)++;
- continue;
- }
- err = devlink_nl_sb_port_pool_fill(msg, devlink,
- devlink_port,
- devlink_sb,
- pool_index,
- DEVLINK_CMD_SB_PORT_POOL_NEW,
- portid, seq,
- NLM_F_MULTI);
- if (err)
- return err;
- (*p_idx)++;
- }
- }
- return 0;
-}
-
-static int
-devlink_nl_cmd_sb_port_pool_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_sb *devlink_sb;
- int idx = 0;
- int err = 0;
-
- if (!devlink->ops->sb_port_pool_get)
- return 0;
-
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- err = __sb_port_pool_get_dumpit(msg, state->idx, &idx,
- devlink, devlink_sb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- state->idx = idx;
- break;
- }
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_sb_port_pool_get = {
- .dump_one = devlink_nl_cmd_sb_port_pool_get_dump_one,
-};
-
-static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
- unsigned int sb_index, u16 pool_index,
- u32 threshold,
- struct netlink_ext_ack *extack)
-
-{
- const struct devlink_ops *ops = devlink_port->devlink->ops;
-
- if (ops->sb_port_pool_set)
- return ops->sb_port_pool_set(devlink_port, sb_index,
- pool_index, threshold, extack);
- return -EOPNOTSUPP;
-}
-
-static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_sb *devlink_sb;
- u16 pool_index;
- u32 threshold;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
- &pool_index);
- if (err)
- return err;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
- return -EINVAL;
-
- threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
- return devlink_sb_port_pool_set(devlink_port, devlink_sb->index,
- pool_index, threshold, info->extack);
-}
-
-static int
-devlink_nl_sb_tc_pool_bind_fill(struct sk_buff *msg, struct devlink *devlink,
- struct devlink_port *devlink_port,
- struct devlink_sb *devlink_sb, u16 tc_index,
- enum devlink_sb_pool_type pool_type,
- enum devlink_command cmd,
- u32 portid, u32 seq, int flags)
-{
- const struct devlink_ops *ops = devlink->ops;
- u16 pool_index;
- u32 threshold;
- void *hdr;
- int err;
-
- err = ops->sb_tc_pool_bind_get(devlink_port, devlink_sb->index,
- tc_index, pool_type,
- &pool_index, &threshold);
- if (err)
- return err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_TC_INDEX, tc_index))
- goto nla_put_failure;
- if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_type))
- goto nla_put_failure;
- if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
- goto nla_put_failure;
-
- if (ops->sb_occ_tc_port_bind_get) {
- u32 cur;
- u32 max;
-
- err = ops->sb_occ_tc_port_bind_get(devlink_port,
- devlink_sb->index,
- tc_index, pool_type,
- &cur, &max);
- if (err && err != -EOPNOTSUPP)
- return err;
- if (!err) {
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
- goto nla_put_failure;
- }
- }
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = devlink_port->devlink;
- struct devlink_sb *devlink_sb;
- struct sk_buff *msg;
- enum devlink_sb_pool_type pool_type;
- u16 tc_index;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- err = devlink_sb_pool_type_get_from_info(info, &pool_type);
- if (err)
- return err;
-
- err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
- pool_type, &tc_index);
- if (err)
- return err;
-
- if (!devlink->ops->sb_tc_pool_bind_get)
- return -EOPNOTSUPP;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port,
- devlink_sb, tc_index, pool_type,
- DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
- info->snd_portid,
- info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
- int start, int *p_idx,
- struct devlink *devlink,
- struct devlink_sb *devlink_sb,
- u32 portid, u32 seq)
-{
- struct devlink_port *devlink_port;
- unsigned long port_index;
- u16 tc_index;
- int err;
-
- xa_for_each(&devlink->ports, port_index, devlink_port) {
- for (tc_index = 0;
- tc_index < devlink_sb->ingress_tc_count; tc_index++) {
- if (*p_idx < start) {
- (*p_idx)++;
- continue;
- }
- err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
- devlink_port,
- devlink_sb,
- tc_index,
- DEVLINK_SB_POOL_TYPE_INGRESS,
- DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
- portid, seq,
- NLM_F_MULTI);
- if (err)
- return err;
- (*p_idx)++;
- }
- for (tc_index = 0;
- tc_index < devlink_sb->egress_tc_count; tc_index++) {
- if (*p_idx < start) {
- (*p_idx)++;
- continue;
- }
- err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
- devlink_port,
- devlink_sb,
- tc_index,
- DEVLINK_SB_POOL_TYPE_EGRESS,
- DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
- portid, seq,
- NLM_F_MULTI);
- if (err)
- return err;
- (*p_idx)++;
- }
- }
- return 0;
-}
-
-static int
-devlink_nl_cmd_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_sb *devlink_sb;
- int idx = 0;
- int err = 0;
-
- if (!devlink->ops->sb_tc_pool_bind_get)
- return 0;
-
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- err = __sb_tc_pool_bind_get_dumpit(msg, state->idx, &idx,
- devlink, devlink_sb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- state->idx = idx;
- break;
- }
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_sb_tc_pool_bind_get = {
- .dump_one = devlink_nl_cmd_sb_tc_pool_bind_get_dump_one,
-};
-
-static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
- unsigned int sb_index, u16 tc_index,
- enum devlink_sb_pool_type pool_type,
- u16 pool_index, u32 threshold,
- struct netlink_ext_ack *extack)
-
-{
- const struct devlink_ops *ops = devlink_port->devlink->ops;
-
- if (ops->sb_tc_pool_bind_set)
- return ops->sb_tc_pool_bind_set(devlink_port, sb_index,
- tc_index, pool_type,
- pool_index, threshold, extack);
- return -EOPNOTSUPP;
-}
-
-static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = info->user_ptr[0];
- enum devlink_sb_pool_type pool_type;
- struct devlink_sb *devlink_sb;
- u16 tc_index;
- u16 pool_index;
- u32 threshold;
- int err;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- err = devlink_sb_pool_type_get_from_info(info, &pool_type);
- if (err)
- return err;
-
- err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
- pool_type, &tc_index);
- if (err)
- return err;
-
- err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
- &pool_index);
- if (err)
- return err;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
- return -EINVAL;
-
- threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
- return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index,
- tc_index, pool_type,
- pool_index, threshold, info->extack);
-}
-
-static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- const struct devlink_ops *ops = devlink->ops;
- struct devlink_sb *devlink_sb;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- if (ops->sb_occ_snapshot)
- return ops->sb_occ_snapshot(devlink, devlink_sb->index);
- return -EOPNOTSUPP;
-}
-
-static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- const struct devlink_ops *ops = devlink->ops;
- struct devlink_sb *devlink_sb;
-
- devlink_sb = devlink_sb_get_from_info(devlink, info);
- if (IS_ERR(devlink_sb))
- return PTR_ERR(devlink_sb);
-
- if (ops->sb_occ_max_clear)
- return ops->sb_occ_max_clear(devlink, devlink_sb->index);
- return -EOPNOTSUPP;
-}
-
-int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
- struct netlink_ext_ack *extack)
-{
- struct devlink_rate *devlink_rate;
-
- list_for_each_entry(devlink_rate, &devlink->rate_list, list)
- if (devlink_rate_is_node(devlink_rate)) {
- NL_SET_ERR_MSG(extack, "Rate node(s) exists.");
- return -EBUSY;
- }
- return 0;
-}
-
-int devlink_dpipe_match_put(struct sk_buff *skb,
- struct devlink_dpipe_match *match)
-{
- struct devlink_dpipe_header *header = match->header;
- struct devlink_dpipe_field *field = &header->fields[match->field_id];
- struct nlattr *match_attr;
-
- match_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_MATCH);
- if (!match_attr)
- return -EMSGSIZE;
-
- if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
- nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
- goto nla_put_failure;
-
- nla_nest_end(skb, match_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, match_attr);
- return -EMSGSIZE;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_match_put);
-
-static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table,
- struct sk_buff *skb)
-{
- struct nlattr *matches_attr;
-
- matches_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_TABLE_MATCHES);
- if (!matches_attr)
- return -EMSGSIZE;
-
- if (table->table_ops->matches_dump(table->priv, skb))
- goto nla_put_failure;
-
- nla_nest_end(skb, matches_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, matches_attr);
- return -EMSGSIZE;
-}
-
-int devlink_dpipe_action_put(struct sk_buff *skb,
- struct devlink_dpipe_action *action)
-{
- struct devlink_dpipe_header *header = action->header;
- struct devlink_dpipe_field *field = &header->fields[action->field_id];
- struct nlattr *action_attr;
-
- action_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ACTION);
- if (!action_attr)
- return -EMSGSIZE;
-
- if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
- nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
- goto nla_put_failure;
-
- nla_nest_end(skb, action_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, action_attr);
- return -EMSGSIZE;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_action_put);
-
-static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table,
- struct sk_buff *skb)
-{
- struct nlattr *actions_attr;
-
- actions_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_TABLE_ACTIONS);
- if (!actions_attr)
- return -EMSGSIZE;
-
- if (table->table_ops->actions_dump(table->priv, skb))
- goto nla_put_failure;
-
- nla_nest_end(skb, actions_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, actions_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_dpipe_table_put(struct sk_buff *skb,
- struct devlink_dpipe_table *table)
-{
- struct nlattr *table_attr;
- u64 table_size;
-
- table_size = table->table_ops->size_get(table->priv);
- table_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLE);
- if (!table_attr)
- return -EMSGSIZE;
-
- if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size,
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
- table->counters_enabled))
- goto nla_put_failure;
-
- if (table->resource_valid) {
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID,
- table->resource_id, DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS,
- table->resource_units, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- }
- if (devlink_dpipe_matches_put(table, skb))
- goto nla_put_failure;
-
- if (devlink_dpipe_actions_put(table, skb))
- goto nla_put_failure;
-
- nla_nest_end(skb, table_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, table_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb,
- struct genl_info *info)
-{
- int err;
-
- if (*pskb) {
- err = genlmsg_reply(*pskb, info);
- if (err)
- return err;
- }
- *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!*pskb)
- return -ENOMEM;
- return 0;
-}
-
-static int devlink_dpipe_tables_fill(struct genl_info *info,
- enum devlink_command cmd, int flags,
- struct list_head *dpipe_tables,
- const char *table_name)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_dpipe_table *table;
- struct nlattr *tables_attr;
- struct sk_buff *skb = NULL;
- struct nlmsghdr *nlh;
- bool incomplete;
- void *hdr;
- int i;
- int err;
-
- table = list_first_entry(dpipe_tables,
- struct devlink_dpipe_table, list);
-start_again:
- err = devlink_dpipe_send_and_alloc_skb(&skb, info);
- if (err)
- return err;
-
- hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
- &devlink_nl_family, NLM_F_MULTI, cmd);
- if (!hdr) {
- nlmsg_free(skb);
- return -EMSGSIZE;
- }
-
- if (devlink_nl_put_handle(skb, devlink))
- goto nla_put_failure;
- tables_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLES);
- if (!tables_attr)
- goto nla_put_failure;
-
- i = 0;
- incomplete = false;
- list_for_each_entry_from(table, dpipe_tables, list) {
- if (!table_name) {
- err = devlink_dpipe_table_put(skb, table);
- if (err) {
- if (!i)
- goto err_table_put;
- incomplete = true;
- break;
- }
- } else {
- if (!strcmp(table->name, table_name)) {
- err = devlink_dpipe_table_put(skb, table);
- if (err)
- break;
- }
- }
- i++;
- }
-
- nla_nest_end(skb, tables_attr);
- genlmsg_end(skb, hdr);
- if (incomplete)
- goto start_again;
-
-send_done:
- nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
- NLMSG_DONE, 0, flags | NLM_F_MULTI);
- if (!nlh) {
- err = devlink_dpipe_send_and_alloc_skb(&skb, info);
- if (err)
- return err;
- goto send_done;
- }
-
- return genlmsg_reply(skb, info);
-
-nla_put_failure:
- err = -EMSGSIZE;
-err_table_put:
- nlmsg_free(skb);
- return err;
-}
-
-static int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- const char *table_name = NULL;
-
- if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
- table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
-
- return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0,
- &devlink->dpipe_table_list,
- table_name);
-}
-
-static int devlink_dpipe_value_put(struct sk_buff *skb,
- struct devlink_dpipe_value *value)
-{
- if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE,
- value->value_size, value->value))
- return -EMSGSIZE;
- if (value->mask)
- if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK,
- value->value_size, value->mask))
- return -EMSGSIZE;
- if (value->mapping_valid)
- if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING,
- value->mapping_value))
- return -EMSGSIZE;
- return 0;
-}
-
-static int devlink_dpipe_action_value_put(struct sk_buff *skb,
- struct devlink_dpipe_value *value)
-{
- if (!value->action)
- return -EINVAL;
- if (devlink_dpipe_action_put(skb, value->action))
- return -EMSGSIZE;
- if (devlink_dpipe_value_put(skb, value))
- return -EMSGSIZE;
- return 0;
-}
-
-static int devlink_dpipe_action_values_put(struct sk_buff *skb,
- struct devlink_dpipe_value *values,
- unsigned int values_count)
-{
- struct nlattr *action_attr;
- int i;
- int err;
-
- for (i = 0; i < values_count; i++) {
- action_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_ACTION_VALUE);
- if (!action_attr)
- return -EMSGSIZE;
- err = devlink_dpipe_action_value_put(skb, &values[i]);
- if (err)
- goto err_action_value_put;
- nla_nest_end(skb, action_attr);
- }
- return 0;
-
-err_action_value_put:
- nla_nest_cancel(skb, action_attr);
- return err;
-}
-
-static int devlink_dpipe_match_value_put(struct sk_buff *skb,
- struct devlink_dpipe_value *value)
-{
- if (!value->match)
- return -EINVAL;
- if (devlink_dpipe_match_put(skb, value->match))
- return -EMSGSIZE;
- if (devlink_dpipe_value_put(skb, value))
- return -EMSGSIZE;
- return 0;
-}
-
-static int devlink_dpipe_match_values_put(struct sk_buff *skb,
- struct devlink_dpipe_value *values,
- unsigned int values_count)
-{
- struct nlattr *match_attr;
- int i;
- int err;
-
- for (i = 0; i < values_count; i++) {
- match_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_MATCH_VALUE);
- if (!match_attr)
- return -EMSGSIZE;
- err = devlink_dpipe_match_value_put(skb, &values[i]);
- if (err)
- goto err_match_value_put;
- nla_nest_end(skb, match_attr);
- }
- return 0;
-
-err_match_value_put:
- nla_nest_cancel(skb, match_attr);
- return err;
-}
-
-static int devlink_dpipe_entry_put(struct sk_buff *skb,
- struct devlink_dpipe_entry *entry)
-{
- struct nlattr *entry_attr, *matches_attr, *actions_attr;
- int err;
-
- entry_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ENTRY);
- if (!entry_attr)
- return -EMSGSIZE;
-
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index,
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (entry->counter_valid)
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
- entry->counter, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- matches_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES);
- if (!matches_attr)
- goto nla_put_failure;
-
- err = devlink_dpipe_match_values_put(skb, entry->match_values,
- entry->match_values_count);
- if (err) {
- nla_nest_cancel(skb, matches_attr);
- goto err_match_values_put;
- }
- nla_nest_end(skb, matches_attr);
-
- actions_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES);
- if (!actions_attr)
- goto nla_put_failure;
-
- err = devlink_dpipe_action_values_put(skb, entry->action_values,
- entry->action_values_count);
- if (err) {
- nla_nest_cancel(skb, actions_attr);
- goto err_action_values_put;
- }
- nla_nest_end(skb, actions_attr);
-
- nla_nest_end(skb, entry_attr);
- return 0;
-
-nla_put_failure:
- err = -EMSGSIZE;
-err_match_values_put:
-err_action_values_put:
- nla_nest_cancel(skb, entry_attr);
- return err;
-}
-
-static struct devlink_dpipe_table *
-devlink_dpipe_table_find(struct list_head *dpipe_tables,
- const char *table_name, struct devlink *devlink)
-{
- struct devlink_dpipe_table *table;
- list_for_each_entry_rcu(table, dpipe_tables, list,
- lockdep_is_held(&devlink->lock)) {
- if (!strcmp(table->name, table_name))
- return table;
- }
- return NULL;
-}
-
-int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
-{
- struct devlink *devlink;
- int err;
-
- err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb,
- dump_ctx->info);
- if (err)
- return err;
-
- dump_ctx->hdr = genlmsg_put(dump_ctx->skb,
- dump_ctx->info->snd_portid,
- dump_ctx->info->snd_seq,
- &devlink_nl_family, NLM_F_MULTI,
- dump_ctx->cmd);
- if (!dump_ctx->hdr)
- goto nla_put_failure;
-
- devlink = dump_ctx->info->user_ptr[0];
- if (devlink_nl_put_handle(dump_ctx->skb, devlink))
- goto nla_put_failure;
- dump_ctx->nest = nla_nest_start_noflag(dump_ctx->skb,
- DEVLINK_ATTR_DPIPE_ENTRIES);
- if (!dump_ctx->nest)
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- nlmsg_free(dump_ctx->skb);
- return -EMSGSIZE;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare);
-
-int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
- struct devlink_dpipe_entry *entry)
-{
- return devlink_dpipe_entry_put(dump_ctx->skb, entry);
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append);
-
-int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
-{
- nla_nest_end(dump_ctx->skb, dump_ctx->nest);
- genlmsg_end(dump_ctx->skb, dump_ctx->hdr);
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
-
-void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
-
-{
- unsigned int value_count, value_index;
- struct devlink_dpipe_value *value;
-
- value = entry->action_values;
- value_count = entry->action_values_count;
- for (value_index = 0; value_index < value_count; value_index++) {
- kfree(value[value_index].value);
- kfree(value[value_index].mask);
- }
-
- value = entry->match_values;
- value_count = entry->match_values_count;
- for (value_index = 0; value_index < value_count; value_index++) {
- kfree(value[value_index].value);
- kfree(value[value_index].mask);
- }
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_entry_clear);
-
-static int devlink_dpipe_entries_fill(struct genl_info *info,
- enum devlink_command cmd, int flags,
- struct devlink_dpipe_table *table)
-{
- struct devlink_dpipe_dump_ctx dump_ctx;
- struct nlmsghdr *nlh;
- int err;
-
- dump_ctx.skb = NULL;
- dump_ctx.cmd = cmd;
- dump_ctx.info = info;
-
- err = table->table_ops->entries_dump(table->priv,
- table->counters_enabled,
- &dump_ctx);
- if (err)
- return err;
-
-send_done:
- nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
- NLMSG_DONE, 0, flags | NLM_F_MULTI);
- if (!nlh) {
- err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
- if (err)
- return err;
- goto send_done;
- }
- return genlmsg_reply(dump_ctx.skb, info);
-}
-
-static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_dpipe_table *table;
- const char *table_name;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME))
- return -EINVAL;
-
- table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
- table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name, devlink);
- if (!table)
- return -EINVAL;
-
- if (!table->table_ops->entries_dump)
- return -EINVAL;
-
- return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET,
- 0, table);
-}
-
-static int devlink_dpipe_fields_put(struct sk_buff *skb,
- const struct devlink_dpipe_header *header)
-{
- struct devlink_dpipe_field *field;
- struct nlattr *field_attr;
- int i;
-
- for (i = 0; i < header->fields_count; i++) {
- field = &header->fields[i];
- field_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_FIELD);
- if (!field_attr)
- return -EMSGSIZE;
- if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type))
- goto nla_put_failure;
- nla_nest_end(skb, field_attr);
- }
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, field_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_dpipe_header_put(struct sk_buff *skb,
- struct devlink_dpipe_header *header)
-{
- struct nlattr *fields_attr, *header_attr;
- int err;
-
- header_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADER);
- if (!header_attr)
- return -EMSGSIZE;
-
- if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) ||
- nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
- nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
- goto nla_put_failure;
-
- fields_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_DPIPE_HEADER_FIELDS);
- if (!fields_attr)
- goto nla_put_failure;
-
- err = devlink_dpipe_fields_put(skb, header);
- if (err) {
- nla_nest_cancel(skb, fields_attr);
- goto nla_put_failure;
- }
- nla_nest_end(skb, fields_attr);
- nla_nest_end(skb, header_attr);
- return 0;
-
-nla_put_failure:
- err = -EMSGSIZE;
- nla_nest_cancel(skb, header_attr);
- return err;
-}
-
-static int devlink_dpipe_headers_fill(struct genl_info *info,
- enum devlink_command cmd, int flags,
- struct devlink_dpipe_headers *
- dpipe_headers)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct nlattr *headers_attr;
- struct sk_buff *skb = NULL;
- struct nlmsghdr *nlh;
- void *hdr;
- int i, j;
- int err;
-
- i = 0;
-start_again:
- err = devlink_dpipe_send_and_alloc_skb(&skb, info);
- if (err)
- return err;
-
- hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
- &devlink_nl_family, NLM_F_MULTI, cmd);
- if (!hdr) {
- nlmsg_free(skb);
- return -EMSGSIZE;
- }
-
- if (devlink_nl_put_handle(skb, devlink))
- goto nla_put_failure;
- headers_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADERS);
- if (!headers_attr)
- goto nla_put_failure;
-
- j = 0;
- for (; i < dpipe_headers->headers_count; i++) {
- err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]);
- if (err) {
- if (!j)
- goto err_table_put;
- break;
- }
- j++;
- }
- nla_nest_end(skb, headers_attr);
- genlmsg_end(skb, hdr);
- if (i != dpipe_headers->headers_count)
- goto start_again;
-
-send_done:
- nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
- NLMSG_DONE, 0, flags | NLM_F_MULTI);
- if (!nlh) {
- err = devlink_dpipe_send_and_alloc_skb(&skb, info);
- if (err)
- return err;
- goto send_done;
- }
- return genlmsg_reply(skb, info);
-
-nla_put_failure:
- err = -EMSGSIZE;
-err_table_put:
- nlmsg_free(skb);
- return err;
-}
-
-static int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
-
- if (!devlink->dpipe_headers)
- return -EOPNOTSUPP;
- return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET,
- 0, devlink->dpipe_headers);
-}
-
-static int devlink_dpipe_table_counters_set(struct devlink *devlink,
- const char *table_name,
- bool enable)
-{
- struct devlink_dpipe_table *table;
-
- table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name, devlink);
- if (!table)
- return -EINVAL;
-
- if (table->counter_control_extern)
- return -EOPNOTSUPP;
-
- if (!(table->counters_enabled ^ enable))
- return 0;
-
- table->counters_enabled = enable;
- if (table->table_ops->counters_set_update)
- table->table_ops->counters_set_update(table->priv, enable);
- return 0;
-}
-
-static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- const char *table_name;
- bool counters_enable;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_DPIPE_TABLE_NAME) ||
- GENL_REQ_ATTR_CHECK(info,
- DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED))
- return -EINVAL;
-
- table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
- counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]);
-
- return devlink_dpipe_table_counters_set(devlink, table_name,
- counters_enable);
-}
-
-static struct devlink_resource *
-devlink_resource_find(struct devlink *devlink,
- struct devlink_resource *resource, u64 resource_id)
-{
- struct list_head *resource_list;
-
- if (resource)
- resource_list = &resource->resource_list;
- else
- resource_list = &devlink->resource_list;
-
- list_for_each_entry(resource, resource_list, list) {
- struct devlink_resource *child_resource;
-
- if (resource->id == resource_id)
- return resource;
-
- child_resource = devlink_resource_find(devlink, resource,
- resource_id);
- if (child_resource)
- return child_resource;
- }
- return NULL;
-}
-
-static void
-devlink_resource_validate_children(struct devlink_resource *resource)
-{
- struct devlink_resource *child_resource;
- bool size_valid = true;
- u64 parts_size = 0;
-
- if (list_empty(&resource->resource_list))
- goto out;
-
- list_for_each_entry(child_resource, &resource->resource_list, list)
- parts_size += child_resource->size_new;
-
- if (parts_size > resource->size_new)
- size_valid = false;
-out:
- resource->size_valid = size_valid;
-}
-
-static int
-devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
- struct netlink_ext_ack *extack)
-{
- u64 reminder;
- int err = 0;
-
- if (size > resource->size_params.size_max) {
- NL_SET_ERR_MSG(extack, "Size larger than maximum");
- err = -EINVAL;
- }
-
- if (size < resource->size_params.size_min) {
- NL_SET_ERR_MSG(extack, "Size smaller than minimum");
- err = -EINVAL;
- }
-
- div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
- if (reminder) {
- NL_SET_ERR_MSG(extack, "Wrong granularity");
- err = -EINVAL;
- }
-
- return err;
-}
-
-static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_resource *resource;
- u64 resource_id;
- u64 size;
- int err;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_ID) ||
- GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_SIZE))
- return -EINVAL;
- resource_id = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_ID]);
-
- resource = devlink_resource_find(devlink, NULL, resource_id);
- if (!resource)
- return -EINVAL;
-
- size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
- err = devlink_resource_validate_size(resource, size, info->extack);
- if (err)
- return err;
-
- resource->size_new = size;
- devlink_resource_validate_children(resource);
- if (resource->parent)
- devlink_resource_validate_children(resource->parent);
- return 0;
-}
-
-static int
-devlink_resource_size_params_put(struct devlink_resource *resource,
- struct sk_buff *skb)
-{
- struct devlink_resource_size_params *size_params;
-
- size_params = &resource->size_params;
- if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
- size_params->size_granularity, DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
- size_params->size_max, DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
- size_params->size_min, DEVLINK_ATTR_PAD) ||
- nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit))
- return -EMSGSIZE;
- return 0;
-}
-
-static int devlink_resource_occ_put(struct devlink_resource *resource,
- struct sk_buff *skb)
-{
- if (!resource->occ_get)
- return 0;
- return nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
- resource->occ_get(resource->occ_get_priv),
- DEVLINK_ATTR_PAD);
-}
-
-static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
- struct devlink_resource *resource)
-{
- struct devlink_resource *child_resource;
- struct nlattr *child_resource_attr;
- struct nlattr *resource_attr;
-
- resource_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE);
- if (!resource_attr)
- return -EMSGSIZE;
-
- if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size,
- DEVLINK_ATTR_PAD) ||
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id,
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (resource->size != resource->size_new &&
- nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
- resource->size_new, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (devlink_resource_occ_put(resource, skb))
- goto nla_put_failure;
- if (devlink_resource_size_params_put(resource, skb))
- goto nla_put_failure;
- if (list_empty(&resource->resource_list))
- goto out;
-
- if (nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_SIZE_VALID,
- resource->size_valid))
- goto nla_put_failure;
-
- child_resource_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_RESOURCE_LIST);
- if (!child_resource_attr)
- goto nla_put_failure;
-
- list_for_each_entry(child_resource, &resource->resource_list, list) {
- if (devlink_resource_put(devlink, skb, child_resource))
- goto resource_put_failure;
- }
-
- nla_nest_end(skb, child_resource_attr);
-out:
- nla_nest_end(skb, resource_attr);
- return 0;
-
-resource_put_failure:
- nla_nest_cancel(skb, child_resource_attr);
-nla_put_failure:
- nla_nest_cancel(skb, resource_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_resource_fill(struct genl_info *info,
- enum devlink_command cmd, int flags)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_resource *resource;
- struct nlattr *resources_attr;
- struct sk_buff *skb = NULL;
- struct nlmsghdr *nlh;
- bool incomplete;
- void *hdr;
- int i;
- int err;
-
- resource = list_first_entry(&devlink->resource_list,
- struct devlink_resource, list);
-start_again:
- err = devlink_dpipe_send_and_alloc_skb(&skb, info);
- if (err)
- return err;
-
- hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
- &devlink_nl_family, NLM_F_MULTI, cmd);
- if (!hdr) {
- nlmsg_free(skb);
- return -EMSGSIZE;
- }
-
- if (devlink_nl_put_handle(skb, devlink))
- goto nla_put_failure;
-
- resources_attr = nla_nest_start_noflag(skb,
- DEVLINK_ATTR_RESOURCE_LIST);
- if (!resources_attr)
- goto nla_put_failure;
-
- incomplete = false;
- i = 0;
- list_for_each_entry_from(resource, &devlink->resource_list, list) {
- err = devlink_resource_put(devlink, skb, resource);
- if (err) {
- if (!i)
- goto err_resource_put;
- incomplete = true;
- break;
- }
- i++;
- }
- nla_nest_end(skb, resources_attr);
- genlmsg_end(skb, hdr);
- if (incomplete)
- goto start_again;
-send_done:
- nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
- NLMSG_DONE, 0, flags | NLM_F_MULTI);
- if (!nlh) {
- err = devlink_dpipe_send_and_alloc_skb(&skb, info);
- if (err)
- return err;
- goto send_done;
- }
- return genlmsg_reply(skb, info);
-
-nla_put_failure:
- err = -EMSGSIZE;
-err_resource_put:
- nlmsg_free(skb);
- return err;
-}
-
-static int devlink_nl_cmd_resource_dump(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
-
- if (list_empty(&devlink->resource_list))
- return -EOPNOTSUPP;
-
- return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0);
-}
-
-int devlink_resources_validate(struct devlink *devlink,
- struct devlink_resource *resource,
- struct genl_info *info)
-{
- struct list_head *resource_list;
- int err = 0;
-
- if (resource)
- resource_list = &resource->resource_list;
- else
- resource_list = &devlink->resource_list;
-
- list_for_each_entry(resource, resource_list, list) {
- if (!resource->size_valid)
- return -EINVAL;
- err = devlink_resources_validate(devlink, resource, info);
- if (err)
- return err;
- }
- return err;
-}
-
-static const struct devlink_param devlink_param_generic[] = {
- {
- .id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
- .name = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME,
- .type = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
- .name = DEVLINK_PARAM_GENERIC_MAX_MACS_NAME,
- .type = DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
- .name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME,
- .type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI,
- .name = DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME,
- .type = DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
- .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME,
- .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
- .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME,
- .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
- .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
- .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
- .name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME,
- .type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
- .name = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME,
- .type = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
- .name = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME,
- .type = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE,
- },
- {
- .id = DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
- .name = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME,
- .type = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE,
- },
-};
-
-static int devlink_param_generic_verify(const struct devlink_param *param)
-{
- /* verify it match generic parameter by id and name */
- if (param->id > DEVLINK_PARAM_GENERIC_ID_MAX)
- return -EINVAL;
- if (strcmp(param->name, devlink_param_generic[param->id].name))
- return -ENOENT;
-
- WARN_ON(param->type != devlink_param_generic[param->id].type);
-
- return 0;
-}
-
-static int devlink_param_driver_verify(const struct devlink_param *param)
-{
- int i;
-
- if (param->id <= DEVLINK_PARAM_GENERIC_ID_MAX)
- return -EINVAL;
- /* verify no such name in generic params */
- for (i = 0; i <= DEVLINK_PARAM_GENERIC_ID_MAX; i++)
- if (!strcmp(param->name, devlink_param_generic[i].name))
- return -EEXIST;
-
- return 0;
-}
-
-static struct devlink_param_item *
-devlink_param_find_by_name(struct xarray *params, const char *param_name)
-{
- struct devlink_param_item *param_item;
- unsigned long param_id;
-
- xa_for_each(params, param_id, param_item) {
- if (!strcmp(param_item->param->name, param_name))
- return param_item;
- }
- return NULL;
-}
-
-static struct devlink_param_item *
-devlink_param_find_by_id(struct xarray *params, u32 param_id)
-{
- return xa_load(params, param_id);
-}
-
-static bool
-devlink_param_cmode_is_supported(const struct devlink_param *param,
- enum devlink_param_cmode cmode)
-{
- return test_bit(cmode, &param->supported_cmodes);
-}
-
-static int devlink_param_get(struct devlink *devlink,
- const struct devlink_param *param,
- struct devlink_param_gset_ctx *ctx)
-{
- if (!param->get || devlink->reload_failed)
- return -EOPNOTSUPP;
- return param->get(devlink, param->id, ctx);
-}
-
-static int devlink_param_set(struct devlink *devlink,
- const struct devlink_param *param,
- struct devlink_param_gset_ctx *ctx)
-{
- if (!param->set || devlink->reload_failed)
- return -EOPNOTSUPP;
- return param->set(devlink, param->id, ctx);
-}
-
-static int
-devlink_param_type_to_nla_type(enum devlink_param_type param_type)
-{
- switch (param_type) {
- case DEVLINK_PARAM_TYPE_U8:
- return NLA_U8;
- case DEVLINK_PARAM_TYPE_U16:
- return NLA_U16;
- case DEVLINK_PARAM_TYPE_U32:
- return NLA_U32;
- case DEVLINK_PARAM_TYPE_STRING:
- return NLA_STRING;
- case DEVLINK_PARAM_TYPE_BOOL:
- return NLA_FLAG;
- default:
- return -EINVAL;
- }
-}
-
-static int
-devlink_nl_param_value_fill_one(struct sk_buff *msg,
- enum devlink_param_type type,
- enum devlink_param_cmode cmode,
- union devlink_param_value val)
-{
- struct nlattr *param_value_attr;
-
- param_value_attr = nla_nest_start_noflag(msg,
- DEVLINK_ATTR_PARAM_VALUE);
- if (!param_value_attr)
- goto nla_put_failure;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_CMODE, cmode))
- goto value_nest_cancel;
-
- switch (type) {
- case DEVLINK_PARAM_TYPE_U8:
- if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu8))
- goto value_nest_cancel;
- break;
- case DEVLINK_PARAM_TYPE_U16:
- if (nla_put_u16(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu16))
- goto value_nest_cancel;
- break;
- case DEVLINK_PARAM_TYPE_U32:
- if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu32))
- goto value_nest_cancel;
- break;
- case DEVLINK_PARAM_TYPE_STRING:
- if (nla_put_string(msg, DEVLINK_ATTR_PARAM_VALUE_DATA,
- val.vstr))
- goto value_nest_cancel;
- break;
- case DEVLINK_PARAM_TYPE_BOOL:
- if (val.vbool &&
- nla_put_flag(msg, DEVLINK_ATTR_PARAM_VALUE_DATA))
- goto value_nest_cancel;
- break;
- }
-
- nla_nest_end(msg, param_value_attr);
- return 0;
-
-value_nest_cancel:
- nla_nest_cancel(msg, param_value_attr);
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
- unsigned int port_index,
- struct devlink_param_item *param_item,
- enum devlink_command cmd,
- u32 portid, u32 seq, int flags)
-{
- union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1];
- bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {};
- const struct devlink_param *param = param_item->param;
- struct devlink_param_gset_ctx ctx;
- struct nlattr *param_values_list;
- struct nlattr *param_attr;
- int nla_type;
- void *hdr;
- int err;
- int i;
-
- /* Get value from driver part to driverinit configuration mode */
- for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
- if (!devlink_param_cmode_is_supported(param, i))
- continue;
- if (i == DEVLINK_PARAM_CMODE_DRIVERINIT) {
- if (param_item->driverinit_value_new_valid)
- param_value[i] = param_item->driverinit_value_new;
- else if (param_item->driverinit_value_valid)
- param_value[i] = param_item->driverinit_value;
- else
- return -EOPNOTSUPP;
- } else {
- ctx.cmode = i;
- err = devlink_param_get(devlink, param, &ctx);
- if (err)
- return err;
- param_value[i] = ctx.val;
- }
- param_value_set[i] = true;
- }
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto genlmsg_cancel;
-
- if (cmd == DEVLINK_CMD_PORT_PARAM_GET ||
- cmd == DEVLINK_CMD_PORT_PARAM_NEW ||
- cmd == DEVLINK_CMD_PORT_PARAM_DEL)
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index))
- goto genlmsg_cancel;
-
- param_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PARAM);
- if (!param_attr)
- goto genlmsg_cancel;
- if (nla_put_string(msg, DEVLINK_ATTR_PARAM_NAME, param->name))
- goto param_nest_cancel;
- if (param->generic && nla_put_flag(msg, DEVLINK_ATTR_PARAM_GENERIC))
- goto param_nest_cancel;
-
- nla_type = devlink_param_type_to_nla_type(param->type);
- if (nla_type < 0)
- goto param_nest_cancel;
- if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type))
- goto param_nest_cancel;
-
- param_values_list = nla_nest_start_noflag(msg,
- DEVLINK_ATTR_PARAM_VALUES_LIST);
- if (!param_values_list)
- goto param_nest_cancel;
-
- for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
- if (!param_value_set[i])
- continue;
- err = devlink_nl_param_value_fill_one(msg, param->type,
- i, param_value[i]);
- if (err)
- goto values_list_nest_cancel;
- }
-
- nla_nest_end(msg, param_values_list);
- nla_nest_end(msg, param_attr);
- genlmsg_end(msg, hdr);
- return 0;
-
-values_list_nest_cancel:
- nla_nest_end(msg, param_values_list);
-param_nest_cancel:
- nla_nest_cancel(msg, param_attr);
-genlmsg_cancel:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void devlink_param_notify(struct devlink *devlink,
- unsigned int port_index,
- struct devlink_param_item *param_item,
- enum devlink_command cmd)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL &&
- cmd != DEVLINK_CMD_PORT_PARAM_NEW &&
- cmd != DEVLINK_CMD_PORT_PARAM_DEL);
-
- /* devlink_notify_register() / devlink_notify_unregister()
- * will replay the notifications if the params are added/removed
- * outside of the lifetime of the instance.
- */
- if (!devl_is_registered(devlink))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
- err = devlink_nl_param_fill(msg, devlink, port_index, param_item, cmd,
- 0, 0, 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static int
-devlink_nl_cmd_param_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_param_item *param_item;
- unsigned long param_id;
- int err = 0;
-
- xa_for_each_start(&devlink->params, param_id, param_item, state->idx) {
- err = devlink_nl_param_fill(msg, devlink, 0, param_item,
- DEVLINK_CMD_PARAM_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- state->idx = param_id;
- break;
- }
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_param_get = {
- .dump_one = devlink_nl_cmd_param_get_dump_one,
-};
-
-static int
-devlink_param_type_get_from_info(struct genl_info *info,
- enum devlink_param_type *param_type)
-{
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_TYPE))
- return -EINVAL;
-
- switch (nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE])) {
- case NLA_U8:
- *param_type = DEVLINK_PARAM_TYPE_U8;
- break;
- case NLA_U16:
- *param_type = DEVLINK_PARAM_TYPE_U16;
- break;
- case NLA_U32:
- *param_type = DEVLINK_PARAM_TYPE_U32;
- break;
- case NLA_STRING:
- *param_type = DEVLINK_PARAM_TYPE_STRING;
- break;
- case NLA_FLAG:
- *param_type = DEVLINK_PARAM_TYPE_BOOL;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int
-devlink_param_value_get_from_info(const struct devlink_param *param,
- struct genl_info *info,
- union devlink_param_value *value)
-{
- struct nlattr *param_data;
- int len;
-
- param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA];
-
- if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data)
- return -EINVAL;
-
- switch (param->type) {
- case DEVLINK_PARAM_TYPE_U8:
- if (nla_len(param_data) != sizeof(u8))
- return -EINVAL;
- value->vu8 = nla_get_u8(param_data);
- break;
- case DEVLINK_PARAM_TYPE_U16:
- if (nla_len(param_data) != sizeof(u16))
- return -EINVAL;
- value->vu16 = nla_get_u16(param_data);
- break;
- case DEVLINK_PARAM_TYPE_U32:
- if (nla_len(param_data) != sizeof(u32))
- return -EINVAL;
- value->vu32 = nla_get_u32(param_data);
- break;
- case DEVLINK_PARAM_TYPE_STRING:
- len = strnlen(nla_data(param_data), nla_len(param_data));
- if (len == nla_len(param_data) ||
- len >= __DEVLINK_PARAM_MAX_STRING_VALUE)
- return -EINVAL;
- strcpy(value->vstr, nla_data(param_data));
- break;
- case DEVLINK_PARAM_TYPE_BOOL:
- if (param_data && nla_len(param_data))
- return -EINVAL;
- value->vbool = nla_get_flag(param_data);
- break;
- }
- return 0;
-}
-
-static struct devlink_param_item *
-devlink_param_get_from_info(struct xarray *params, struct genl_info *info)
-{
- char *param_name;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_NAME))
- return NULL;
-
- param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
- return devlink_param_find_by_name(params, param_name);
-}
-
-static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_param_item *param_item;
- struct sk_buff *msg;
- int err;
-
- param_item = devlink_param_get_from_info(&devlink->params, info);
- if (!param_item)
- return -EINVAL;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_param_fill(msg, devlink, 0, param_item,
- DEVLINK_CMD_PARAM_GET,
- info->snd_portid, info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
- unsigned int port_index,
- struct xarray *params,
- struct genl_info *info,
- enum devlink_command cmd)
-{
- enum devlink_param_type param_type;
- struct devlink_param_gset_ctx ctx;
- enum devlink_param_cmode cmode;
- struct devlink_param_item *param_item;
- const struct devlink_param *param;
- union devlink_param_value value;
- int err = 0;
-
- param_item = devlink_param_get_from_info(params, info);
- if (!param_item)
- return -EINVAL;
- param = param_item->param;
- err = devlink_param_type_get_from_info(info, &param_type);
- if (err)
- return err;
- if (param_type != param->type)
- return -EINVAL;
- err = devlink_param_value_get_from_info(param, info, &value);
- if (err)
- return err;
- if (param->validate) {
- err = param->validate(devlink, param->id, value, info->extack);
- if (err)
- return err;
- }
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_VALUE_CMODE))
- return -EINVAL;
- cmode = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE]);
- if (!devlink_param_cmode_is_supported(param, cmode))
- return -EOPNOTSUPP;
-
- if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) {
- param_item->driverinit_value_new = value;
- param_item->driverinit_value_new_valid = true;
- } else {
- if (!param->set)
- return -EOPNOTSUPP;
- ctx.val = value;
- ctx.cmode = cmode;
- err = devlink_param_set(devlink, param, &ctx);
- if (err)
- return err;
- }
-
- devlink_param_notify(devlink, port_index, param_item, cmd);
- return 0;
-}
-
-static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
-
- return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->params,
- info, DEVLINK_CMD_PARAM_NEW);
-}
-
-static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
-{
- NL_SET_ERR_MSG(cb->extack, "Port params are not supported");
- return msg->len;
-}
-
-static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- NL_SET_ERR_MSG(info->extack, "Port params are not supported");
- return -EINVAL;
-}
-
-static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- NL_SET_ERR_MSG(info->extack, "Port params are not supported");
- return -EINVAL;
-}
-
-static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
- struct devlink *devlink,
- struct devlink_snapshot *snapshot)
-{
- struct nlattr *snap_attr;
- int err;
-
- snap_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
- if (!snap_attr)
- return -EINVAL;
-
- err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id);
- if (err)
- goto nla_put_failure;
-
- nla_nest_end(msg, snap_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, snap_attr);
- return err;
-}
-
-static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg,
- struct devlink *devlink,
- struct devlink_region *region)
-{
- struct devlink_snapshot *snapshot;
- struct nlattr *snapshots_attr;
- int err;
-
- snapshots_attr = nla_nest_start_noflag(msg,
- DEVLINK_ATTR_REGION_SNAPSHOTS);
- if (!snapshots_attr)
- return -EINVAL;
-
- list_for_each_entry(snapshot, &region->snapshot_list, list) {
- err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot);
- if (err)
- goto nla_put_failure;
- }
-
- nla_nest_end(msg, snapshots_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, snapshots_attr);
- return err;
-}
-
-static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags,
- struct devlink_region *region)
-{
- void *hdr;
- int err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- err = devlink_nl_put_handle(msg, devlink);
- if (err)
- goto nla_put_failure;
-
- if (region->port) {
- err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
- region->port->index);
- if (err)
- goto nla_put_failure;
- }
-
- err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME, region->ops->name);
- if (err)
- goto nla_put_failure;
-
- err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
- region->size,
- DEVLINK_ATTR_PAD);
- if (err)
- goto nla_put_failure;
-
- err = nla_put_u32(msg, DEVLINK_ATTR_REGION_MAX_SNAPSHOTS,
- region->max_snapshots);
- if (err)
- goto nla_put_failure;
-
- err = devlink_nl_region_snapshots_id_put(msg, devlink, region);
- if (err)
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static struct sk_buff *
-devlink_nl_region_notify_build(struct devlink_region *region,
- struct devlink_snapshot *snapshot,
- enum devlink_command cmd, u32 portid, u32 seq)
-{
- struct devlink *devlink = region->devlink;
- struct sk_buff *msg;
- void *hdr;
- int err;
-
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return ERR_PTR(-ENOMEM);
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, 0, cmd);
- if (!hdr) {
- err = -EMSGSIZE;
- goto out_free_msg;
- }
-
- err = devlink_nl_put_handle(msg, devlink);
- if (err)
- goto out_cancel_msg;
-
- if (region->port) {
- err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
- region->port->index);
- if (err)
- goto out_cancel_msg;
- }
-
- err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME,
- region->ops->name);
- if (err)
- goto out_cancel_msg;
-
- if (snapshot) {
- err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID,
- snapshot->id);
- if (err)
- goto out_cancel_msg;
- } else {
- err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
- region->size, DEVLINK_ATTR_PAD);
- if (err)
- goto out_cancel_msg;
- }
- genlmsg_end(msg, hdr);
-
- return msg;
-
-out_cancel_msg:
- genlmsg_cancel(msg, hdr);
-out_free_msg:
- nlmsg_free(msg);
- return ERR_PTR(err);
-}
-
-static void devlink_nl_region_notify(struct devlink_region *region,
- struct devlink_snapshot *snapshot,
- enum devlink_command cmd)
-{
- struct devlink *devlink = region->devlink;
- struct sk_buff *msg;
-
- WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0);
- if (IS_ERR(msg))
- return;
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-/**
- * __devlink_snapshot_id_increment - Increment number of snapshots using an id
- * @devlink: devlink instance
- * @id: the snapshot id
- *
- * Track when a new snapshot begins using an id. Load the count for the
- * given id from the snapshot xarray, increment it, and store it back.
- *
- * Called when a new snapshot is created with the given id.
- *
- * The id *must* have been previously allocated by
- * devlink_region_snapshot_id_get().
- *
- * Returns 0 on success, or an error on failure.
- */
-static int __devlink_snapshot_id_increment(struct devlink *devlink, u32 id)
-{
- unsigned long count;
- void *p;
- int err;
-
- xa_lock(&devlink->snapshot_ids);
- p = xa_load(&devlink->snapshot_ids, id);
- if (WARN_ON(!p)) {
- err = -EINVAL;
- goto unlock;
- }
-
- if (WARN_ON(!xa_is_value(p))) {
- err = -EINVAL;
- goto unlock;
- }
-
- count = xa_to_value(p);
- count++;
-
- err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
- GFP_ATOMIC));
-unlock:
- xa_unlock(&devlink->snapshot_ids);
- return err;
-}
-
-/**
- * __devlink_snapshot_id_decrement - Decrease number of snapshots using an id
- * @devlink: devlink instance
- * @id: the snapshot id
- *
- * Track when a snapshot is deleted and stops using an id. Load the count
- * for the given id from the snapshot xarray, decrement it, and store it
- * back.
- *
- * If the count reaches zero, erase this id from the xarray, freeing it
- * up for future re-use by devlink_region_snapshot_id_get().
- *
- * Called when a snapshot using the given id is deleted, and when the
- * initial allocator of the id is finished using it.
- */
-static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id)
-{
- unsigned long count;
- void *p;
-
- xa_lock(&devlink->snapshot_ids);
- p = xa_load(&devlink->snapshot_ids, id);
- if (WARN_ON(!p))
- goto unlock;
-
- if (WARN_ON(!xa_is_value(p)))
- goto unlock;
-
- count = xa_to_value(p);
-
- if (count > 1) {
- count--;
- __xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
- GFP_ATOMIC);
- } else {
- /* If this was the last user, we can erase this id */
- __xa_erase(&devlink->snapshot_ids, id);
- }
-unlock:
- xa_unlock(&devlink->snapshot_ids);
-}
-
-/**
- * __devlink_snapshot_id_insert - Insert a specific snapshot ID
- * @devlink: devlink instance
- * @id: the snapshot id
- *
- * Mark the given snapshot id as used by inserting a zero value into the
- * snapshot xarray.
- *
- * This must be called while holding the devlink instance lock. Unlike
- * devlink_snapshot_id_get, the initial reference count is zero, not one.
- * It is expected that the id will immediately be used before
- * releasing the devlink instance lock.
- *
- * Returns zero on success, or an error code if the snapshot id could not
- * be inserted.
- */
-static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id)
-{
- int err;
-
- xa_lock(&devlink->snapshot_ids);
- if (xa_load(&devlink->snapshot_ids, id)) {
- xa_unlock(&devlink->snapshot_ids);
- return -EEXIST;
- }
- err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(0),
- GFP_ATOMIC));
- xa_unlock(&devlink->snapshot_ids);
- return err;
-}
-
-/**
- * __devlink_region_snapshot_id_get - get snapshot ID
- * @devlink: devlink instance
- * @id: storage to return snapshot id
- *
- * Allocates a new snapshot id. Returns zero on success, or a negative
- * error on failure. Must be called while holding the devlink instance
- * lock.
- *
- * Snapshot IDs are tracked using an xarray which stores the number of
- * users of the snapshot id.
- *
- * Note that the caller of this function counts as a 'user', in order to
- * avoid race conditions. The caller must release its hold on the
- * snapshot by using devlink_region_snapshot_id_put.
- */
-static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
-{
- return xa_alloc(&devlink->snapshot_ids, id, xa_mk_value(1),
- xa_limit_32b, GFP_KERNEL);
-}
-
-/**
- * __devlink_region_snapshot_create - create a new snapshot
- * This will add a new snapshot of a region. The snapshot
- * will be stored on the region struct and can be accessed
- * from devlink. This is useful for future analyses of snapshots.
- * Multiple snapshots can be created on a region.
- * The @snapshot_id should be obtained using the getter function.
- *
- * Must be called only while holding the region snapshot lock.
- *
- * @region: devlink region of the snapshot
- * @data: snapshot data
- * @snapshot_id: snapshot id to be created
- */
-static int
-__devlink_region_snapshot_create(struct devlink_region *region,
- u8 *data, u32 snapshot_id)
-{
- struct devlink *devlink = region->devlink;
- struct devlink_snapshot *snapshot;
- int err;
-
- lockdep_assert_held(&region->snapshot_lock);
-
- /* check if region can hold one more snapshot */
- if (region->cur_snapshots == region->max_snapshots)
- return -ENOSPC;
-
- if (devlink_region_snapshot_get_by_id(region, snapshot_id))
- return -EEXIST;
-
- snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
- if (!snapshot)
- return -ENOMEM;
-
- err = __devlink_snapshot_id_increment(devlink, snapshot_id);
- if (err)
- goto err_snapshot_id_increment;
-
- snapshot->id = snapshot_id;
- snapshot->region = region;
- snapshot->data = data;
-
- list_add_tail(&snapshot->list, &region->snapshot_list);
-
- region->cur_snapshots++;
-
- devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_NEW);
- return 0;
-
-err_snapshot_id_increment:
- kfree(snapshot);
- return err;
-}
-
-static void devlink_region_snapshot_del(struct devlink_region *region,
- struct devlink_snapshot *snapshot)
-{
- struct devlink *devlink = region->devlink;
-
- lockdep_assert_held(&region->snapshot_lock);
-
- devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
- region->cur_snapshots--;
- list_del(&snapshot->list);
- region->ops->destructor(snapshot->data);
- __devlink_snapshot_id_decrement(devlink, snapshot->id);
- kfree(snapshot);
-}
-
-static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_port *port = NULL;
- struct devlink_region *region;
- const char *region_name;
- struct sk_buff *msg;
- unsigned int index;
- int err;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME))
- return -EINVAL;
-
- if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
- index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-
- port = devlink_port_get_by_index(devlink, index);
- if (!port)
- return -ENODEV;
- }
-
- region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
- if (port)
- region = devlink_port_region_get_by_name(port, region_name);
- else
- region = devlink_region_get_by_name(devlink, region_name);
-
- if (!region)
- return -EINVAL;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_region_fill(msg, devlink, DEVLINK_CMD_REGION_GET,
- info->snd_portid, info->snd_seq, 0,
- region);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_cmd_region_get_port_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb,
- struct devlink_port *port,
- int *idx,
- int start)
-{
- struct devlink_region *region;
- int err = 0;
-
- list_for_each_entry(region, &port->region_list, list) {
- if (*idx < start) {
- (*idx)++;
- continue;
- }
- err = devlink_nl_region_fill(msg, port->devlink,
- DEVLINK_CMD_REGION_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, region);
- if (err)
- goto out;
- (*idx)++;
- }
-
-out:
- return err;
-}
-
-static int
-devlink_nl_cmd_region_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_region *region;
- struct devlink_port *port;
- unsigned long port_index;
- int idx = 0;
- int err;
-
- list_for_each_entry(region, &devlink->region_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_region_fill(msg, devlink,
- DEVLINK_CMD_REGION_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, region);
- if (err) {
- state->idx = idx;
- return err;
- }
- idx++;
- }
-
- xa_for_each(&devlink->ports, port_index, port) {
- err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, &idx,
- state->idx);
- if (err) {
- state->idx = idx;
- return err;
- }
- }
-
- return 0;
-}
-
-const struct devlink_cmd devl_cmd_region_get = {
- .dump_one = devlink_nl_cmd_region_get_dump_one,
-};
-
-static int devlink_nl_cmd_region_del(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_snapshot *snapshot;
- struct devlink_port *port = NULL;
- struct devlink_region *region;
- const char *region_name;
- unsigned int index;
- u32 snapshot_id;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME) ||
- GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_SNAPSHOT_ID))
- return -EINVAL;
-
- region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
- snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
-
- if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
- index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-
- port = devlink_port_get_by_index(devlink, index);
- if (!port)
- return -ENODEV;
- }
-
- if (port)
- region = devlink_port_region_get_by_name(port, region_name);
- else
- region = devlink_region_get_by_name(devlink, region_name);
-
- if (!region)
- return -EINVAL;
-
- mutex_lock(&region->snapshot_lock);
- snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
- if (!snapshot) {
- mutex_unlock(&region->snapshot_lock);
- return -EINVAL;
- }
-
- devlink_region_snapshot_del(region, snapshot);
- mutex_unlock(&region->snapshot_lock);
- return 0;
-}
-
-static int
-devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_snapshot *snapshot;
- struct devlink_port *port = NULL;
- struct nlattr *snapshot_id_attr;
- struct devlink_region *region;
- const char *region_name;
- unsigned int index;
- u32 snapshot_id;
- u8 *data;
- int err;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) {
- NL_SET_ERR_MSG(info->extack, "No region name provided");
- return -EINVAL;
- }
-
- region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
-
- if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
- index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-
- port = devlink_port_get_by_index(devlink, index);
- if (!port)
- return -ENODEV;
- }
-
- if (port)
- region = devlink_port_region_get_by_name(port, region_name);
- else
- region = devlink_region_get_by_name(devlink, region_name);
-
- if (!region) {
- NL_SET_ERR_MSG(info->extack, "The requested region does not exist");
- return -EINVAL;
- }
-
- if (!region->ops->snapshot) {
- NL_SET_ERR_MSG(info->extack, "The requested region does not support taking an immediate snapshot");
- return -EOPNOTSUPP;
- }
-
- mutex_lock(&region->snapshot_lock);
-
- if (region->cur_snapshots == region->max_snapshots) {
- NL_SET_ERR_MSG(info->extack, "The region has reached the maximum number of stored snapshots");
- err = -ENOSPC;
- goto unlock;
- }
-
- snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
- if (snapshot_id_attr) {
- snapshot_id = nla_get_u32(snapshot_id_attr);
-
- if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
- NL_SET_ERR_MSG(info->extack, "The requested snapshot id is already in use");
- err = -EEXIST;
- goto unlock;
- }
-
- err = __devlink_snapshot_id_insert(devlink, snapshot_id);
- if (err)
- goto unlock;
- } else {
- err = __devlink_region_snapshot_id_get(devlink, &snapshot_id);
- if (err) {
- NL_SET_ERR_MSG(info->extack, "Failed to allocate a new snapshot id");
- goto unlock;
- }
- }
-
- if (port)
- err = region->port_ops->snapshot(port, region->port_ops,
- info->extack, &data);
- else
- err = region->ops->snapshot(devlink, region->ops,
- info->extack, &data);
- if (err)
- goto err_snapshot_capture;
-
- err = __devlink_region_snapshot_create(region, data, snapshot_id);
- if (err)
- goto err_snapshot_create;
-
- if (!snapshot_id_attr) {
- struct sk_buff *msg;
-
- snapshot = devlink_region_snapshot_get_by_id(region,
- snapshot_id);
- if (WARN_ON(!snapshot)) {
- err = -EINVAL;
- goto unlock;
- }
-
- msg = devlink_nl_region_notify_build(region, snapshot,
- DEVLINK_CMD_REGION_NEW,
- info->snd_portid,
- info->snd_seq);
- err = PTR_ERR_OR_ZERO(msg);
- if (err)
- goto err_notify;
-
- err = genlmsg_reply(msg, info);
- if (err)
- goto err_notify;
- }
-
- mutex_unlock(&region->snapshot_lock);
- return 0;
-
-err_snapshot_create:
- region->ops->destructor(data);
-err_snapshot_capture:
- __devlink_snapshot_id_decrement(devlink, snapshot_id);
- mutex_unlock(&region->snapshot_lock);
- return err;
-
-err_notify:
- devlink_region_snapshot_del(region, snapshot);
-unlock:
- mutex_unlock(&region->snapshot_lock);
- return err;
-}
-
-static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
- u8 *chunk, u32 chunk_size,
- u64 addr)
-{
- struct nlattr *chunk_attr;
- int err;
-
- chunk_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_CHUNK);
- if (!chunk_attr)
- return -EINVAL;
-
- err = nla_put(msg, DEVLINK_ATTR_REGION_CHUNK_DATA, chunk_size, chunk);
- if (err)
- goto nla_put_failure;
-
- err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr,
- DEVLINK_ATTR_PAD);
- if (err)
- goto nla_put_failure;
-
- nla_nest_end(msg, chunk_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, chunk_attr);
- return err;
-}
-
-#define DEVLINK_REGION_READ_CHUNK_SIZE 256
-
-typedef int devlink_chunk_fill_t(void *cb_priv, u8 *chunk, u32 chunk_size,
- u64 curr_offset,
- struct netlink_ext_ack *extack);
-
-static int
-devlink_nl_region_read_fill(struct sk_buff *skb, devlink_chunk_fill_t *cb,
- void *cb_priv, u64 start_offset, u64 end_offset,
- u64 *new_offset, struct netlink_ext_ack *extack)
-{
- u64 curr_offset = start_offset;
- int err = 0;
- u8 *data;
-
- /* Allocate and re-use a single buffer */
- data = kmalloc(DEVLINK_REGION_READ_CHUNK_SIZE, GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- *new_offset = start_offset;
-
- while (curr_offset < end_offset) {
- u32 data_size;
-
- data_size = min_t(u32, end_offset - curr_offset,
- DEVLINK_REGION_READ_CHUNK_SIZE);
-
- err = cb(cb_priv, data, data_size, curr_offset, extack);
- if (err)
- break;
-
- err = devlink_nl_cmd_region_read_chunk_fill(skb, data, data_size, curr_offset);
- if (err)
- break;
-
- curr_offset += data_size;
- }
- *new_offset = curr_offset;
-
- kfree(data);
-
- return err;
-}
-
-static int
-devlink_region_snapshot_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
- u64 curr_offset,
- struct netlink_ext_ack __always_unused *extack)
-{
- struct devlink_snapshot *snapshot = cb_priv;
-
- memcpy(chunk, &snapshot->data[curr_offset], chunk_size);
-
- return 0;
-}
-
-static int
-devlink_region_port_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
- u64 curr_offset, struct netlink_ext_ack *extack)
-{
- struct devlink_region *region = cb_priv;
-
- return region->port_ops->read(region->port, region->port_ops, extack,
- curr_offset, chunk_size, chunk);
-}
-
-static int
-devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
- u64 curr_offset, struct netlink_ext_ack *extack)
-{
- struct devlink_region *region = cb_priv;
-
- return region->ops->read(region->devlink, region->ops, extack,
- curr_offset, chunk_size, chunk);
-}
-
-static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
- struct netlink_callback *cb)
-{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct nlattr *chunks_attr, *region_attr, *snapshot_attr;
- u64 ret_offset, start_offset, end_offset = U64_MAX;
- struct nlattr **attrs = info->attrs;
- struct devlink_port *port = NULL;
- devlink_chunk_fill_t *region_cb;
- struct devlink_region *region;
- const char *region_name;
- struct devlink *devlink;
- unsigned int index;
- void *region_cb_priv;
- void *hdr;
- int err;
-
- start_offset = state->start_offset;
-
- devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
- if (IS_ERR(devlink))
- return PTR_ERR(devlink);
-
- if (!attrs[DEVLINK_ATTR_REGION_NAME]) {
- NL_SET_ERR_MSG(cb->extack, "No region name provided");
- err = -EINVAL;
- goto out_unlock;
- }
-
- if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
- index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-
- port = devlink_port_get_by_index(devlink, index);
- if (!port) {
- err = -ENODEV;
- goto out_unlock;
- }
- }
-
- region_attr = attrs[DEVLINK_ATTR_REGION_NAME];
- region_name = nla_data(region_attr);
-
- if (port)
- region = devlink_port_region_get_by_name(port, region_name);
- else
- region = devlink_region_get_by_name(devlink, region_name);
-
- if (!region) {
- NL_SET_ERR_MSG_ATTR(cb->extack, region_attr, "Requested region does not exist");
- err = -EINVAL;
- goto out_unlock;
- }
-
- snapshot_attr = attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
- if (!snapshot_attr) {
- if (!nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
- NL_SET_ERR_MSG(cb->extack, "No snapshot id provided");
- err = -EINVAL;
- goto out_unlock;
- }
-
- if (!region->ops->read) {
- NL_SET_ERR_MSG(cb->extack, "Requested region does not support direct read");
- err = -EOPNOTSUPP;
- goto out_unlock;
- }
-
- if (port)
- region_cb = &devlink_region_port_direct_fill;
- else
- region_cb = &devlink_region_direct_fill;
- region_cb_priv = region;
- } else {
- struct devlink_snapshot *snapshot;
- u32 snapshot_id;
-
- if (nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
- NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Direct region read does not use snapshot");
- err = -EINVAL;
- goto out_unlock;
- }
-
- snapshot_id = nla_get_u32(snapshot_attr);
- snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
- if (!snapshot) {
- NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Requested snapshot does not exist");
- err = -EINVAL;
- goto out_unlock;
- }
- region_cb = &devlink_region_snapshot_fill;
- region_cb_priv = snapshot;
- }
-
- if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
- attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
- if (!start_offset)
- start_offset =
- nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
-
- end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
- end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
- }
-
- if (end_offset > region->size)
- end_offset = region->size;
-
- /* return 0 if there is no further data to read */
- if (start_offset == end_offset) {
- err = 0;
- goto out_unlock;
- }
-
- hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
- DEVLINK_CMD_REGION_READ);
- if (!hdr) {
- err = -EMSGSIZE;
- goto out_unlock;
- }
-
- err = devlink_nl_put_handle(skb, devlink);
- if (err)
- goto nla_put_failure;
-
- if (region->port) {
- err = nla_put_u32(skb, DEVLINK_ATTR_PORT_INDEX,
- region->port->index);
- if (err)
- goto nla_put_failure;
- }
-
- err = nla_put_string(skb, DEVLINK_ATTR_REGION_NAME, region_name);
- if (err)
- goto nla_put_failure;
-
- chunks_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_REGION_CHUNKS);
- if (!chunks_attr) {
- err = -EMSGSIZE;
- goto nla_put_failure;
- }
-
- err = devlink_nl_region_read_fill(skb, region_cb, region_cb_priv,
- start_offset, end_offset, &ret_offset,
- cb->extack);
-
- if (err && err != -EMSGSIZE)
- goto nla_put_failure;
-
- /* Check if there was any progress done to prevent infinite loop */
- if (ret_offset == start_offset) {
- err = -EINVAL;
- goto nla_put_failure;
- }
-
- state->start_offset = ret_offset;
-
- nla_nest_end(skb, chunks_attr);
- genlmsg_end(skb, hdr);
- devl_unlock(devlink);
- devlink_put(devlink);
- return skb->len;
-
-nla_put_failure:
- genlmsg_cancel(skb, hdr);
-out_unlock:
- devl_unlock(devlink);
- devlink_put(devlink);
- return err;
-}
-
-struct devlink_stats {
- u64_stats_t rx_bytes;
- u64_stats_t rx_packets;
- struct u64_stats_sync syncp;
-};
-
-/**
- * struct devlink_trap_policer_item - Packet trap policer attributes.
- * @policer: Immutable packet trap policer attributes.
- * @rate: Rate in packets / sec.
- * @burst: Burst size in packets.
- * @list: trap_policer_list member.
- *
- * Describes packet trap policer attributes. Created by devlink during trap
- * policer registration.
- */
-struct devlink_trap_policer_item {
- const struct devlink_trap_policer *policer;
- u64 rate;
- u64 burst;
- struct list_head list;
-};
-
-/**
- * struct devlink_trap_group_item - Packet trap group attributes.
- * @group: Immutable packet trap group attributes.
- * @policer_item: Associated policer item. Can be NULL.
- * @list: trap_group_list member.
- * @stats: Trap group statistics.
- *
- * Describes packet trap group attributes. Created by devlink during trap
- * group registration.
- */
-struct devlink_trap_group_item {
- const struct devlink_trap_group *group;
- struct devlink_trap_policer_item *policer_item;
- struct list_head list;
- struct devlink_stats __percpu *stats;
-};
-
-/**
- * struct devlink_trap_item - Packet trap attributes.
- * @trap: Immutable packet trap attributes.
- * @group_item: Associated group item.
- * @list: trap_list member.
- * @action: Trap action.
- * @stats: Trap statistics.
- * @priv: Driver private information.
- *
- * Describes both mutable and immutable packet trap attributes. Created by
- * devlink during trap registration and used for all trap related operations.
- */
-struct devlink_trap_item {
- const struct devlink_trap *trap;
- struct devlink_trap_group_item *group_item;
- struct list_head list;
- enum devlink_trap_action action;
- struct devlink_stats __percpu *stats;
- void *priv;
-};
-
-static struct devlink_trap_policer_item *
-devlink_trap_policer_item_lookup(struct devlink *devlink, u32 id)
-{
- struct devlink_trap_policer_item *policer_item;
-
- list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
- if (policer_item->policer->id == id)
- return policer_item;
- }
-
- return NULL;
-}
-
-static struct devlink_trap_item *
-devlink_trap_item_lookup(struct devlink *devlink, const char *name)
-{
- struct devlink_trap_item *trap_item;
-
- list_for_each_entry(trap_item, &devlink->trap_list, list) {
- if (!strcmp(trap_item->trap->name, name))
- return trap_item;
- }
-
- return NULL;
-}
-
-static struct devlink_trap_item *
-devlink_trap_item_get_from_info(struct devlink *devlink,
- struct genl_info *info)
-{
- struct nlattr *attr;
-
- if (!info->attrs[DEVLINK_ATTR_TRAP_NAME])
- return NULL;
- attr = info->attrs[DEVLINK_ATTR_TRAP_NAME];
-
- return devlink_trap_item_lookup(devlink, nla_data(attr));
-}
-
-static int
-devlink_trap_action_get_from_info(struct genl_info *info,
- enum devlink_trap_action *p_trap_action)
-{
- u8 val;
-
- val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]);
- switch (val) {
- case DEVLINK_TRAP_ACTION_DROP:
- case DEVLINK_TRAP_ACTION_TRAP:
- case DEVLINK_TRAP_ACTION_MIRROR:
- *p_trap_action = val;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int devlink_trap_metadata_put(struct sk_buff *msg,
- const struct devlink_trap *trap)
-{
- struct nlattr *attr;
-
- attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA);
- if (!attr)
- return -EMSGSIZE;
-
- if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) &&
- nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT))
- goto nla_put_failure;
- if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE) &&
- nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE))
- goto nla_put_failure;
-
- nla_nest_end(msg, attr);
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, attr);
- return -EMSGSIZE;
-}
-
-static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
- struct devlink_stats *stats)
-{
- int i;
-
- memset(stats, 0, sizeof(*stats));
- for_each_possible_cpu(i) {
- struct devlink_stats *cpu_stats;
- u64 rx_packets, rx_bytes;
- unsigned int start;
-
- cpu_stats = per_cpu_ptr(trap_stats, i);
- do {
- start = u64_stats_fetch_begin(&cpu_stats->syncp);
- rx_packets = u64_stats_read(&cpu_stats->rx_packets);
- rx_bytes = u64_stats_read(&cpu_stats->rx_bytes);
- } while (u64_stats_fetch_retry(&cpu_stats->syncp, start));
-
- u64_stats_add(&stats->rx_packets, rx_packets);
- u64_stats_add(&stats->rx_bytes, rx_bytes);
- }
-}
-
-static int
-devlink_trap_group_stats_put(struct sk_buff *msg,
- struct devlink_stats __percpu *trap_stats)
-{
- struct devlink_stats stats;
- struct nlattr *attr;
-
- devlink_trap_stats_read(trap_stats, &stats);
-
- attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
- if (!attr)
- return -EMSGSIZE;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
- u64_stats_read(&stats.rx_packets),
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
- u64_stats_read(&stats.rx_bytes),
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- nla_nest_end(msg, attr);
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, attr);
- return -EMSGSIZE;
-}
-
-static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
- const struct devlink_trap_item *trap_item)
-{
- struct devlink_stats stats;
- struct nlattr *attr;
- u64 drops = 0;
- int err;
-
- if (devlink->ops->trap_drop_counter_get) {
- err = devlink->ops->trap_drop_counter_get(devlink,
- trap_item->trap,
- &drops);
- if (err)
- return err;
- }
-
- devlink_trap_stats_read(trap_item->stats, &stats);
-
- attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
- if (!attr)
- return -EMSGSIZE;
-
- if (devlink->ops->trap_drop_counter_get &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
- u64_stats_read(&stats.rx_packets),
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
- u64_stats_read(&stats.rx_bytes),
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- nla_nest_end(msg, attr);
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, attr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
- const struct devlink_trap_item *trap_item,
- enum devlink_command cmd, u32 portid, u32 seq,
- int flags)
-{
- struct devlink_trap_group_item *group_item = trap_item->group_item;
- void *hdr;
- int err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
- group_item->group->name))
- goto nla_put_failure;
-
- if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name))
- goto nla_put_failure;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type))
- goto nla_put_failure;
-
- if (trap_item->trap->generic &&
- nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
- goto nla_put_failure;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action))
- goto nla_put_failure;
-
- err = devlink_trap_metadata_put(msg, trap_item->trap);
- if (err)
- goto nla_put_failure;
-
- err = devlink_trap_stats_put(msg, devlink, trap_item);
- if (err)
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
-
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_trap_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_trap_item *trap_item;
- struct sk_buff *msg;
- int err;
-
- if (list_empty(&devlink->trap_list))
- return -EOPNOTSUPP;
-
- trap_item = devlink_trap_item_get_from_info(devlink, info);
- if (!trap_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap");
- return -ENOENT;
- }
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_trap_fill(msg, devlink, trap_item,
- DEVLINK_CMD_TRAP_NEW, info->snd_portid,
- info->snd_seq, 0);
- if (err)
- goto err_trap_fill;
-
- return genlmsg_reply(msg, info);
-
-err_trap_fill:
- nlmsg_free(msg);
- return err;
-}
-
-static int
-devlink_nl_cmd_trap_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_trap_item *trap_item;
- int idx = 0;
- int err = 0;
-
- list_for_each_entry(trap_item, &devlink->trap_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_trap_fill(msg, devlink, trap_item,
- DEVLINK_CMD_TRAP_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- state->idx = idx;
- break;
- }
- idx++;
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_trap_get = {
- .dump_one = devlink_nl_cmd_trap_get_dump_one,
-};
-
-static int __devlink_trap_action_set(struct devlink *devlink,
- struct devlink_trap_item *trap_item,
- enum devlink_trap_action trap_action,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- if (trap_item->action != trap_action &&
- trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) {
- NL_SET_ERR_MSG(extack, "Cannot change action of non-drop traps. Skipping");
- return 0;
- }
-
- err = devlink->ops->trap_action_set(devlink, trap_item->trap,
- trap_action, extack);
- if (err)
- return err;
-
- trap_item->action = trap_action;
-
- return 0;
-}
-
-static int devlink_trap_action_set(struct devlink *devlink,
- struct devlink_trap_item *trap_item,
- struct genl_info *info)
-{
- enum devlink_trap_action trap_action;
- int err;
-
- if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
- return 0;
-
- err = devlink_trap_action_get_from_info(info, &trap_action);
- if (err) {
- NL_SET_ERR_MSG(info->extack, "Invalid trap action");
- return -EINVAL;
- }
-
- return __devlink_trap_action_set(devlink, trap_item, trap_action,
- info->extack);
-}
-
-static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_trap_item *trap_item;
-
- if (list_empty(&devlink->trap_list))
- return -EOPNOTSUPP;
-
- trap_item = devlink_trap_item_get_from_info(devlink, info);
- if (!trap_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap");
- return -ENOENT;
- }
-
- return devlink_trap_action_set(devlink, trap_item, info);
-}
-
-static struct devlink_trap_group_item *
-devlink_trap_group_item_lookup(struct devlink *devlink, const char *name)
-{
- struct devlink_trap_group_item *group_item;
-
- list_for_each_entry(group_item, &devlink->trap_group_list, list) {
- if (!strcmp(group_item->group->name, name))
- return group_item;
- }
-
- return NULL;
-}
-
-static struct devlink_trap_group_item *
-devlink_trap_group_item_lookup_by_id(struct devlink *devlink, u16 id)
-{
- struct devlink_trap_group_item *group_item;
-
- list_for_each_entry(group_item, &devlink->trap_group_list, list) {
- if (group_item->group->id == id)
- return group_item;
- }
-
- return NULL;
-}
-
-static struct devlink_trap_group_item *
-devlink_trap_group_item_get_from_info(struct devlink *devlink,
- struct genl_info *info)
-{
- char *name;
-
- if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME])
- return NULL;
- name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]);
-
- return devlink_trap_group_item_lookup(devlink, name);
-}
-
-static int
-devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
- const struct devlink_trap_group_item *group_item,
- enum devlink_command cmd, u32 portid, u32 seq,
- int flags)
-{
- void *hdr;
- int err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
- group_item->group->name))
- goto nla_put_failure;
-
- if (group_item->group->generic &&
- nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
- goto nla_put_failure;
-
- if (group_item->policer_item &&
- nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID,
- group_item->policer_item->policer->id))
- goto nla_put_failure;
-
- err = devlink_trap_group_stats_put(msg, group_item->stats);
- if (err)
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
-
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_trap_group_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_trap_group_item *group_item;
- struct sk_buff *msg;
- int err;
-
- if (list_empty(&devlink->trap_group_list))
- return -EOPNOTSUPP;
-
- group_item = devlink_trap_group_item_get_from_info(devlink, info);
- if (!group_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap group");
- return -ENOENT;
- }
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_trap_group_fill(msg, devlink, group_item,
- DEVLINK_CMD_TRAP_GROUP_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err)
- goto err_trap_group_fill;
-
- return genlmsg_reply(msg, info);
-
-err_trap_group_fill:
- nlmsg_free(msg);
- return err;
-}
-
-static int
-devlink_nl_cmd_trap_group_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_trap_group_item *group_item;
- int idx = 0;
- int err = 0;
-
-
- list_for_each_entry(group_item, &devlink->trap_group_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_trap_group_fill(msg, devlink, group_item,
- DEVLINK_CMD_TRAP_GROUP_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- state->idx = idx;
- break;
- }
- idx++;
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_trap_group_get = {
- .dump_one = devlink_nl_cmd_trap_group_get_dump_one,
-};
-
-static int
-__devlink_trap_group_action_set(struct devlink *devlink,
- struct devlink_trap_group_item *group_item,
- enum devlink_trap_action trap_action,
- struct netlink_ext_ack *extack)
-{
- const char *group_name = group_item->group->name;
- struct devlink_trap_item *trap_item;
- int err;
-
- if (devlink->ops->trap_group_action_set) {
- err = devlink->ops->trap_group_action_set(devlink, group_item->group,
- trap_action, extack);
- if (err)
- return err;
-
- list_for_each_entry(trap_item, &devlink->trap_list, list) {
- if (strcmp(trap_item->group_item->group->name, group_name))
- continue;
- if (trap_item->action != trap_action &&
- trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP)
- continue;
- trap_item->action = trap_action;
- }
-
- return 0;
- }
-
- list_for_each_entry(trap_item, &devlink->trap_list, list) {
- if (strcmp(trap_item->group_item->group->name, group_name))
- continue;
- err = __devlink_trap_action_set(devlink, trap_item,
- trap_action, extack);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static int
-devlink_trap_group_action_set(struct devlink *devlink,
- struct devlink_trap_group_item *group_item,
- struct genl_info *info, bool *p_modified)
-{
- enum devlink_trap_action trap_action;
- int err;
-
- if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
- return 0;
-
- err = devlink_trap_action_get_from_info(info, &trap_action);
- if (err) {
- NL_SET_ERR_MSG(info->extack, "Invalid trap action");
- return -EINVAL;
- }
-
- err = __devlink_trap_group_action_set(devlink, group_item, trap_action,
- info->extack);
- if (err)
- return err;
-
- *p_modified = true;
-
- return 0;
-}
-
-static int devlink_trap_group_set(struct devlink *devlink,
- struct devlink_trap_group_item *group_item,
- struct genl_info *info)
-{
- struct devlink_trap_policer_item *policer_item;
- struct netlink_ext_ack *extack = info->extack;
- const struct devlink_trap_policer *policer;
- struct nlattr **attrs = info->attrs;
- u32 policer_id;
- int err;
-
- if (!attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
- return 0;
-
- if (!devlink->ops->trap_group_set)
- return -EOPNOTSUPP;
-
- policer_id = nla_get_u32(attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
- policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
- if (policer_id && !policer_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
- return -ENOENT;
- }
- policer = policer_item ? policer_item->policer : NULL;
-
- err = devlink->ops->trap_group_set(devlink, group_item->group, policer,
- extack);
- if (err)
- return err;
-
- group_item->policer_item = policer_item;
-
- return 0;
-}
-
-static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_trap_group_item *group_item;
- bool modified = false;
- int err;
-
- if (list_empty(&devlink->trap_group_list))
- return -EOPNOTSUPP;
-
- group_item = devlink_trap_group_item_get_from_info(devlink, info);
- if (!group_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap group");
- return -ENOENT;
- }
-
- err = devlink_trap_group_action_set(devlink, group_item, info,
- &modified);
- if (err)
- return err;
-
- err = devlink_trap_group_set(devlink, group_item, info);
- if (err)
- goto err_trap_group_set;
-
- return 0;
-
-err_trap_group_set:
- if (modified)
- NL_SET_ERR_MSG(extack, "Trap group set failed, but some changes were committed already");
- return err;
-}
-
-static struct devlink_trap_policer_item *
-devlink_trap_policer_item_get_from_info(struct devlink *devlink,
- struct genl_info *info)
-{
- u32 id;
-
- if (!info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
- return NULL;
- id = nla_get_u32(info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
-
- return devlink_trap_policer_item_lookup(devlink, id);
-}
-
-static int
-devlink_trap_policer_stats_put(struct sk_buff *msg, struct devlink *devlink,
- const struct devlink_trap_policer *policer)
-{
- struct nlattr *attr;
- u64 drops;
- int err;
-
- if (!devlink->ops->trap_policer_counter_get)
- return 0;
-
- err = devlink->ops->trap_policer_counter_get(devlink, policer, &drops);
- if (err)
- return err;
-
- attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
- if (!attr)
- return -EMSGSIZE;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
- DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- nla_nest_end(msg, attr);
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, attr);
- return -EMSGSIZE;
-}
-
-static int
-devlink_nl_trap_policer_fill(struct sk_buff *msg, struct devlink *devlink,
- const struct devlink_trap_policer_item *policer_item,
- enum devlink_command cmd, u32 portid, u32 seq,
- int flags)
-{
- void *hdr;
- int err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID,
- policer_item->policer->id))
- goto nla_put_failure;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_RATE,
- policer_item->rate, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_BURST,
- policer_item->burst, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
- err = devlink_trap_policer_stats_put(msg, devlink,
- policer_item->policer);
- if (err)
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
-
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_trap_policer_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_trap_policer_item *policer_item;
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- if (list_empty(&devlink->trap_policer_list))
- return -EOPNOTSUPP;
-
- policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
- if (!policer_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
- return -ENOENT;
- }
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
- DEVLINK_CMD_TRAP_POLICER_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err)
- goto err_trap_policer_fill;
-
- return genlmsg_reply(msg, info);
-
-err_trap_policer_fill:
- nlmsg_free(msg);
- return err;
-}
-
-static int
-devlink_nl_cmd_trap_policer_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_trap_policer_item *policer_item;
- int idx = 0;
- int err = 0;
-
- list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
- DEVLINK_CMD_TRAP_POLICER_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- state->idx = idx;
- break;
- }
- idx++;
- }
-
- return err;
-}
-
-const struct devlink_cmd devl_cmd_trap_policer_get = {
- .dump_one = devlink_nl_cmd_trap_policer_get_dump_one,
-};
-
-static int
-devlink_trap_policer_set(struct devlink *devlink,
- struct devlink_trap_policer_item *policer_item,
- struct genl_info *info)
-{
- struct netlink_ext_ack *extack = info->extack;
- struct nlattr **attrs = info->attrs;
- u64 rate, burst;
- int err;
-
- rate = policer_item->rate;
- burst = policer_item->burst;
-
- if (attrs[DEVLINK_ATTR_TRAP_POLICER_RATE])
- rate = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_RATE]);
-
- if (attrs[DEVLINK_ATTR_TRAP_POLICER_BURST])
- burst = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_BURST]);
-
- if (rate < policer_item->policer->min_rate) {
- NL_SET_ERR_MSG(extack, "Policer rate lower than limit");
- return -EINVAL;
- }
-
- if (rate > policer_item->policer->max_rate) {
- NL_SET_ERR_MSG(extack, "Policer rate higher than limit");
- return -EINVAL;
- }
-
- if (burst < policer_item->policer->min_burst) {
- NL_SET_ERR_MSG(extack, "Policer burst size lower than limit");
- return -EINVAL;
- }
-
- if (burst > policer_item->policer->max_burst) {
- NL_SET_ERR_MSG(extack, "Policer burst size higher than limit");
- return -EINVAL;
- }
-
- err = devlink->ops->trap_policer_set(devlink, policer_item->policer,
- rate, burst, info->extack);
- if (err)
- return err;
-
- policer_item->rate = rate;
- policer_item->burst = burst;
-
- return 0;
-}
-
-static int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_trap_policer_item *policer_item;
- struct netlink_ext_ack *extack = info->extack;
- struct devlink *devlink = info->user_ptr[0];
-
- if (list_empty(&devlink->trap_policer_list))
- return -EOPNOTSUPP;
-
- if (!devlink->ops->trap_policer_set)
- return -EOPNOTSUPP;
-
- policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
- if (!policer_item) {
- NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
- return -ENOENT;
- }
-
- return devlink_trap_policer_set(devlink, policer_item, info);
-}
-
-const struct genl_small_ops devlink_nl_ops[56] = {
- {
- .cmd = DEVLINK_CMD_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_PORT_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_PORT_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_RATE_GET,
- .doit = devlink_nl_cmd_rate_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_RATE_SET,
- .doit = devlink_nl_cmd_rate_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
- },
- {
- .cmd = DEVLINK_CMD_RATE_NEW,
- .doit = devlink_nl_cmd_rate_new_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_RATE_DEL,
- .doit = devlink_nl_cmd_rate_del_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_RATE_NODE,
- },
- {
- .cmd = DEVLINK_CMD_PORT_SPLIT,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_split_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_PORT_UNSPLIT,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_unsplit_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_PORT_NEW,
- .doit = devlink_nl_cmd_port_new_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_PORT_DEL,
- .doit = devlink_nl_cmd_port_del_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_LINECARD_GET,
- .doit = devlink_nl_cmd_linecard_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_LINECARD_SET,
- .doit = devlink_nl_cmd_linecard_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
- },
- {
- .cmd = DEVLINK_CMD_SB_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_SB_POOL_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_pool_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_SB_POOL_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_pool_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_port_pool_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_port_pool_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_occ_snapshot_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_sb_occ_max_clear_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_ESWITCH_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_eswitch_get_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_ESWITCH_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_eswitch_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_dpipe_table_get,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_dpipe_entries_get,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_dpipe_headers_get,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_dpipe_table_counters_set,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_RESOURCE_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_resource_set,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_RESOURCE_DUMP,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_resource_dump,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_RELOAD,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_reload,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_PARAM_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_param_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_PARAM_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_param_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_PORT_PARAM_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_param_get_doit,
- .dumpit = devlink_nl_cmd_port_param_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_PORT_PARAM_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_port_param_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
- },
- {
- .cmd = DEVLINK_CMD_REGION_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_region_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_REGION_NEW,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_region_new,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_REGION_DEL,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_region_del,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_REGION_READ,
- .validate = GENL_DONT_VALIDATE_STRICT |
- GENL_DONT_VALIDATE_DUMP_STRICT,
- .dumpit = devlink_nl_cmd_region_read_dumpit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_INFO_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_info_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_set_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_recover_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_diagnose_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
- .validate = GENL_DONT_VALIDATE_STRICT |
- GENL_DONT_VALIDATE_DUMP_STRICT,
- .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_health_reporter_test_doit,
- .flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
- },
- {
- .cmd = DEVLINK_CMD_FLASH_UPDATE,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = devlink_nl_cmd_flash_update,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_TRAP_GET,
- .doit = devlink_nl_cmd_trap_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_TRAP_SET,
- .doit = devlink_nl_cmd_trap_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_TRAP_GROUP_GET,
- .doit = devlink_nl_cmd_trap_group_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_TRAP_GROUP_SET,
- .doit = devlink_nl_cmd_trap_group_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_TRAP_POLICER_GET,
- .doit = devlink_nl_cmd_trap_policer_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_TRAP_POLICER_SET,
- .doit = devlink_nl_cmd_trap_policer_set_doit,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = DEVLINK_CMD_SELFTESTS_GET,
- .doit = devlink_nl_cmd_selftests_get_doit,
- .dumpit = devlink_nl_instance_iter_dumpit,
- /* can be retrieved by unprivileged users */
- },
- {
- .cmd = DEVLINK_CMD_SELFTESTS_RUN,
- .doit = devlink_nl_cmd_selftests_run,
- .flags = GENL_ADMIN_PERM,
- },
- /* -- No new ops here! Use split ops going forward! -- */
-};
-
-static void
-devlink_trap_policer_notify(struct devlink *devlink,
- const struct devlink_trap_policer_item *policer_item,
- enum devlink_command cmd);
-static void
-devlink_trap_group_notify(struct devlink *devlink,
- const struct devlink_trap_group_item *group_item,
- enum devlink_command cmd);
-static void devlink_trap_notify(struct devlink *devlink,
- const struct devlink_trap_item *trap_item,
- enum devlink_command cmd);
-
-void devlink_notify_register(struct devlink *devlink)
-{
- struct devlink_trap_policer_item *policer_item;
- struct devlink_trap_group_item *group_item;
- struct devlink_param_item *param_item;
- struct devlink_trap_item *trap_item;
- struct devlink_port *devlink_port;
- struct devlink_linecard *linecard;
- struct devlink_rate *rate_node;
- struct devlink_region *region;
- unsigned long port_index;
- unsigned long param_id;
-
- devlink_notify(devlink, DEVLINK_CMD_NEW);
- list_for_each_entry(linecard, &devlink->linecard_list, list)
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-
- xa_for_each(&devlink->ports, port_index, devlink_port)
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
-
- list_for_each_entry(policer_item, &devlink->trap_policer_list, list)
- devlink_trap_policer_notify(devlink, policer_item,
- DEVLINK_CMD_TRAP_POLICER_NEW);
-
- list_for_each_entry(group_item, &devlink->trap_group_list, list)
- devlink_trap_group_notify(devlink, group_item,
- DEVLINK_CMD_TRAP_GROUP_NEW);
-
- list_for_each_entry(trap_item, &devlink->trap_list, list)
- devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
-
- list_for_each_entry(rate_node, &devlink->rate_list, list)
- devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
-
- list_for_each_entry(region, &devlink->region_list, list)
- devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
-
- xa_for_each(&devlink->params, param_id, param_item)
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_NEW);
-}
-
-void devlink_notify_unregister(struct devlink *devlink)
-{
- struct devlink_trap_policer_item *policer_item;
- struct devlink_trap_group_item *group_item;
- struct devlink_param_item *param_item;
- struct devlink_trap_item *trap_item;
- struct devlink_port *devlink_port;
- struct devlink_rate *rate_node;
- struct devlink_region *region;
- unsigned long port_index;
- unsigned long param_id;
-
- xa_for_each(&devlink->params, param_id, param_item)
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_DEL);
-
- list_for_each_entry_reverse(region, &devlink->region_list, list)
- devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
-
- list_for_each_entry_reverse(rate_node, &devlink->rate_list, list)
- devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
-
- list_for_each_entry_reverse(trap_item, &devlink->trap_list, list)
- devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
-
- list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list)
- devlink_trap_group_notify(devlink, group_item,
- DEVLINK_CMD_TRAP_GROUP_DEL);
- list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list,
- list)
- devlink_trap_policer_notify(devlink, policer_item,
- DEVLINK_CMD_TRAP_POLICER_DEL);
-
- xa_for_each(&devlink->ports, port_index, devlink_port)
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
- devlink_notify(devlink, DEVLINK_CMD_DEL);
-}
-
-static void devlink_port_type_warn(struct work_struct *work)
-{
- struct devlink_port *port = container_of(to_delayed_work(work),
- struct devlink_port,
- type_warn_dw);
- dev_warn(port->devlink->dev, "Type was not set for devlink port.");
-}
-
-static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
-{
- /* Ignore CPU and DSA flavours. */
- return devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
- devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA &&
- devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_UNUSED;
-}
-
-#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 3600)
-
-static void devlink_port_type_warn_schedule(struct devlink_port *devlink_port)
-{
- if (!devlink_port_type_should_warn(devlink_port))
- return;
- /* Schedule a work to WARN in case driver does not set port
- * type within timeout.
- */
- schedule_delayed_work(&devlink_port->type_warn_dw,
- DEVLINK_PORT_TYPE_WARN_TIMEOUT);
-}
-
-static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
-{
- if (!devlink_port_type_should_warn(devlink_port))
- return;
- cancel_delayed_work_sync(&devlink_port->type_warn_dw);
-}
-
-/**
- * devlink_port_init() - Init devlink port
- *
- * @devlink: devlink
- * @devlink_port: devlink port
- *
- * Initialize essential stuff that is needed for functions
- * that may be called before devlink port registration.
- * Call to this function is optional and not needed
- * in case the driver does not use such functions.
- */
-void devlink_port_init(struct devlink *devlink,
- struct devlink_port *devlink_port)
-{
- if (devlink_port->initialized)
- return;
- devlink_port->devlink = devlink;
- INIT_LIST_HEAD(&devlink_port->region_list);
- devlink_port->initialized = true;
-}
-EXPORT_SYMBOL_GPL(devlink_port_init);
-
-/**
- * devlink_port_fini() - Deinitialize devlink port
- *
- * @devlink_port: devlink port
- *
- * Deinitialize essential stuff that is in use for functions
- * that may be called after devlink port unregistration.
- * Call to this function is optional and not needed
- * in case the driver does not use such functions.
- */
-void devlink_port_fini(struct devlink_port *devlink_port)
-{
- WARN_ON(!list_empty(&devlink_port->region_list));
-}
-EXPORT_SYMBOL_GPL(devlink_port_fini);
-
-static const struct devlink_port_ops devlink_port_dummy_ops = {};
-
-/**
- * devl_port_register_with_ops() - Register devlink port
- *
- * @devlink: devlink
- * @devlink_port: devlink port
- * @port_index: driver-specific numerical identifier of the port
- * @ops: port ops
- *
- * Register devlink port with provided port index. User can use
- * any indexing, even hw-related one. devlink_port structure
- * is convenient to be embedded inside user driver private structure.
- * Note that the caller should take care of zeroing the devlink_port
- * structure.
- */
-int devl_port_register_with_ops(struct devlink *devlink,
- struct devlink_port *devlink_port,
- unsigned int port_index,
- const struct devlink_port_ops *ops)
-{
- int err;
-
- devl_assert_locked(devlink);
-
- ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
- devlink_port_init(devlink, devlink_port);
- devlink_port->registered = true;
- devlink_port->index = port_index;
- devlink_port->ops = ops ? ops : &devlink_port_dummy_ops;
- spin_lock_init(&devlink_port->type_lock);
- INIT_LIST_HEAD(&devlink_port->reporter_list);
- err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL);
- if (err)
- return err;
-
- INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
- devlink_port_type_warn_schedule(devlink_port);
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_port_register_with_ops);
-
-/**
- * devlink_port_register_with_ops - Register devlink port
- *
- * @devlink: devlink
- * @devlink_port: devlink port
- * @port_index: driver-specific numerical identifier of the port
- * @ops: port ops
- *
- * Register devlink port with provided port index. User can use
- * any indexing, even hw-related one. devlink_port structure
- * is convenient to be embedded inside user driver private structure.
- * Note that the caller should take care of zeroing the devlink_port
- * structure.
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-int devlink_port_register_with_ops(struct devlink *devlink,
- struct devlink_port *devlink_port,
- unsigned int port_index,
- const struct devlink_port_ops *ops)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_port_register_with_ops(devlink, devlink_port,
- port_index, ops);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_port_register_with_ops);
-
-/**
- * devl_port_unregister() - Unregister devlink port
- *
- * @devlink_port: devlink port
- */
-void devl_port_unregister(struct devlink_port *devlink_port)
-{
- lockdep_assert_held(&devlink_port->devlink->lock);
- WARN_ON(devlink_port->type != DEVLINK_PORT_TYPE_NOTSET);
-
- devlink_port_type_warn_cancel(devlink_port);
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
- xa_erase(&devlink_port->devlink->ports, devlink_port->index);
- WARN_ON(!list_empty(&devlink_port->reporter_list));
- devlink_port->registered = false;
-}
-EXPORT_SYMBOL_GPL(devl_port_unregister);
-
-/**
- * devlink_port_unregister - Unregister devlink port
- *
- * @devlink_port: devlink port
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_port_unregister(struct devlink_port *devlink_port)
-{
- struct devlink *devlink = devlink_port->devlink;
-
- devl_lock(devlink);
- devl_port_unregister(devlink_port);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_port_unregister);
-
-static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port,
- struct net_device *netdev)
-{
- const struct net_device_ops *ops = netdev->netdev_ops;
-
- /* If driver registers devlink port, it should set devlink port
- * attributes accordingly so the compat functions are called
- * and the original ops are not used.
- */
- if (ops->ndo_get_phys_port_name) {
- /* Some drivers use the same set of ndos for netdevs
- * that have devlink_port registered and also for
- * those who don't. Make sure that ndo_get_phys_port_name
- * returns -EOPNOTSUPP here in case it is defined.
- * Warn if not.
- */
- char name[IFNAMSIZ];
- int err;
-
- err = ops->ndo_get_phys_port_name(netdev, name, sizeof(name));
- WARN_ON(err != -EOPNOTSUPP);
- }
- if (ops->ndo_get_port_parent_id) {
- /* Some drivers use the same set of ndos for netdevs
- * that have devlink_port registered and also for
- * those who don't. Make sure that ndo_get_port_parent_id
- * returns -EOPNOTSUPP here in case it is defined.
- * Warn if not.
- */
- struct netdev_phys_item_id ppid;
- int err;
-
- err = ops->ndo_get_port_parent_id(netdev, &ppid);
- WARN_ON(err != -EOPNOTSUPP);
- }
-}
-
-static void __devlink_port_type_set(struct devlink_port *devlink_port,
- enum devlink_port_type type,
- void *type_dev)
-{
- struct net_device *netdev = type_dev;
-
- ASSERT_DEVLINK_PORT_REGISTERED(devlink_port);
-
- if (type == DEVLINK_PORT_TYPE_NOTSET) {
- devlink_port_type_warn_schedule(devlink_port);
- } else {
- devlink_port_type_warn_cancel(devlink_port);
- if (type == DEVLINK_PORT_TYPE_ETH && netdev)
- devlink_port_type_netdev_checks(devlink_port, netdev);
- }
-
- spin_lock_bh(&devlink_port->type_lock);
- devlink_port->type = type;
- switch (type) {
- case DEVLINK_PORT_TYPE_ETH:
- devlink_port->type_eth.netdev = netdev;
- if (netdev) {
- ASSERT_RTNL();
- devlink_port->type_eth.ifindex = netdev->ifindex;
- BUILD_BUG_ON(sizeof(devlink_port->type_eth.ifname) !=
- sizeof(netdev->name));
- strcpy(devlink_port->type_eth.ifname, netdev->name);
- }
- break;
- case DEVLINK_PORT_TYPE_IB:
- devlink_port->type_ib.ibdev = type_dev;
- break;
- default:
- break;
- }
- spin_unlock_bh(&devlink_port->type_lock);
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
-}
-
-/**
- * devlink_port_type_eth_set - Set port type to Ethernet
- *
- * @devlink_port: devlink port
- *
- * If driver is calling this, most likely it is doing something wrong.
- */
-void devlink_port_type_eth_set(struct devlink_port *devlink_port)
-{
- dev_warn(devlink_port->devlink->dev,
- "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n",
- devlink_port->index);
- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, NULL);
-}
-EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
-
-/**
- * devlink_port_type_ib_set - Set port type to InfiniBand
- *
- * @devlink_port: devlink port
- * @ibdev: related IB device
- */
-void devlink_port_type_ib_set(struct devlink_port *devlink_port,
- struct ib_device *ibdev)
-{
- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev);
-}
-EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
-
-/**
- * devlink_port_type_clear - Clear port type
- *
- * @devlink_port: devlink port
- *
- * If driver is calling this for clearing Ethernet type, most likely
- * it is doing something wrong.
- */
-void devlink_port_type_clear(struct devlink_port *devlink_port)
-{
- if (devlink_port->type == DEVLINK_PORT_TYPE_ETH)
- dev_warn(devlink_port->devlink->dev,
- "devlink port type for port %d cleared without a software interface reference, device type not supported by the kernel?\n",
- devlink_port->index);
- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL);
-}
-EXPORT_SYMBOL_GPL(devlink_port_type_clear);
-
-int devlink_port_netdevice_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
-{
- struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
- struct devlink_port *devlink_port = netdev->devlink_port;
- struct devlink *devlink;
-
- if (!devlink_port)
- return NOTIFY_OK;
- devlink = devlink_port->devlink;
-
- switch (event) {
- case NETDEV_POST_INIT:
- /* Set the type but not netdev pointer. It is going to be set
- * later on by NETDEV_REGISTER event. Happens once during
- * netdevice register
- */
- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH,
- NULL);
- break;
- case NETDEV_REGISTER:
- case NETDEV_CHANGENAME:
- if (devlink_net(devlink) != dev_net(netdev))
- return NOTIFY_OK;
- /* Set the netdev on top of previously set type. Note this
- * event happens also during net namespace change so here
- * we take into account netdev pointer appearing in this
- * namespace.
- */
- __devlink_port_type_set(devlink_port, devlink_port->type,
- netdev);
- break;
- case NETDEV_UNREGISTER:
- if (devlink_net(devlink) != dev_net(netdev))
- return NOTIFY_OK;
- /* Clear netdev pointer, but not the type. This event happens
- * also during net namespace change so we need to clear
- * pointer to netdev that is going to another net namespace.
- */
- __devlink_port_type_set(devlink_port, devlink_port->type,
- NULL);
- break;
- case NETDEV_PRE_UNINIT:
- /* Clear the type and the netdev pointer. Happens one during
- * netdevice unregister.
- */
- __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET,
- NULL);
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
- enum devlink_port_flavour flavour)
-{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
-
- devlink_port->attrs_set = true;
- attrs->flavour = flavour;
- if (attrs->switch_id.id_len) {
- devlink_port->switch_port = true;
- if (WARN_ON(attrs->switch_id.id_len > MAX_PHYS_ITEM_ID_LEN))
- attrs->switch_id.id_len = MAX_PHYS_ITEM_ID_LEN;
- } else {
- devlink_port->switch_port = false;
- }
- return 0;
-}
-
-/**
- * devlink_port_attrs_set - Set port attributes
- *
- * @devlink_port: devlink port
- * @attrs: devlink port attrs
- */
-void devlink_port_attrs_set(struct devlink_port *devlink_port,
- struct devlink_port_attrs *attrs)
-{
- int ret;
-
- ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
- devlink_port->attrs = *attrs;
- ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
- if (ret)
- return;
- WARN_ON(attrs->splittable && attrs->split);
-}
-EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
-
-/**
- * devlink_port_attrs_pci_pf_set - Set PCI PF port attributes
- *
- * @devlink_port: devlink port
- * @controller: associated controller number for the devlink port instance
- * @pf: associated PF for the devlink port instance
- * @external: indicates if the port is for an external controller
- */
-void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller,
- u16 pf, bool external)
-{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
- int ret;
-
- ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
- ret = __devlink_port_attrs_set(devlink_port,
- DEVLINK_PORT_FLAVOUR_PCI_PF);
- if (ret)
- return;
- attrs->pci_pf.controller = controller;
- attrs->pci_pf.pf = pf;
- attrs->pci_pf.external = external;
-}
-EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
-
-/**
- * devlink_port_attrs_pci_vf_set - Set PCI VF port attributes
- *
- * @devlink_port: devlink port
- * @controller: associated controller number for the devlink port instance
- * @pf: associated PF for the devlink port instance
- * @vf: associated VF of a PF for the devlink port instance
- * @external: indicates if the port is for an external controller
- */
-void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
- u16 pf, u16 vf, bool external)
-{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
- int ret;
-
- ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
- ret = __devlink_port_attrs_set(devlink_port,
- DEVLINK_PORT_FLAVOUR_PCI_VF);
- if (ret)
- return;
- attrs->pci_vf.controller = controller;
- attrs->pci_vf.pf = pf;
- attrs->pci_vf.vf = vf;
- attrs->pci_vf.external = external;
-}
-EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set);
-
-/**
- * devlink_port_attrs_pci_sf_set - Set PCI SF port attributes
- *
- * @devlink_port: devlink port
- * @controller: associated controller number for the devlink port instance
- * @pf: associated PF for the devlink port instance
- * @sf: associated SF of a PF for the devlink port instance
- * @external: indicates if the port is for an external controller
- */
-void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller,
- u16 pf, u32 sf, bool external)
-{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
- int ret;
-
- ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
- ret = __devlink_port_attrs_set(devlink_port,
- DEVLINK_PORT_FLAVOUR_PCI_SF);
- if (ret)
- return;
- attrs->pci_sf.controller = controller;
- attrs->pci_sf.pf = pf;
- attrs->pci_sf.sf = sf;
- attrs->pci_sf.external = external;
-}
-EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
-
-/**
- * devl_rate_node_create - create devlink rate node
- * @devlink: devlink instance
- * @priv: driver private data
- * @node_name: name of the resulting node
- * @parent: parent devlink_rate struct
- *
- * Create devlink rate object of type node
- */
-struct devlink_rate *
-devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
- struct devlink_rate *parent)
-{
- struct devlink_rate *rate_node;
-
- rate_node = devlink_rate_node_get_by_name(devlink, node_name);
- if (!IS_ERR(rate_node))
- return ERR_PTR(-EEXIST);
-
- rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
- if (!rate_node)
- return ERR_PTR(-ENOMEM);
-
- if (parent) {
- rate_node->parent = parent;
- refcount_inc(&rate_node->parent->refcnt);
- }
-
- rate_node->type = DEVLINK_RATE_TYPE_NODE;
- rate_node->devlink = devlink;
- rate_node->priv = priv;
-
- rate_node->name = kstrdup(node_name, GFP_KERNEL);
- if (!rate_node->name) {
- kfree(rate_node);
- return ERR_PTR(-ENOMEM);
- }
-
- refcount_set(&rate_node->refcnt, 1);
- list_add(&rate_node->list, &devlink->rate_list);
- devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
- return rate_node;
-}
-EXPORT_SYMBOL_GPL(devl_rate_node_create);
-
-/**
- * devl_rate_leaf_create - create devlink rate leaf
- * @devlink_port: devlink port object to create rate object on
- * @priv: driver private data
- * @parent: parent devlink_rate struct
- *
- * Create devlink rate object of type leaf on provided @devlink_port.
- */
-int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv,
- struct devlink_rate *parent)
-{
- struct devlink *devlink = devlink_port->devlink;
- struct devlink_rate *devlink_rate;
-
- devl_assert_locked(devlink_port->devlink);
-
- if (WARN_ON(devlink_port->devlink_rate))
- return -EBUSY;
-
- devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
- if (!devlink_rate)
- return -ENOMEM;
-
- if (parent) {
- devlink_rate->parent = parent;
- refcount_inc(&devlink_rate->parent->refcnt);
- }
-
- devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
- devlink_rate->devlink = devlink;
- devlink_rate->devlink_port = devlink_port;
- devlink_rate->priv = priv;
- list_add_tail(&devlink_rate->list, &devlink->rate_list);
- devlink_port->devlink_rate = devlink_rate;
- devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_rate_leaf_create);
-
-/**
- * devl_rate_leaf_destroy - destroy devlink rate leaf
- *
- * @devlink_port: devlink port linked to the rate object
- *
- * Destroy the devlink rate object of type leaf on provided @devlink_port.
- */
-void devl_rate_leaf_destroy(struct devlink_port *devlink_port)
-{
- struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
-
- devl_assert_locked(devlink_port->devlink);
- if (!devlink_rate)
- return;
-
- devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
- if (devlink_rate->parent)
- refcount_dec(&devlink_rate->parent->refcnt);
- list_del(&devlink_rate->list);
- devlink_port->devlink_rate = NULL;
- kfree(devlink_rate);
-}
-EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
-
-/**
- * devl_rate_nodes_destroy - destroy all devlink rate nodes on device
- * @devlink: devlink instance
- *
- * Unset parent for all rate objects and destroy all rate nodes
- * on specified device.
- */
-void devl_rate_nodes_destroy(struct devlink *devlink)
-{
- static struct devlink_rate *devlink_rate, *tmp;
- const struct devlink_ops *ops = devlink->ops;
-
- devl_assert_locked(devlink);
-
- list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
- if (!devlink_rate->parent)
- continue;
-
- refcount_dec(&devlink_rate->parent->refcnt);
- if (devlink_rate_is_leaf(devlink_rate))
- ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv,
- NULL, NULL);
- else if (devlink_rate_is_node(devlink_rate))
- ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv,
- NULL, NULL);
- }
- list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) {
- if (devlink_rate_is_node(devlink_rate)) {
- ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL);
- list_del(&devlink_rate->list);
- kfree(devlink_rate->name);
- kfree(devlink_rate);
- }
- }
-}
-EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy);
-
-/**
- * devlink_port_linecard_set - Link port with a linecard
- *
- * @devlink_port: devlink port
- * @linecard: devlink linecard
- */
-void devlink_port_linecard_set(struct devlink_port *devlink_port,
- struct devlink_linecard *linecard)
-{
- ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
-
- devlink_port->linecard = linecard;
-}
-EXPORT_SYMBOL_GPL(devlink_port_linecard_set);
-
-static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
- char *name, size_t len)
-{
- struct devlink_port_attrs *attrs = &devlink_port->attrs;
- int n = 0;
-
- if (!devlink_port->attrs_set)
- return -EOPNOTSUPP;
-
- switch (attrs->flavour) {
- case DEVLINK_PORT_FLAVOUR_PHYSICAL:
- if (devlink_port->linecard)
- n = snprintf(name, len, "l%u",
- devlink_port->linecard->index);
- if (n < len)
- n += snprintf(name + n, len - n, "p%u",
- attrs->phys.port_number);
- if (n < len && attrs->split)
- n += snprintf(name + n, len - n, "s%u",
- attrs->phys.split_subport_number);
- break;
- case DEVLINK_PORT_FLAVOUR_CPU:
- case DEVLINK_PORT_FLAVOUR_DSA:
- case DEVLINK_PORT_FLAVOUR_UNUSED:
- /* As CPU and DSA ports do not have a netdevice associated
- * case should not ever happen.
- */
- WARN_ON(1);
- return -EINVAL;
- case DEVLINK_PORT_FLAVOUR_PCI_PF:
- if (attrs->pci_pf.external) {
- n = snprintf(name, len, "c%u", attrs->pci_pf.controller);
- if (n >= len)
- return -EINVAL;
- len -= n;
- name += n;
- }
- n = snprintf(name, len, "pf%u", attrs->pci_pf.pf);
- break;
- case DEVLINK_PORT_FLAVOUR_PCI_VF:
- if (attrs->pci_vf.external) {
- n = snprintf(name, len, "c%u", attrs->pci_vf.controller);
- if (n >= len)
- return -EINVAL;
- len -= n;
- name += n;
- }
- n = snprintf(name, len, "pf%uvf%u",
- attrs->pci_vf.pf, attrs->pci_vf.vf);
- break;
- case DEVLINK_PORT_FLAVOUR_PCI_SF:
- if (attrs->pci_sf.external) {
- n = snprintf(name, len, "c%u", attrs->pci_sf.controller);
- if (n >= len)
- return -EINVAL;
- len -= n;
- name += n;
- }
- n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
- attrs->pci_sf.sf);
- break;
- case DEVLINK_PORT_FLAVOUR_VIRTUAL:
- return -EOPNOTSUPP;
- }
-
- if (n >= len)
- return -EINVAL;
-
- return 0;
-}
-
-static int devlink_linecard_types_init(struct devlink_linecard *linecard)
-{
- struct devlink_linecard_type *linecard_type;
- unsigned int count;
- int i;
-
- count = linecard->ops->types_count(linecard, linecard->priv);
- linecard->types = kmalloc_array(count, sizeof(*linecard_type),
- GFP_KERNEL);
- if (!linecard->types)
- return -ENOMEM;
- linecard->types_count = count;
-
- for (i = 0; i < count; i++) {
- linecard_type = &linecard->types[i];
- linecard->ops->types_get(linecard, linecard->priv, i,
- &linecard_type->type,
- &linecard_type->priv);
- }
- return 0;
-}
-
-static void devlink_linecard_types_fini(struct devlink_linecard *linecard)
-{
- kfree(linecard->types);
-}
-
-/**
- * devl_linecard_create - Create devlink linecard
- *
- * @devlink: devlink
- * @linecard_index: driver-specific numerical identifier of the linecard
- * @ops: linecards ops
- * @priv: user priv pointer
- *
- * Create devlink linecard instance with provided linecard index.
- * Caller can use any indexing, even hw-related one.
- *
- * Return: Line card structure or an ERR_PTR() encoded error code.
- */
-struct devlink_linecard *
-devl_linecard_create(struct devlink *devlink, unsigned int linecard_index,
- const struct devlink_linecard_ops *ops, void *priv)
-{
- struct devlink_linecard *linecard;
- int err;
-
- if (WARN_ON(!ops || !ops->provision || !ops->unprovision ||
- !ops->types_count || !ops->types_get))
- return ERR_PTR(-EINVAL);
-
- if (devlink_linecard_index_exists(devlink, linecard_index))
- return ERR_PTR(-EEXIST);
-
- linecard = kzalloc(sizeof(*linecard), GFP_KERNEL);
- if (!linecard)
- return ERR_PTR(-ENOMEM);
-
- linecard->devlink = devlink;
- linecard->index = linecard_index;
- linecard->ops = ops;
- linecard->priv = priv;
- linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
- mutex_init(&linecard->state_lock);
-
- err = devlink_linecard_types_init(linecard);
- if (err) {
- mutex_destroy(&linecard->state_lock);
- kfree(linecard);
- return ERR_PTR(err);
- }
-
- list_add_tail(&linecard->list, &devlink->linecard_list);
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- return linecard;
-}
-EXPORT_SYMBOL_GPL(devl_linecard_create);
-
-/**
- * devl_linecard_destroy - Destroy devlink linecard
- *
- * @linecard: devlink linecard
- */
-void devl_linecard_destroy(struct devlink_linecard *linecard)
-{
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
- list_del(&linecard->list);
- devlink_linecard_types_fini(linecard);
- mutex_destroy(&linecard->state_lock);
- kfree(linecard);
-}
-EXPORT_SYMBOL_GPL(devl_linecard_destroy);
-
-/**
- * devlink_linecard_provision_set - Set provisioning on linecard
- *
- * @linecard: devlink linecard
- * @type: linecard type
- *
- * This is either called directly from the provision() op call or
- * as a result of the provision() op call asynchronously.
- */
-void devlink_linecard_provision_set(struct devlink_linecard *linecard,
- const char *type)
-{
- mutex_lock(&linecard->state_lock);
- WARN_ON(linecard->type && strcmp(linecard->type, type));
- linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
- linecard->type = type;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
-
-/**
- * devlink_linecard_provision_clear - Clear provisioning on linecard
- *
- * @linecard: devlink linecard
- *
- * This is either called directly from the unprovision() op call or
- * as a result of the unprovision() op call asynchronously.
- */
-void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
-{
- mutex_lock(&linecard->state_lock);
- WARN_ON(linecard->nested_devlink);
- linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
- linecard->type = NULL;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear);
-
-/**
- * devlink_linecard_provision_fail - Fail provisioning on linecard
- *
- * @linecard: devlink linecard
- *
- * This is either called directly from the provision() op call or
- * as a result of the provision() op call asynchronously.
- */
-void devlink_linecard_provision_fail(struct devlink_linecard *linecard)
-{
- mutex_lock(&linecard->state_lock);
- WARN_ON(linecard->nested_devlink);
- linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_provision_fail);
-
-/**
- * devlink_linecard_activate - Set linecard active
- *
- * @linecard: devlink linecard
- */
-void devlink_linecard_activate(struct devlink_linecard *linecard)
-{
- mutex_lock(&linecard->state_lock);
- WARN_ON(linecard->state != DEVLINK_LINECARD_STATE_PROVISIONED);
- linecard->state = DEVLINK_LINECARD_STATE_ACTIVE;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_activate);
-
-/**
- * devlink_linecard_deactivate - Set linecard inactive
- *
- * @linecard: devlink linecard
- */
-void devlink_linecard_deactivate(struct devlink_linecard *linecard)
-{
- mutex_lock(&linecard->state_lock);
- switch (linecard->state) {
- case DEVLINK_LINECARD_STATE_ACTIVE:
- linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- break;
- case DEVLINK_LINECARD_STATE_UNPROVISIONING:
- /* Line card is being deactivated as part
- * of unprovisioning flow.
- */
- break;
- default:
- WARN_ON(1);
- break;
- }
- mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
-
-/**
- * devlink_linecard_nested_dl_set - Attach/detach nested devlink
- * instance to linecard.
- *
- * @linecard: devlink linecard
- * @nested_devlink: devlink instance to attach or NULL to detach
- */
-void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
- struct devlink *nested_devlink)
-{
- mutex_lock(&linecard->state_lock);
- linecard->nested_devlink = nested_devlink;
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
-
-int devl_sb_register(struct devlink *devlink, unsigned int sb_index,
- u32 size, u16 ingress_pools_count,
- u16 egress_pools_count, u16 ingress_tc_count,
- u16 egress_tc_count)
-{
- struct devlink_sb *devlink_sb;
-
- lockdep_assert_held(&devlink->lock);
-
- if (devlink_sb_index_exists(devlink, sb_index))
- return -EEXIST;
-
- devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL);
- if (!devlink_sb)
- return -ENOMEM;
- devlink_sb->index = sb_index;
- devlink_sb->size = size;
- devlink_sb->ingress_pools_count = ingress_pools_count;
- devlink_sb->egress_pools_count = egress_pools_count;
- devlink_sb->ingress_tc_count = ingress_tc_count;
- devlink_sb->egress_tc_count = egress_tc_count;
- list_add_tail(&devlink_sb->list, &devlink->sb_list);
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_sb_register);
-
-int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
- u32 size, u16 ingress_pools_count,
- u16 egress_pools_count, u16 ingress_tc_count,
- u16 egress_tc_count)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_sb_register(devlink, sb_index, size, ingress_pools_count,
- egress_pools_count, ingress_tc_count,
- egress_tc_count);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_sb_register);
-
-void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index)
-{
- struct devlink_sb *devlink_sb;
-
- lockdep_assert_held(&devlink->lock);
-
- devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
- WARN_ON(!devlink_sb);
- list_del(&devlink_sb->list);
- kfree(devlink_sb);
-}
-EXPORT_SYMBOL_GPL(devl_sb_unregister);
-
-void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
-{
- devl_lock(devlink);
- devl_sb_unregister(devlink, sb_index);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_sb_unregister);
-
-/**
- * devl_dpipe_headers_register - register dpipe headers
- *
- * @devlink: devlink
- * @dpipe_headers: dpipe header array
- *
- * Register the headers supported by hardware.
- */
-void devl_dpipe_headers_register(struct devlink *devlink,
- struct devlink_dpipe_headers *dpipe_headers)
-{
- lockdep_assert_held(&devlink->lock);
-
- devlink->dpipe_headers = dpipe_headers;
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_headers_register);
-
-/**
- * devl_dpipe_headers_unregister - unregister dpipe headers
- *
- * @devlink: devlink
- *
- * Unregister the headers supported by hardware.
- */
-void devl_dpipe_headers_unregister(struct devlink *devlink)
-{
- lockdep_assert_held(&devlink->lock);
-
- devlink->dpipe_headers = NULL;
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_headers_unregister);
-
-/**
- * devlink_dpipe_table_counter_enabled - check if counter allocation
- * required
- * @devlink: devlink
- * @table_name: tables name
- *
- * Used by driver to check if counter allocation is required.
- * After counter allocation is turned on the table entries
- * are updated to include counter statistics.
- *
- * After that point on the driver must respect the counter
- * state so that each entry added to the table is added
- * with a counter.
- */
-bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
- const char *table_name)
-{
- struct devlink_dpipe_table *table;
- bool enabled;
-
- rcu_read_lock();
- table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name, devlink);
- enabled = false;
- if (table)
- enabled = table->counters_enabled;
- rcu_read_unlock();
- return enabled;
-}
-EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
-
-/**
- * devl_dpipe_table_register - register dpipe table
- *
- * @devlink: devlink
- * @table_name: table name
- * @table_ops: table ops
- * @priv: priv
- * @counter_control_extern: external control for counters
- */
-int devl_dpipe_table_register(struct devlink *devlink,
- const char *table_name,
- struct devlink_dpipe_table_ops *table_ops,
- void *priv, bool counter_control_extern)
-{
- struct devlink_dpipe_table *table;
-
- lockdep_assert_held(&devlink->lock);
-
- if (WARN_ON(!table_ops->size_get))
- return -EINVAL;
-
- if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name,
- devlink))
- return -EEXIST;
-
- table = kzalloc(sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
-
- table->name = table_name;
- table->table_ops = table_ops;
- table->priv = priv;
- table->counter_control_extern = counter_control_extern;
-
- list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_table_register);
-
-/**
- * devl_dpipe_table_unregister - unregister dpipe table
- *
- * @devlink: devlink
- * @table_name: table name
- */
-void devl_dpipe_table_unregister(struct devlink *devlink,
- const char *table_name)
-{
- struct devlink_dpipe_table *table;
-
- lockdep_assert_held(&devlink->lock);
-
- table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name, devlink);
- if (!table)
- return;
- list_del_rcu(&table->list);
- kfree_rcu(table, rcu);
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_table_unregister);
-
-/**
- * devl_resource_register - devlink resource register
- *
- * @devlink: devlink
- * @resource_name: resource's name
- * @resource_size: resource's size
- * @resource_id: resource's id
- * @parent_resource_id: resource's parent id
- * @size_params: size parameters
- *
- * Generic resources should reuse the same names across drivers.
- * Please see the generic resources list at:
- * Documentation/networking/devlink/devlink-resource.rst
- */
-int devl_resource_register(struct devlink *devlink,
- const char *resource_name,
- u64 resource_size,
- u64 resource_id,
- u64 parent_resource_id,
- const struct devlink_resource_size_params *size_params)
-{
- struct devlink_resource *resource;
- struct list_head *resource_list;
- bool top_hierarchy;
-
- lockdep_assert_held(&devlink->lock);
-
- top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
-
- resource = devlink_resource_find(devlink, NULL, resource_id);
- if (resource)
- return -EINVAL;
-
- resource = kzalloc(sizeof(*resource), GFP_KERNEL);
- if (!resource)
- return -ENOMEM;
-
- if (top_hierarchy) {
- resource_list = &devlink->resource_list;
- } else {
- struct devlink_resource *parent_resource;
-
- parent_resource = devlink_resource_find(devlink, NULL,
- parent_resource_id);
- if (parent_resource) {
- resource_list = &parent_resource->resource_list;
- resource->parent = parent_resource;
- } else {
- kfree(resource);
- return -EINVAL;
- }
- }
-
- resource->name = resource_name;
- resource->size = resource_size;
- resource->size_new = resource_size;
- resource->id = resource_id;
- resource->size_valid = true;
- memcpy(&resource->size_params, size_params,
- sizeof(resource->size_params));
- INIT_LIST_HEAD(&resource->resource_list);
- list_add_tail(&resource->list, resource_list);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_resource_register);
-
-/**
- * devlink_resource_register - devlink resource register
- *
- * @devlink: devlink
- * @resource_name: resource's name
- * @resource_size: resource's size
- * @resource_id: resource's id
- * @parent_resource_id: resource's parent id
- * @size_params: size parameters
- *
- * Generic resources should reuse the same names across drivers.
- * Please see the generic resources list at:
- * Documentation/networking/devlink/devlink-resource.rst
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-int devlink_resource_register(struct devlink *devlink,
- const char *resource_name,
- u64 resource_size,
- u64 resource_id,
- u64 parent_resource_id,
- const struct devlink_resource_size_params *size_params)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_resource_register(devlink, resource_name, resource_size,
- resource_id, parent_resource_id, size_params);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_resource_register);
-
-static void devlink_resource_unregister(struct devlink *devlink,
- struct devlink_resource *resource)
-{
- struct devlink_resource *tmp, *child_resource;
-
- list_for_each_entry_safe(child_resource, tmp, &resource->resource_list,
- list) {
- devlink_resource_unregister(devlink, child_resource);
- list_del(&child_resource->list);
- kfree(child_resource);
- }
-}
-
-/**
- * devl_resources_unregister - free all resources
- *
- * @devlink: devlink
- */
-void devl_resources_unregister(struct devlink *devlink)
-{
- struct devlink_resource *tmp, *child_resource;
-
- lockdep_assert_held(&devlink->lock);
-
- list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list,
- list) {
- devlink_resource_unregister(devlink, child_resource);
- list_del(&child_resource->list);
- kfree(child_resource);
- }
-}
-EXPORT_SYMBOL_GPL(devl_resources_unregister);
-
-/**
- * devlink_resources_unregister - free all resources
- *
- * @devlink: devlink
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_resources_unregister(struct devlink *devlink)
-{
- devl_lock(devlink);
- devl_resources_unregister(devlink);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_resources_unregister);
-
-/**
- * devl_resource_size_get - get and update size
- *
- * @devlink: devlink
- * @resource_id: the requested resource id
- * @p_resource_size: ptr to update
- */
-int devl_resource_size_get(struct devlink *devlink,
- u64 resource_id,
- u64 *p_resource_size)
-{
- struct devlink_resource *resource;
-
- lockdep_assert_held(&devlink->lock);
-
- resource = devlink_resource_find(devlink, NULL, resource_id);
- if (!resource)
- return -EINVAL;
- *p_resource_size = resource->size_new;
- resource->size = resource->size_new;
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_resource_size_get);
-
-/**
- * devl_dpipe_table_resource_set - set the resource id
- *
- * @devlink: devlink
- * @table_name: table name
- * @resource_id: resource id
- * @resource_units: number of resource's units consumed per table's entry
- */
-int devl_dpipe_table_resource_set(struct devlink *devlink,
- const char *table_name, u64 resource_id,
- u64 resource_units)
-{
- struct devlink_dpipe_table *table;
-
- table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name, devlink);
- if (!table)
- return -EINVAL;
-
- table->resource_id = resource_id;
- table->resource_units = resource_units;
- table->resource_valid = true;
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_dpipe_table_resource_set);
-
-/**
- * devl_resource_occ_get_register - register occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- * @occ_get: occupancy getter callback
- * @occ_get_priv: occupancy getter callback priv
- */
-void devl_resource_occ_get_register(struct devlink *devlink,
- u64 resource_id,
- devlink_resource_occ_get_t *occ_get,
- void *occ_get_priv)
-{
- struct devlink_resource *resource;
-
- lockdep_assert_held(&devlink->lock);
-
- resource = devlink_resource_find(devlink, NULL, resource_id);
- if (WARN_ON(!resource))
- return;
- WARN_ON(resource->occ_get);
-
- resource->occ_get = occ_get;
- resource->occ_get_priv = occ_get_priv;
-}
-EXPORT_SYMBOL_GPL(devl_resource_occ_get_register);
-
-/**
- * devlink_resource_occ_get_register - register occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- * @occ_get: occupancy getter callback
- * @occ_get_priv: occupancy getter callback priv
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_resource_occ_get_register(struct devlink *devlink,
- u64 resource_id,
- devlink_resource_occ_get_t *occ_get,
- void *occ_get_priv)
-{
- devl_lock(devlink);
- devl_resource_occ_get_register(devlink, resource_id,
- occ_get, occ_get_priv);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
-
-/**
- * devl_resource_occ_get_unregister - unregister occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- */
-void devl_resource_occ_get_unregister(struct devlink *devlink,
- u64 resource_id)
-{
- struct devlink_resource *resource;
-
- lockdep_assert_held(&devlink->lock);
-
- resource = devlink_resource_find(devlink, NULL, resource_id);
- if (WARN_ON(!resource))
- return;
- WARN_ON(!resource->occ_get);
-
- resource->occ_get = NULL;
- resource->occ_get_priv = NULL;
-}
-EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister);
-
-/**
- * devlink_resource_occ_get_unregister - unregister occupancy getter
- *
- * @devlink: devlink
- * @resource_id: resource id
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_resource_occ_get_unregister(struct devlink *devlink,
- u64 resource_id)
-{
- devl_lock(devlink);
- devl_resource_occ_get_unregister(devlink, resource_id);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
-
-static int devlink_param_verify(const struct devlink_param *param)
-{
- if (!param || !param->name || !param->supported_cmodes)
- return -EINVAL;
- if (param->generic)
- return devlink_param_generic_verify(param);
- else
- return devlink_param_driver_verify(param);
-}
-
-static int devlink_param_register(struct devlink *devlink,
- const struct devlink_param *param)
-{
- struct devlink_param_item *param_item;
- int err;
-
- WARN_ON(devlink_param_verify(param));
- WARN_ON(devlink_param_find_by_name(&devlink->params, param->name));
-
- if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
- WARN_ON(param->get || param->set);
- else
- WARN_ON(!param->get || !param->set);
-
- param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
- if (!param_item)
- return -ENOMEM;
-
- param_item->param = param;
-
- err = xa_insert(&devlink->params, param->id, param_item, GFP_KERNEL);
- if (err)
- goto err_xa_insert;
-
- devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
- return 0;
-
-err_xa_insert:
- kfree(param_item);
- return err;
-}
-
-static void devlink_param_unregister(struct devlink *devlink,
- const struct devlink_param *param)
-{
- struct devlink_param_item *param_item;
-
- param_item = devlink_param_find_by_id(&devlink->params, param->id);
- if (WARN_ON(!param_item))
- return;
- devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL);
- xa_erase(&devlink->params, param->id);
- kfree(param_item);
-}
-
-/**
- * devl_params_register - register configuration parameters
- *
- * @devlink: devlink
- * @params: configuration parameters array
- * @params_count: number of parameters provided
- *
- * Register the configuration parameters supported by the driver.
- */
-int devl_params_register(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
-{
- const struct devlink_param *param = params;
- int i, err;
-
- lockdep_assert_held(&devlink->lock);
-
- for (i = 0; i < params_count; i++, param++) {
- err = devlink_param_register(devlink, param);
- if (err)
- goto rollback;
- }
- return 0;
-
-rollback:
- if (!i)
- return err;
-
- for (param--; i > 0; i--, param--)
- devlink_param_unregister(devlink, param);
- return err;
-}
-EXPORT_SYMBOL_GPL(devl_params_register);
-
-int devlink_params_register(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_params_register(devlink, params, params_count);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_params_register);
-
-/**
- * devl_params_unregister - unregister configuration parameters
- * @devlink: devlink
- * @params: configuration parameters to unregister
- * @params_count: number of parameters provided
- */
-void devl_params_unregister(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
-{
- const struct devlink_param *param = params;
- int i;
-
- lockdep_assert_held(&devlink->lock);
-
- for (i = 0; i < params_count; i++, param++)
- devlink_param_unregister(devlink, param);
-}
-EXPORT_SYMBOL_GPL(devl_params_unregister);
-
-void devlink_params_unregister(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
-{
- devl_lock(devlink);
- devl_params_unregister(devlink, params, params_count);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_params_unregister);
-
-/**
- * devl_param_driverinit_value_get - get configuration parameter
- * value for driver initializing
- *
- * @devlink: devlink
- * @param_id: parameter ID
- * @val: pointer to store the value of parameter in driverinit
- * configuration mode
- *
- * This function should be used by the driver to get driverinit
- * configuration for initialization after reload command.
- *
- * Note that lockless call of this function relies on the
- * driver to maintain following basic sane behavior:
- * 1) Driver ensures a call to this function cannot race with
- * registering/unregistering the parameter with the same parameter ID.
- * 2) Driver ensures a call to this function cannot race with
- * devl_param_driverinit_value_set() call with the same parameter ID.
- * 3) Driver ensures a call to this function cannot race with
- * reload operation.
- * If the driver is not able to comply, it has to take the devlink->lock
- * while calling this.
- */
-int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
- union devlink_param_value *val)
-{
- struct devlink_param_item *param_item;
-
- if (WARN_ON(!devlink_reload_supported(devlink->ops)))
- return -EOPNOTSUPP;
-
- param_item = devlink_param_find_by_id(&devlink->params, param_id);
- if (!param_item)
- return -EINVAL;
-
- if (!param_item->driverinit_value_valid)
- return -EOPNOTSUPP;
-
- if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
- DEVLINK_PARAM_CMODE_DRIVERINIT)))
- return -EOPNOTSUPP;
-
- *val = param_item->driverinit_value;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devl_param_driverinit_value_get);
-
-/**
- * devl_param_driverinit_value_set - set value of configuration
- * parameter for driverinit
- * configuration mode
- *
- * @devlink: devlink
- * @param_id: parameter ID
- * @init_val: value of parameter to set for driverinit configuration mode
- *
- * This function should be used by the driver to set driverinit
- * configuration mode default value.
- */
-void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
- union devlink_param_value init_val)
-{
- struct devlink_param_item *param_item;
-
- devl_assert_locked(devlink);
-
- param_item = devlink_param_find_by_id(&devlink->params, param_id);
- if (WARN_ON(!param_item))
- return;
-
- if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
- DEVLINK_PARAM_CMODE_DRIVERINIT)))
- return;
-
- param_item->driverinit_value = init_val;
- param_item->driverinit_value_valid = true;
-
- devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
-}
-EXPORT_SYMBOL_GPL(devl_param_driverinit_value_set);
-
-void devlink_params_driverinit_load_new(struct devlink *devlink)
-{
- struct devlink_param_item *param_item;
- unsigned long param_id;
-
- xa_for_each(&devlink->params, param_id, param_item) {
- if (!devlink_param_cmode_is_supported(param_item->param,
- DEVLINK_PARAM_CMODE_DRIVERINIT) ||
- !param_item->driverinit_value_new_valid)
- continue;
- param_item->driverinit_value = param_item->driverinit_value_new;
- param_item->driverinit_value_valid = true;
- param_item->driverinit_value_new_valid = false;
- }
-}
-
-/**
- * devl_param_value_changed - notify devlink on a parameter's value
- * change. Should be called by the driver
- * right after the change.
- *
- * @devlink: devlink
- * @param_id: parameter ID
- *
- * This function should be used by the driver to notify devlink on value
- * change, excluding driverinit configuration mode.
- * For driverinit configuration mode driver should use the function
- */
-void devl_param_value_changed(struct devlink *devlink, u32 param_id)
-{
- struct devlink_param_item *param_item;
-
- param_item = devlink_param_find_by_id(&devlink->params, param_id);
- WARN_ON(!param_item);
-
- devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
-}
-EXPORT_SYMBOL_GPL(devl_param_value_changed);
-
-/**
- * devl_region_create - create a new address region
- *
- * @devlink: devlink
- * @ops: region operations and name
- * @region_max_snapshots: Maximum supported number of snapshots for region
- * @region_size: size of region
- */
-struct devlink_region *devl_region_create(struct devlink *devlink,
- const struct devlink_region_ops *ops,
- u32 region_max_snapshots,
- u64 region_size)
-{
- struct devlink_region *region;
-
- devl_assert_locked(devlink);
-
- if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
- return ERR_PTR(-EINVAL);
-
- if (devlink_region_get_by_name(devlink, ops->name))
- return ERR_PTR(-EEXIST);
-
- region = kzalloc(sizeof(*region), GFP_KERNEL);
- if (!region)
- return ERR_PTR(-ENOMEM);
-
- region->devlink = devlink;
- region->max_snapshots = region_max_snapshots;
- region->ops = ops;
- region->size = region_size;
- INIT_LIST_HEAD(&region->snapshot_list);
- mutex_init(&region->snapshot_lock);
- list_add_tail(&region->list, &devlink->region_list);
- devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
-
- return region;
-}
-EXPORT_SYMBOL_GPL(devl_region_create);
-
-/**
- * devlink_region_create - create a new address region
- *
- * @devlink: devlink
- * @ops: region operations and name
- * @region_max_snapshots: Maximum supported number of snapshots for region
- * @region_size: size of region
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-struct devlink_region *
-devlink_region_create(struct devlink *devlink,
- const struct devlink_region_ops *ops,
- u32 region_max_snapshots, u64 region_size)
-{
- struct devlink_region *region;
-
- devl_lock(devlink);
- region = devl_region_create(devlink, ops, region_max_snapshots,
- region_size);
- devl_unlock(devlink);
- return region;
-}
-EXPORT_SYMBOL_GPL(devlink_region_create);
-
-/**
- * devlink_port_region_create - create a new address region for a port
- *
- * @port: devlink port
- * @ops: region operations and name
- * @region_max_snapshots: Maximum supported number of snapshots for region
- * @region_size: size of region
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-struct devlink_region *
-devlink_port_region_create(struct devlink_port *port,
- const struct devlink_port_region_ops *ops,
- u32 region_max_snapshots, u64 region_size)
-{
- struct devlink *devlink = port->devlink;
- struct devlink_region *region;
- int err = 0;
-
- ASSERT_DEVLINK_PORT_INITIALIZED(port);
-
- if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
- return ERR_PTR(-EINVAL);
-
- devl_lock(devlink);
-
- if (devlink_port_region_get_by_name(port, ops->name)) {
- err = -EEXIST;
- goto unlock;
- }
-
- region = kzalloc(sizeof(*region), GFP_KERNEL);
- if (!region) {
- err = -ENOMEM;
- goto unlock;
- }
-
- region->devlink = devlink;
- region->port = port;
- region->max_snapshots = region_max_snapshots;
- region->port_ops = ops;
- region->size = region_size;
- INIT_LIST_HEAD(&region->snapshot_list);
- mutex_init(&region->snapshot_lock);
- list_add_tail(&region->list, &port->region_list);
- devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
-
- devl_unlock(devlink);
- return region;
-
-unlock:
- devl_unlock(devlink);
- return ERR_PTR(err);
-}
-EXPORT_SYMBOL_GPL(devlink_port_region_create);
-
-/**
- * devl_region_destroy - destroy address region
- *
- * @region: devlink region to destroy
- */
-void devl_region_destroy(struct devlink_region *region)
-{
- struct devlink *devlink = region->devlink;
- struct devlink_snapshot *snapshot, *ts;
-
- devl_assert_locked(devlink);
-
- /* Free all snapshots of region */
- mutex_lock(&region->snapshot_lock);
- list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
- devlink_region_snapshot_del(region, snapshot);
- mutex_unlock(&region->snapshot_lock);
-
- list_del(&region->list);
- mutex_destroy(&region->snapshot_lock);
-
- devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
- kfree(region);
-}
-EXPORT_SYMBOL_GPL(devl_region_destroy);
-
-/**
- * devlink_region_destroy - destroy address region
- *
- * @region: devlink region to destroy
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_region_destroy(struct devlink_region *region)
-{
- struct devlink *devlink = region->devlink;
-
- devl_lock(devlink);
- devl_region_destroy(region);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_region_destroy);
-
-/**
- * devlink_region_snapshot_id_get - get snapshot ID
- *
- * This callback should be called when adding a new snapshot,
- * Driver should use the same id for multiple snapshots taken
- * on multiple regions at the same time/by the same trigger.
- *
- * The caller of this function must use devlink_region_snapshot_id_put
- * when finished creating regions using this id.
- *
- * Returns zero on success, or a negative error code on failure.
- *
- * @devlink: devlink
- * @id: storage to return id
- */
-int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
-{
- return __devlink_region_snapshot_id_get(devlink, id);
-}
-EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get);
-
-/**
- * devlink_region_snapshot_id_put - put snapshot ID reference
- *
- * This should be called by a driver after finishing creating snapshots
- * with an id. Doing so ensures that the ID can later be released in the
- * event that all snapshots using it have been destroyed.
- *
- * @devlink: devlink
- * @id: id to release reference on
- */
-void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id)
-{
- __devlink_snapshot_id_decrement(devlink, id);
-}
-EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put);
-
-/**
- * devlink_region_snapshot_create - create a new snapshot
- * This will add a new snapshot of a region. The snapshot
- * will be stored on the region struct and can be accessed
- * from devlink. This is useful for future analyses of snapshots.
- * Multiple snapshots can be created on a region.
- * The @snapshot_id should be obtained using the getter function.
- *
- * @region: devlink region of the snapshot
- * @data: snapshot data
- * @snapshot_id: snapshot id to be created
- */
-int devlink_region_snapshot_create(struct devlink_region *region,
- u8 *data, u32 snapshot_id)
-{
- int err;
-
- mutex_lock(&region->snapshot_lock);
- err = __devlink_region_snapshot_create(region, data, snapshot_id);
- mutex_unlock(&region->snapshot_lock);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
-
-#define DEVLINK_TRAP(_id, _type) \
- { \
- .type = DEVLINK_TRAP_TYPE_##_type, \
- .id = DEVLINK_TRAP_GENERIC_ID_##_id, \
- .name = DEVLINK_TRAP_GENERIC_NAME_##_id, \
- }
-
-static const struct devlink_trap devlink_trap_generic[] = {
- DEVLINK_TRAP(SMAC_MC, DROP),
- DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP),
- DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP),
- DEVLINK_TRAP(INGRESS_STP_FILTER, DROP),
- DEVLINK_TRAP(EMPTY_TX_LIST, DROP),
- DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP),
- DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP),
- DEVLINK_TRAP(TTL_ERROR, EXCEPTION),
- DEVLINK_TRAP(TAIL_DROP, DROP),
- DEVLINK_TRAP(NON_IP_PACKET, DROP),
- DEVLINK_TRAP(UC_DIP_MC_DMAC, DROP),
- DEVLINK_TRAP(DIP_LB, DROP),
- DEVLINK_TRAP(SIP_MC, DROP),
- DEVLINK_TRAP(SIP_LB, DROP),
- DEVLINK_TRAP(CORRUPTED_IP_HDR, DROP),
- DEVLINK_TRAP(IPV4_SIP_BC, DROP),
- DEVLINK_TRAP(IPV6_MC_DIP_RESERVED_SCOPE, DROP),
- DEVLINK_TRAP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, DROP),
- DEVLINK_TRAP(MTU_ERROR, EXCEPTION),
- DEVLINK_TRAP(UNRESOLVED_NEIGH, EXCEPTION),
- DEVLINK_TRAP(RPF, EXCEPTION),
- DEVLINK_TRAP(REJECT_ROUTE, EXCEPTION),
- DEVLINK_TRAP(IPV4_LPM_UNICAST_MISS, EXCEPTION),
- DEVLINK_TRAP(IPV6_LPM_UNICAST_MISS, EXCEPTION),
- DEVLINK_TRAP(NON_ROUTABLE, DROP),
- DEVLINK_TRAP(DECAP_ERROR, EXCEPTION),
- DEVLINK_TRAP(OVERLAY_SMAC_MC, DROP),
- DEVLINK_TRAP(INGRESS_FLOW_ACTION_DROP, DROP),
- DEVLINK_TRAP(EGRESS_FLOW_ACTION_DROP, DROP),
- DEVLINK_TRAP(STP, CONTROL),
- DEVLINK_TRAP(LACP, CONTROL),
- DEVLINK_TRAP(LLDP, CONTROL),
- DEVLINK_TRAP(IGMP_QUERY, CONTROL),
- DEVLINK_TRAP(IGMP_V1_REPORT, CONTROL),
- DEVLINK_TRAP(IGMP_V2_REPORT, CONTROL),
- DEVLINK_TRAP(IGMP_V3_REPORT, CONTROL),
- DEVLINK_TRAP(IGMP_V2_LEAVE, CONTROL),
- DEVLINK_TRAP(MLD_QUERY, CONTROL),
- DEVLINK_TRAP(MLD_V1_REPORT, CONTROL),
- DEVLINK_TRAP(MLD_V2_REPORT, CONTROL),
- DEVLINK_TRAP(MLD_V1_DONE, CONTROL),
- DEVLINK_TRAP(IPV4_DHCP, CONTROL),
- DEVLINK_TRAP(IPV6_DHCP, CONTROL),
- DEVLINK_TRAP(ARP_REQUEST, CONTROL),
- DEVLINK_TRAP(ARP_RESPONSE, CONTROL),
- DEVLINK_TRAP(ARP_OVERLAY, CONTROL),
- DEVLINK_TRAP(IPV6_NEIGH_SOLICIT, CONTROL),
- DEVLINK_TRAP(IPV6_NEIGH_ADVERT, CONTROL),
- DEVLINK_TRAP(IPV4_BFD, CONTROL),
- DEVLINK_TRAP(IPV6_BFD, CONTROL),
- DEVLINK_TRAP(IPV4_OSPF, CONTROL),
- DEVLINK_TRAP(IPV6_OSPF, CONTROL),
- DEVLINK_TRAP(IPV4_BGP, CONTROL),
- DEVLINK_TRAP(IPV6_BGP, CONTROL),
- DEVLINK_TRAP(IPV4_VRRP, CONTROL),
- DEVLINK_TRAP(IPV6_VRRP, CONTROL),
- DEVLINK_TRAP(IPV4_PIM, CONTROL),
- DEVLINK_TRAP(IPV6_PIM, CONTROL),
- DEVLINK_TRAP(UC_LB, CONTROL),
- DEVLINK_TRAP(LOCAL_ROUTE, CONTROL),
- DEVLINK_TRAP(EXTERNAL_ROUTE, CONTROL),
- DEVLINK_TRAP(IPV6_UC_DIP_LINK_LOCAL_SCOPE, CONTROL),
- DEVLINK_TRAP(IPV6_DIP_ALL_NODES, CONTROL),
- DEVLINK_TRAP(IPV6_DIP_ALL_ROUTERS, CONTROL),
- DEVLINK_TRAP(IPV6_ROUTER_SOLICIT, CONTROL),
- DEVLINK_TRAP(IPV6_ROUTER_ADVERT, CONTROL),
- DEVLINK_TRAP(IPV6_REDIRECT, CONTROL),
- DEVLINK_TRAP(IPV4_ROUTER_ALERT, CONTROL),
- DEVLINK_TRAP(IPV6_ROUTER_ALERT, CONTROL),
- DEVLINK_TRAP(PTP_EVENT, CONTROL),
- DEVLINK_TRAP(PTP_GENERAL, CONTROL),
- DEVLINK_TRAP(FLOW_ACTION_SAMPLE, CONTROL),
- DEVLINK_TRAP(FLOW_ACTION_TRAP, CONTROL),
- DEVLINK_TRAP(EARLY_DROP, DROP),
- DEVLINK_TRAP(VXLAN_PARSING, DROP),
- DEVLINK_TRAP(LLC_SNAP_PARSING, DROP),
- DEVLINK_TRAP(VLAN_PARSING, DROP),
- DEVLINK_TRAP(PPPOE_PPP_PARSING, DROP),
- DEVLINK_TRAP(MPLS_PARSING, DROP),
- DEVLINK_TRAP(ARP_PARSING, DROP),
- DEVLINK_TRAP(IP_1_PARSING, DROP),
- DEVLINK_TRAP(IP_N_PARSING, DROP),
- DEVLINK_TRAP(GRE_PARSING, DROP),
- DEVLINK_TRAP(UDP_PARSING, DROP),
- DEVLINK_TRAP(TCP_PARSING, DROP),
- DEVLINK_TRAP(IPSEC_PARSING, DROP),
- DEVLINK_TRAP(SCTP_PARSING, DROP),
- DEVLINK_TRAP(DCCP_PARSING, DROP),
- DEVLINK_TRAP(GTP_PARSING, DROP),
- DEVLINK_TRAP(ESP_PARSING, DROP),
- DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP),
- DEVLINK_TRAP(DMAC_FILTER, DROP),
- DEVLINK_TRAP(EAPOL, CONTROL),
- DEVLINK_TRAP(LOCKED_PORT, DROP),
-};
-
-#define DEVLINK_TRAP_GROUP(_id) \
- { \
- .id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id, \
- .name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id, \
- }
-
-static const struct devlink_trap_group devlink_trap_group_generic[] = {
- DEVLINK_TRAP_GROUP(L2_DROPS),
- DEVLINK_TRAP_GROUP(L3_DROPS),
- DEVLINK_TRAP_GROUP(L3_EXCEPTIONS),
- DEVLINK_TRAP_GROUP(BUFFER_DROPS),
- DEVLINK_TRAP_GROUP(TUNNEL_DROPS),
- DEVLINK_TRAP_GROUP(ACL_DROPS),
- DEVLINK_TRAP_GROUP(STP),
- DEVLINK_TRAP_GROUP(LACP),
- DEVLINK_TRAP_GROUP(LLDP),
- DEVLINK_TRAP_GROUP(MC_SNOOPING),
- DEVLINK_TRAP_GROUP(DHCP),
- DEVLINK_TRAP_GROUP(NEIGH_DISCOVERY),
- DEVLINK_TRAP_GROUP(BFD),
- DEVLINK_TRAP_GROUP(OSPF),
- DEVLINK_TRAP_GROUP(BGP),
- DEVLINK_TRAP_GROUP(VRRP),
- DEVLINK_TRAP_GROUP(PIM),
- DEVLINK_TRAP_GROUP(UC_LB),
- DEVLINK_TRAP_GROUP(LOCAL_DELIVERY),
- DEVLINK_TRAP_GROUP(EXTERNAL_DELIVERY),
- DEVLINK_TRAP_GROUP(IPV6),
- DEVLINK_TRAP_GROUP(PTP_EVENT),
- DEVLINK_TRAP_GROUP(PTP_GENERAL),
- DEVLINK_TRAP_GROUP(ACL_SAMPLE),
- DEVLINK_TRAP_GROUP(ACL_TRAP),
- DEVLINK_TRAP_GROUP(PARSER_ERROR_DROPS),
- DEVLINK_TRAP_GROUP(EAPOL),
-};
-
-static int devlink_trap_generic_verify(const struct devlink_trap *trap)
-{
- if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX)
- return -EINVAL;
-
- if (strcmp(trap->name, devlink_trap_generic[trap->id].name))
- return -EINVAL;
-
- if (trap->type != devlink_trap_generic[trap->id].type)
- return -EINVAL;
-
- return 0;
-}
-
-static int devlink_trap_driver_verify(const struct devlink_trap *trap)
-{
- int i;
-
- if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) {
- if (!strcmp(trap->name, devlink_trap_generic[i].name))
- return -EEXIST;
- }
-
- return 0;
-}
-
-static int devlink_trap_verify(const struct devlink_trap *trap)
-{
- if (!trap || !trap->name)
- return -EINVAL;
-
- if (trap->generic)
- return devlink_trap_generic_verify(trap);
- else
- return devlink_trap_driver_verify(trap);
-}
-
-static int
-devlink_trap_group_generic_verify(const struct devlink_trap_group *group)
-{
- if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
- return -EINVAL;
-
- if (strcmp(group->name, devlink_trap_group_generic[group->id].name))
- return -EINVAL;
-
- return 0;
-}
-
-static int
-devlink_trap_group_driver_verify(const struct devlink_trap_group *group)
-{
- int i;
-
- if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) {
- if (!strcmp(group->name, devlink_trap_group_generic[i].name))
- return -EEXIST;
- }
-
- return 0;
-}
-
-static int devlink_trap_group_verify(const struct devlink_trap_group *group)
-{
- if (group->generic)
- return devlink_trap_group_generic_verify(group);
- else
- return devlink_trap_group_driver_verify(group);
-}
-
-static void
-devlink_trap_group_notify(struct devlink *devlink,
- const struct devlink_trap_group_item *group_item,
- enum devlink_command cmd)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
- cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0,
- 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static int
-devlink_trap_item_group_link(struct devlink *devlink,
- struct devlink_trap_item *trap_item)
-{
- u16 group_id = trap_item->trap->init_group_id;
- struct devlink_trap_group_item *group_item;
-
- group_item = devlink_trap_group_item_lookup_by_id(devlink, group_id);
- if (WARN_ON_ONCE(!group_item))
- return -EINVAL;
-
- trap_item->group_item = group_item;
-
- return 0;
-}
-
-static void devlink_trap_notify(struct devlink *devlink,
- const struct devlink_trap_item *trap_item,
- enum devlink_command cmd)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
- cmd != DEVLINK_CMD_TRAP_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static int
-devlink_trap_register(struct devlink *devlink,
- const struct devlink_trap *trap, void *priv)
-{
- struct devlink_trap_item *trap_item;
- int err;
-
- if (devlink_trap_item_lookup(devlink, trap->name))
- return -EEXIST;
-
- trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL);
- if (!trap_item)
- return -ENOMEM;
-
- trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
- if (!trap_item->stats) {
- err = -ENOMEM;
- goto err_stats_alloc;
- }
-
- trap_item->trap = trap;
- trap_item->action = trap->init_action;
- trap_item->priv = priv;
-
- err = devlink_trap_item_group_link(devlink, trap_item);
- if (err)
- goto err_group_link;
-
- err = devlink->ops->trap_init(devlink, trap, trap_item);
- if (err)
- goto err_trap_init;
-
- list_add_tail(&trap_item->list, &devlink->trap_list);
- devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
-
- return 0;
-
-err_trap_init:
-err_group_link:
- free_percpu(trap_item->stats);
-err_stats_alloc:
- kfree(trap_item);
- return err;
-}
-
-static void devlink_trap_unregister(struct devlink *devlink,
- const struct devlink_trap *trap)
-{
- struct devlink_trap_item *trap_item;
-
- trap_item = devlink_trap_item_lookup(devlink, trap->name);
- if (WARN_ON_ONCE(!trap_item))
- return;
-
- devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
- list_del(&trap_item->list);
- if (devlink->ops->trap_fini)
- devlink->ops->trap_fini(devlink, trap, trap_item);
- free_percpu(trap_item->stats);
- kfree(trap_item);
-}
-
-static void devlink_trap_disable(struct devlink *devlink,
- const struct devlink_trap *trap)
-{
- struct devlink_trap_item *trap_item;
-
- trap_item = devlink_trap_item_lookup(devlink, trap->name);
- if (WARN_ON_ONCE(!trap_item))
- return;
-
- devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP,
- NULL);
- trap_item->action = DEVLINK_TRAP_ACTION_DROP;
-}
-
-/**
- * devl_traps_register - Register packet traps with devlink.
- * @devlink: devlink.
- * @traps: Packet traps.
- * @traps_count: Count of provided packet traps.
- * @priv: Driver private information.
- *
- * Return: Non-zero value on failure.
- */
-int devl_traps_register(struct devlink *devlink,
- const struct devlink_trap *traps,
- size_t traps_count, void *priv)
-{
- int i, err;
-
- if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
- return -EINVAL;
-
- devl_assert_locked(devlink);
- for (i = 0; i < traps_count; i++) {
- const struct devlink_trap *trap = &traps[i];
-
- err = devlink_trap_verify(trap);
- if (err)
- goto err_trap_verify;
-
- err = devlink_trap_register(devlink, trap, priv);
- if (err)
- goto err_trap_register;
- }
-
- return 0;
-
-err_trap_register:
-err_trap_verify:
- for (i--; i >= 0; i--)
- devlink_trap_unregister(devlink, &traps[i]);
- return err;
-}
-EXPORT_SYMBOL_GPL(devl_traps_register);
-
-/**
- * devlink_traps_register - Register packet traps with devlink.
- * @devlink: devlink.
- * @traps: Packet traps.
- * @traps_count: Count of provided packet traps.
- * @priv: Driver private information.
- *
- * Context: Takes and release devlink->lock <mutex>.
- *
- * Return: Non-zero value on failure.
- */
-int devlink_traps_register(struct devlink *devlink,
- const struct devlink_trap *traps,
- size_t traps_count, void *priv)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_traps_register(devlink, traps, traps_count, priv);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_traps_register);
-
-/**
- * devl_traps_unregister - Unregister packet traps from devlink.
- * @devlink: devlink.
- * @traps: Packet traps.
- * @traps_count: Count of provided packet traps.
- */
-void devl_traps_unregister(struct devlink *devlink,
- const struct devlink_trap *traps,
- size_t traps_count)
-{
- int i;
-
- devl_assert_locked(devlink);
- /* Make sure we do not have any packets in-flight while unregistering
- * traps by disabling all of them and waiting for a grace period.
- */
- for (i = traps_count - 1; i >= 0; i--)
- devlink_trap_disable(devlink, &traps[i]);
- synchronize_rcu();
- for (i = traps_count - 1; i >= 0; i--)
- devlink_trap_unregister(devlink, &traps[i]);
-}
-EXPORT_SYMBOL_GPL(devl_traps_unregister);
-
-/**
- * devlink_traps_unregister - Unregister packet traps from devlink.
- * @devlink: devlink.
- * @traps: Packet traps.
- * @traps_count: Count of provided packet traps.
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_traps_unregister(struct devlink *devlink,
- const struct devlink_trap *traps,
- size_t traps_count)
-{
- devl_lock(devlink);
- devl_traps_unregister(devlink, traps, traps_count);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_traps_unregister);
-
-static void
-devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
- size_t skb_len)
-{
- struct devlink_stats *stats;
-
- stats = this_cpu_ptr(trap_stats);
- u64_stats_update_begin(&stats->syncp);
- u64_stats_add(&stats->rx_bytes, skb_len);
- u64_stats_inc(&stats->rx_packets);
- u64_stats_update_end(&stats->syncp);
-}
-
-static void
-devlink_trap_report_metadata_set(struct devlink_trap_metadata *metadata,
- const struct devlink_trap_item *trap_item,
- struct devlink_port *in_devlink_port,
- const struct flow_action_cookie *fa_cookie)
-{
- metadata->trap_name = trap_item->trap->name;
- metadata->trap_group_name = trap_item->group_item->group->name;
- metadata->fa_cookie = fa_cookie;
- metadata->trap_type = trap_item->trap->type;
-
- spin_lock(&in_devlink_port->type_lock);
- if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
- metadata->input_dev = in_devlink_port->type_eth.netdev;
- spin_unlock(&in_devlink_port->type_lock);
-}
-
-/**
- * devlink_trap_report - Report trapped packet to drop monitor.
- * @devlink: devlink.
- * @skb: Trapped packet.
- * @trap_ctx: Trap context.
- * @in_devlink_port: Input devlink port.
- * @fa_cookie: Flow action cookie. Could be NULL.
- */
-void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
- void *trap_ctx, struct devlink_port *in_devlink_port,
- const struct flow_action_cookie *fa_cookie)
-
-{
- struct devlink_trap_item *trap_item = trap_ctx;
-
- devlink_trap_stats_update(trap_item->stats, skb->len);
- devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
-
- if (trace_devlink_trap_report_enabled()) {
- struct devlink_trap_metadata metadata = {};
-
- devlink_trap_report_metadata_set(&metadata, trap_item,
- in_devlink_port, fa_cookie);
- trace_devlink_trap_report(devlink, skb, &metadata);
- }
-}
-EXPORT_SYMBOL_GPL(devlink_trap_report);
-
-/**
- * devlink_trap_ctx_priv - Trap context to driver private information.
- * @trap_ctx: Trap context.
- *
- * Return: Driver private information passed during registration.
- */
-void *devlink_trap_ctx_priv(void *trap_ctx)
-{
- struct devlink_trap_item *trap_item = trap_ctx;
-
- return trap_item->priv;
-}
-EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv);
-
-static int
-devlink_trap_group_item_policer_link(struct devlink *devlink,
- struct devlink_trap_group_item *group_item)
-{
- u32 policer_id = group_item->group->init_policer_id;
- struct devlink_trap_policer_item *policer_item;
-
- if (policer_id == 0)
- return 0;
-
- policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
- if (WARN_ON_ONCE(!policer_item))
- return -EINVAL;
-
- group_item->policer_item = policer_item;
-
- return 0;
-}
-
-static int
-devlink_trap_group_register(struct devlink *devlink,
- const struct devlink_trap_group *group)
-{
- struct devlink_trap_group_item *group_item;
- int err;
-
- if (devlink_trap_group_item_lookup(devlink, group->name))
- return -EEXIST;
-
- group_item = kzalloc(sizeof(*group_item), GFP_KERNEL);
- if (!group_item)
- return -ENOMEM;
-
- group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
- if (!group_item->stats) {
- err = -ENOMEM;
- goto err_stats_alloc;
- }
-
- group_item->group = group;
-
- err = devlink_trap_group_item_policer_link(devlink, group_item);
- if (err)
- goto err_policer_link;
-
- if (devlink->ops->trap_group_init) {
- err = devlink->ops->trap_group_init(devlink, group);
- if (err)
- goto err_group_init;
- }
-
- list_add_tail(&group_item->list, &devlink->trap_group_list);
- devlink_trap_group_notify(devlink, group_item,
- DEVLINK_CMD_TRAP_GROUP_NEW);
-
- return 0;
-
-err_group_init:
-err_policer_link:
- free_percpu(group_item->stats);
-err_stats_alloc:
- kfree(group_item);
- return err;
-}
-
-static void
-devlink_trap_group_unregister(struct devlink *devlink,
- const struct devlink_trap_group *group)
-{
- struct devlink_trap_group_item *group_item;
-
- group_item = devlink_trap_group_item_lookup(devlink, group->name);
- if (WARN_ON_ONCE(!group_item))
- return;
-
- devlink_trap_group_notify(devlink, group_item,
- DEVLINK_CMD_TRAP_GROUP_DEL);
- list_del(&group_item->list);
- free_percpu(group_item->stats);
- kfree(group_item);
-}
-
-/**
- * devl_trap_groups_register - Register packet trap groups with devlink.
- * @devlink: devlink.
- * @groups: Packet trap groups.
- * @groups_count: Count of provided packet trap groups.
- *
- * Return: Non-zero value on failure.
- */
-int devl_trap_groups_register(struct devlink *devlink,
- const struct devlink_trap_group *groups,
- size_t groups_count)
-{
- int i, err;
-
- devl_assert_locked(devlink);
- for (i = 0; i < groups_count; i++) {
- const struct devlink_trap_group *group = &groups[i];
-
- err = devlink_trap_group_verify(group);
- if (err)
- goto err_trap_group_verify;
-
- err = devlink_trap_group_register(devlink, group);
- if (err)
- goto err_trap_group_register;
- }
-
- return 0;
-
-err_trap_group_register:
-err_trap_group_verify:
- for (i--; i >= 0; i--)
- devlink_trap_group_unregister(devlink, &groups[i]);
- return err;
-}
-EXPORT_SYMBOL_GPL(devl_trap_groups_register);
-
-/**
- * devlink_trap_groups_register - Register packet trap groups with devlink.
- * @devlink: devlink.
- * @groups: Packet trap groups.
- * @groups_count: Count of provided packet trap groups.
- *
- * Context: Takes and release devlink->lock <mutex>.
- *
- * Return: Non-zero value on failure.
- */
-int devlink_trap_groups_register(struct devlink *devlink,
- const struct devlink_trap_group *groups,
- size_t groups_count)
-{
- int err;
-
- devl_lock(devlink);
- err = devl_trap_groups_register(devlink, groups, groups_count);
- devl_unlock(devlink);
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_trap_groups_register);
-
-/**
- * devl_trap_groups_unregister - Unregister packet trap groups from devlink.
- * @devlink: devlink.
- * @groups: Packet trap groups.
- * @groups_count: Count of provided packet trap groups.
- */
-void devl_trap_groups_unregister(struct devlink *devlink,
- const struct devlink_trap_group *groups,
- size_t groups_count)
-{
- int i;
-
- devl_assert_locked(devlink);
- for (i = groups_count - 1; i >= 0; i--)
- devlink_trap_group_unregister(devlink, &groups[i]);
-}
-EXPORT_SYMBOL_GPL(devl_trap_groups_unregister);
-
-/**
- * devlink_trap_groups_unregister - Unregister packet trap groups from devlink.
- * @devlink: devlink.
- * @groups: Packet trap groups.
- * @groups_count: Count of provided packet trap groups.
- *
- * Context: Takes and release devlink->lock <mutex>.
- */
-void devlink_trap_groups_unregister(struct devlink *devlink,
- const struct devlink_trap_group *groups,
- size_t groups_count)
-{
- devl_lock(devlink);
- devl_trap_groups_unregister(devlink, groups, groups_count);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister);
-
-static void
-devlink_trap_policer_notify(struct devlink *devlink,
- const struct devlink_trap_policer_item *policer_item,
- enum devlink_command cmd)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW &&
- cmd != DEVLINK_CMD_TRAP_POLICER_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_trap_policer_fill(msg, devlink, policer_item, cmd, 0,
- 0, 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-static int
-devlink_trap_policer_register(struct devlink *devlink,
- const struct devlink_trap_policer *policer)
-{
- struct devlink_trap_policer_item *policer_item;
- int err;
-
- if (devlink_trap_policer_item_lookup(devlink, policer->id))
- return -EEXIST;
-
- policer_item = kzalloc(sizeof(*policer_item), GFP_KERNEL);
- if (!policer_item)
- return -ENOMEM;
-
- policer_item->policer = policer;
- policer_item->rate = policer->init_rate;
- policer_item->burst = policer->init_burst;
-
- if (devlink->ops->trap_policer_init) {
- err = devlink->ops->trap_policer_init(devlink, policer);
- if (err)
- goto err_policer_init;
- }
-
- list_add_tail(&policer_item->list, &devlink->trap_policer_list);
- devlink_trap_policer_notify(devlink, policer_item,
- DEVLINK_CMD_TRAP_POLICER_NEW);
-
- return 0;
-
-err_policer_init:
- kfree(policer_item);
- return err;
-}
-
-static void
-devlink_trap_policer_unregister(struct devlink *devlink,
- const struct devlink_trap_policer *policer)
-{
- struct devlink_trap_policer_item *policer_item;
-
- policer_item = devlink_trap_policer_item_lookup(devlink, policer->id);
- if (WARN_ON_ONCE(!policer_item))
- return;
-
- devlink_trap_policer_notify(devlink, policer_item,
- DEVLINK_CMD_TRAP_POLICER_DEL);
- list_del(&policer_item->list);
- if (devlink->ops->trap_policer_fini)
- devlink->ops->trap_policer_fini(devlink, policer);
- kfree(policer_item);
-}
-
-/**
- * devl_trap_policers_register - Register packet trap policers with devlink.
- * @devlink: devlink.
- * @policers: Packet trap policers.
- * @policers_count: Count of provided packet trap policers.
- *
- * Return: Non-zero value on failure.
- */
-int
-devl_trap_policers_register(struct devlink *devlink,
- const struct devlink_trap_policer *policers,
- size_t policers_count)
-{
- int i, err;
-
- devl_assert_locked(devlink);
- for (i = 0; i < policers_count; i++) {
- const struct devlink_trap_policer *policer = &policers[i];
-
- if (WARN_ON(policer->id == 0 ||
- policer->max_rate < policer->min_rate ||
- policer->max_burst < policer->min_burst)) {
- err = -EINVAL;
- goto err_trap_policer_verify;
- }
-
- err = devlink_trap_policer_register(devlink, policer);
- if (err)
- goto err_trap_policer_register;
- }
- return 0;
-
-err_trap_policer_register:
-err_trap_policer_verify:
- for (i--; i >= 0; i--)
- devlink_trap_policer_unregister(devlink, &policers[i]);
- return err;
-}
-EXPORT_SYMBOL_GPL(devl_trap_policers_register);
-
-/**
- * devl_trap_policers_unregister - Unregister packet trap policers from devlink.
- * @devlink: devlink.
- * @policers: Packet trap policers.
- * @policers_count: Count of provided packet trap policers.
- */
-void
-devl_trap_policers_unregister(struct devlink *devlink,
- const struct devlink_trap_policer *policers,
- size_t policers_count)
-{
- int i;
-
- devl_assert_locked(devlink);
- for (i = policers_count - 1; i >= 0; i--)
- devlink_trap_policer_unregister(devlink, &policers[i]);
-}
-EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);
-
-int devlink_compat_phys_port_name_get(struct net_device *dev,
- char *name, size_t len)
-{
- struct devlink_port *devlink_port;
-
- /* RTNL mutex is held here which ensures that devlink_port
- * instance cannot disappear in the middle. No need to take
- * any devlink lock as only permanent values are accessed.
- */
- ASSERT_RTNL();
-
- devlink_port = dev->devlink_port;
- if (!devlink_port)
- return -EOPNOTSUPP;
-
- return __devlink_port_phys_port_name_get(devlink_port, name, len);
-}
-
-int devlink_compat_switch_id_get(struct net_device *dev,
- struct netdev_phys_item_id *ppid)
-{
- struct devlink_port *devlink_port;
-
- /* Caller must hold RTNL mutex or reference to dev, which ensures that
- * devlink_port instance cannot disappear in the middle. No need to take
- * any devlink lock as only permanent values are accessed.
- */
- devlink_port = dev->devlink_port;
- if (!devlink_port || !devlink_port->switch_port)
- return -EOPNOTSUPP;
-
- memcpy(ppid, &devlink_port->attrs.switch_id, sizeof(*ppid));
-
- return 0;
-}
diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c
new file mode 100644
index 000000000000..85c32c314b0f
--- /dev/null
+++ b/net/devlink/linecard.c
@@ -0,0 +1,606 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+static struct devlink_linecard *
+devlink_linecard_get_by_index(struct devlink *devlink,
+ unsigned int linecard_index)
+{
+ struct devlink_linecard *devlink_linecard;
+
+ list_for_each_entry(devlink_linecard, &devlink->linecard_list, list) {
+ if (devlink_linecard->index == linecard_index)
+ return devlink_linecard;
+ }
+ return NULL;
+}
+
+static bool devlink_linecard_index_exists(struct devlink *devlink,
+ unsigned int linecard_index)
+{
+ return devlink_linecard_get_by_index(devlink, linecard_index);
+}
+
+static struct devlink_linecard *
+devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
+{
+ if (attrs[DEVLINK_ATTR_LINECARD_INDEX]) {
+ u32 linecard_index = nla_get_u32(attrs[DEVLINK_ATTR_LINECARD_INDEX]);
+ struct devlink_linecard *linecard;
+
+ linecard = devlink_linecard_get_by_index(devlink, linecard_index);
+ if (!linecard)
+ return ERR_PTR(-ENODEV);
+ return linecard;
+ }
+ return ERR_PTR(-EINVAL);
+}
+
+static struct devlink_linecard *
+devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ return devlink_linecard_get_from_attrs(devlink, info->attrs);
+}
+
+static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
+{
+ struct nlattr *nested_attr;
+
+ nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK);
+ if (!nested_attr)
+ return -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nested_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, nested_attr);
+ return -EMSGSIZE;
+}
+
+struct devlink_linecard_type {
+ const char *type;
+ const void *priv;
+};
+
+static int devlink_nl_linecard_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_linecard *linecard,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_linecard_type *linecard_type;
+ struct nlattr *attr;
+ void *hdr;
+ int i;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_LINECARD_STATE, linecard->state))
+ goto nla_put_failure;
+ if (linecard->type &&
+ nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE, linecard->type))
+ goto nla_put_failure;
+
+ if (linecard->types_count) {
+ attr = nla_nest_start(msg,
+ DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES);
+ if (!attr)
+ goto nla_put_failure;
+ for (i = 0; i < linecard->types_count; i++) {
+ linecard_type = &linecard->types[i];
+ if (nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE,
+ linecard_type->type)) {
+ nla_nest_cancel(msg, attr);
+ goto nla_put_failure;
+ }
+ }
+ nla_nest_end(msg, attr);
+ }
+
+ if (linecard->nested_devlink &&
+ devlink_nl_put_nested_handle(msg, linecard->nested_devlink))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_linecard_notify(struct devlink_linecard *linecard,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = linecard->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW &&
+ cmd != DEVLINK_CMD_LINECARD_DEL);
+
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_linecard_fill(msg, devlink, linecard, cmd, 0, 0, 0,
+ NULL);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_linecards_notify_register(struct devlink *devlink)
+{
+ struct devlink_linecard *linecard;
+
+ list_for_each_entry(linecard, &devlink->linecard_list, list)
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+}
+
+void devlink_linecards_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_linecard *linecard;
+
+ list_for_each_entry_reverse(linecard, &devlink->linecard_list, list)
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
+}
+
+int devlink_nl_linecard_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_linecard *linecard;
+ struct sk_buff *msg;
+ int err;
+
+ linecard = devlink_linecard_get_from_info(devlink, info);
+ if (IS_ERR(linecard))
+ return PTR_ERR(linecard);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ mutex_lock(&linecard->state_lock);
+ err = devlink_nl_linecard_fill(msg, devlink, linecard,
+ DEVLINK_CMD_LINECARD_NEW,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ mutex_unlock(&linecard->state_lock);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_linecard_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_linecard *linecard;
+ int idx = 0;
+ int err = 0;
+
+ list_for_each_entry(linecard, &devlink->linecard_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ mutex_lock(&linecard->state_lock);
+ err = devlink_nl_linecard_fill(msg, devlink, linecard,
+ DEVLINK_CMD_LINECARD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags,
+ cb->extack);
+ mutex_unlock(&linecard->state_lock);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
+ }
+
+ return err;
+}
+
+int devlink_nl_linecard_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_linecard_get_dump_one);
+}
+
+static struct devlink_linecard_type *
+devlink_linecard_type_lookup(struct devlink_linecard *linecard,
+ const char *type)
+{
+ struct devlink_linecard_type *linecard_type;
+ int i;
+
+ for (i = 0; i < linecard->types_count; i++) {
+ linecard_type = &linecard->types[i];
+ if (!strcmp(type, linecard_type->type))
+ return linecard_type;
+ }
+ return NULL;
+}
+
+static int devlink_linecard_type_set(struct devlink_linecard *linecard,
+ const char *type,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_linecard_ops *ops = linecard->ops;
+ struct devlink_linecard_type *linecard_type;
+ int err;
+
+ mutex_lock(&linecard->state_lock);
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
+ NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
+ NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
+ err = -EBUSY;
+ goto out;
+ }
+
+ linecard_type = devlink_linecard_type_lookup(linecard, type);
+ if (!linecard_type) {
+ NL_SET_ERR_MSG(extack, "Unsupported line card type provided");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED &&
+ linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
+ NL_SET_ERR_MSG(extack, "Line card already provisioned");
+ err = -EBUSY;
+ /* Check if the line card is provisioned in the same
+ * way the user asks. In case it is, make the operation
+ * to return success.
+ */
+ if (ops->same_provision &&
+ ops->same_provision(linecard, linecard->priv,
+ linecard_type->type,
+ linecard_type->priv))
+ err = 0;
+ goto out;
+ }
+
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING;
+ linecard->type = linecard_type->type;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ err = ops->provision(linecard, linecard->priv, linecard_type->type,
+ linecard_type->priv, extack);
+ if (err) {
+ /* Provisioning failed. Assume the linecard is unprovisioned
+ * for future operations.
+ */
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ }
+ return err;
+
+out:
+ mutex_unlock(&linecard->state_lock);
+ return err;
+}
+
+static int devlink_linecard_type_unset(struct devlink_linecard *linecard,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ mutex_lock(&linecard->state_lock);
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
+ NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
+ NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ err = 0;
+ goto out;
+ }
+
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) {
+ NL_SET_ERR_MSG(extack, "Line card is not provisioned");
+ err = 0;
+ goto out;
+ }
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONING;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ err = linecard->ops->unprovision(linecard, linecard->priv,
+ extack);
+ if (err) {
+ /* Unprovisioning failed. Assume the linecard is unprovisioned
+ * for future operations.
+ */
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ }
+ return err;
+
+out:
+ mutex_unlock(&linecard->state_lock);
+ return err;
+}
+
+int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_linecard *linecard;
+ int err;
+
+ linecard = devlink_linecard_get_from_info(devlink, info);
+ if (IS_ERR(linecard))
+ return PTR_ERR(linecard);
+
+ if (info->attrs[DEVLINK_ATTR_LINECARD_TYPE]) {
+ const char *type;
+
+ type = nla_data(info->attrs[DEVLINK_ATTR_LINECARD_TYPE]);
+ if (strcmp(type, "")) {
+ err = devlink_linecard_type_set(linecard, type, extack);
+ if (err)
+ return err;
+ } else {
+ err = devlink_linecard_type_unset(linecard, extack);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int devlink_linecard_types_init(struct devlink_linecard *linecard)
+{
+ struct devlink_linecard_type *linecard_type;
+ unsigned int count;
+ int i;
+
+ count = linecard->ops->types_count(linecard, linecard->priv);
+ linecard->types = kmalloc_array(count, sizeof(*linecard_type),
+ GFP_KERNEL);
+ if (!linecard->types)
+ return -ENOMEM;
+ linecard->types_count = count;
+
+ for (i = 0; i < count; i++) {
+ linecard_type = &linecard->types[i];
+ linecard->ops->types_get(linecard, linecard->priv, i,
+ &linecard_type->type,
+ &linecard_type->priv);
+ }
+ return 0;
+}
+
+static void devlink_linecard_types_fini(struct devlink_linecard *linecard)
+{
+ kfree(linecard->types);
+}
+
+/**
+ * devl_linecard_create - Create devlink linecard
+ *
+ * @devlink: devlink
+ * @linecard_index: driver-specific numerical identifier of the linecard
+ * @ops: linecards ops
+ * @priv: user priv pointer
+ *
+ * Create devlink linecard instance with provided linecard index.
+ * Caller can use any indexing, even hw-related one.
+ *
+ * Return: Line card structure or an ERR_PTR() encoded error code.
+ */
+struct devlink_linecard *
+devl_linecard_create(struct devlink *devlink, unsigned int linecard_index,
+ const struct devlink_linecard_ops *ops, void *priv)
+{
+ struct devlink_linecard *linecard;
+ int err;
+
+ if (WARN_ON(!ops || !ops->provision || !ops->unprovision ||
+ !ops->types_count || !ops->types_get))
+ return ERR_PTR(-EINVAL);
+
+ if (devlink_linecard_index_exists(devlink, linecard_index))
+ return ERR_PTR(-EEXIST);
+
+ linecard = kzalloc(sizeof(*linecard), GFP_KERNEL);
+ if (!linecard)
+ return ERR_PTR(-ENOMEM);
+
+ linecard->devlink = devlink;
+ linecard->index = linecard_index;
+ linecard->ops = ops;
+ linecard->priv = priv;
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ mutex_init(&linecard->state_lock);
+
+ err = devlink_linecard_types_init(linecard);
+ if (err) {
+ mutex_destroy(&linecard->state_lock);
+ kfree(linecard);
+ return ERR_PTR(err);
+ }
+
+ list_add_tail(&linecard->list, &devlink->linecard_list);
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ return linecard;
+}
+EXPORT_SYMBOL_GPL(devl_linecard_create);
+
+/**
+ * devl_linecard_destroy - Destroy devlink linecard
+ *
+ * @linecard: devlink linecard
+ */
+void devl_linecard_destroy(struct devlink_linecard *linecard)
+{
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
+ list_del(&linecard->list);
+ devlink_linecard_types_fini(linecard);
+ mutex_destroy(&linecard->state_lock);
+ kfree(linecard);
+}
+EXPORT_SYMBOL_GPL(devl_linecard_destroy);
+
+/**
+ * devlink_linecard_provision_set - Set provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ * @type: linecard type
+ *
+ * This is either called directly from the provision() op call or
+ * as a result of the provision() op call asynchronously.
+ */
+void devlink_linecard_provision_set(struct devlink_linecard *linecard,
+ const char *type)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->type && strcmp(linecard->type, type));
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
+ linecard->type = type;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
+
+/**
+ * devlink_linecard_provision_clear - Clear provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ *
+ * This is either called directly from the unprovision() op call or
+ * as a result of the unprovision() op call asynchronously.
+ */
+void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->nested_devlink);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear);
+
+/**
+ * devlink_linecard_provision_fail - Fail provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ *
+ * This is either called directly from the provision() op call or
+ * as a result of the provision() op call asynchronously.
+ */
+void devlink_linecard_provision_fail(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->nested_devlink);
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_fail);
+
+/**
+ * devlink_linecard_activate - Set linecard active
+ *
+ * @linecard: devlink linecard
+ */
+void devlink_linecard_activate(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->state != DEVLINK_LINECARD_STATE_PROVISIONED);
+ linecard->state = DEVLINK_LINECARD_STATE_ACTIVE;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_activate);
+
+/**
+ * devlink_linecard_deactivate - Set linecard inactive
+ *
+ * @linecard: devlink linecard
+ */
+void devlink_linecard_deactivate(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ switch (linecard->state) {
+ case DEVLINK_LINECARD_STATE_ACTIVE:
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ break;
+ case DEVLINK_LINECARD_STATE_UNPROVISIONING:
+ /* Line card is being deactivated as part
+ * of unprovisioning flow.
+ */
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
+
+/**
+ * devlink_linecard_nested_dl_set - Attach/detach nested devlink
+ * instance to linecard.
+ *
+ * @linecard: devlink linecard
+ * @nested_devlink: devlink instance to attach or NULL to detach
+ */
+void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
+ struct devlink *nested_devlink)
+{
+ mutex_lock(&linecard->state_lock);
+ linecard->nested_devlink = nested_devlink;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set);
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
index 7a332eb70f70..fc3e7c029a3b 100644
--- a/net/devlink/netlink.c
+++ b/net/devlink/netlink.c
@@ -82,6 +82,21 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG },
};
+int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info)
+{
+ int err;
+
+ if (*msg) {
+ err = genlmsg_reply(*msg, info);
+ if (err)
+ return err;
+ }
+ *msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!*msg)
+ return -ENOMEM;
+ return 0;
+}
+
struct devlink *
devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
{
@@ -109,10 +124,9 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
return ERR_PTR(-ENODEV);
}
-static int devlink_nl_pre_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
+static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info,
+ u8 flags)
{
- struct devlink_linecard *linecard;
struct devlink_port *devlink_port;
struct devlink *devlink;
int err;
@@ -122,42 +136,17 @@ static int devlink_nl_pre_doit(const struct genl_split_ops *ops,
return PTR_ERR(devlink);
info->user_ptr[0] = devlink;
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
+ if (flags & DEVLINK_NL_FLAG_NEED_PORT) {
devlink_port = devlink_port_get_from_info(devlink, info);
if (IS_ERR(devlink_port)) {
err = PTR_ERR(devlink_port);
goto unlock;
}
info->user_ptr[1] = devlink_port;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) {
+ } else if (flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) {
devlink_port = devlink_port_get_from_info(devlink, info);
if (!IS_ERR(devlink_port))
info->user_ptr[1] = devlink_port;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
- struct devlink_rate *devlink_rate;
-
- devlink_rate = devlink_rate_get_from_info(devlink, info);
- if (IS_ERR(devlink_rate)) {
- err = PTR_ERR(devlink_rate);
- goto unlock;
- }
- info->user_ptr[1] = devlink_rate;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) {
- struct devlink_rate *rate_node;
-
- rate_node = devlink_rate_node_get_from_info(devlink, info);
- if (IS_ERR(rate_node)) {
- err = PTR_ERR(rate_node);
- goto unlock;
- }
- info->user_ptr[1] = rate_node;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
- linecard = devlink_linecard_get_from_info(devlink, info);
- if (IS_ERR(linecard)) {
- err = PTR_ERR(linecard);
- goto unlock;
- }
- info->user_ptr[1] = linecard;
}
return 0;
@@ -167,8 +156,27 @@ unlock:
return err;
}
-static void devlink_nl_post_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
+int devlink_nl_pre_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return __devlink_nl_pre_doit(skb, info, ops->internal_flags);
+}
+
+int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_PORT);
+}
+
+int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
+ struct sk_buff *skb,
+ struct genl_info *info)
+{
+ return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT);
+}
+
+void devlink_nl_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink;
@@ -177,42 +185,41 @@ static void devlink_nl_post_doit(const struct genl_split_ops *ops,
devlink_put(devlink);
}
-static const struct devlink_cmd *devl_cmds[] = {
- [DEVLINK_CMD_GET] = &devl_cmd_get,
- [DEVLINK_CMD_PORT_GET] = &devl_cmd_port_get,
- [DEVLINK_CMD_SB_GET] = &devl_cmd_sb_get,
- [DEVLINK_CMD_SB_POOL_GET] = &devl_cmd_sb_pool_get,
- [DEVLINK_CMD_SB_PORT_POOL_GET] = &devl_cmd_sb_port_pool_get,
- [DEVLINK_CMD_SB_TC_POOL_BIND_GET] = &devl_cmd_sb_tc_pool_bind_get,
- [DEVLINK_CMD_PARAM_GET] = &devl_cmd_param_get,
- [DEVLINK_CMD_REGION_GET] = &devl_cmd_region_get,
- [DEVLINK_CMD_INFO_GET] = &devl_cmd_info_get,
- [DEVLINK_CMD_HEALTH_REPORTER_GET] = &devl_cmd_health_reporter_get,
- [DEVLINK_CMD_TRAP_GET] = &devl_cmd_trap_get,
- [DEVLINK_CMD_TRAP_GROUP_GET] = &devl_cmd_trap_group_get,
- [DEVLINK_CMD_TRAP_POLICER_GET] = &devl_cmd_trap_policer_get,
- [DEVLINK_CMD_RATE_GET] = &devl_cmd_rate_get,
- [DEVLINK_CMD_LINECARD_GET] = &devl_cmd_linecard_get,
- [DEVLINK_CMD_SELFTESTS_GET] = &devl_cmd_selftests_get,
-};
+static int devlink_nl_inst_single_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb, int flags,
+ devlink_nl_dump_one_func_t *dump_one,
+ struct nlattr **attrs)
+{
+ struct devlink *devlink;
+ int err;
+
+ devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs);
+ if (IS_ERR(devlink))
+ return PTR_ERR(devlink);
+ err = dump_one(msg, devlink, cb, flags | NLM_F_DUMP_FILTERED);
+
+ devl_unlock(devlink);
+ devlink_put(devlink);
+
+ if (err != -EMSGSIZE)
+ return err;
+ return msg->len;
+}
-int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int devlink_nl_inst_iter_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb, int flags,
+ devlink_nl_dump_one_func_t *dump_one)
{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- const struct devlink_cmd *cmd;
struct devlink *devlink;
int err = 0;
- cmd = devl_cmds[info->op.cmd];
-
while ((devlink = devlinks_xa_find_get(sock_net(msg->sk),
&state->instance))) {
devl_lock(devlink);
if (devl_is_registered(devlink))
- err = cmd->dump_one(msg, devlink, cb);
+ err = dump_one(msg, devlink, cb, flags);
else
err = 0;
@@ -233,6 +240,272 @@ int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
return msg->len;
}
+int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
+ devlink_nl_dump_one_func_t *dump_one)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct nlattr **attrs = info->attrs;
+ int flags = NLM_F_MULTI;
+
+ if (attrs &&
+ (attrs[DEVLINK_ATTR_BUS_NAME] || attrs[DEVLINK_ATTR_DEV_NAME]))
+ return devlink_nl_inst_single_dumpit(msg, cb, flags, dump_one,
+ attrs);
+ else
+ return devlink_nl_inst_iter_dumpit(msg, cb, flags, dump_one);
+}
+
+static const struct genl_small_ops devlink_nl_small_ops[40] = {
+ {
+ .cmd = DEVLINK_CMD_PORT_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_port_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_SET,
+ .doit = devlink_nl_cmd_rate_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_NEW,
+ .doit = devlink_nl_cmd_rate_new_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_DEL,
+ .doit = devlink_nl_cmd_rate_del_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_SPLIT,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_port_split_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_UNSPLIT,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_port_unsplit_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_NEW,
+ .doit = devlink_nl_cmd_port_new_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_DEL,
+ .doit = devlink_nl_cmd_port_del_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+
+ {
+ .cmd = DEVLINK_CMD_LINECARD_SET,
+ .doit = devlink_nl_cmd_linecard_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_POOL_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_sb_pool_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_PORT_POOL_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_sb_port_pool_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_sb_occ_snapshot_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_sb_occ_max_clear_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_ESWITCH_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_eswitch_get_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_ESWITCH_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_eswitch_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_dpipe_table_get,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_dpipe_entries_get,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_dpipe_headers_get,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_dpipe_table_counters_set,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_RESOURCE_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_resource_set,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_RESOURCE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_resource_dump,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_RELOAD,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_reload,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_PARAM_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_param_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_PARAM_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_port_param_get_doit,
+ .dumpit = devlink_nl_cmd_port_param_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_PARAM_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_port_param_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_NEW,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_region_new,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_DEL,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_region_del,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_READ,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
+ .dumpit = devlink_nl_cmd_region_read_dumpit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_health_reporter_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_health_reporter_recover_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_health_reporter_diagnose_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
+ .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_health_reporter_test_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
+ },
+ {
+ .cmd = DEVLINK_CMD_FLASH_UPDATE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_flash_update,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_SET,
+ .doit = devlink_nl_cmd_trap_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_SET,
+ .doit = devlink_nl_cmd_trap_group_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_POLICER_SET,
+ .doit = devlink_nl_cmd_trap_policer_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_SELFTESTS_RUN,
+ .doit = devlink_nl_cmd_selftests_run,
+ .flags = GENL_ADMIN_PERM,
+ },
+ /* -- No new ops here! Use split ops going forward! -- */
+};
+
struct genl_family devlink_nl_family __ro_after_init = {
.name = DEVLINK_GENL_NAME,
.version = DEVLINK_GENL_VERSION,
@@ -243,8 +516,10 @@ struct genl_family devlink_nl_family __ro_after_init = {
.pre_doit = devlink_nl_pre_doit,
.post_doit = devlink_nl_post_doit,
.module = THIS_MODULE,
- .small_ops = devlink_nl_ops,
- .n_small_ops = ARRAY_SIZE(devlink_nl_ops),
+ .small_ops = devlink_nl_small_ops,
+ .n_small_ops = ARRAY_SIZE(devlink_nl_small_ops),
+ .split_ops = devlink_nl_ops,
+ .n_split_ops = ARRAY_SIZE(devlink_nl_ops),
.resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1,
.mcgrps = devlink_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
new file mode 100644
index 000000000000..467b7a431de1
--- /dev/null
+++ b/net/devlink/netlink_gen.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/devlink.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "netlink_gen.h"
+
+#include <uapi/linux/devlink.h>
+
+/* DEVLINK_CMD_GET - do */
+static const struct nla_policy devlink_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_PORT_GET - do */
+static const struct nla_policy devlink_port_get_do_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_PORT_GET - dump */
+static const struct nla_policy devlink_port_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SB_GET - do */
+static const struct nla_policy devlink_sb_get_do_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_SB_GET - dump */
+static const struct nla_policy devlink_sb_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SB_POOL_GET - do */
+static const struct nla_policy devlink_sb_pool_get_do_nl_policy[DEVLINK_ATTR_SB_POOL_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, },
+};
+
+/* DEVLINK_CMD_SB_POOL_GET - dump */
+static const struct nla_policy devlink_sb_pool_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SB_PORT_POOL_GET - do */
+static const struct nla_policy devlink_sb_port_pool_get_do_nl_policy[DEVLINK_ATTR_SB_POOL_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, },
+};
+
+/* DEVLINK_CMD_SB_PORT_POOL_GET - dump */
+static const struct nla_policy devlink_sb_port_pool_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */
+static const struct nla_policy devlink_sb_tc_pool_bind_get_do_nl_policy[DEVLINK_ATTR_SB_TC_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_SB_POOL_TYPE] = NLA_POLICY_MAX(NLA_U8, 1),
+ [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16, },
+};
+
+/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - dump */
+static const struct nla_policy devlink_sb_tc_pool_bind_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_PARAM_GET - do */
+static const struct nla_policy devlink_param_get_do_nl_policy[DEVLINK_ATTR_PARAM_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_PARAM_GET - dump */
+static const struct nla_policy devlink_param_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_REGION_GET - do */
+static const struct nla_policy devlink_region_get_do_nl_policy[DEVLINK_ATTR_REGION_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_REGION_GET - dump */
+static const struct nla_policy devlink_region_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_INFO_GET - do */
+static const struct nla_policy devlink_info_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_HEALTH_REPORTER_GET - do */
+static const struct nla_policy devlink_health_reporter_get_do_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_HEALTH_REPORTER_GET - dump */
+static const struct nla_policy devlink_health_reporter_get_dump_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_TRAP_GET - do */
+static const struct nla_policy devlink_trap_get_do_nl_policy[DEVLINK_ATTR_TRAP_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_TRAP_GET - dump */
+static const struct nla_policy devlink_trap_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_TRAP_GROUP_GET - do */
+static const struct nla_policy devlink_trap_group_get_do_nl_policy[DEVLINK_ATTR_TRAP_GROUP_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_TRAP_GROUP_GET - dump */
+static const struct nla_policy devlink_trap_group_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_TRAP_POLICER_GET - do */
+static const struct nla_policy devlink_trap_policer_get_do_nl_policy[DEVLINK_ATTR_TRAP_POLICER_ID + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_TRAP_POLICER_GET - dump */
+static const struct nla_policy devlink_trap_policer_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_RATE_GET - do */
+static const struct nla_policy devlink_rate_get_do_nl_policy[DEVLINK_ATTR_RATE_NODE_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+ [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_RATE_GET - dump */
+static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_LINECARD_GET - do */
+static const struct nla_policy devlink_linecard_get_do_nl_policy[DEVLINK_ATTR_LINECARD_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32, },
+};
+
+/* DEVLINK_CMD_LINECARD_GET - dump */
+static const struct nla_policy devlink_linecard_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* DEVLINK_CMD_SELFTESTS_GET - do */
+static const struct nla_policy devlink_selftests_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+/* Ops table for devlink */
+const struct genl_split_ops devlink_nl_ops[32] = {
+ {
+ .cmd = DEVLINK_CMD_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_GET,
+ .validate = GENL_DONT_VALIDATE_DUMP,
+ .dumpit = devlink_nl_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_port_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_port_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_GET,
+ .dumpit = devlink_nl_port_get_dumpit,
+ .policy = devlink_port_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_sb_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_GET,
+ .dumpit = devlink_nl_sb_get_dumpit,
+ .policy = devlink_sb_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_POOL_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_sb_pool_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_pool_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_POOL_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_POOL_GET,
+ .dumpit = devlink_nl_sb_pool_get_dumpit,
+ .policy = devlink_sb_pool_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_sb_port_pool_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_port_pool_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_POOL_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
+ .dumpit = devlink_nl_sb_port_pool_get_dumpit,
+ .policy = devlink_sb_port_pool_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port,
+ .doit = devlink_nl_sb_tc_pool_bind_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_sb_tc_pool_bind_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_SB_TC_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
+ .dumpit = devlink_nl_sb_tc_pool_bind_get_dumpit,
+ .policy = devlink_sb_tc_pool_bind_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_PARAM_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_param_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_param_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_PARAM_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_PARAM_GET,
+ .dumpit = devlink_nl_param_get_dumpit,
+ .policy = devlink_param_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port_optional,
+ .doit = devlink_nl_region_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_region_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_REGION_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_REGION_GET,
+ .dumpit = devlink_nl_region_get_dumpit,
+ .policy = devlink_region_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_INFO_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_info_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_info_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_INFO_GET,
+ .validate = GENL_DONT_VALIDATE_DUMP,
+ .dumpit = devlink_nl_info_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit_port_optional,
+ .doit = devlink_nl_health_reporter_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_health_reporter_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
+ .dumpit = devlink_nl_health_reporter_get_dumpit,
+ .policy = devlink_health_reporter_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_INDEX,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_trap_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_trap_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_TRAP_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GET,
+ .dumpit = devlink_nl_trap_get_dumpit,
+ .policy = devlink_trap_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_trap_group_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_trap_group_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_TRAP_GROUP_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_GET,
+ .dumpit = devlink_nl_trap_group_get_dumpit,
+ .policy = devlink_trap_group_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_POLICER_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_trap_policer_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_trap_policer_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_TRAP_POLICER_ID,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_POLICER_GET,
+ .dumpit = devlink_nl_trap_policer_get_dumpit,
+ .policy = devlink_trap_policer_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_rate_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_rate_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_RATE_NODE_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_GET,
+ .dumpit = devlink_nl_rate_get_dumpit,
+ .policy = devlink_rate_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_LINECARD_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_linecard_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_linecard_get_do_nl_policy,
+ .maxattr = DEVLINK_ATTR_LINECARD_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_LINECARD_GET,
+ .dumpit = devlink_nl_linecard_get_dumpit,
+ .policy = devlink_linecard_get_dump_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = DEVLINK_CMD_SELFTESTS_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT,
+ .pre_doit = devlink_nl_pre_doit,
+ .doit = devlink_nl_selftests_get_doit,
+ .post_doit = devlink_nl_post_doit,
+ .policy = devlink_selftests_get_nl_policy,
+ .maxattr = DEVLINK_ATTR_DEV_NAME,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = DEVLINK_CMD_SELFTESTS_GET,
+ .validate = GENL_DONT_VALIDATE_DUMP,
+ .dumpit = devlink_nl_selftests_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+};
diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h
new file mode 100644
index 000000000000..f8bbc93e39be
--- /dev/null
+++ b/net/devlink/netlink_gen.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/devlink.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_DEVLINK_GEN_H
+#define _LINUX_DEVLINK_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/devlink.h>
+
+/* Ops table for devlink */
+extern const struct genl_split_ops devlink_nl_ops[32];
+
+int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
+ struct sk_buff *skb,
+ struct genl_info *info);
+void
+devlink_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+
+int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int devlink_nl_port_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_port_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_sb_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_sb_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int devlink_nl_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_sb_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_sb_port_pool_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_sb_port_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_sb_tc_pool_bind_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_param_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_param_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_region_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_region_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_info_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_info_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_health_reporter_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_health_reporter_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_trap_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_trap_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_trap_group_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_trap_group_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_trap_policer_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_trap_policer_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_rate_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_linecard_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_linecard_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_selftests_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+
+#endif /* _LINUX_DEVLINK_GEN_H */
diff --git a/net/devlink/param.c b/net/devlink/param.c
new file mode 100644
index 000000000000..31275f9d4cb7
--- /dev/null
+++ b/net/devlink/param.c
@@ -0,0 +1,865 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+static const struct devlink_param devlink_param_generic[] = {
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
+ .name = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME,
+ .type = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+ .name = DEVLINK_PARAM_GENERIC_MAX_MACS_NAME,
+ .type = DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+ .name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME,
+ .type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI,
+ .name = DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME,
+ .type = DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
+ .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME,
+ .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
+ .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME,
+ .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
+ .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
+ .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
+ .name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME,
+ .type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
+ .name = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME,
+ .type = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
+ .name = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME,
+ .type = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE,
+ },
+};
+
+static int devlink_param_generic_verify(const struct devlink_param *param)
+{
+ /* verify it match generic parameter by id and name */
+ if (param->id > DEVLINK_PARAM_GENERIC_ID_MAX)
+ return -EINVAL;
+ if (strcmp(param->name, devlink_param_generic[param->id].name))
+ return -ENOENT;
+
+ WARN_ON(param->type != devlink_param_generic[param->id].type);
+
+ return 0;
+}
+
+static int devlink_param_driver_verify(const struct devlink_param *param)
+{
+ int i;
+
+ if (param->id <= DEVLINK_PARAM_GENERIC_ID_MAX)
+ return -EINVAL;
+ /* verify no such name in generic params */
+ for (i = 0; i <= DEVLINK_PARAM_GENERIC_ID_MAX; i++)
+ if (!strcmp(param->name, devlink_param_generic[i].name))
+ return -EEXIST;
+
+ return 0;
+}
+
+static struct devlink_param_item *
+devlink_param_find_by_name(struct xarray *params, const char *param_name)
+{
+ struct devlink_param_item *param_item;
+ unsigned long param_id;
+
+ xa_for_each(params, param_id, param_item) {
+ if (!strcmp(param_item->param->name, param_name))
+ return param_item;
+ }
+ return NULL;
+}
+
+static struct devlink_param_item *
+devlink_param_find_by_id(struct xarray *params, u32 param_id)
+{
+ return xa_load(params, param_id);
+}
+
+static bool
+devlink_param_cmode_is_supported(const struct devlink_param *param,
+ enum devlink_param_cmode cmode)
+{
+ return test_bit(cmode, &param->supported_cmodes);
+}
+
+static int devlink_param_get(struct devlink *devlink,
+ const struct devlink_param *param,
+ struct devlink_param_gset_ctx *ctx)
+{
+ if (!param->get)
+ return -EOPNOTSUPP;
+ return param->get(devlink, param->id, ctx);
+}
+
+static int devlink_param_set(struct devlink *devlink,
+ const struct devlink_param *param,
+ struct devlink_param_gset_ctx *ctx)
+{
+ if (!param->set)
+ return -EOPNOTSUPP;
+ return param->set(devlink, param->id, ctx);
+}
+
+static int
+devlink_param_type_to_nla_type(enum devlink_param_type param_type)
+{
+ switch (param_type) {
+ case DEVLINK_PARAM_TYPE_U8:
+ return NLA_U8;
+ case DEVLINK_PARAM_TYPE_U16:
+ return NLA_U16;
+ case DEVLINK_PARAM_TYPE_U32:
+ return NLA_U32;
+ case DEVLINK_PARAM_TYPE_STRING:
+ return NLA_STRING;
+ case DEVLINK_PARAM_TYPE_BOOL:
+ return NLA_FLAG;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int
+devlink_nl_param_value_fill_one(struct sk_buff *msg,
+ enum devlink_param_type type,
+ enum devlink_param_cmode cmode,
+ union devlink_param_value val)
+{
+ struct nlattr *param_value_attr;
+
+ param_value_attr = nla_nest_start_noflag(msg,
+ DEVLINK_ATTR_PARAM_VALUE);
+ if (!param_value_attr)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_CMODE, cmode))
+ goto value_nest_cancel;
+
+ switch (type) {
+ case DEVLINK_PARAM_TYPE_U8:
+ if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu8))
+ goto value_nest_cancel;
+ break;
+ case DEVLINK_PARAM_TYPE_U16:
+ if (nla_put_u16(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu16))
+ goto value_nest_cancel;
+ break;
+ case DEVLINK_PARAM_TYPE_U32:
+ if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu32))
+ goto value_nest_cancel;
+ break;
+ case DEVLINK_PARAM_TYPE_STRING:
+ if (nla_put_string(msg, DEVLINK_ATTR_PARAM_VALUE_DATA,
+ val.vstr))
+ goto value_nest_cancel;
+ break;
+ case DEVLINK_PARAM_TYPE_BOOL:
+ if (val.vbool &&
+ nla_put_flag(msg, DEVLINK_ATTR_PARAM_VALUE_DATA))
+ goto value_nest_cancel;
+ break;
+ }
+
+ nla_nest_end(msg, param_value_attr);
+ return 0;
+
+value_nest_cancel:
+ nla_nest_cancel(msg, param_value_attr);
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
+ unsigned int port_index,
+ struct devlink_param_item *param_item,
+ enum devlink_command cmd,
+ u32 portid, u32 seq, int flags)
+{
+ union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1];
+ bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {};
+ const struct devlink_param *param = param_item->param;
+ struct devlink_param_gset_ctx ctx;
+ struct nlattr *param_values_list;
+ struct nlattr *param_attr;
+ int nla_type;
+ void *hdr;
+ int err;
+ int i;
+
+ /* Get value from driver part to driverinit configuration mode */
+ for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
+ if (!devlink_param_cmode_is_supported(param, i))
+ continue;
+ if (i == DEVLINK_PARAM_CMODE_DRIVERINIT) {
+ if (param_item->driverinit_value_new_valid)
+ param_value[i] = param_item->driverinit_value_new;
+ else if (param_item->driverinit_value_valid)
+ param_value[i] = param_item->driverinit_value;
+ else
+ return -EOPNOTSUPP;
+ } else {
+ ctx.cmode = i;
+ err = devlink_param_get(devlink, param, &ctx);
+ if (err)
+ return err;
+ param_value[i] = ctx.val;
+ }
+ param_value_set[i] = true;
+ }
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ if (cmd == DEVLINK_CMD_PORT_PARAM_GET ||
+ cmd == DEVLINK_CMD_PORT_PARAM_NEW ||
+ cmd == DEVLINK_CMD_PORT_PARAM_DEL)
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index))
+ goto genlmsg_cancel;
+
+ param_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PARAM);
+ if (!param_attr)
+ goto genlmsg_cancel;
+ if (nla_put_string(msg, DEVLINK_ATTR_PARAM_NAME, param->name))
+ goto param_nest_cancel;
+ if (param->generic && nla_put_flag(msg, DEVLINK_ATTR_PARAM_GENERIC))
+ goto param_nest_cancel;
+
+ nla_type = devlink_param_type_to_nla_type(param->type);
+ if (nla_type < 0)
+ goto param_nest_cancel;
+ if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type))
+ goto param_nest_cancel;
+
+ param_values_list = nla_nest_start_noflag(msg,
+ DEVLINK_ATTR_PARAM_VALUES_LIST);
+ if (!param_values_list)
+ goto param_nest_cancel;
+
+ for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
+ if (!param_value_set[i])
+ continue;
+ err = devlink_nl_param_value_fill_one(msg, param->type,
+ i, param_value[i]);
+ if (err)
+ goto values_list_nest_cancel;
+ }
+
+ nla_nest_end(msg, param_values_list);
+ nla_nest_end(msg, param_attr);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+values_list_nest_cancel:
+ nla_nest_end(msg, param_values_list);
+param_nest_cancel:
+ nla_nest_cancel(msg, param_attr);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_param_notify(struct devlink *devlink,
+ unsigned int port_index,
+ struct devlink_param_item *param_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL &&
+ cmd != DEVLINK_CMD_PORT_PARAM_NEW &&
+ cmd != DEVLINK_CMD_PORT_PARAM_DEL);
+
+ /* devlink_notify_register() / devlink_notify_unregister()
+ * will replay the notifications if the params are added/removed
+ * outside of the lifetime of the instance.
+ */
+ if (!devl_is_registered(devlink))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+ err = devlink_nl_param_fill(msg, devlink, port_index, param_item, cmd,
+ 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static void devlink_params_notify(struct devlink *devlink,
+ enum devlink_command cmd)
+{
+ struct devlink_param_item *param_item;
+ unsigned long param_id;
+
+ xa_for_each(&devlink->params, param_id, param_item)
+ devlink_param_notify(devlink, 0, param_item, cmd);
+}
+
+void devlink_params_notify_register(struct devlink *devlink)
+{
+ devlink_params_notify(devlink, DEVLINK_CMD_PARAM_NEW);
+}
+
+void devlink_params_notify_unregister(struct devlink *devlink)
+{
+ devlink_params_notify(devlink, DEVLINK_CMD_PARAM_DEL);
+}
+
+static int devlink_nl_param_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_param_item *param_item;
+ unsigned long param_id;
+ int err = 0;
+
+ xa_for_each_start(&devlink->params, param_id, param_item, state->idx) {
+ err = devlink_nl_param_fill(msg, devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = param_id;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int devlink_nl_param_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_param_get_dump_one);
+}
+
+static int
+devlink_param_type_get_from_info(struct genl_info *info,
+ enum devlink_param_type *param_type)
+{
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_TYPE))
+ return -EINVAL;
+
+ switch (nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE])) {
+ case NLA_U8:
+ *param_type = DEVLINK_PARAM_TYPE_U8;
+ break;
+ case NLA_U16:
+ *param_type = DEVLINK_PARAM_TYPE_U16;
+ break;
+ case NLA_U32:
+ *param_type = DEVLINK_PARAM_TYPE_U32;
+ break;
+ case NLA_STRING:
+ *param_type = DEVLINK_PARAM_TYPE_STRING;
+ break;
+ case NLA_FLAG:
+ *param_type = DEVLINK_PARAM_TYPE_BOOL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+devlink_param_value_get_from_info(const struct devlink_param *param,
+ struct genl_info *info,
+ union devlink_param_value *value)
+{
+ struct nlattr *param_data;
+ int len;
+
+ param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA];
+
+ if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data)
+ return -EINVAL;
+
+ switch (param->type) {
+ case DEVLINK_PARAM_TYPE_U8:
+ if (nla_len(param_data) != sizeof(u8))
+ return -EINVAL;
+ value->vu8 = nla_get_u8(param_data);
+ break;
+ case DEVLINK_PARAM_TYPE_U16:
+ if (nla_len(param_data) != sizeof(u16))
+ return -EINVAL;
+ value->vu16 = nla_get_u16(param_data);
+ break;
+ case DEVLINK_PARAM_TYPE_U32:
+ if (nla_len(param_data) != sizeof(u32))
+ return -EINVAL;
+ value->vu32 = nla_get_u32(param_data);
+ break;
+ case DEVLINK_PARAM_TYPE_STRING:
+ len = strnlen(nla_data(param_data), nla_len(param_data));
+ if (len == nla_len(param_data) ||
+ len >= __DEVLINK_PARAM_MAX_STRING_VALUE)
+ return -EINVAL;
+ strcpy(value->vstr, nla_data(param_data));
+ break;
+ case DEVLINK_PARAM_TYPE_BOOL:
+ if (param_data && nla_len(param_data))
+ return -EINVAL;
+ value->vbool = nla_get_flag(param_data);
+ break;
+ }
+ return 0;
+}
+
+static struct devlink_param_item *
+devlink_param_get_from_info(struct xarray *params, struct genl_info *info)
+{
+ char *param_name;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_NAME))
+ return NULL;
+
+ param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
+ return devlink_param_find_by_name(params, param_name);
+}
+
+int devlink_nl_param_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_param_item *param_item;
+ struct sk_buff *msg;
+ int err;
+
+ param_item = devlink_param_get_from_info(&devlink->params, info);
+ if (!param_item)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_param_fill(msg, devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_GET,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
+ unsigned int port_index,
+ struct xarray *params,
+ struct genl_info *info,
+ enum devlink_command cmd)
+{
+ enum devlink_param_type param_type;
+ struct devlink_param_gset_ctx ctx;
+ enum devlink_param_cmode cmode;
+ struct devlink_param_item *param_item;
+ const struct devlink_param *param;
+ union devlink_param_value value;
+ int err = 0;
+
+ param_item = devlink_param_get_from_info(params, info);
+ if (!param_item)
+ return -EINVAL;
+ param = param_item->param;
+ err = devlink_param_type_get_from_info(info, &param_type);
+ if (err)
+ return err;
+ if (param_type != param->type)
+ return -EINVAL;
+ err = devlink_param_value_get_from_info(param, info, &value);
+ if (err)
+ return err;
+ if (param->validate) {
+ err = param->validate(devlink, param->id, value, info->extack);
+ if (err)
+ return err;
+ }
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_VALUE_CMODE))
+ return -EINVAL;
+ cmode = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE]);
+ if (!devlink_param_cmode_is_supported(param, cmode))
+ return -EOPNOTSUPP;
+
+ if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) {
+ param_item->driverinit_value_new = value;
+ param_item->driverinit_value_new_valid = true;
+ } else {
+ if (!param->set)
+ return -EOPNOTSUPP;
+ ctx.val = value;
+ ctx.cmode = cmode;
+ err = devlink_param_set(devlink, param, &ctx);
+ if (err)
+ return err;
+ }
+
+ devlink_param_notify(devlink, port_index, param_item, cmd);
+ return 0;
+}
+
+int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+
+ return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->params,
+ info, DEVLINK_CMD_PARAM_NEW);
+}
+
+int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ NL_SET_ERR_MSG(cb->extack, "Port params are not supported");
+ return msg->len;
+}
+
+int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ NL_SET_ERR_MSG(info->extack, "Port params are not supported");
+ return -EINVAL;
+}
+
+int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ NL_SET_ERR_MSG(info->extack, "Port params are not supported");
+ return -EINVAL;
+}
+
+static int devlink_param_verify(const struct devlink_param *param)
+{
+ if (!param || !param->name || !param->supported_cmodes)
+ return -EINVAL;
+ if (param->generic)
+ return devlink_param_generic_verify(param);
+ else
+ return devlink_param_driver_verify(param);
+}
+
+static int devlink_param_register(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ struct devlink_param_item *param_item;
+ int err;
+
+ WARN_ON(devlink_param_verify(param));
+ WARN_ON(devlink_param_find_by_name(&devlink->params, param->name));
+
+ if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
+ WARN_ON(param->get || param->set);
+ else
+ WARN_ON(!param->get || !param->set);
+
+ param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
+ if (!param_item)
+ return -ENOMEM;
+
+ param_item->param = param;
+
+ err = xa_insert(&devlink->params, param->id, param_item, GFP_KERNEL);
+ if (err)
+ goto err_xa_insert;
+
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+ return 0;
+
+err_xa_insert:
+ kfree(param_item);
+ return err;
+}
+
+static void devlink_param_unregister(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ struct devlink_param_item *param_item;
+
+ param_item = devlink_param_find_by_id(&devlink->params, param->id);
+ if (WARN_ON(!param_item))
+ return;
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL);
+ xa_erase(&devlink->params, param->id);
+ kfree(param_item);
+}
+
+/**
+ * devl_params_register - register configuration parameters
+ *
+ * @devlink: devlink
+ * @params: configuration parameters array
+ * @params_count: number of parameters provided
+ *
+ * Register the configuration parameters supported by the driver.
+ */
+int devl_params_register(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ const struct devlink_param *param = params;
+ int i, err;
+
+ lockdep_assert_held(&devlink->lock);
+
+ for (i = 0; i < params_count; i++, param++) {
+ err = devlink_param_register(devlink, param);
+ if (err)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ if (!i)
+ return err;
+
+ for (param--; i > 0; i--, param--)
+ devlink_param_unregister(devlink, param);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devl_params_register);
+
+int devlink_params_register(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_params_register(devlink, params, params_count);
+ devl_unlock(devlink);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_params_register);
+
+/**
+ * devl_params_unregister - unregister configuration parameters
+ * @devlink: devlink
+ * @params: configuration parameters to unregister
+ * @params_count: number of parameters provided
+ */
+void devl_params_unregister(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ const struct devlink_param *param = params;
+ int i;
+
+ lockdep_assert_held(&devlink->lock);
+
+ for (i = 0; i < params_count; i++, param++)
+ devlink_param_unregister(devlink, param);
+}
+EXPORT_SYMBOL_GPL(devl_params_unregister);
+
+void devlink_params_unregister(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ devl_lock(devlink);
+ devl_params_unregister(devlink, params, params_count);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_params_unregister);
+
+/**
+ * devl_param_driverinit_value_get - get configuration parameter
+ * value for driver initializing
+ *
+ * @devlink: devlink
+ * @param_id: parameter ID
+ * @val: pointer to store the value of parameter in driverinit
+ * configuration mode
+ *
+ * This function should be used by the driver to get driverinit
+ * configuration for initialization after reload command.
+ *
+ * Note that lockless call of this function relies on the
+ * driver to maintain following basic sane behavior:
+ * 1) Driver ensures a call to this function cannot race with
+ * registering/unregistering the parameter with the same parameter ID.
+ * 2) Driver ensures a call to this function cannot race with
+ * devl_param_driverinit_value_set() call with the same parameter ID.
+ * 3) Driver ensures a call to this function cannot race with
+ * reload operation.
+ * If the driver is not able to comply, it has to take the devlink->lock
+ * while calling this.
+ */
+int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+ union devlink_param_value *val)
+{
+ struct devlink_param_item *param_item;
+
+ if (WARN_ON(!devlink_reload_supported(devlink->ops)))
+ return -EOPNOTSUPP;
+
+ param_item = devlink_param_find_by_id(&devlink->params, param_id);
+ if (!param_item)
+ return -EINVAL;
+
+ if (!param_item->driverinit_value_valid)
+ return -EOPNOTSUPP;
+
+ if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT)))
+ return -EOPNOTSUPP;
+
+ *val = param_item->driverinit_value;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_param_driverinit_value_get);
+
+/**
+ * devl_param_driverinit_value_set - set value of configuration
+ * parameter for driverinit
+ * configuration mode
+ *
+ * @devlink: devlink
+ * @param_id: parameter ID
+ * @init_val: value of parameter to set for driverinit configuration mode
+ *
+ * This function should be used by the driver to set driverinit
+ * configuration mode default value.
+ */
+void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+ union devlink_param_value init_val)
+{
+ struct devlink_param_item *param_item;
+
+ devl_assert_locked(devlink);
+
+ param_item = devlink_param_find_by_id(&devlink->params, param_id);
+ if (WARN_ON(!param_item))
+ return;
+
+ if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT)))
+ return;
+
+ param_item->driverinit_value = init_val;
+ param_item->driverinit_value_valid = true;
+
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+}
+EXPORT_SYMBOL_GPL(devl_param_driverinit_value_set);
+
+void devlink_params_driverinit_load_new(struct devlink *devlink)
+{
+ struct devlink_param_item *param_item;
+ unsigned long param_id;
+
+ xa_for_each(&devlink->params, param_id, param_item) {
+ if (!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT) ||
+ !param_item->driverinit_value_new_valid)
+ continue;
+ param_item->driverinit_value = param_item->driverinit_value_new;
+ param_item->driverinit_value_valid = true;
+ param_item->driverinit_value_new_valid = false;
+ }
+}
+
+/**
+ * devl_param_value_changed - notify devlink on a parameter's value
+ * change. Should be called by the driver
+ * right after the change.
+ *
+ * @devlink: devlink
+ * @param_id: parameter ID
+ *
+ * This function should be used by the driver to notify devlink on value
+ * change, excluding driverinit configuration mode.
+ * For driverinit configuration mode driver should use the function
+ */
+void devl_param_value_changed(struct devlink *devlink, u32 param_id)
+{
+ struct devlink_param_item *param_item;
+
+ param_item = devlink_param_find_by_id(&devlink->params, param_id);
+ WARN_ON(!param_item);
+
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+}
+EXPORT_SYMBOL_GPL(devl_param_value_changed);
diff --git a/net/devlink/port.c b/net/devlink/port.c
new file mode 100644
index 000000000000..4763b42885fb
--- /dev/null
+++ b/net/devlink/port.c
@@ -0,0 +1,1515 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+#define DEVLINK_PORT_FN_CAPS_VALID_MASK \
+ (_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
+
+static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
+ [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
+ [DEVLINK_PORT_FN_ATTR_STATE] =
+ NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
+ DEVLINK_PORT_FN_STATE_ACTIVE),
+ [DEVLINK_PORT_FN_ATTR_CAPS] =
+ NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
+};
+
+#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \
+ WARN_ON_ONCE(!(devlink_port)->registered)
+#define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port) \
+ WARN_ON_ONCE((devlink_port)->registered)
+
+struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
+ unsigned int port_index)
+{
+ return xa_load(&devlink->ports, port_index);
+}
+
+struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
+ struct nlattr **attrs)
+{
+ if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
+ struct devlink_port *devlink_port;
+
+ devlink_port = devlink_port_get_by_index(devlink, port_index);
+ if (!devlink_port)
+ return ERR_PTR(-ENODEV);
+ return devlink_port;
+ }
+ return ERR_PTR(-EINVAL);
+}
+
+struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ return devlink_port_get_from_attrs(devlink, info->attrs);
+}
+
+static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
+ u32 cap, bool is_enable)
+{
+ caps->selector |= cap;
+ if (is_enable)
+ caps->value |= cap;
+}
+
+static int devlink_port_fn_roce_fill(struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!devlink_port->ops->port_fn_roce_get)
+ return 0;
+
+ err = devlink_port->ops->port_fn_roce_get(devlink_port, &is_enable,
+ extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_migratable_fill(struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!devlink_port->ops->port_fn_migratable_get ||
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+ return 0;
+
+ err = devlink_port->ops->port_fn_migratable_get(devlink_port,
+ &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_MIGRATABLE, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_ipsec_crypto_fill(struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!devlink_port->ops->port_fn_ipsec_crypto_get ||
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+ return 0;
+
+ err = devlink_port->ops->port_fn_ipsec_crypto_get(devlink_port, &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_ipsec_packet_fill(struct devlink_port *devlink_port,
+ struct nla_bitfield32 *caps,
+ struct netlink_ext_ack *extack)
+{
+ bool is_enable;
+ int err;
+
+ if (!devlink_port->ops->port_fn_ipsec_packet_get ||
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
+ return 0;
+
+ err = devlink_port->ops->port_fn_ipsec_packet_get(devlink_port, &is_enable, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_IPSEC_PACKET, is_enable);
+ return 0;
+}
+
+static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ struct nla_bitfield32 caps = {};
+ int err;
+
+ err = devlink_port_fn_roce_fill(devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ err = devlink_port_fn_migratable_fill(devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ err = devlink_port_fn_ipsec_crypto_fill(devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ err = devlink_port_fn_ipsec_packet_fill(devlink_port, &caps, extack);
+ if (err)
+ return err;
+
+ if (!caps.selector)
+ return 0;
+ err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
+ caps.selector);
+ if (err)
+ return err;
+
+ *msg_updated = true;
+ return 0;
+}
+
+int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port)
+{
+ if (devlink_nl_put_handle(msg, devlink_port->devlink))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+ return -EMSGSIZE;
+ return 0;
+}
+
+size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port)
+{
+ struct devlink *devlink = devlink_port->devlink;
+
+ return nla_total_size(strlen(devlink->dev->bus->name) + 1) /* DEVLINK_ATTR_BUS_NAME */
+ + nla_total_size(strlen(dev_name(devlink->dev)) + 1) /* DEVLINK_ATTR_DEV_NAME */
+ + nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */
+}
+
+static int devlink_nl_port_attrs_put(struct sk_buff *msg,
+ struct devlink_port *devlink_port)
+{
+ struct devlink_port_attrs *attrs = &devlink_port->attrs;
+
+ if (!devlink_port->attrs_set)
+ return 0;
+ if (attrs->lanes) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_LANES, attrs->lanes))
+ return -EMSGSIZE;
+ }
+ if (nla_put_u8(msg, DEVLINK_ATTR_PORT_SPLITTABLE, attrs->splittable))
+ return -EMSGSIZE;
+ if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour))
+ return -EMSGSIZE;
+ switch (devlink_port->attrs.flavour) {
+ case DEVLINK_PORT_FLAVOUR_PCI_PF:
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+ attrs->pci_pf.controller) ||
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_pf.pf))
+ return -EMSGSIZE;
+ if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_pf.external))
+ return -EMSGSIZE;
+ break;
+ case DEVLINK_PORT_FLAVOUR_PCI_VF:
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+ attrs->pci_vf.controller) ||
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_vf.pf) ||
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER, attrs->pci_vf.vf))
+ return -EMSGSIZE;
+ if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_vf.external))
+ return -EMSGSIZE;
+ break;
+ case DEVLINK_PORT_FLAVOUR_PCI_SF:
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+ attrs->pci_sf.controller) ||
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
+ attrs->pci_sf.pf) ||
+ nla_put_u32(msg, DEVLINK_ATTR_PORT_PCI_SF_NUMBER,
+ attrs->pci_sf.sf))
+ return -EMSGSIZE;
+ break;
+ case DEVLINK_PORT_FLAVOUR_PHYSICAL:
+ case DEVLINK_PORT_FLAVOUR_CPU:
+ case DEVLINK_PORT_FLAVOUR_DSA:
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
+ attrs->phys.port_number))
+ return -EMSGSIZE;
+ if (!attrs->split)
+ return 0;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
+ attrs->phys.port_number))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,
+ attrs->phys.split_subport_number))
+ return -EMSGSIZE;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int devlink_port_fn_hw_addr_fill(struct devlink_port *port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ u8 hw_addr[MAX_ADDR_LEN];
+ int hw_addr_len;
+ int err;
+
+ if (!port->ops->port_fn_hw_addr_get)
+ return 0;
+
+ err = port->ops->port_fn_hw_addr_get(port, hw_addr, &hw_addr_len,
+ extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+ err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr);
+ if (err)
+ return err;
+ *msg_updated = true;
+ return 0;
+}
+
+static bool
+devlink_port_fn_state_valid(enum devlink_port_fn_state state)
+{
+ return state == DEVLINK_PORT_FN_STATE_INACTIVE ||
+ state == DEVLINK_PORT_FN_STATE_ACTIVE;
+}
+
+static bool
+devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
+{
+ return opstate == DEVLINK_PORT_FN_OPSTATE_DETACHED ||
+ opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
+}
+
+static int devlink_port_fn_state_fill(struct devlink_port *port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ enum devlink_port_fn_opstate opstate;
+ enum devlink_port_fn_state state;
+ int err;
+
+ if (!port->ops->port_fn_state_get)
+ return 0;
+
+ err = port->ops->port_fn_state_get(port, &state, &opstate, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+ if (!devlink_port_fn_state_valid(state)) {
+ WARN_ON_ONCE(1);
+ NL_SET_ERR_MSG(extack, "Invalid state read from driver");
+ return -EINVAL;
+ }
+ if (!devlink_port_fn_opstate_valid(opstate)) {
+ WARN_ON_ONCE(1);
+ NL_SET_ERR_MSG(extack, "Invalid operational state read from driver");
+ return -EINVAL;
+ }
+ if (nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_STATE, state) ||
+ nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_OPSTATE, opstate))
+ return -EMSGSIZE;
+ *msg_updated = true;
+ return 0;
+}
+
+static int
+devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ return devlink_port->ops->port_fn_migratable_set(devlink_port, enable,
+ extack);
+}
+
+static int
+devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ return devlink_port->ops->port_fn_roce_set(devlink_port, enable,
+ extack);
+}
+
+static int
+devlink_port_fn_ipsec_crypto_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ return devlink_port->ops->port_fn_ipsec_crypto_set(devlink_port, enable, extack);
+}
+
+static int
+devlink_port_fn_ipsec_packet_set(struct devlink_port *devlink_port, bool enable,
+ struct netlink_ext_ack *extack)
+{
+ return devlink_port->ops->port_fn_ipsec_packet_set(devlink_port, enable, extack);
+}
+
+static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nla_bitfield32 caps;
+ u32 caps_value;
+ int err;
+
+ caps = nla_get_bitfield32(attr);
+ caps_value = caps.value & caps.selector;
+ if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) {
+ err = devlink_port_fn_roce_set(devlink_port,
+ caps_value & DEVLINK_PORT_FN_CAP_ROCE,
+ extack);
+ if (err)
+ return err;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
+ err = devlink_port_fn_mig_set(devlink_port, caps_value &
+ DEVLINK_PORT_FN_CAP_MIGRATABLE,
+ extack);
+ if (err)
+ return err;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO) {
+ err = devlink_port_fn_ipsec_crypto_set(devlink_port, caps_value &
+ DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO,
+ extack);
+ if (err)
+ return err;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_PACKET) {
+ err = devlink_port_fn_ipsec_packet_set(devlink_port, caps_value &
+ DEVLINK_PORT_FN_CAP_IPSEC_PACKET,
+ extack);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int
+devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *function_attr;
+ bool msg_updated = false;
+ int err;
+
+ function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION);
+ if (!function_attr)
+ return -EMSGSIZE;
+
+ err = devlink_port_fn_hw_addr_fill(port, msg, extack, &msg_updated);
+ if (err)
+ goto out;
+ err = devlink_port_fn_caps_fill(port, msg, extack, &msg_updated);
+ if (err)
+ goto out;
+ err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
+out:
+ if (err || !msg_updated)
+ nla_nest_cancel(msg, function_attr);
+ else
+ nla_nest_end(msg, function_attr);
+ return err;
+}
+
+static int devlink_nl_port_fill(struct sk_buff *msg,
+ struct devlink_port *devlink_port,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags, struct netlink_ext_ack *extack)
+{
+ struct devlink *devlink = devlink_port->devlink;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+ goto nla_put_failure;
+
+ spin_lock_bh(&devlink_port->type_lock);
+ if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
+ goto nla_put_failure_type_locked;
+ if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE,
+ devlink_port->desired_type))
+ goto nla_put_failure_type_locked;
+ if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
+ if (devlink_port->type_eth.netdev &&
+ (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
+ devlink_port->type_eth.ifindex) ||
+ nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
+ devlink_port->type_eth.ifname)))
+ goto nla_put_failure_type_locked;
+ }
+ if (devlink_port->type == DEVLINK_PORT_TYPE_IB) {
+ struct ib_device *ibdev = devlink_port->type_ib.ibdev;
+
+ if (ibdev &&
+ nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME,
+ ibdev->name))
+ goto nla_put_failure_type_locked;
+ }
+ spin_unlock_bh(&devlink_port->type_lock);
+ if (devlink_nl_port_attrs_put(msg, devlink_port))
+ goto nla_put_failure;
+ if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
+ goto nla_put_failure;
+ if (devlink_port->linecard &&
+ nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX,
+ devlink_port->linecard->index))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure_type_locked:
+ spin_unlock_bh(&devlink_port->type_lock);
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_port_notify(struct devlink_port *devlink_port,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = devlink_port->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
+
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static void devlink_ports_notify(struct devlink *devlink,
+ enum devlink_command cmd)
+{
+ struct devlink_port *devlink_port;
+ unsigned long port_index;
+
+ xa_for_each(&devlink->ports, port_index, devlink_port)
+ devlink_port_notify(devlink_port, cmd);
+}
+
+void devlink_ports_notify_register(struct devlink *devlink)
+{
+ devlink_ports_notify(devlink, DEVLINK_CMD_PORT_NEW);
+}
+
+void devlink_ports_notify_unregister(struct devlink *devlink)
+{
+ devlink_ports_notify(devlink, DEVLINK_CMD_PORT_DEL);
+}
+
+int devlink_nl_port_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_port *devlink_port;
+ unsigned long port_index;
+ int err = 0;
+
+ xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) {
+ err = devlink_nl_port_fill(msg, devlink_port,
+ DEVLINK_CMD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags,
+ cb->extack);
+ if (err) {
+ state->idx = port_index;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int devlink_nl_port_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_port_get_dump_one);
+}
+
+static int devlink_port_type_set(struct devlink_port *devlink_port,
+ enum devlink_port_type port_type)
+
+{
+ int err;
+
+ if (!devlink_port->ops->port_type_set)
+ return -EOPNOTSUPP;
+
+ if (port_type == devlink_port->type)
+ return 0;
+
+ err = devlink_port->ops->port_type_set(devlink_port, port_type);
+ if (err)
+ return err;
+
+ devlink_port->desired_type = port_type;
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ return 0;
+}
+
+static int devlink_port_function_hw_addr_set(struct devlink_port *port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ const u8 *hw_addr;
+ int hw_addr_len;
+
+ hw_addr = nla_data(attr);
+ hw_addr_len = nla_len(attr);
+ if (hw_addr_len > MAX_ADDR_LEN) {
+ NL_SET_ERR_MSG(extack, "Port function hardware address too long");
+ return -EINVAL;
+ }
+ if (port->type == DEVLINK_PORT_TYPE_ETH) {
+ if (hw_addr_len != ETH_ALEN) {
+ NL_SET_ERR_MSG(extack, "Address must be 6 bytes for Ethernet device");
+ return -EINVAL;
+ }
+ if (!is_unicast_ether_addr(hw_addr)) {
+ NL_SET_ERR_MSG(extack, "Non-unicast hardware address unsupported");
+ return -EINVAL;
+ }
+ }
+
+ return port->ops->port_fn_hw_addr_set(port, hw_addr, hw_addr_len,
+ extack);
+}
+
+static int devlink_port_fn_state_set(struct devlink_port *port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ enum devlink_port_fn_state state;
+
+ state = nla_get_u8(attr);
+ return port->ops->port_fn_state_set(port, state, extack);
+}
+
+static int devlink_port_function_validate(struct devlink_port *devlink_port,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_port_ops *ops = devlink_port->ops;
+ struct nlattr *attr;
+
+ if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
+ !ops->port_fn_hw_addr_set) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ "Port doesn't support function attributes");
+ return -EOPNOTSUPP;
+ }
+ if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ "Function does not support state setting");
+ return -EOPNOTSUPP;
+ }
+ attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+ if (attr) {
+ struct nla_bitfield32 caps;
+
+ caps = nla_get_bitfield32(attr);
+ if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE &&
+ !ops->port_fn_roce_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support RoCE function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) {
+ if (!ops->port_fn_migratable_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support migratable function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "migratable function attribute supported for VFs only");
+ return -EOPNOTSUPP;
+ }
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_CRYPTO) {
+ if (!ops->port_fn_ipsec_crypto_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support ipsec_crypto function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "ipsec_crypto function attribute supported for VFs only");
+ return -EOPNOTSUPP;
+ }
+ }
+ if (caps.selector & DEVLINK_PORT_FN_CAP_IPSEC_PACKET) {
+ if (!ops->port_fn_ipsec_packet_set) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Port doesn't support ipsec_packet function attribute");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "ipsec_packet function attribute supported for VFs only");
+ return -EOPNOTSUPP;
+ }
+ }
+ }
+ return 0;
+}
+
+static int devlink_port_function_set(struct devlink_port *port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr,
+ devlink_function_nl_policy, extack);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Fail to parse port function attributes");
+ return err;
+ }
+
+ err = devlink_port_function_validate(port, tb, extack);
+ if (err)
+ return err;
+
+ attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
+ if (attr) {
+ err = devlink_port_function_hw_addr_set(port, attr, extack);
+ if (err)
+ return err;
+ }
+
+ attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
+ if (attr) {
+ err = devlink_port_fn_caps_set(port, attr, extack);
+ if (err)
+ return err;
+ }
+
+ /* Keep this as the last function attribute set, so that when
+ * multiple port function attributes are set along with state,
+ * Those can be applied first before activating the state.
+ */
+ attr = tb[DEVLINK_PORT_FN_ATTR_STATE];
+ if (attr)
+ err = devlink_port_fn_state_set(port, attr, extack);
+
+ if (!err)
+ devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
+ return err;
+}
+
+int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ int err;
+
+ if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
+ enum devlink_port_type port_type;
+
+ port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
+ err = devlink_port_type_set(devlink_port, port_type);
+ if (err)
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_PORT_FUNCTION]) {
+ struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
+ struct netlink_ext_ack *extack = info->extack;
+
+ err = devlink_port_function_set(devlink_port, attr, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
+ u32 count;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_SPLIT_COUNT))
+ return -EINVAL;
+ if (!devlink_port->ops->port_split)
+ return -EOPNOTSUPP;
+
+ count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
+
+ if (!devlink_port->attrs.splittable) {
+ /* Split ports cannot be split. */
+ if (devlink_port->attrs.split)
+ NL_SET_ERR_MSG(info->extack, "Port cannot be split further");
+ else
+ NL_SET_ERR_MSG(info->extack, "Port cannot be split");
+ return -EINVAL;
+ }
+
+ if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) {
+ NL_SET_ERR_MSG(info->extack, "Invalid split count");
+ return -EINVAL;
+ }
+
+ return devlink_port->ops->port_split(devlink, devlink_port, count,
+ info->extack);
+}
+
+int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (!devlink_port->ops->port_unsplit)
+ return -EOPNOTSUPP;
+ return devlink_port->ops->port_unsplit(devlink, devlink_port, info->extack);
+}
+
+int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink_port_new_attrs new_attrs = {};
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_port *devlink_port;
+ struct sk_buff *msg;
+ int err;
+
+ if (!devlink->ops->port_new)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] ||
+ !info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]) {
+ NL_SET_ERR_MSG(extack, "Port flavour or PCI PF are not specified");
+ return -EINVAL;
+ }
+ new_attrs.flavour = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_FLAVOUR]);
+ new_attrs.pfnum =
+ nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]);
+
+ if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ /* Port index of the new port being created by driver. */
+ new_attrs.port_index =
+ nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+ new_attrs.port_index_valid = true;
+ }
+ if (info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]) {
+ new_attrs.controller =
+ nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]);
+ new_attrs.controller_valid = true;
+ }
+ if (new_attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_SF &&
+ info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]) {
+ new_attrs.sfnum = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]);
+ new_attrs.sfnum_valid = true;
+ }
+
+ err = devlink->ops->port_new(devlink, &new_attrs,
+ extack, &devlink_port);
+ if (err)
+ return err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ err = -ENOMEM;
+ goto err_out_port_del;
+ }
+ err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
+ info->snd_portid, info->snd_seq, 0, NULL);
+ if (WARN_ON_ONCE(err))
+ goto err_out_msg_free;
+ err = genlmsg_reply(msg, info);
+ if (err)
+ goto err_out_port_del;
+ return 0;
+
+err_out_msg_free:
+ nlmsg_free(msg);
+err_out_port_del:
+ devlink_port->ops->port_del(devlink, devlink_port, NULL);
+ return err;
+}
+
+int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (!devlink_port->ops->port_del)
+ return -EOPNOTSUPP;
+
+ return devlink_port->ops->port_del(devlink, devlink_port, extack);
+}
+
+static void devlink_port_type_warn(struct work_struct *work)
+{
+ struct devlink_port *port = container_of(to_delayed_work(work),
+ struct devlink_port,
+ type_warn_dw);
+ dev_warn(port->devlink->dev, "Type was not set for devlink port.");
+}
+
+static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
+{
+ /* Ignore CPU and DSA flavours. */
+ return devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA &&
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_UNUSED;
+}
+
+#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 3600)
+
+static void devlink_port_type_warn_schedule(struct devlink_port *devlink_port)
+{
+ if (!devlink_port_type_should_warn(devlink_port))
+ return;
+ /* Schedule a work to WARN in case driver does not set port
+ * type within timeout.
+ */
+ schedule_delayed_work(&devlink_port->type_warn_dw,
+ DEVLINK_PORT_TYPE_WARN_TIMEOUT);
+}
+
+static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
+{
+ if (!devlink_port_type_should_warn(devlink_port))
+ return;
+ cancel_delayed_work_sync(&devlink_port->type_warn_dw);
+}
+
+/**
+ * devlink_port_init() - Init devlink port
+ *
+ * @devlink: devlink
+ * @devlink_port: devlink port
+ *
+ * Initialize essential stuff that is needed for functions
+ * that may be called before devlink port registration.
+ * Call to this function is optional and not needed
+ * in case the driver does not use such functions.
+ */
+void devlink_port_init(struct devlink *devlink,
+ struct devlink_port *devlink_port)
+{
+ if (devlink_port->initialized)
+ return;
+ devlink_port->devlink = devlink;
+ INIT_LIST_HEAD(&devlink_port->region_list);
+ devlink_port->initialized = true;
+}
+EXPORT_SYMBOL_GPL(devlink_port_init);
+
+/**
+ * devlink_port_fini() - Deinitialize devlink port
+ *
+ * @devlink_port: devlink port
+ *
+ * Deinitialize essential stuff that is in use for functions
+ * that may be called after devlink port unregistration.
+ * Call to this function is optional and not needed
+ * in case the driver does not use such functions.
+ */
+void devlink_port_fini(struct devlink_port *devlink_port)
+{
+ WARN_ON(!list_empty(&devlink_port->region_list));
+}
+EXPORT_SYMBOL_GPL(devlink_port_fini);
+
+static const struct devlink_port_ops devlink_port_dummy_ops = {};
+
+/**
+ * devl_port_register_with_ops() - Register devlink port
+ *
+ * @devlink: devlink
+ * @devlink_port: devlink port
+ * @port_index: driver-specific numerical identifier of the port
+ * @ops: port ops
+ *
+ * Register devlink port with provided port index. User can use
+ * any indexing, even hw-related one. devlink_port structure
+ * is convenient to be embedded inside user driver private structure.
+ * Note that the caller should take care of zeroing the devlink_port
+ * structure.
+ */
+int devl_port_register_with_ops(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index,
+ const struct devlink_port_ops *ops)
+{
+ int err;
+
+ devl_assert_locked(devlink);
+
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ devlink_port_init(devlink, devlink_port);
+ devlink_port->registered = true;
+ devlink_port->index = port_index;
+ devlink_port->ops = ops ? ops : &devlink_port_dummy_ops;
+ spin_lock_init(&devlink_port->type_lock);
+ INIT_LIST_HEAD(&devlink_port->reporter_list);
+ err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL);
+ if (err) {
+ devlink_port->registered = false;
+ return err;
+ }
+
+ INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
+ devlink_port_type_warn_schedule(devlink_port);
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_port_register_with_ops);
+
+/**
+ * devlink_port_register_with_ops - Register devlink port
+ *
+ * @devlink: devlink
+ * @devlink_port: devlink port
+ * @port_index: driver-specific numerical identifier of the port
+ * @ops: port ops
+ *
+ * Register devlink port with provided port index. User can use
+ * any indexing, even hw-related one. devlink_port structure
+ * is convenient to be embedded inside user driver private structure.
+ * Note that the caller should take care of zeroing the devlink_port
+ * structure.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+int devlink_port_register_with_ops(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index,
+ const struct devlink_port_ops *ops)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_port_register_with_ops(devlink, devlink_port,
+ port_index, ops);
+ devl_unlock(devlink);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_port_register_with_ops);
+
+/**
+ * devl_port_unregister() - Unregister devlink port
+ *
+ * @devlink_port: devlink port
+ */
+void devl_port_unregister(struct devlink_port *devlink_port)
+{
+ lockdep_assert_held(&devlink_port->devlink->lock);
+ WARN_ON(devlink_port->type != DEVLINK_PORT_TYPE_NOTSET);
+
+ devlink_port_type_warn_cancel(devlink_port);
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
+ xa_erase(&devlink_port->devlink->ports, devlink_port->index);
+ WARN_ON(!list_empty(&devlink_port->reporter_list));
+ devlink_port->registered = false;
+}
+EXPORT_SYMBOL_GPL(devl_port_unregister);
+
+/**
+ * devlink_port_unregister - Unregister devlink port
+ *
+ * @devlink_port: devlink port
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_port_unregister(struct devlink_port *devlink_port)
+{
+ struct devlink *devlink = devlink_port->devlink;
+
+ devl_lock(devlink);
+ devl_port_unregister(devlink_port);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_port_unregister);
+
+static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port,
+ struct net_device *netdev)
+{
+ const struct net_device_ops *ops = netdev->netdev_ops;
+
+ /* If driver registers devlink port, it should set devlink port
+ * attributes accordingly so the compat functions are called
+ * and the original ops are not used.
+ */
+ if (ops->ndo_get_phys_port_name) {
+ /* Some drivers use the same set of ndos for netdevs
+ * that have devlink_port registered and also for
+ * those who don't. Make sure that ndo_get_phys_port_name
+ * returns -EOPNOTSUPP here in case it is defined.
+ * Warn if not.
+ */
+ char name[IFNAMSIZ];
+ int err;
+
+ err = ops->ndo_get_phys_port_name(netdev, name, sizeof(name));
+ WARN_ON(err != -EOPNOTSUPP);
+ }
+ if (ops->ndo_get_port_parent_id) {
+ /* Some drivers use the same set of ndos for netdevs
+ * that have devlink_port registered and also for
+ * those who don't. Make sure that ndo_get_port_parent_id
+ * returns -EOPNOTSUPP here in case it is defined.
+ * Warn if not.
+ */
+ struct netdev_phys_item_id ppid;
+ int err;
+
+ err = ops->ndo_get_port_parent_id(netdev, &ppid);
+ WARN_ON(err != -EOPNOTSUPP);
+ }
+}
+
+static void __devlink_port_type_set(struct devlink_port *devlink_port,
+ enum devlink_port_type type,
+ void *type_dev)
+{
+ struct net_device *netdev = type_dev;
+
+ ASSERT_DEVLINK_PORT_REGISTERED(devlink_port);
+
+ if (type == DEVLINK_PORT_TYPE_NOTSET) {
+ devlink_port_type_warn_schedule(devlink_port);
+ } else {
+ devlink_port_type_warn_cancel(devlink_port);
+ if (type == DEVLINK_PORT_TYPE_ETH && netdev)
+ devlink_port_type_netdev_checks(devlink_port, netdev);
+ }
+
+ spin_lock_bh(&devlink_port->type_lock);
+ devlink_port->type = type;
+ switch (type) {
+ case DEVLINK_PORT_TYPE_ETH:
+ devlink_port->type_eth.netdev = netdev;
+ if (netdev) {
+ ASSERT_RTNL();
+ devlink_port->type_eth.ifindex = netdev->ifindex;
+ BUILD_BUG_ON(sizeof(devlink_port->type_eth.ifname) !=
+ sizeof(netdev->name));
+ strcpy(devlink_port->type_eth.ifname, netdev->name);
+ }
+ break;
+ case DEVLINK_PORT_TYPE_IB:
+ devlink_port->type_ib.ibdev = type_dev;
+ break;
+ default:
+ break;
+ }
+ spin_unlock_bh(&devlink_port->type_lock);
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+}
+
+/**
+ * devlink_port_type_eth_set - Set port type to Ethernet
+ *
+ * @devlink_port: devlink port
+ *
+ * If driver is calling this, most likely it is doing something wrong.
+ */
+void devlink_port_type_eth_set(struct devlink_port *devlink_port)
+{
+ dev_warn(devlink_port->devlink->dev,
+ "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n",
+ devlink_port->index);
+ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, NULL);
+}
+EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
+
+/**
+ * devlink_port_type_ib_set - Set port type to InfiniBand
+ *
+ * @devlink_port: devlink port
+ * @ibdev: related IB device
+ */
+void devlink_port_type_ib_set(struct devlink_port *devlink_port,
+ struct ib_device *ibdev)
+{
+ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev);
+}
+EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
+
+/**
+ * devlink_port_type_clear - Clear port type
+ *
+ * @devlink_port: devlink port
+ *
+ * If driver is calling this for clearing Ethernet type, most likely
+ * it is doing something wrong.
+ */
+void devlink_port_type_clear(struct devlink_port *devlink_port)
+{
+ if (devlink_port->type == DEVLINK_PORT_TYPE_ETH)
+ dev_warn(devlink_port->devlink->dev,
+ "devlink port type for port %d cleared without a software interface reference, device type not supported by the kernel?\n",
+ devlink_port->index);
+ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL);
+}
+EXPORT_SYMBOL_GPL(devlink_port_type_clear);
+
+int devlink_port_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct devlink_port *devlink_port = netdev->devlink_port;
+ struct devlink *devlink;
+
+ if (!devlink_port)
+ return NOTIFY_OK;
+ devlink = devlink_port->devlink;
+
+ switch (event) {
+ case NETDEV_POST_INIT:
+ /* Set the type but not netdev pointer. It is going to be set
+ * later on by NETDEV_REGISTER event. Happens once during
+ * netdevice register
+ */
+ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH,
+ NULL);
+ break;
+ case NETDEV_REGISTER:
+ case NETDEV_CHANGENAME:
+ if (devlink_net(devlink) != dev_net(netdev))
+ return NOTIFY_OK;
+ /* Set the netdev on top of previously set type. Note this
+ * event happens also during net namespace change so here
+ * we take into account netdev pointer appearing in this
+ * namespace.
+ */
+ __devlink_port_type_set(devlink_port, devlink_port->type,
+ netdev);
+ break;
+ case NETDEV_UNREGISTER:
+ if (devlink_net(devlink) != dev_net(netdev))
+ return NOTIFY_OK;
+ /* Clear netdev pointer, but not the type. This event happens
+ * also during net namespace change so we need to clear
+ * pointer to netdev that is going to another net namespace.
+ */
+ __devlink_port_type_set(devlink_port, devlink_port->type,
+ NULL);
+ break;
+ case NETDEV_PRE_UNINIT:
+ /* Clear the type and the netdev pointer. Happens one during
+ * netdevice unregister.
+ */
+ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET,
+ NULL);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
+ enum devlink_port_flavour flavour)
+{
+ struct devlink_port_attrs *attrs = &devlink_port->attrs;
+
+ devlink_port->attrs_set = true;
+ attrs->flavour = flavour;
+ if (attrs->switch_id.id_len) {
+ devlink_port->switch_port = true;
+ if (WARN_ON(attrs->switch_id.id_len > MAX_PHYS_ITEM_ID_LEN))
+ attrs->switch_id.id_len = MAX_PHYS_ITEM_ID_LEN;
+ } else {
+ devlink_port->switch_port = false;
+ }
+ return 0;
+}
+
+/**
+ * devlink_port_attrs_set - Set port attributes
+ *
+ * @devlink_port: devlink port
+ * @attrs: devlink port attrs
+ */
+void devlink_port_attrs_set(struct devlink_port *devlink_port,
+ struct devlink_port_attrs *attrs)
+{
+ int ret;
+
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ devlink_port->attrs = *attrs;
+ ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
+ if (ret)
+ return;
+ WARN_ON(attrs->splittable && attrs->split);
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
+
+/**
+ * devlink_port_attrs_pci_pf_set - Set PCI PF port attributes
+ *
+ * @devlink_port: devlink port
+ * @controller: associated controller number for the devlink port instance
+ * @pf: associated PF for the devlink port instance
+ * @external: indicates if the port is for an external controller
+ */
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller,
+ u16 pf, bool external)
+{
+ struct devlink_port_attrs *attrs = &devlink_port->attrs;
+ int ret;
+
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ ret = __devlink_port_attrs_set(devlink_port,
+ DEVLINK_PORT_FLAVOUR_PCI_PF);
+ if (ret)
+ return;
+ attrs->pci_pf.controller = controller;
+ attrs->pci_pf.pf = pf;
+ attrs->pci_pf.external = external;
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
+
+/**
+ * devlink_port_attrs_pci_vf_set - Set PCI VF port attributes
+ *
+ * @devlink_port: devlink port
+ * @controller: associated controller number for the devlink port instance
+ * @pf: associated PF for the devlink port instance
+ * @vf: associated VF of a PF for the devlink port instance
+ * @external: indicates if the port is for an external controller
+ */
+void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
+ u16 pf, u16 vf, bool external)
+{
+ struct devlink_port_attrs *attrs = &devlink_port->attrs;
+ int ret;
+
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ ret = __devlink_port_attrs_set(devlink_port,
+ DEVLINK_PORT_FLAVOUR_PCI_VF);
+ if (ret)
+ return;
+ attrs->pci_vf.controller = controller;
+ attrs->pci_vf.pf = pf;
+ attrs->pci_vf.vf = vf;
+ attrs->pci_vf.external = external;
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set);
+
+/**
+ * devlink_port_attrs_pci_sf_set - Set PCI SF port attributes
+ *
+ * @devlink_port: devlink port
+ * @controller: associated controller number for the devlink port instance
+ * @pf: associated PF for the devlink port instance
+ * @sf: associated SF of a PF for the devlink port instance
+ * @external: indicates if the port is for an external controller
+ */
+void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller,
+ u16 pf, u32 sf, bool external)
+{
+ struct devlink_port_attrs *attrs = &devlink_port->attrs;
+ int ret;
+
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ ret = __devlink_port_attrs_set(devlink_port,
+ DEVLINK_PORT_FLAVOUR_PCI_SF);
+ if (ret)
+ return;
+ attrs->pci_sf.controller = controller;
+ attrs->pci_sf.pf = pf;
+ attrs->pci_sf.sf = sf;
+ attrs->pci_sf.external = external;
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
+
+/**
+ * devlink_port_linecard_set - Link port with a linecard
+ *
+ * @devlink_port: devlink port
+ * @linecard: devlink linecard
+ */
+void devlink_port_linecard_set(struct devlink_port *devlink_port,
+ struct devlink_linecard *linecard)
+{
+ ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port);
+
+ devlink_port->linecard = linecard;
+}
+EXPORT_SYMBOL_GPL(devlink_port_linecard_set);
+
+static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
+ char *name, size_t len)
+{
+ struct devlink_port_attrs *attrs = &devlink_port->attrs;
+ int n = 0;
+
+ if (!devlink_port->attrs_set)
+ return -EOPNOTSUPP;
+
+ switch (attrs->flavour) {
+ case DEVLINK_PORT_FLAVOUR_PHYSICAL:
+ if (devlink_port->linecard)
+ n = snprintf(name, len, "l%u",
+ devlink_port->linecard->index);
+ if (n < len)
+ n += snprintf(name + n, len - n, "p%u",
+ attrs->phys.port_number);
+ if (n < len && attrs->split)
+ n += snprintf(name + n, len - n, "s%u",
+ attrs->phys.split_subport_number);
+ break;
+ case DEVLINK_PORT_FLAVOUR_CPU:
+ case DEVLINK_PORT_FLAVOUR_DSA:
+ case DEVLINK_PORT_FLAVOUR_UNUSED:
+ /* As CPU and DSA ports do not have a netdevice associated
+ * case should not ever happen.
+ */
+ WARN_ON(1);
+ return -EINVAL;
+ case DEVLINK_PORT_FLAVOUR_PCI_PF:
+ if (attrs->pci_pf.external) {
+ n = snprintf(name, len, "c%u", attrs->pci_pf.controller);
+ if (n >= len)
+ return -EINVAL;
+ len -= n;
+ name += n;
+ }
+ n = snprintf(name, len, "pf%u", attrs->pci_pf.pf);
+ break;
+ case DEVLINK_PORT_FLAVOUR_PCI_VF:
+ if (attrs->pci_vf.external) {
+ n = snprintf(name, len, "c%u", attrs->pci_vf.controller);
+ if (n >= len)
+ return -EINVAL;
+ len -= n;
+ name += n;
+ }
+ n = snprintf(name, len, "pf%uvf%u",
+ attrs->pci_vf.pf, attrs->pci_vf.vf);
+ break;
+ case DEVLINK_PORT_FLAVOUR_PCI_SF:
+ if (attrs->pci_sf.external) {
+ n = snprintf(name, len, "c%u", attrs->pci_sf.controller);
+ if (n >= len)
+ return -EINVAL;
+ len -= n;
+ name += n;
+ }
+ n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
+ attrs->pci_sf.sf);
+ break;
+ case DEVLINK_PORT_FLAVOUR_VIRTUAL:
+ return -EOPNOTSUPP;
+ }
+
+ if (n >= len)
+ return -EINVAL;
+
+ return 0;
+}
+
+int devlink_compat_phys_port_name_get(struct net_device *dev,
+ char *name, size_t len)
+{
+ struct devlink_port *devlink_port;
+
+ /* RTNL mutex is held here which ensures that devlink_port
+ * instance cannot disappear in the middle. No need to take
+ * any devlink lock as only permanent values are accessed.
+ */
+ ASSERT_RTNL();
+
+ devlink_port = dev->devlink_port;
+ if (!devlink_port)
+ return -EOPNOTSUPP;
+
+ return __devlink_port_phys_port_name_get(devlink_port, name, len);
+}
+
+int devlink_compat_switch_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *ppid)
+{
+ struct devlink_port *devlink_port;
+
+ /* Caller must hold RTNL mutex or reference to dev, which ensures that
+ * devlink_port instance cannot disappear in the middle. No need to take
+ * any devlink lock as only permanent values are accessed.
+ */
+ devlink_port = dev->devlink_port;
+ if (!devlink_port || !devlink_port->switch_port)
+ return -EOPNOTSUPP;
+
+ memcpy(ppid, &devlink_port->attrs.switch_id, sizeof(*ppid));
+
+ return 0;
+}
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
new file mode 100644
index 000000000000..dff1593b8406
--- /dev/null
+++ b/net/devlink/rate.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+static inline bool
+devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
+{
+ return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF;
+}
+
+static inline bool
+devlink_rate_is_node(struct devlink_rate *devlink_rate)
+{
+ return devlink_rate->type == DEVLINK_RATE_TYPE_NODE;
+}
+
+static struct devlink_rate *
+devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ struct devlink_rate *devlink_rate;
+ struct devlink_port *devlink_port;
+
+ devlink_port = devlink_port_get_from_attrs(devlink, info->attrs);
+ if (IS_ERR(devlink_port))
+ return ERR_CAST(devlink_port);
+ devlink_rate = devlink_port->devlink_rate;
+ return devlink_rate ?: ERR_PTR(-ENODEV);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name)
+{
+ static struct devlink_rate *devlink_rate;
+
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ if (devlink_rate_is_node(devlink_rate) &&
+ !strcmp(node_name, devlink_rate->name))
+ return devlink_rate;
+ }
+ return ERR_PTR(-ENODEV);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
+{
+ const char *rate_node_name;
+ size_t len;
+
+ if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME])
+ return ERR_PTR(-EINVAL);
+ rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]);
+ len = strlen(rate_node_name);
+ /* Name cannot be empty or decimal number */
+ if (!len || strspn(rate_node_name, "0123456789") == len)
+ return ERR_PTR(-EINVAL);
+
+ return devlink_rate_node_get_by_name(devlink, rate_node_name);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ return devlink_rate_node_get_from_attrs(devlink, info->attrs);
+}
+
+static struct devlink_rate *
+devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ struct nlattr **attrs = info->attrs;
+
+ if (attrs[DEVLINK_ATTR_PORT_INDEX])
+ return devlink_rate_leaf_get_from_info(devlink, info);
+ else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME])
+ return devlink_rate_node_get_from_info(devlink, info);
+ else
+ return ERR_PTR(-EINVAL);
+}
+
+static int devlink_nl_rate_fill(struct sk_buff *msg,
+ struct devlink_rate *devlink_rate,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags, struct netlink_ext_ack *extack)
+{
+ struct devlink *devlink = devlink_rate->devlink;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type))
+ goto nla_put_failure;
+
+ if (devlink_rate_is_leaf(devlink_rate)) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+ devlink_rate->devlink_port->index))
+ goto nla_put_failure;
+ } else if (devlink_rate_is_node(devlink_rate)) {
+ if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME,
+ devlink_rate->name))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE,
+ devlink_rate->tx_share, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX,
+ devlink_rate->tx_max, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY,
+ devlink_rate->tx_priority))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_WEIGHT,
+ devlink_rate->tx_weight))
+ goto nla_put_failure;
+
+ if (devlink_rate->parent)
+ if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME,
+ devlink_rate->parent->name))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_rate_notify(struct devlink_rate *devlink_rate,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = devlink_rate->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
+
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_rate_fill(msg, devlink_rate, cmd, 0, 0, 0, NULL);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_rates_notify_register(struct devlink *devlink)
+{
+ struct devlink_rate *rate_node;
+
+ list_for_each_entry(rate_node, &devlink->rate_list, list)
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+}
+
+void devlink_rates_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_rate *rate_node;
+
+ list_for_each_entry_reverse(rate_node, &devlink->rate_list, list)
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+}
+
+static int
+devlink_nl_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_rate *devlink_rate;
+ int idx = 0;
+ int err = 0;
+
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
+ u32 id = NETLINK_CB(cb->skb).portid;
+
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
+ cb->nlh->nlmsg_seq, flags, NULL);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
+ }
+
+ return err;
+}
+
+int devlink_nl_rate_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_rate_get_dump_one);
+}
+
+int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_rate *devlink_rate;
+ struct sk_buff *msg;
+ int err;
+
+ devlink_rate = devlink_rate_get_from_info(devlink, info);
+ if (IS_ERR(devlink_rate))
+ return PTR_ERR(devlink_rate);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static bool
+devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent)
+{
+ while (parent) {
+ if (parent == devlink_rate)
+ return true;
+ parent = parent->parent;
+ }
+ return false;
+}
+
+static int
+devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
+ struct genl_info *info,
+ struct nlattr *nla_parent)
+{
+ struct devlink *devlink = devlink_rate->devlink;
+ const char *parent_name = nla_data(nla_parent);
+ const struct devlink_ops *ops = devlink->ops;
+ size_t len = strlen(parent_name);
+ struct devlink_rate *parent;
+ int err = -EOPNOTSUPP;
+
+ parent = devlink_rate->parent;
+
+ if (parent && !len) {
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_parent_set(devlink_rate, NULL,
+ devlink_rate->priv, NULL,
+ info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_parent_set(devlink_rate, NULL,
+ devlink_rate->priv, NULL,
+ info->extack);
+ if (err)
+ return err;
+
+ refcount_dec(&parent->refcnt);
+ devlink_rate->parent = NULL;
+ } else if (len) {
+ parent = devlink_rate_node_get_by_name(devlink, parent_name);
+ if (IS_ERR(parent))
+ return -ENODEV;
+
+ if (parent == devlink_rate) {
+ NL_SET_ERR_MSG(info->extack, "Parent to self is not allowed");
+ return -EINVAL;
+ }
+
+ if (devlink_rate_is_node(devlink_rate) &&
+ devlink_rate_is_parent_node(devlink_rate, parent->parent)) {
+ NL_SET_ERR_MSG(info->extack, "Node is already a parent of parent node.");
+ return -EEXIST;
+ }
+
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_parent_set(devlink_rate, parent,
+ devlink_rate->priv, parent->priv,
+ info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_parent_set(devlink_rate, parent,
+ devlink_rate->priv, parent->priv,
+ info->extack);
+ if (err)
+ return err;
+
+ if (devlink_rate->parent)
+ /* we're reassigning to other parent in this case */
+ refcount_dec(&devlink_rate->parent->refcnt);
+
+ refcount_inc(&parent->refcnt);
+ devlink_rate->parent = parent;
+ }
+
+ return 0;
+}
+
+static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
+ const struct devlink_ops *ops,
+ struct genl_info *info)
+{
+ struct nlattr *nla_parent, **attrs = info->attrs;
+ int err = -EOPNOTSUPP;
+ u32 priority;
+ u32 weight;
+ u64 rate;
+
+ if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) {
+ rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]);
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ if (err)
+ return err;
+ devlink_rate->tx_share = rate;
+ }
+
+ if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) {
+ rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]);
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ if (err)
+ return err;
+ devlink_rate->tx_max = rate;
+ }
+
+ if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]) {
+ priority = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]);
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tx_priority_set(devlink_rate, devlink_rate->priv,
+ priority, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tx_priority_set(devlink_rate, devlink_rate->priv,
+ priority, info->extack);
+
+ if (err)
+ return err;
+ devlink_rate->tx_priority = priority;
+ }
+
+ if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]) {
+ weight = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]);
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tx_weight_set(devlink_rate, devlink_rate->priv,
+ weight, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tx_weight_set(devlink_rate, devlink_rate->priv,
+ weight, info->extack);
+
+ if (err)
+ return err;
+ devlink_rate->tx_weight = weight;
+ }
+
+ nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME];
+ if (nla_parent) {
+ err = devlink_nl_rate_parent_node_set(devlink_rate, info,
+ nla_parent);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
+ struct genl_info *info,
+ enum devlink_rate_type type)
+{
+ struct nlattr **attrs = info->attrs;
+
+ if (type == DEVLINK_RATE_TYPE_LEAF) {
+ if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) {
+ NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the leafs");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) {
+ NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the leafs");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
+ !ops->rate_leaf_parent_set) {
+ NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the leafs");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_leaf_tx_priority_set) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[DEVLINK_ATTR_RATE_TX_PRIORITY],
+ "TX priority set isn't supported for the leafs");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_leaf_tx_weight_set) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[DEVLINK_ATTR_RATE_TX_WEIGHT],
+ "TX weight set isn't supported for the leafs");
+ return false;
+ }
+ } else if (type == DEVLINK_RATE_TYPE_NODE) {
+ if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
+ NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) {
+ NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the nodes");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
+ !ops->rate_node_parent_set) {
+ NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the nodes");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_node_tx_priority_set) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[DEVLINK_ATTR_RATE_TX_PRIORITY],
+ "TX priority set isn't supported for the nodes");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_node_tx_weight_set) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[DEVLINK_ATTR_RATE_TX_WEIGHT],
+ "TX weight set isn't supported for the nodes");
+ return false;
+ }
+ } else {
+ WARN(1, "Unknown type of rate object");
+ return false;
+ }
+
+ return true;
+}
+
+int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_rate *devlink_rate;
+ const struct devlink_ops *ops;
+ int err;
+
+ devlink_rate = devlink_rate_get_from_info(devlink, info);
+ if (IS_ERR(devlink_rate))
+ return PTR_ERR(devlink_rate);
+
+ ops = devlink->ops;
+ if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type))
+ return -EOPNOTSUPP;
+
+ err = devlink_nl_rate_set(devlink_rate, ops, info);
+
+ if (!err)
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+ return err;
+}
+
+int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_rate *rate_node;
+ const struct devlink_ops *ops;
+ int err;
+
+ ops = devlink->ops;
+ if (!ops || !ops->rate_node_new || !ops->rate_node_del) {
+ NL_SET_ERR_MSG(info->extack, "Rate nodes aren't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE))
+ return -EOPNOTSUPP;
+
+ rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs);
+ if (!IS_ERR(rate_node))
+ return -EEXIST;
+ else if (rate_node == ERR_PTR(-EINVAL))
+ return -EINVAL;
+
+ rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
+ if (!rate_node)
+ return -ENOMEM;
+
+ rate_node->devlink = devlink;
+ rate_node->type = DEVLINK_RATE_TYPE_NODE;
+ rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL);
+ if (!rate_node->name) {
+ err = -ENOMEM;
+ goto err_strdup;
+ }
+
+ err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack);
+ if (err)
+ goto err_node_new;
+
+ err = devlink_nl_rate_set(rate_node, ops, info);
+ if (err)
+ goto err_rate_set;
+
+ refcount_set(&rate_node->refcnt, 1);
+ list_add(&rate_node->list, &devlink->rate_list);
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+ return 0;
+
+err_rate_set:
+ ops->rate_node_del(rate_node, rate_node->priv, info->extack);
+err_node_new:
+ kfree(rate_node->name);
+err_strdup:
+ kfree(rate_node);
+ return err;
+}
+
+int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_rate *rate_node;
+ int err;
+
+ rate_node = devlink_rate_node_get_from_info(devlink, info);
+ if (IS_ERR(rate_node))
+ return PTR_ERR(rate_node);
+
+ if (refcount_read(&rate_node->refcnt) > 1) {
+ NL_SET_ERR_MSG(info->extack, "Node has children. Cannot delete node.");
+ return -EBUSY;
+ }
+
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+ err = devlink->ops->rate_node_del(rate_node, rate_node->priv,
+ info->extack);
+ if (rate_node->parent)
+ refcount_dec(&rate_node->parent->refcnt);
+ list_del(&rate_node->list);
+ kfree(rate_node->name);
+ kfree(rate_node);
+ return err;
+}
+
+int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_rate *devlink_rate;
+
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list)
+ if (devlink_rate_is_node(devlink_rate)) {
+ NL_SET_ERR_MSG(extack, "Rate node(s) exists.");
+ return -EBUSY;
+ }
+ return 0;
+}
+
+/**
+ * devl_rate_node_create - create devlink rate node
+ * @devlink: devlink instance
+ * @priv: driver private data
+ * @node_name: name of the resulting node
+ * @parent: parent devlink_rate struct
+ *
+ * Create devlink rate object of type node
+ */
+struct devlink_rate *
+devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
+ struct devlink_rate *parent)
+{
+ struct devlink_rate *rate_node;
+
+ rate_node = devlink_rate_node_get_by_name(devlink, node_name);
+ if (!IS_ERR(rate_node))
+ return ERR_PTR(-EEXIST);
+
+ rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
+ if (!rate_node)
+ return ERR_PTR(-ENOMEM);
+
+ if (parent) {
+ rate_node->parent = parent;
+ refcount_inc(&rate_node->parent->refcnt);
+ }
+
+ rate_node->type = DEVLINK_RATE_TYPE_NODE;
+ rate_node->devlink = devlink;
+ rate_node->priv = priv;
+
+ rate_node->name = kstrdup(node_name, GFP_KERNEL);
+ if (!rate_node->name) {
+ kfree(rate_node);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ refcount_set(&rate_node->refcnt, 1);
+ list_add(&rate_node->list, &devlink->rate_list);
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+ return rate_node;
+}
+EXPORT_SYMBOL_GPL(devl_rate_node_create);
+
+/**
+ * devl_rate_leaf_create - create devlink rate leaf
+ * @devlink_port: devlink port object to create rate object on
+ * @priv: driver private data
+ * @parent: parent devlink_rate struct
+ *
+ * Create devlink rate object of type leaf on provided @devlink_port.
+ */
+int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv,
+ struct devlink_rate *parent)
+{
+ struct devlink *devlink = devlink_port->devlink;
+ struct devlink_rate *devlink_rate;
+
+ devl_assert_locked(devlink_port->devlink);
+
+ if (WARN_ON(devlink_port->devlink_rate))
+ return -EBUSY;
+
+ devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
+ if (!devlink_rate)
+ return -ENOMEM;
+
+ if (parent) {
+ devlink_rate->parent = parent;
+ refcount_inc(&devlink_rate->parent->refcnt);
+ }
+
+ devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
+ devlink_rate->devlink = devlink;
+ devlink_rate->devlink_port = devlink_port;
+ devlink_rate->priv = priv;
+ list_add_tail(&devlink_rate->list, &devlink->rate_list);
+ devlink_port->devlink_rate = devlink_rate;
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_rate_leaf_create);
+
+/**
+ * devl_rate_leaf_destroy - destroy devlink rate leaf
+ *
+ * @devlink_port: devlink port linked to the rate object
+ *
+ * Destroy the devlink rate object of type leaf on provided @devlink_port.
+ */
+void devl_rate_leaf_destroy(struct devlink_port *devlink_port)
+{
+ struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
+
+ devl_assert_locked(devlink_port->devlink);
+ if (!devlink_rate)
+ return;
+
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
+ if (devlink_rate->parent)
+ refcount_dec(&devlink_rate->parent->refcnt);
+ list_del(&devlink_rate->list);
+ devlink_port->devlink_rate = NULL;
+ kfree(devlink_rate);
+}
+EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
+
+/**
+ * devl_rate_nodes_destroy - destroy all devlink rate nodes on device
+ * @devlink: devlink instance
+ *
+ * Unset parent for all rate objects and destroy all rate nodes
+ * on specified device.
+ */
+void devl_rate_nodes_destroy(struct devlink *devlink)
+{
+ static struct devlink_rate *devlink_rate, *tmp;
+ const struct devlink_ops *ops = devlink->ops;
+
+ devl_assert_locked(devlink);
+
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ if (!devlink_rate->parent)
+ continue;
+
+ refcount_dec(&devlink_rate->parent->refcnt);
+ if (devlink_rate_is_leaf(devlink_rate))
+ ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv,
+ NULL, NULL);
+ else if (devlink_rate_is_node(devlink_rate))
+ ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv,
+ NULL, NULL);
+ }
+ list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) {
+ if (devlink_rate_is_node(devlink_rate)) {
+ ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL);
+ list_del(&devlink_rate->list);
+ kfree(devlink_rate->name);
+ kfree(devlink_rate);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy);
diff --git a/net/devlink/region.c b/net/devlink/region.c
new file mode 100644
index 000000000000..d197cdb662db
--- /dev/null
+++ b/net/devlink/region.c
@@ -0,0 +1,1260 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+struct devlink_region {
+ struct devlink *devlink;
+ struct devlink_port *port;
+ struct list_head list;
+ union {
+ const struct devlink_region_ops *ops;
+ const struct devlink_port_region_ops *port_ops;
+ };
+ struct mutex snapshot_lock; /* protects snapshot_list,
+ * max_snapshots and cur_snapshots
+ * consistency.
+ */
+ struct list_head snapshot_list;
+ u32 max_snapshots;
+ u32 cur_snapshots;
+ u64 size;
+};
+
+struct devlink_snapshot {
+ struct list_head list;
+ struct devlink_region *region;
+ u8 *data;
+ u32 id;
+};
+
+static struct devlink_region *
+devlink_region_get_by_name(struct devlink *devlink, const char *region_name)
+{
+ struct devlink_region *region;
+
+ list_for_each_entry(region, &devlink->region_list, list)
+ if (!strcmp(region->ops->name, region_name))
+ return region;
+
+ return NULL;
+}
+
+static struct devlink_region *
+devlink_port_region_get_by_name(struct devlink_port *port,
+ const char *region_name)
+{
+ struct devlink_region *region;
+
+ list_for_each_entry(region, &port->region_list, list)
+ if (!strcmp(region->ops->name, region_name))
+ return region;
+
+ return NULL;
+}
+
+static struct devlink_snapshot *
+devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
+{
+ struct devlink_snapshot *snapshot;
+
+ list_for_each_entry(snapshot, &region->snapshot_list, list)
+ if (snapshot->id == id)
+ return snapshot;
+
+ return NULL;
+}
+
+static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_snapshot *snapshot)
+{
+ struct nlattr *snap_attr;
+ int err;
+
+ snap_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
+ if (!snap_attr)
+ return -EINVAL;
+
+ err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id);
+ if (err)
+ goto nla_put_failure;
+
+ nla_nest_end(msg, snap_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, snap_attr);
+ return err;
+}
+
+static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_region *region)
+{
+ struct devlink_snapshot *snapshot;
+ struct nlattr *snapshots_attr;
+ int err;
+
+ snapshots_attr = nla_nest_start_noflag(msg,
+ DEVLINK_ATTR_REGION_SNAPSHOTS);
+ if (!snapshots_attr)
+ return -EINVAL;
+
+ list_for_each_entry(snapshot, &region->snapshot_list, list) {
+ err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(msg, snapshots_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, snapshots_attr);
+ return err;
+}
+
+static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags,
+ struct devlink_region *region)
+{
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = devlink_nl_put_handle(msg, devlink);
+ if (err)
+ goto nla_put_failure;
+
+ if (region->port) {
+ err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+ region->port->index);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME, region->ops->name);
+ if (err)
+ goto nla_put_failure;
+
+ err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+ region->size,
+ DEVLINK_ATTR_PAD);
+ if (err)
+ goto nla_put_failure;
+
+ err = nla_put_u32(msg, DEVLINK_ATTR_REGION_MAX_SNAPSHOTS,
+ region->max_snapshots);
+ if (err)
+ goto nla_put_failure;
+
+ err = devlink_nl_region_snapshots_id_put(msg, devlink, region);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+static struct sk_buff *
+devlink_nl_region_notify_build(struct devlink_region *region,
+ struct devlink_snapshot *snapshot,
+ enum devlink_command cmd, u32 portid, u32 seq)
+{
+ struct devlink *devlink = region->devlink;
+ struct sk_buff *msg;
+ void *hdr;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return ERR_PTR(-ENOMEM);
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, 0, cmd);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto out_free_msg;
+ }
+
+ err = devlink_nl_put_handle(msg, devlink);
+ if (err)
+ goto out_cancel_msg;
+
+ if (region->port) {
+ err = nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+ region->port->index);
+ if (err)
+ goto out_cancel_msg;
+ }
+
+ err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME,
+ region->ops->name);
+ if (err)
+ goto out_cancel_msg;
+
+ if (snapshot) {
+ err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID,
+ snapshot->id);
+ if (err)
+ goto out_cancel_msg;
+ } else {
+ err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+ region->size, DEVLINK_ATTR_PAD);
+ if (err)
+ goto out_cancel_msg;
+ }
+ genlmsg_end(msg, hdr);
+
+ return msg;
+
+out_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+out_free_msg:
+ nlmsg_free(msg);
+ return ERR_PTR(err);
+}
+
+static void devlink_nl_region_notify(struct devlink_region *region,
+ struct devlink_snapshot *snapshot,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = region->devlink;
+ struct sk_buff *msg;
+
+ WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0);
+ if (IS_ERR(msg))
+ return;
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_regions_notify_register(struct devlink *devlink)
+{
+ struct devlink_region *region;
+
+ list_for_each_entry(region, &devlink->region_list, list)
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+}
+
+void devlink_regions_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_region *region;
+
+ list_for_each_entry_reverse(region, &devlink->region_list, list)
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
+}
+
+/**
+ * __devlink_snapshot_id_increment - Increment number of snapshots using an id
+ * @devlink: devlink instance
+ * @id: the snapshot id
+ *
+ * Track when a new snapshot begins using an id. Load the count for the
+ * given id from the snapshot xarray, increment it, and store it back.
+ *
+ * Called when a new snapshot is created with the given id.
+ *
+ * The id *must* have been previously allocated by
+ * devlink_region_snapshot_id_get().
+ *
+ * Returns 0 on success, or an error on failure.
+ */
+static int __devlink_snapshot_id_increment(struct devlink *devlink, u32 id)
+{
+ unsigned long count;
+ void *p;
+ int err;
+
+ xa_lock(&devlink->snapshot_ids);
+ p = xa_load(&devlink->snapshot_ids, id);
+ if (WARN_ON(!p)) {
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ if (WARN_ON(!xa_is_value(p))) {
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ count = xa_to_value(p);
+ count++;
+
+ err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
+ GFP_ATOMIC));
+unlock:
+ xa_unlock(&devlink->snapshot_ids);
+ return err;
+}
+
+/**
+ * __devlink_snapshot_id_decrement - Decrease number of snapshots using an id
+ * @devlink: devlink instance
+ * @id: the snapshot id
+ *
+ * Track when a snapshot is deleted and stops using an id. Load the count
+ * for the given id from the snapshot xarray, decrement it, and store it
+ * back.
+ *
+ * If the count reaches zero, erase this id from the xarray, freeing it
+ * up for future re-use by devlink_region_snapshot_id_get().
+ *
+ * Called when a snapshot using the given id is deleted, and when the
+ * initial allocator of the id is finished using it.
+ */
+static void __devlink_snapshot_id_decrement(struct devlink *devlink, u32 id)
+{
+ unsigned long count;
+ void *p;
+
+ xa_lock(&devlink->snapshot_ids);
+ p = xa_load(&devlink->snapshot_ids, id);
+ if (WARN_ON(!p))
+ goto unlock;
+
+ if (WARN_ON(!xa_is_value(p)))
+ goto unlock;
+
+ count = xa_to_value(p);
+
+ if (count > 1) {
+ count--;
+ __xa_store(&devlink->snapshot_ids, id, xa_mk_value(count),
+ GFP_ATOMIC);
+ } else {
+ /* If this was the last user, we can erase this id */
+ __xa_erase(&devlink->snapshot_ids, id);
+ }
+unlock:
+ xa_unlock(&devlink->snapshot_ids);
+}
+
+/**
+ * __devlink_snapshot_id_insert - Insert a specific snapshot ID
+ * @devlink: devlink instance
+ * @id: the snapshot id
+ *
+ * Mark the given snapshot id as used by inserting a zero value into the
+ * snapshot xarray.
+ *
+ * This must be called while holding the devlink instance lock. Unlike
+ * devlink_snapshot_id_get, the initial reference count is zero, not one.
+ * It is expected that the id will immediately be used before
+ * releasing the devlink instance lock.
+ *
+ * Returns zero on success, or an error code if the snapshot id could not
+ * be inserted.
+ */
+static int __devlink_snapshot_id_insert(struct devlink *devlink, u32 id)
+{
+ int err;
+
+ xa_lock(&devlink->snapshot_ids);
+ if (xa_load(&devlink->snapshot_ids, id)) {
+ xa_unlock(&devlink->snapshot_ids);
+ return -EEXIST;
+ }
+ err = xa_err(__xa_store(&devlink->snapshot_ids, id, xa_mk_value(0),
+ GFP_ATOMIC));
+ xa_unlock(&devlink->snapshot_ids);
+ return err;
+}
+
+/**
+ * __devlink_region_snapshot_id_get - get snapshot ID
+ * @devlink: devlink instance
+ * @id: storage to return snapshot id
+ *
+ * Allocates a new snapshot id. Returns zero on success, or a negative
+ * error on failure. Must be called while holding the devlink instance
+ * lock.
+ *
+ * Snapshot IDs are tracked using an xarray which stores the number of
+ * users of the snapshot id.
+ *
+ * Note that the caller of this function counts as a 'user', in order to
+ * avoid race conditions. The caller must release its hold on the
+ * snapshot by using devlink_region_snapshot_id_put.
+ */
+static int __devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
+{
+ return xa_alloc(&devlink->snapshot_ids, id, xa_mk_value(1),
+ xa_limit_32b, GFP_KERNEL);
+}
+
+/**
+ * __devlink_region_snapshot_create - create a new snapshot
+ * This will add a new snapshot of a region. The snapshot
+ * will be stored on the region struct and can be accessed
+ * from devlink. This is useful for future analyses of snapshots.
+ * Multiple snapshots can be created on a region.
+ * The @snapshot_id should be obtained using the getter function.
+ *
+ * Must be called only while holding the region snapshot lock.
+ *
+ * @region: devlink region of the snapshot
+ * @data: snapshot data
+ * @snapshot_id: snapshot id to be created
+ */
+static int
+__devlink_region_snapshot_create(struct devlink_region *region,
+ u8 *data, u32 snapshot_id)
+{
+ struct devlink *devlink = region->devlink;
+ struct devlink_snapshot *snapshot;
+ int err;
+
+ lockdep_assert_held(&region->snapshot_lock);
+
+ /* check if region can hold one more snapshot */
+ if (region->cur_snapshots == region->max_snapshots)
+ return -ENOSPC;
+
+ if (devlink_region_snapshot_get_by_id(region, snapshot_id))
+ return -EEXIST;
+
+ snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
+ if (!snapshot)
+ return -ENOMEM;
+
+ err = __devlink_snapshot_id_increment(devlink, snapshot_id);
+ if (err)
+ goto err_snapshot_id_increment;
+
+ snapshot->id = snapshot_id;
+ snapshot->region = region;
+ snapshot->data = data;
+
+ list_add_tail(&snapshot->list, &region->snapshot_list);
+
+ region->cur_snapshots++;
+
+ devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_NEW);
+ return 0;
+
+err_snapshot_id_increment:
+ kfree(snapshot);
+ return err;
+}
+
+static void devlink_region_snapshot_del(struct devlink_region *region,
+ struct devlink_snapshot *snapshot)
+{
+ struct devlink *devlink = region->devlink;
+
+ lockdep_assert_held(&region->snapshot_lock);
+
+ devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
+ region->cur_snapshots--;
+ list_del(&snapshot->list);
+ region->ops->destructor(snapshot->data);
+ __devlink_snapshot_id_decrement(devlink, snapshot->id);
+ kfree(snapshot);
+}
+
+int devlink_nl_region_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_port *port = NULL;
+ struct devlink_region *region;
+ const char *region_name;
+ struct sk_buff *msg;
+ unsigned int index;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME))
+ return -EINVAL;
+
+ if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+ port = devlink_port_get_by_index(devlink, index);
+ if (!port)
+ return -ENODEV;
+ }
+
+ region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+ if (port)
+ region = devlink_port_region_get_by_name(port, region_name);
+ else
+ region = devlink_region_get_by_name(devlink, region_name);
+
+ if (!region)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_region_fill(msg, devlink, DEVLINK_CMD_REGION_GET,
+ info->snd_portid, info->snd_seq, 0,
+ region);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_region_get_port_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb,
+ struct devlink_port *port,
+ int *idx, int start, int flags)
+{
+ struct devlink_region *region;
+ int err = 0;
+
+ list_for_each_entry(region, &port->region_list, list) {
+ if (*idx < start) {
+ (*idx)++;
+ continue;
+ }
+ err = devlink_nl_region_fill(msg, port->devlink,
+ DEVLINK_CMD_REGION_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ flags, region);
+ if (err)
+ goto out;
+ (*idx)++;
+ }
+
+out:
+ return err;
+}
+
+static int devlink_nl_region_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_region *region;
+ struct devlink_port *port;
+ unsigned long port_index;
+ int idx = 0;
+ int err;
+
+ list_for_each_entry(region, &devlink->region_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_region_fill(msg, devlink,
+ DEVLINK_CMD_REGION_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags,
+ region);
+ if (err) {
+ state->idx = idx;
+ return err;
+ }
+ idx++;
+ }
+
+ xa_for_each(&devlink->ports, port_index, port) {
+ err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, &idx,
+ state->idx, flags);
+ if (err) {
+ state->idx = idx;
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int devlink_nl_region_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_region_get_dump_one);
+}
+
+int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_snapshot *snapshot;
+ struct devlink_port *port = NULL;
+ struct devlink_region *region;
+ const char *region_name;
+ unsigned int index;
+ u32 snapshot_id;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME) ||
+ GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_SNAPSHOT_ID))
+ return -EINVAL;
+
+ region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+ snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
+
+ if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+ port = devlink_port_get_by_index(devlink, index);
+ if (!port)
+ return -ENODEV;
+ }
+
+ if (port)
+ region = devlink_port_region_get_by_name(port, region_name);
+ else
+ region = devlink_region_get_by_name(devlink, region_name);
+
+ if (!region)
+ return -EINVAL;
+
+ mutex_lock(&region->snapshot_lock);
+ snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+ if (!snapshot) {
+ mutex_unlock(&region->snapshot_lock);
+ return -EINVAL;
+ }
+
+ devlink_region_snapshot_del(region, snapshot);
+ mutex_unlock(&region->snapshot_lock);
+ return 0;
+}
+
+int devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_snapshot *snapshot;
+ struct devlink_port *port = NULL;
+ struct nlattr *snapshot_id_attr;
+ struct devlink_region *region;
+ const char *region_name;
+ unsigned int index;
+ u32 snapshot_id;
+ u8 *data;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) {
+ NL_SET_ERR_MSG(info->extack, "No region name provided");
+ return -EINVAL;
+ }
+
+ region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+
+ if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+ port = devlink_port_get_by_index(devlink, index);
+ if (!port)
+ return -ENODEV;
+ }
+
+ if (port)
+ region = devlink_port_region_get_by_name(port, region_name);
+ else
+ region = devlink_region_get_by_name(devlink, region_name);
+
+ if (!region) {
+ NL_SET_ERR_MSG(info->extack, "The requested region does not exist");
+ return -EINVAL;
+ }
+
+ if (!region->ops->snapshot) {
+ NL_SET_ERR_MSG(info->extack, "The requested region does not support taking an immediate snapshot");
+ return -EOPNOTSUPP;
+ }
+
+ mutex_lock(&region->snapshot_lock);
+
+ if (region->cur_snapshots == region->max_snapshots) {
+ NL_SET_ERR_MSG(info->extack, "The region has reached the maximum number of stored snapshots");
+ err = -ENOSPC;
+ goto unlock;
+ }
+
+ snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
+ if (snapshot_id_attr) {
+ snapshot_id = nla_get_u32(snapshot_id_attr);
+
+ if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
+ NL_SET_ERR_MSG(info->extack, "The requested snapshot id is already in use");
+ err = -EEXIST;
+ goto unlock;
+ }
+
+ err = __devlink_snapshot_id_insert(devlink, snapshot_id);
+ if (err)
+ goto unlock;
+ } else {
+ err = __devlink_region_snapshot_id_get(devlink, &snapshot_id);
+ if (err) {
+ NL_SET_ERR_MSG(info->extack, "Failed to allocate a new snapshot id");
+ goto unlock;
+ }
+ }
+
+ if (port)
+ err = region->port_ops->snapshot(port, region->port_ops,
+ info->extack, &data);
+ else
+ err = region->ops->snapshot(devlink, region->ops,
+ info->extack, &data);
+ if (err)
+ goto err_snapshot_capture;
+
+ err = __devlink_region_snapshot_create(region, data, snapshot_id);
+ if (err)
+ goto err_snapshot_create;
+
+ if (!snapshot_id_attr) {
+ struct sk_buff *msg;
+
+ snapshot = devlink_region_snapshot_get_by_id(region,
+ snapshot_id);
+ if (WARN_ON(!snapshot)) {
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ msg = devlink_nl_region_notify_build(region, snapshot,
+ DEVLINK_CMD_REGION_NEW,
+ info->snd_portid,
+ info->snd_seq);
+ err = PTR_ERR_OR_ZERO(msg);
+ if (err)
+ goto err_notify;
+
+ err = genlmsg_reply(msg, info);
+ if (err)
+ goto err_notify;
+ }
+
+ mutex_unlock(&region->snapshot_lock);
+ return 0;
+
+err_snapshot_create:
+ region->ops->destructor(data);
+err_snapshot_capture:
+ __devlink_snapshot_id_decrement(devlink, snapshot_id);
+ mutex_unlock(&region->snapshot_lock);
+ return err;
+
+err_notify:
+ devlink_region_snapshot_del(region, snapshot);
+unlock:
+ mutex_unlock(&region->snapshot_lock);
+ return err;
+}
+
+static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
+ u8 *chunk, u32 chunk_size,
+ u64 addr)
+{
+ struct nlattr *chunk_attr;
+ int err;
+
+ chunk_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_CHUNK);
+ if (!chunk_attr)
+ return -EINVAL;
+
+ err = nla_put(msg, DEVLINK_ATTR_REGION_CHUNK_DATA, chunk_size, chunk);
+ if (err)
+ goto nla_put_failure;
+
+ err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr,
+ DEVLINK_ATTR_PAD);
+ if (err)
+ goto nla_put_failure;
+
+ nla_nest_end(msg, chunk_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, chunk_attr);
+ return err;
+}
+
+#define DEVLINK_REGION_READ_CHUNK_SIZE 256
+
+typedef int devlink_chunk_fill_t(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset,
+ struct netlink_ext_ack *extack);
+
+static int
+devlink_nl_region_read_fill(struct sk_buff *skb, devlink_chunk_fill_t *cb,
+ void *cb_priv, u64 start_offset, u64 end_offset,
+ u64 *new_offset, struct netlink_ext_ack *extack)
+{
+ u64 curr_offset = start_offset;
+ int err = 0;
+ u8 *data;
+
+ /* Allocate and re-use a single buffer */
+ data = kmalloc(DEVLINK_REGION_READ_CHUNK_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ *new_offset = start_offset;
+
+ while (curr_offset < end_offset) {
+ u32 data_size;
+
+ data_size = min_t(u32, end_offset - curr_offset,
+ DEVLINK_REGION_READ_CHUNK_SIZE);
+
+ err = cb(cb_priv, data, data_size, curr_offset, extack);
+ if (err)
+ break;
+
+ err = devlink_nl_cmd_region_read_chunk_fill(skb, data, data_size, curr_offset);
+ if (err)
+ break;
+
+ curr_offset += data_size;
+ }
+ *new_offset = curr_offset;
+
+ kfree(data);
+
+ return err;
+}
+
+static int
+devlink_region_snapshot_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset,
+ struct netlink_ext_ack __always_unused *extack)
+{
+ struct devlink_snapshot *snapshot = cb_priv;
+
+ memcpy(chunk, &snapshot->data[curr_offset], chunk_size);
+
+ return 0;
+}
+
+static int
+devlink_region_port_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset, struct netlink_ext_ack *extack)
+{
+ struct devlink_region *region = cb_priv;
+
+ return region->port_ops->read(region->port, region->port_ops, extack,
+ curr_offset, chunk_size, chunk);
+}
+
+static int
+devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset, struct netlink_ext_ack *extack)
+{
+ struct devlink_region *region = cb_priv;
+
+ return region->ops->read(region->devlink, region->ops, extack,
+ curr_offset, chunk_size, chunk);
+}
+
+int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct nlattr *chunks_attr, *region_attr, *snapshot_attr;
+ u64 ret_offset, start_offset, end_offset = U64_MAX;
+ struct nlattr **attrs = info->info.attrs;
+ struct devlink_port *port = NULL;
+ devlink_chunk_fill_t *region_cb;
+ struct devlink_region *region;
+ const char *region_name;
+ struct devlink *devlink;
+ unsigned int index;
+ void *region_cb_priv;
+ void *hdr;
+ int err;
+
+ start_offset = state->start_offset;
+
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+ if (IS_ERR(devlink))
+ return PTR_ERR(devlink);
+
+ if (!attrs[DEVLINK_ATTR_REGION_NAME]) {
+ NL_SET_ERR_MSG(cb->extack, "No region name provided");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+ port = devlink_port_get_by_index(devlink, index);
+ if (!port) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
+ }
+
+ region_attr = attrs[DEVLINK_ATTR_REGION_NAME];
+ region_name = nla_data(region_attr);
+
+ if (port)
+ region = devlink_port_region_get_by_name(port, region_name);
+ else
+ region = devlink_region_get_by_name(devlink, region_name);
+
+ if (!region) {
+ NL_SET_ERR_MSG_ATTR(cb->extack, region_attr, "Requested region does not exist");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ snapshot_attr = attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
+ if (!snapshot_attr) {
+ if (!nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
+ NL_SET_ERR_MSG(cb->extack, "No snapshot id provided");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (!region->ops->read) {
+ NL_SET_ERR_MSG(cb->extack, "Requested region does not support direct read");
+ err = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
+ if (port)
+ region_cb = &devlink_region_port_direct_fill;
+ else
+ region_cb = &devlink_region_direct_fill;
+ region_cb_priv = region;
+ } else {
+ struct devlink_snapshot *snapshot;
+ u32 snapshot_id;
+
+ if (nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
+ NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Direct region read does not use snapshot");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ snapshot_id = nla_get_u32(snapshot_attr);
+ snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+ if (!snapshot) {
+ NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Requested snapshot does not exist");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ region_cb = &devlink_region_snapshot_fill;
+ region_cb_priv = snapshot;
+ }
+
+ if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
+ attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
+ if (!start_offset)
+ start_offset =
+ nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+
+ end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+ end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
+ }
+
+ if (end_offset > region->size)
+ end_offset = region->size;
+
+ /* return 0 if there is no further data to read */
+ if (start_offset == end_offset) {
+ err = 0;
+ goto out_unlock;
+ }
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
+ DEVLINK_CMD_REGION_READ);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto out_unlock;
+ }
+
+ err = devlink_nl_put_handle(skb, devlink);
+ if (err)
+ goto nla_put_failure;
+
+ if (region->port) {
+ err = nla_put_u32(skb, DEVLINK_ATTR_PORT_INDEX,
+ region->port->index);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ err = nla_put_string(skb, DEVLINK_ATTR_REGION_NAME, region_name);
+ if (err)
+ goto nla_put_failure;
+
+ chunks_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_REGION_CHUNKS);
+ if (!chunks_attr) {
+ err = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+
+ err = devlink_nl_region_read_fill(skb, region_cb, region_cb_priv,
+ start_offset, end_offset, &ret_offset,
+ cb->extack);
+
+ if (err && err != -EMSGSIZE)
+ goto nla_put_failure;
+
+ /* Check if there was any progress done to prevent infinite loop */
+ if (ret_offset == start_offset) {
+ err = -EINVAL;
+ goto nla_put_failure;
+ }
+
+ state->start_offset = ret_offset;
+
+ nla_nest_end(skb, chunks_attr);
+ genlmsg_end(skb, hdr);
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ return skb->len;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+out_unlock:
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ return err;
+}
+
+/**
+ * devl_region_create - create a new address region
+ *
+ * @devlink: devlink
+ * @ops: region operations and name
+ * @region_max_snapshots: Maximum supported number of snapshots for region
+ * @region_size: size of region
+ */
+struct devlink_region *devl_region_create(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ u32 region_max_snapshots,
+ u64 region_size)
+{
+ struct devlink_region *region;
+
+ devl_assert_locked(devlink);
+
+ if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
+ return ERR_PTR(-EINVAL);
+
+ if (devlink_region_get_by_name(devlink, ops->name))
+ return ERR_PTR(-EEXIST);
+
+ region = kzalloc(sizeof(*region), GFP_KERNEL);
+ if (!region)
+ return ERR_PTR(-ENOMEM);
+
+ region->devlink = devlink;
+ region->max_snapshots = region_max_snapshots;
+ region->ops = ops;
+ region->size = region_size;
+ INIT_LIST_HEAD(&region->snapshot_list);
+ mutex_init(&region->snapshot_lock);
+ list_add_tail(&region->list, &devlink->region_list);
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+
+ return region;
+}
+EXPORT_SYMBOL_GPL(devl_region_create);
+
+/**
+ * devlink_region_create - create a new address region
+ *
+ * @devlink: devlink
+ * @ops: region operations and name
+ * @region_max_snapshots: Maximum supported number of snapshots for region
+ * @region_size: size of region
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+struct devlink_region *
+devlink_region_create(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size)
+{
+ struct devlink_region *region;
+
+ devl_lock(devlink);
+ region = devl_region_create(devlink, ops, region_max_snapshots,
+ region_size);
+ devl_unlock(devlink);
+ return region;
+}
+EXPORT_SYMBOL_GPL(devlink_region_create);
+
+/**
+ * devlink_port_region_create - create a new address region for a port
+ *
+ * @port: devlink port
+ * @ops: region operations and name
+ * @region_max_snapshots: Maximum supported number of snapshots for region
+ * @region_size: size of region
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+struct devlink_region *
+devlink_port_region_create(struct devlink_port *port,
+ const struct devlink_port_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size)
+{
+ struct devlink *devlink = port->devlink;
+ struct devlink_region *region;
+ int err = 0;
+
+ ASSERT_DEVLINK_PORT_INITIALIZED(port);
+
+ if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
+ return ERR_PTR(-EINVAL);
+
+ devl_lock(devlink);
+
+ if (devlink_port_region_get_by_name(port, ops->name)) {
+ err = -EEXIST;
+ goto unlock;
+ }
+
+ region = kzalloc(sizeof(*region), GFP_KERNEL);
+ if (!region) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ region->devlink = devlink;
+ region->port = port;
+ region->max_snapshots = region_max_snapshots;
+ region->port_ops = ops;
+ region->size = region_size;
+ INIT_LIST_HEAD(&region->snapshot_list);
+ mutex_init(&region->snapshot_lock);
+ list_add_tail(&region->list, &port->region_list);
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+
+ devl_unlock(devlink);
+ return region;
+
+unlock:
+ devl_unlock(devlink);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(devlink_port_region_create);
+
+/**
+ * devl_region_destroy - destroy address region
+ *
+ * @region: devlink region to destroy
+ */
+void devl_region_destroy(struct devlink_region *region)
+{
+ struct devlink *devlink = region->devlink;
+ struct devlink_snapshot *snapshot, *ts;
+
+ devl_assert_locked(devlink);
+
+ /* Free all snapshots of region */
+ mutex_lock(&region->snapshot_lock);
+ list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
+ devlink_region_snapshot_del(region, snapshot);
+ mutex_unlock(&region->snapshot_lock);
+
+ list_del(&region->list);
+ mutex_destroy(&region->snapshot_lock);
+
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
+ kfree(region);
+}
+EXPORT_SYMBOL_GPL(devl_region_destroy);
+
+/**
+ * devlink_region_destroy - destroy address region
+ *
+ * @region: devlink region to destroy
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_region_destroy(struct devlink_region *region)
+{
+ struct devlink *devlink = region->devlink;
+
+ devl_lock(devlink);
+ devl_region_destroy(region);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_region_destroy);
+
+/**
+ * devlink_region_snapshot_id_get - get snapshot ID
+ *
+ * This callback should be called when adding a new snapshot,
+ * Driver should use the same id for multiple snapshots taken
+ * on multiple regions at the same time/by the same trigger.
+ *
+ * The caller of this function must use devlink_region_snapshot_id_put
+ * when finished creating regions using this id.
+ *
+ * Returns zero on success, or a negative error code on failure.
+ *
+ * @devlink: devlink
+ * @id: storage to return id
+ */
+int devlink_region_snapshot_id_get(struct devlink *devlink, u32 *id)
+{
+ return __devlink_region_snapshot_id_get(devlink, id);
+}
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get);
+
+/**
+ * devlink_region_snapshot_id_put - put snapshot ID reference
+ *
+ * This should be called by a driver after finishing creating snapshots
+ * with an id. Doing so ensures that the ID can later be released in the
+ * event that all snapshots using it have been destroyed.
+ *
+ * @devlink: devlink
+ * @id: id to release reference on
+ */
+void devlink_region_snapshot_id_put(struct devlink *devlink, u32 id)
+{
+ __devlink_snapshot_id_decrement(devlink, id);
+}
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_put);
+
+/**
+ * devlink_region_snapshot_create - create a new snapshot
+ * This will add a new snapshot of a region. The snapshot
+ * will be stored on the region struct and can be accessed
+ * from devlink. This is useful for future analyses of snapshots.
+ * Multiple snapshots can be created on a region.
+ * The @snapshot_id should be obtained using the getter function.
+ *
+ * @region: devlink region of the snapshot
+ * @data: snapshot data
+ * @snapshot_id: snapshot id to be created
+ */
+int devlink_region_snapshot_create(struct devlink_region *region,
+ u8 *data, u32 snapshot_id)
+{
+ int err;
+
+ mutex_lock(&region->snapshot_lock);
+ err = __devlink_region_snapshot_create(region, data, snapshot_id);
+ mutex_unlock(&region->snapshot_lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
diff --git a/net/devlink/resource.c b/net/devlink/resource.c
new file mode 100644
index 000000000000..c8b615e4c385
--- /dev/null
+++ b/net/devlink/resource.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+/**
+ * struct devlink_resource - devlink resource
+ * @name: name of the resource
+ * @id: id, per devlink instance
+ * @size: size of the resource
+ * @size_new: updated size of the resource, reload is needed
+ * @size_valid: valid in case the total size of the resource is valid
+ * including its children
+ * @parent: parent resource
+ * @size_params: size parameters
+ * @list: parent list
+ * @resource_list: list of child resources
+ * @occ_get: occupancy getter callback
+ * @occ_get_priv: occupancy getter callback priv
+ */
+struct devlink_resource {
+ const char *name;
+ u64 id;
+ u64 size;
+ u64 size_new;
+ bool size_valid;
+ struct devlink_resource *parent;
+ struct devlink_resource_size_params size_params;
+ struct list_head list;
+ struct list_head resource_list;
+ devlink_resource_occ_get_t *occ_get;
+ void *occ_get_priv;
+};
+
+static struct devlink_resource *
+devlink_resource_find(struct devlink *devlink,
+ struct devlink_resource *resource, u64 resource_id)
+{
+ struct list_head *resource_list;
+
+ if (resource)
+ resource_list = &resource->resource_list;
+ else
+ resource_list = &devlink->resource_list;
+
+ list_for_each_entry(resource, resource_list, list) {
+ struct devlink_resource *child_resource;
+
+ if (resource->id == resource_id)
+ return resource;
+
+ child_resource = devlink_resource_find(devlink, resource,
+ resource_id);
+ if (child_resource)
+ return child_resource;
+ }
+ return NULL;
+}
+
+static void
+devlink_resource_validate_children(struct devlink_resource *resource)
+{
+ struct devlink_resource *child_resource;
+ bool size_valid = true;
+ u64 parts_size = 0;
+
+ if (list_empty(&resource->resource_list))
+ goto out;
+
+ list_for_each_entry(child_resource, &resource->resource_list, list)
+ parts_size += child_resource->size_new;
+
+ if (parts_size > resource->size_new)
+ size_valid = false;
+out:
+ resource->size_valid = size_valid;
+}
+
+static int
+devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
+ struct netlink_ext_ack *extack)
+{
+ u64 reminder;
+ int err = 0;
+
+ if (size > resource->size_params.size_max) {
+ NL_SET_ERR_MSG(extack, "Size larger than maximum");
+ err = -EINVAL;
+ }
+
+ if (size < resource->size_params.size_min) {
+ NL_SET_ERR_MSG(extack, "Size smaller than minimum");
+ err = -EINVAL;
+ }
+
+ div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
+ if (reminder) {
+ NL_SET_ERR_MSG(extack, "Wrong granularity");
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_resource *resource;
+ u64 resource_id;
+ u64 size;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_ID) ||
+ GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_RESOURCE_SIZE))
+ return -EINVAL;
+ resource_id = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_ID]);
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (!resource)
+ return -EINVAL;
+
+ size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
+ err = devlink_resource_validate_size(resource, size, info->extack);
+ if (err)
+ return err;
+
+ resource->size_new = size;
+ devlink_resource_validate_children(resource);
+ if (resource->parent)
+ devlink_resource_validate_children(resource->parent);
+ return 0;
+}
+
+static int
+devlink_resource_size_params_put(struct devlink_resource *resource,
+ struct sk_buff *skb)
+{
+ struct devlink_resource_size_params *size_params;
+
+ size_params = &resource->size_params;
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_GRAN,
+ size_params->size_granularity, DEVLINK_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MAX,
+ size_params->size_max, DEVLINK_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_MIN,
+ size_params->size_min, DEVLINK_ATTR_PAD) ||
+ nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_UNIT, size_params->unit))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_resource_occ_put(struct devlink_resource *resource,
+ struct sk_buff *skb)
+{
+ if (!resource->occ_get)
+ return 0;
+ return nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
+ resource->occ_get(resource->occ_get_priv),
+ DEVLINK_ATTR_PAD);
+}
+
+static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
+ struct devlink_resource *resource)
+{
+ struct devlink_resource *child_resource;
+ struct nlattr *child_resource_attr;
+ struct nlattr *resource_attr;
+
+ resource_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE);
+ if (!resource_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_RESOURCE_NAME, resource->name) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE, resource->size,
+ DEVLINK_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (resource->size != resource->size_new &&
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
+ resource->size_new, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (devlink_resource_occ_put(resource, skb))
+ goto nla_put_failure;
+ if (devlink_resource_size_params_put(resource, skb))
+ goto nla_put_failure;
+ if (list_empty(&resource->resource_list))
+ goto out;
+
+ if (nla_put_u8(skb, DEVLINK_ATTR_RESOURCE_SIZE_VALID,
+ resource->size_valid))
+ goto nla_put_failure;
+
+ child_resource_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_RESOURCE_LIST);
+ if (!child_resource_attr)
+ goto nla_put_failure;
+
+ list_for_each_entry(child_resource, &resource->resource_list, list) {
+ if (devlink_resource_put(devlink, skb, child_resource))
+ goto resource_put_failure;
+ }
+
+ nla_nest_end(skb, child_resource_attr);
+out:
+ nla_nest_end(skb, resource_attr);
+ return 0;
+
+resource_put_failure:
+ nla_nest_cancel(skb, child_resource_attr);
+nla_put_failure:
+ nla_nest_cancel(skb, resource_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_resource_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_resource *resource;
+ struct nlattr *resources_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ bool incomplete;
+ void *hdr;
+ int i;
+ int err;
+
+ resource = list_first_entry(&devlink->resource_list,
+ struct devlink_resource, list);
+start_again:
+ err = devlink_nl_msg_reply_and_new(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr) {
+ nlmsg_free(skb);
+ return -EMSGSIZE;
+ }
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+
+ resources_attr = nla_nest_start_noflag(skb,
+ DEVLINK_ATTR_RESOURCE_LIST);
+ if (!resources_attr)
+ goto nla_put_failure;
+
+ incomplete = false;
+ i = 0;
+ list_for_each_entry_from(resource, &devlink->resource_list, list) {
+ err = devlink_resource_put(devlink, skb, resource);
+ if (err) {
+ if (!i)
+ goto err_resource_put;
+ incomplete = true;
+ break;
+ }
+ i++;
+ }
+ nla_nest_end(skb, resources_attr);
+ genlmsg_end(skb, hdr);
+ if (incomplete)
+ goto start_again;
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_nl_msg_reply_and_new(&skb, info);
+ if (err)
+ return err;
+ goto send_done;
+ }
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_resource_put:
+ nlmsg_free(skb);
+ return err;
+}
+
+int devlink_nl_cmd_resource_dump(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (list_empty(&devlink->resource_list))
+ return -EOPNOTSUPP;
+
+ return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0);
+}
+
+int devlink_resources_validate(struct devlink *devlink,
+ struct devlink_resource *resource,
+ struct genl_info *info)
+{
+ struct list_head *resource_list;
+ int err = 0;
+
+ if (resource)
+ resource_list = &resource->resource_list;
+ else
+ resource_list = &devlink->resource_list;
+
+ list_for_each_entry(resource, resource_list, list) {
+ if (!resource->size_valid)
+ return -EINVAL;
+ err = devlink_resources_validate(devlink, resource, info);
+ if (err)
+ return err;
+ }
+ return err;
+}
+
+/**
+ * devl_resource_register - devlink resource register
+ *
+ * @devlink: devlink
+ * @resource_name: resource's name
+ * @resource_size: resource's size
+ * @resource_id: resource's id
+ * @parent_resource_id: resource's parent id
+ * @size_params: size parameters
+ *
+ * Generic resources should reuse the same names across drivers.
+ * Please see the generic resources list at:
+ * Documentation/networking/devlink/devlink-resource.rst
+ */
+int devl_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ const struct devlink_resource_size_params *size_params)
+{
+ struct devlink_resource *resource;
+ struct list_head *resource_list;
+ bool top_hierarchy;
+
+ lockdep_assert_held(&devlink->lock);
+
+ top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (resource)
+ return -EINVAL;
+
+ resource = kzalloc(sizeof(*resource), GFP_KERNEL);
+ if (!resource)
+ return -ENOMEM;
+
+ if (top_hierarchy) {
+ resource_list = &devlink->resource_list;
+ } else {
+ struct devlink_resource *parent_resource;
+
+ parent_resource = devlink_resource_find(devlink, NULL,
+ parent_resource_id);
+ if (parent_resource) {
+ resource_list = &parent_resource->resource_list;
+ resource->parent = parent_resource;
+ } else {
+ kfree(resource);
+ return -EINVAL;
+ }
+ }
+
+ resource->name = resource_name;
+ resource->size = resource_size;
+ resource->size_new = resource_size;
+ resource->id = resource_id;
+ resource->size_valid = true;
+ memcpy(&resource->size_params, size_params,
+ sizeof(resource->size_params));
+ INIT_LIST_HEAD(&resource->resource_list);
+ list_add_tail(&resource->list, resource_list);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_resource_register);
+
+/**
+ * devlink_resource_register - devlink resource register
+ *
+ * @devlink: devlink
+ * @resource_name: resource's name
+ * @resource_size: resource's size
+ * @resource_id: resource's id
+ * @parent_resource_id: resource's parent id
+ * @size_params: size parameters
+ *
+ * Generic resources should reuse the same names across drivers.
+ * Please see the generic resources list at:
+ * Documentation/networking/devlink/devlink-resource.rst
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+int devlink_resource_register(struct devlink *devlink,
+ const char *resource_name,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ const struct devlink_resource_size_params *size_params)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_resource_register(devlink, resource_name, resource_size,
+ resource_id, parent_resource_id, size_params);
+ devl_unlock(devlink);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_resource_register);
+
+static void devlink_resource_unregister(struct devlink *devlink,
+ struct devlink_resource *resource)
+{
+ struct devlink_resource *tmp, *child_resource;
+
+ list_for_each_entry_safe(child_resource, tmp, &resource->resource_list,
+ list) {
+ devlink_resource_unregister(devlink, child_resource);
+ list_del(&child_resource->list);
+ kfree(child_resource);
+ }
+}
+
+/**
+ * devl_resources_unregister - free all resources
+ *
+ * @devlink: devlink
+ */
+void devl_resources_unregister(struct devlink *devlink)
+{
+ struct devlink_resource *tmp, *child_resource;
+
+ lockdep_assert_held(&devlink->lock);
+
+ list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list,
+ list) {
+ devlink_resource_unregister(devlink, child_resource);
+ list_del(&child_resource->list);
+ kfree(child_resource);
+ }
+}
+EXPORT_SYMBOL_GPL(devl_resources_unregister);
+
+/**
+ * devlink_resources_unregister - free all resources
+ *
+ * @devlink: devlink
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_resources_unregister(struct devlink *devlink)
+{
+ devl_lock(devlink);
+ devl_resources_unregister(devlink);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_resources_unregister);
+
+/**
+ * devl_resource_size_get - get and update size
+ *
+ * @devlink: devlink
+ * @resource_id: the requested resource id
+ * @p_resource_size: ptr to update
+ */
+int devl_resource_size_get(struct devlink *devlink,
+ u64 resource_id,
+ u64 *p_resource_size)
+{
+ struct devlink_resource *resource;
+
+ lockdep_assert_held(&devlink->lock);
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (!resource)
+ return -EINVAL;
+ *p_resource_size = resource->size_new;
+ resource->size = resource->size_new;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_resource_size_get);
+
+/**
+ * devl_resource_occ_get_register - register occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ * @occ_get: occupancy getter callback
+ * @occ_get_priv: occupancy getter callback priv
+ */
+void devl_resource_occ_get_register(struct devlink *devlink,
+ u64 resource_id,
+ devlink_resource_occ_get_t *occ_get,
+ void *occ_get_priv)
+{
+ struct devlink_resource *resource;
+
+ lockdep_assert_held(&devlink->lock);
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (WARN_ON(!resource))
+ return;
+ WARN_ON(resource->occ_get);
+
+ resource->occ_get = occ_get;
+ resource->occ_get_priv = occ_get_priv;
+}
+EXPORT_SYMBOL_GPL(devl_resource_occ_get_register);
+
+/**
+ * devlink_resource_occ_get_register - register occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ * @occ_get: occupancy getter callback
+ * @occ_get_priv: occupancy getter callback priv
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_resource_occ_get_register(struct devlink *devlink,
+ u64 resource_id,
+ devlink_resource_occ_get_t *occ_get,
+ void *occ_get_priv)
+{
+ devl_lock(devlink);
+ devl_resource_occ_get_register(devlink, resource_id,
+ occ_get, occ_get_priv);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
+
+/**
+ * devl_resource_occ_get_unregister - unregister occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ */
+void devl_resource_occ_get_unregister(struct devlink *devlink,
+ u64 resource_id)
+{
+ struct devlink_resource *resource;
+
+ lockdep_assert_held(&devlink->lock);
+
+ resource = devlink_resource_find(devlink, NULL, resource_id);
+ if (WARN_ON(!resource))
+ return;
+ WARN_ON(!resource->occ_get);
+
+ resource->occ_get = NULL;
+ resource->occ_get_priv = NULL;
+}
+EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister);
+
+/**
+ * devlink_resource_occ_get_unregister - unregister occupancy getter
+ *
+ * @devlink: devlink
+ * @resource_id: resource id
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_resource_occ_get_unregister(struct devlink *devlink,
+ u64 resource_id)
+{
+ devl_lock(devlink);
+ devl_resource_occ_get_unregister(devlink, resource_id);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
diff --git a/net/devlink/sb.c b/net/devlink/sb.c
new file mode 100644
index 000000000000..bd677fff5ec8
--- /dev/null
+++ b/net/devlink/sb.c
@@ -0,0 +1,996 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include "devl_internal.h"
+
+struct devlink_sb {
+ struct list_head list;
+ unsigned int index;
+ u32 size;
+ u16 ingress_pools_count;
+ u16 egress_pools_count;
+ u16 ingress_tc_count;
+ u16 egress_tc_count;
+};
+
+static u16 devlink_sb_pool_count(struct devlink_sb *devlink_sb)
+{
+ return devlink_sb->ingress_pools_count + devlink_sb->egress_pools_count;
+}
+
+static struct devlink_sb *devlink_sb_get_by_index(struct devlink *devlink,
+ unsigned int sb_index)
+{
+ struct devlink_sb *devlink_sb;
+
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ if (devlink_sb->index == sb_index)
+ return devlink_sb;
+ }
+ return NULL;
+}
+
+static bool devlink_sb_index_exists(struct devlink *devlink,
+ unsigned int sb_index)
+{
+ return devlink_sb_get_by_index(devlink, sb_index);
+}
+
+static struct devlink_sb *devlink_sb_get_from_attrs(struct devlink *devlink,
+ struct nlattr **attrs)
+{
+ if (attrs[DEVLINK_ATTR_SB_INDEX]) {
+ u32 sb_index = nla_get_u32(attrs[DEVLINK_ATTR_SB_INDEX]);
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
+ if (!devlink_sb)
+ return ERR_PTR(-ENODEV);
+ return devlink_sb;
+ }
+ return ERR_PTR(-EINVAL);
+}
+
+static struct devlink_sb *devlink_sb_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ return devlink_sb_get_from_attrs(devlink, info->attrs);
+}
+
+static int devlink_sb_pool_index_get_from_attrs(struct devlink_sb *devlink_sb,
+ struct nlattr **attrs,
+ u16 *p_pool_index)
+{
+ u16 val;
+
+ if (!attrs[DEVLINK_ATTR_SB_POOL_INDEX])
+ return -EINVAL;
+
+ val = nla_get_u16(attrs[DEVLINK_ATTR_SB_POOL_INDEX]);
+ if (val >= devlink_sb_pool_count(devlink_sb))
+ return -EINVAL;
+ *p_pool_index = val;
+ return 0;
+}
+
+static int devlink_sb_pool_index_get_from_info(struct devlink_sb *devlink_sb,
+ struct genl_info *info,
+ u16 *p_pool_index)
+{
+ return devlink_sb_pool_index_get_from_attrs(devlink_sb, info->attrs,
+ p_pool_index);
+}
+
+static int
+devlink_sb_pool_type_get_from_attrs(struct nlattr **attrs,
+ enum devlink_sb_pool_type *p_pool_type)
+{
+ u8 val;
+
+ if (!attrs[DEVLINK_ATTR_SB_POOL_TYPE])
+ return -EINVAL;
+
+ val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_TYPE]);
+ if (val != DEVLINK_SB_POOL_TYPE_INGRESS &&
+ val != DEVLINK_SB_POOL_TYPE_EGRESS)
+ return -EINVAL;
+ *p_pool_type = val;
+ return 0;
+}
+
+static int
+devlink_sb_pool_type_get_from_info(struct genl_info *info,
+ enum devlink_sb_pool_type *p_pool_type)
+{
+ return devlink_sb_pool_type_get_from_attrs(info->attrs, p_pool_type);
+}
+
+static int
+devlink_sb_th_type_get_from_attrs(struct nlattr **attrs,
+ enum devlink_sb_threshold_type *p_th_type)
+{
+ u8 val;
+
+ if (!attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE])
+ return -EINVAL;
+
+ val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]);
+ if (val != DEVLINK_SB_THRESHOLD_TYPE_STATIC &&
+ val != DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC)
+ return -EINVAL;
+ *p_th_type = val;
+ return 0;
+}
+
+static int
+devlink_sb_th_type_get_from_info(struct genl_info *info,
+ enum devlink_sb_threshold_type *p_th_type)
+{
+ return devlink_sb_th_type_get_from_attrs(info->attrs, p_th_type);
+}
+
+static int
+devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
+ struct nlattr **attrs,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_tc_index)
+{
+ u16 val;
+
+ if (!attrs[DEVLINK_ATTR_SB_TC_INDEX])
+ return -EINVAL;
+
+ val = nla_get_u16(attrs[DEVLINK_ATTR_SB_TC_INDEX]);
+ if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS &&
+ val >= devlink_sb->ingress_tc_count)
+ return -EINVAL;
+ if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS &&
+ val >= devlink_sb->egress_tc_count)
+ return -EINVAL;
+ *p_tc_index = val;
+ return 0;
+}
+
+static int
+devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
+ struct genl_info *info,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_tc_index)
+{
+ return devlink_sb_tc_index_get_from_attrs(devlink_sb, info->attrs,
+ pool_type, p_tc_index);
+}
+
+static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_SIZE, devlink_sb->size))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_POOL_COUNT,
+ devlink_sb->ingress_pools_count))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_POOL_COUNT,
+ devlink_sb->egress_pools_count))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_TC_COUNT,
+ devlink_sb->ingress_tc_count))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_TC_COUNT,
+ devlink_sb->egress_tc_count))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_sb_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb;
+ struct sk_buff *msg;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+ DEVLINK_CMD_SB_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_sb *devlink_sb;
+ int idx = 0;
+ int err = 0;
+
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+ DEVLINK_CMD_SB_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
+ }
+
+ return err;
+}
+
+int devlink_nl_sb_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_sb_get_dump_one);
+}
+
+static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ u16 pool_index, enum devlink_command cmd,
+ u32 portid, u32 seq, int flags)
+{
+ struct devlink_sb_pool_info pool_info;
+ void *hdr;
+ int err;
+
+ err = devlink->ops->sb_pool_get(devlink, devlink_sb->index,
+ pool_index, &pool_info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_info.pool_type))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_SIZE, pool_info.size))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,
+ pool_info.threshold_type))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_CELL_SIZE,
+ pool_info.cell_size))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb;
+ struct sk_buff *msg;
+ u16 pool_index;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ if (!devlink->ops->sb_pool_get)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index,
+ DEVLINK_CMD_SB_POOL_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
+ struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ u32 portid, u32 seq, int flags)
+{
+ u16 pool_count = devlink_sb_pool_count(devlink_sb);
+ u16 pool_index;
+ int err;
+
+ for (pool_index = 0; pool_index < pool_count; pool_index++) {
+ if (*p_idx < start) {
+ (*p_idx)++;
+ continue;
+ }
+ err = devlink_nl_sb_pool_fill(msg, devlink,
+ devlink_sb,
+ pool_index,
+ DEVLINK_CMD_SB_POOL_NEW,
+ portid, seq, flags);
+ if (err)
+ return err;
+ (*p_idx)++;
+ }
+ return 0;
+}
+
+static int
+devlink_nl_sb_pool_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_sb *devlink_sb;
+ int err = 0;
+ int idx = 0;
+
+ if (!devlink->ops->sb_pool_get)
+ return 0;
+
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_pool_get_dumpit(msg, state->idx, &idx,
+ devlink, devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = idx;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int devlink_nl_sb_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_sb_pool_get_dump_one);
+}
+
+static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
+ u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack)
+
+{
+ const struct devlink_ops *ops = devlink->ops;
+
+ if (ops->sb_pool_set)
+ return ops->sb_pool_set(devlink, sb_index, pool_index,
+ size, threshold_type, extack);
+ return -EOPNOTSUPP;
+}
+
+int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ enum devlink_sb_threshold_type threshold_type;
+ struct devlink_sb *devlink_sb;
+ u16 pool_index;
+ u32 size;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ err = devlink_sb_th_type_get_from_info(info, &threshold_type);
+ if (err)
+ return err;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_POOL_SIZE))
+ return -EINVAL;
+
+ size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]);
+ return devlink_sb_pool_set(devlink, devlink_sb->index,
+ pool_index, size, threshold_type,
+ info->extack);
+}
+
+static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ struct devlink_sb *devlink_sb,
+ u16 pool_index,
+ enum devlink_command cmd,
+ u32 portid, u32 seq, int flags)
+{
+ const struct devlink_ops *ops = devlink->ops;
+ u32 threshold;
+ void *hdr;
+ int err;
+
+ err = ops->sb_port_pool_get(devlink_port, devlink_sb->index,
+ pool_index, &threshold);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
+ goto nla_put_failure;
+
+ if (ops->sb_occ_port_pool_get) {
+ u32 cur;
+ u32 max;
+
+ err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index,
+ pool_index, &cur, &max);
+ if (err && err != -EOPNOTSUPP)
+ goto sb_occ_get_failure;
+ if (!err) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
+ goto nla_put_failure;
+ }
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+sb_occ_get_failure:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_sb_port_pool_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = devlink_port->devlink;
+ struct devlink_sb *devlink_sb;
+ struct sk_buff *msg;
+ u16 pool_index;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ if (!devlink->ops->sb_port_pool_get)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port,
+ devlink_sb, pool_index,
+ DEVLINK_CMD_SB_PORT_POOL_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
+ struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ u32 portid, u32 seq, int flags)
+{
+ struct devlink_port *devlink_port;
+ u16 pool_count = devlink_sb_pool_count(devlink_sb);
+ unsigned long port_index;
+ u16 pool_index;
+ int err;
+
+ xa_for_each(&devlink->ports, port_index, devlink_port) {
+ for (pool_index = 0; pool_index < pool_count; pool_index++) {
+ if (*p_idx < start) {
+ (*p_idx)++;
+ continue;
+ }
+ err = devlink_nl_sb_port_pool_fill(msg, devlink,
+ devlink_port,
+ devlink_sb,
+ pool_index,
+ DEVLINK_CMD_SB_PORT_POOL_NEW,
+ portid, seq, flags);
+ if (err)
+ return err;
+ (*p_idx)++;
+ }
+ }
+ return 0;
+}
+
+static int
+devlink_nl_sb_port_pool_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_sb *devlink_sb;
+ int idx = 0;
+ int err = 0;
+
+ if (!devlink->ops->sb_port_pool_get)
+ return 0;
+
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_port_pool_get_dumpit(msg, state->idx, &idx,
+ devlink, devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = idx;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int devlink_nl_sb_port_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_sb_port_pool_get_dump_one);
+}
+
+static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 threshold,
+ struct netlink_ext_ack *extack)
+
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ if (ops->sb_port_pool_set)
+ return ops->sb_port_pool_set(devlink_port, sb_index,
+ pool_index, threshold, extack);
+ return -EOPNOTSUPP;
+}
+
+int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_sb *devlink_sb;
+ u16 pool_index;
+ u32 threshold;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
+ return -EINVAL;
+
+ threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
+ return devlink_sb_port_pool_set(devlink_port, devlink_sb->index,
+ pool_index, threshold, info->extack);
+}
+
+static int
+devlink_nl_sb_tc_pool_bind_fill(struct sk_buff *msg, struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ struct devlink_sb *devlink_sb, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ enum devlink_command cmd,
+ u32 portid, u32 seq, int flags)
+{
+ const struct devlink_ops *ops = devlink->ops;
+ u16 pool_index;
+ u32 threshold;
+ void *hdr;
+ int err;
+
+ err = ops->sb_tc_pool_bind_get(devlink_port, devlink_sb->index,
+ tc_index, pool_type,
+ &pool_index, &threshold);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_TC_INDEX, tc_index))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_type))
+ goto nla_put_failure;
+ if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold))
+ goto nla_put_failure;
+
+ if (ops->sb_occ_tc_port_bind_get) {
+ u32 cur;
+ u32 max;
+
+ err = ops->sb_occ_tc_port_bind_get(devlink_port,
+ devlink_sb->index,
+ tc_index, pool_type,
+ &cur, &max);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ if (!err) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max))
+ goto nla_put_failure;
+ }
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_sb_tc_pool_bind_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = devlink_port->devlink;
+ struct devlink_sb *devlink_sb;
+ struct sk_buff *msg;
+ enum devlink_sb_pool_type pool_type;
+ u16 tc_index;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ err = devlink_sb_pool_type_get_from_info(info, &pool_type);
+ if (err)
+ return err;
+
+ err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
+ pool_type, &tc_index);
+ if (err)
+ return err;
+
+ if (!devlink->ops->sb_tc_pool_bind_get)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port,
+ devlink_sb, tc_index, pool_type,
+ DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+ info->snd_portid,
+ info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
+ int start, int *p_idx,
+ struct devlink *devlink,
+ struct devlink_sb *devlink_sb,
+ u32 portid, u32 seq, int flags)
+{
+ struct devlink_port *devlink_port;
+ unsigned long port_index;
+ u16 tc_index;
+ int err;
+
+ xa_for_each(&devlink->ports, port_index, devlink_port) {
+ for (tc_index = 0;
+ tc_index < devlink_sb->ingress_tc_count; tc_index++) {
+ if (*p_idx < start) {
+ (*p_idx)++;
+ continue;
+ }
+ err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
+ devlink_port,
+ devlink_sb,
+ tc_index,
+ DEVLINK_SB_POOL_TYPE_INGRESS,
+ DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+ portid, seq,
+ flags);
+ if (err)
+ return err;
+ (*p_idx)++;
+ }
+ for (tc_index = 0;
+ tc_index < devlink_sb->egress_tc_count; tc_index++) {
+ if (*p_idx < start) {
+ (*p_idx)++;
+ continue;
+ }
+ err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink,
+ devlink_port,
+ devlink_sb,
+ tc_index,
+ DEVLINK_SB_POOL_TYPE_EGRESS,
+ DEVLINK_CMD_SB_TC_POOL_BIND_NEW,
+ portid, seq,
+ flags);
+ if (err)
+ return err;
+ (*p_idx)++;
+ }
+ }
+ return 0;
+}
+
+static int devlink_nl_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_sb *devlink_sb;
+ int idx = 0;
+ int err = 0;
+
+ if (!devlink->ops->sb_tc_pool_bind_get)
+ return 0;
+
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_tc_pool_bind_get_dumpit(msg, state->idx, &idx,
+ devlink, devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = idx;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int devlink_nl_sb_tc_pool_bind_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb,
+ devlink_nl_sb_tc_pool_bind_get_dump_one);
+}
+
+static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack)
+
+{
+ const struct devlink_ops *ops = devlink_port->devlink->ops;
+
+ if (ops->sb_tc_pool_bind_set)
+ return ops->sb_tc_pool_bind_set(devlink_port, sb_index,
+ tc_index, pool_type,
+ pool_index, threshold, extack);
+ return -EOPNOTSUPP;
+}
+
+int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port = info->user_ptr[1];
+ struct devlink *devlink = info->user_ptr[0];
+ enum devlink_sb_pool_type pool_type;
+ struct devlink_sb *devlink_sb;
+ u16 tc_index;
+ u16 pool_index;
+ u32 threshold;
+ int err;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ err = devlink_sb_pool_type_get_from_info(info, &pool_type);
+ if (err)
+ return err;
+
+ err = devlink_sb_tc_index_get_from_info(devlink_sb, info,
+ pool_type, &tc_index);
+ if (err)
+ return err;
+
+ err = devlink_sb_pool_index_get_from_info(devlink_sb, info,
+ &pool_index);
+ if (err)
+ return err;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD))
+ return -EINVAL;
+
+ threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]);
+ return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index,
+ tc_index, pool_type,
+ pool_index, threshold, info->extack);
+}
+
+int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const struct devlink_ops *ops = devlink->ops;
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ if (ops->sb_occ_snapshot)
+ return ops->sb_occ_snapshot(devlink, devlink_sb->index);
+ return -EOPNOTSUPP;
+}
+
+int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const struct devlink_ops *ops = devlink->ops;
+ struct devlink_sb *devlink_sb;
+
+ devlink_sb = devlink_sb_get_from_info(devlink, info);
+ if (IS_ERR(devlink_sb))
+ return PTR_ERR(devlink_sb);
+
+ if (ops->sb_occ_max_clear)
+ return ops->sb_occ_max_clear(devlink, devlink_sb->index);
+ return -EOPNOTSUPP;
+}
+
+int devl_sb_register(struct devlink *devlink, unsigned int sb_index,
+ u32 size, u16 ingress_pools_count,
+ u16 egress_pools_count, u16 ingress_tc_count,
+ u16 egress_tc_count)
+{
+ struct devlink_sb *devlink_sb;
+
+ lockdep_assert_held(&devlink->lock);
+
+ if (devlink_sb_index_exists(devlink, sb_index))
+ return -EEXIST;
+
+ devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL);
+ if (!devlink_sb)
+ return -ENOMEM;
+ devlink_sb->index = sb_index;
+ devlink_sb->size = size;
+ devlink_sb->ingress_pools_count = ingress_pools_count;
+ devlink_sb->egress_pools_count = egress_pools_count;
+ devlink_sb->ingress_tc_count = ingress_tc_count;
+ devlink_sb->egress_tc_count = egress_tc_count;
+ list_add_tail(&devlink_sb->list, &devlink->sb_list);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_sb_register);
+
+int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
+ u32 size, u16 ingress_pools_count,
+ u16 egress_pools_count, u16 ingress_tc_count,
+ u16 egress_tc_count)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_sb_register(devlink, sb_index, size, ingress_pools_count,
+ egress_pools_count, ingress_tc_count,
+ egress_tc_count);
+ devl_unlock(devlink);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_sb_register);
+
+void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index)
+{
+ struct devlink_sb *devlink_sb;
+
+ lockdep_assert_held(&devlink->lock);
+
+ devlink_sb = devlink_sb_get_by_index(devlink, sb_index);
+ WARN_ON(!devlink_sb);
+ list_del(&devlink_sb->list);
+ kfree(devlink_sb);
+}
+EXPORT_SYMBOL_GPL(devl_sb_unregister);
+
+void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
+{
+ devl_lock(devlink);
+ devl_sb_unregister(devlink, sb_index);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_sb_unregister);
diff --git a/net/devlink/trap.c b/net/devlink/trap.c
new file mode 100644
index 000000000000..c26bf9b29bca
--- /dev/null
+++ b/net/devlink/trap.c
@@ -0,0 +1,1861 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <trace/events/devlink.h>
+
+#include "devl_internal.h"
+
+struct devlink_stats {
+ u64_stats_t rx_bytes;
+ u64_stats_t rx_packets;
+ struct u64_stats_sync syncp;
+};
+
+/**
+ * struct devlink_trap_policer_item - Packet trap policer attributes.
+ * @policer: Immutable packet trap policer attributes.
+ * @rate: Rate in packets / sec.
+ * @burst: Burst size in packets.
+ * @list: trap_policer_list member.
+ *
+ * Describes packet trap policer attributes. Created by devlink during trap
+ * policer registration.
+ */
+struct devlink_trap_policer_item {
+ const struct devlink_trap_policer *policer;
+ u64 rate;
+ u64 burst;
+ struct list_head list;
+};
+
+/**
+ * struct devlink_trap_group_item - Packet trap group attributes.
+ * @group: Immutable packet trap group attributes.
+ * @policer_item: Associated policer item. Can be NULL.
+ * @list: trap_group_list member.
+ * @stats: Trap group statistics.
+ *
+ * Describes packet trap group attributes. Created by devlink during trap
+ * group registration.
+ */
+struct devlink_trap_group_item {
+ const struct devlink_trap_group *group;
+ struct devlink_trap_policer_item *policer_item;
+ struct list_head list;
+ struct devlink_stats __percpu *stats;
+};
+
+/**
+ * struct devlink_trap_item - Packet trap attributes.
+ * @trap: Immutable packet trap attributes.
+ * @group_item: Associated group item.
+ * @list: trap_list member.
+ * @action: Trap action.
+ * @stats: Trap statistics.
+ * @priv: Driver private information.
+ *
+ * Describes both mutable and immutable packet trap attributes. Created by
+ * devlink during trap registration and used for all trap related operations.
+ */
+struct devlink_trap_item {
+ const struct devlink_trap *trap;
+ struct devlink_trap_group_item *group_item;
+ struct list_head list;
+ enum devlink_trap_action action;
+ struct devlink_stats __percpu *stats;
+ void *priv;
+};
+
+static struct devlink_trap_policer_item *
+devlink_trap_policer_item_lookup(struct devlink *devlink, u32 id)
+{
+ struct devlink_trap_policer_item *policer_item;
+
+ list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
+ if (policer_item->policer->id == id)
+ return policer_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_item *
+devlink_trap_item_lookup(struct devlink *devlink, const char *name)
+{
+ struct devlink_trap_item *trap_item;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (!strcmp(trap_item->trap->name, name))
+ return trap_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_item *
+devlink_trap_item_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ struct nlattr *attr;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_NAME])
+ return NULL;
+ attr = info->attrs[DEVLINK_ATTR_TRAP_NAME];
+
+ return devlink_trap_item_lookup(devlink, nla_data(attr));
+}
+
+static int
+devlink_trap_action_get_from_info(struct genl_info *info,
+ enum devlink_trap_action *p_trap_action)
+{
+ u8 val;
+
+ val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]);
+ switch (val) {
+ case DEVLINK_TRAP_ACTION_DROP:
+ case DEVLINK_TRAP_ACTION_TRAP:
+ case DEVLINK_TRAP_ACTION_MIRROR:
+ *p_trap_action = val;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_metadata_put(struct sk_buff *msg,
+ const struct devlink_trap *trap)
+{
+ struct nlattr *attr;
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT))
+ goto nla_put_failure;
+ if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE) &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
+ struct devlink_stats *stats)
+{
+ int i;
+
+ memset(stats, 0, sizeof(*stats));
+ for_each_possible_cpu(i) {
+ struct devlink_stats *cpu_stats;
+ u64 rx_packets, rx_bytes;
+ unsigned int start;
+
+ cpu_stats = per_cpu_ptr(trap_stats, i);
+ do {
+ start = u64_stats_fetch_begin(&cpu_stats->syncp);
+ rx_packets = u64_stats_read(&cpu_stats->rx_packets);
+ rx_bytes = u64_stats_read(&cpu_stats->rx_bytes);
+ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start));
+
+ u64_stats_add(&stats->rx_packets, rx_packets);
+ u64_stats_add(&stats->rx_bytes, rx_bytes);
+ }
+}
+
+static int
+devlink_trap_group_stats_put(struct sk_buff *msg,
+ struct devlink_stats __percpu *trap_stats)
+{
+ struct devlink_stats stats;
+ struct nlattr *attr;
+
+ devlink_trap_stats_read(trap_stats, &stats);
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+ u64_stats_read(&stats.rx_packets),
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+ u64_stats_read(&stats.rx_bytes),
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_item *trap_item)
+{
+ struct devlink_stats stats;
+ struct nlattr *attr;
+ u64 drops = 0;
+ int err;
+
+ if (devlink->ops->trap_drop_counter_get) {
+ err = devlink->ops->trap_drop_counter_get(devlink,
+ trap_item->trap,
+ &drops);
+ if (err)
+ return err;
+ }
+
+ devlink_trap_stats_read(trap_item->stats, &stats);
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (devlink->ops->trap_drop_counter_get &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+ u64_stats_read(&stats.rx_packets),
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+ u64_stats_read(&stats.rx_bytes),
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_item *trap_item,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags)
+{
+ struct devlink_trap_group_item *group_item = trap_item->group_item;
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+ group_item->group->name))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type))
+ goto nla_put_failure;
+
+ if (trap_item->trap->generic &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action))
+ goto nla_put_failure;
+
+ err = devlink_trap_metadata_put(msg, trap_item->trap);
+ if (err)
+ goto nla_put_failure;
+
+ err = devlink_trap_stats_put(msg, devlink, trap_item);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_trap_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_item *trap_item;
+ struct sk_buff *msg;
+ int err;
+
+ if (list_empty(&devlink->trap_list))
+ return -EOPNOTSUPP;
+
+ trap_item = devlink_trap_item_get_from_info(devlink, info);
+ if (!trap_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap");
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_trap_fill(msg, devlink, trap_item,
+ DEVLINK_CMD_TRAP_NEW, info->snd_portid,
+ info->snd_seq, 0);
+ if (err)
+ goto err_trap_fill;
+
+ return genlmsg_reply(msg, info);
+
+err_trap_fill:
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_trap_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb, int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_trap_item *trap_item;
+ int idx = 0;
+ int err = 0;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_trap_fill(msg, devlink, trap_item,
+ DEVLINK_CMD_TRAP_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
+ }
+
+ return err;
+}
+
+int devlink_nl_trap_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_trap_get_dump_one);
+}
+
+static int __devlink_trap_action_set(struct devlink *devlink,
+ struct devlink_trap_item *trap_item,
+ enum devlink_trap_action trap_action,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (trap_item->action != trap_action &&
+ trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) {
+ NL_SET_ERR_MSG(extack, "Cannot change action of non-drop traps. Skipping");
+ return 0;
+ }
+
+ err = devlink->ops->trap_action_set(devlink, trap_item->trap,
+ trap_action, extack);
+ if (err)
+ return err;
+
+ trap_item->action = trap_action;
+
+ return 0;
+}
+
+static int devlink_trap_action_set(struct devlink *devlink,
+ struct devlink_trap_item *trap_item,
+ struct genl_info *info)
+{
+ enum devlink_trap_action trap_action;
+ int err;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+ return 0;
+
+ err = devlink_trap_action_get_from_info(info, &trap_action);
+ if (err) {
+ NL_SET_ERR_MSG(info->extack, "Invalid trap action");
+ return -EINVAL;
+ }
+
+ return __devlink_trap_action_set(devlink, trap_item, trap_action,
+ info->extack);
+}
+
+int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_item *trap_item;
+
+ if (list_empty(&devlink->trap_list))
+ return -EOPNOTSUPP;
+
+ trap_item = devlink_trap_item_get_from_info(devlink, info);
+ if (!trap_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap");
+ return -ENOENT;
+ }
+
+ return devlink_trap_action_set(devlink, trap_item, info);
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_lookup(struct devlink *devlink, const char *name)
+{
+ struct devlink_trap_group_item *group_item;
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+ if (!strcmp(group_item->group->name, name))
+ return group_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_lookup_by_id(struct devlink *devlink, u16 id)
+{
+ struct devlink_trap_group_item *group_item;
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+ if (group_item->group->id == id)
+ return group_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ char *name;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME])
+ return NULL;
+ name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]);
+
+ return devlink_trap_group_item_lookup(devlink, name);
+}
+
+static int
+devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_group_item *group_item,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags)
+{
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+ group_item->group->name))
+ goto nla_put_failure;
+
+ if (group_item->group->generic &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+ goto nla_put_failure;
+
+ if (group_item->policer_item &&
+ nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID,
+ group_item->policer_item->policer->id))
+ goto nla_put_failure;
+
+ err = devlink_trap_group_stats_put(msg, group_item->stats);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_trap_group_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_group_item *group_item;
+ struct sk_buff *msg;
+ int err;
+
+ if (list_empty(&devlink->trap_group_list))
+ return -EOPNOTSUPP;
+
+ group_item = devlink_trap_group_item_get_from_info(devlink, info);
+ if (!group_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap group");
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err)
+ goto err_trap_group_fill;
+
+ return genlmsg_reply(msg, info);
+
+err_trap_group_fill:
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_trap_group_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_trap_group_item *group_item;
+ int idx = 0;
+ int err = 0;
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
+ }
+
+ return err;
+}
+
+int devlink_nl_trap_group_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_trap_group_get_dump_one);
+}
+
+static int
+__devlink_trap_group_action_set(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item,
+ enum devlink_trap_action trap_action,
+ struct netlink_ext_ack *extack)
+{
+ const char *group_name = group_item->group->name;
+ struct devlink_trap_item *trap_item;
+ int err;
+
+ if (devlink->ops->trap_group_action_set) {
+ err = devlink->ops->trap_group_action_set(devlink, group_item->group,
+ trap_action, extack);
+ if (err)
+ return err;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (strcmp(trap_item->group_item->group->name, group_name))
+ continue;
+ if (trap_item->action != trap_action &&
+ trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP)
+ continue;
+ trap_item->action = trap_action;
+ }
+
+ return 0;
+ }
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (strcmp(trap_item->group_item->group->name, group_name))
+ continue;
+ err = __devlink_trap_action_set(devlink, trap_item,
+ trap_action, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+devlink_trap_group_action_set(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item,
+ struct genl_info *info, bool *p_modified)
+{
+ enum devlink_trap_action trap_action;
+ int err;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+ return 0;
+
+ err = devlink_trap_action_get_from_info(info, &trap_action);
+ if (err) {
+ NL_SET_ERR_MSG(info->extack, "Invalid trap action");
+ return -EINVAL;
+ }
+
+ err = __devlink_trap_group_action_set(devlink, group_item, trap_action,
+ info->extack);
+ if (err)
+ return err;
+
+ *p_modified = true;
+
+ return 0;
+}
+
+static int devlink_trap_group_set(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item,
+ struct genl_info *info)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct netlink_ext_ack *extack = info->extack;
+ const struct devlink_trap_policer *policer;
+ struct nlattr **attrs = info->attrs;
+ u32 policer_id;
+ int err;
+
+ if (!attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
+ return 0;
+
+ if (!devlink->ops->trap_group_set)
+ return -EOPNOTSUPP;
+
+ policer_id = nla_get_u32(attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
+ policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
+ if (policer_id && !policer_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
+ return -ENOENT;
+ }
+ policer = policer_item ? policer_item->policer : NULL;
+
+ err = devlink->ops->trap_group_set(devlink, group_item->group, policer,
+ extack);
+ if (err)
+ return err;
+
+ group_item->policer_item = policer_item;
+
+ return 0;
+}
+
+int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_group_item *group_item;
+ bool modified = false;
+ int err;
+
+ if (list_empty(&devlink->trap_group_list))
+ return -EOPNOTSUPP;
+
+ group_item = devlink_trap_group_item_get_from_info(devlink, info);
+ if (!group_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap group");
+ return -ENOENT;
+ }
+
+ err = devlink_trap_group_action_set(devlink, group_item, info,
+ &modified);
+ if (err)
+ return err;
+
+ err = devlink_trap_group_set(devlink, group_item, info);
+ if (err)
+ goto err_trap_group_set;
+
+ return 0;
+
+err_trap_group_set:
+ if (modified)
+ NL_SET_ERR_MSG(extack, "Trap group set failed, but some changes were committed already");
+ return err;
+}
+
+static struct devlink_trap_policer_item *
+devlink_trap_policer_item_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ u32 id;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
+ return NULL;
+ id = nla_get_u32(info->attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
+
+ return devlink_trap_policer_item_lookup(devlink, id);
+}
+
+static int
+devlink_trap_policer_stats_put(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_policer *policer)
+{
+ struct nlattr *attr;
+ u64 drops;
+ int err;
+
+ if (!devlink->ops->trap_policer_counter_get)
+ return 0;
+
+ err = devlink->ops->trap_policer_counter_get(devlink, policer, &drops);
+ if (err)
+ return err;
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int
+devlink_nl_trap_policer_fill(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_policer_item *policer_item,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags)
+{
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, DEVLINK_ATTR_TRAP_POLICER_ID,
+ policer_item->policer->id))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_RATE,
+ policer_item->rate, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_TRAP_POLICER_BURST,
+ policer_item->burst, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ err = devlink_trap_policer_stats_put(msg, devlink,
+ policer_item->policer);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_trap_policer_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ if (list_empty(&devlink->trap_policer_list))
+ return -EOPNOTSUPP;
+
+ policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
+ if (!policer_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err)
+ goto err_trap_policer_fill;
+
+ return genlmsg_reply(msg, info);
+
+err_trap_policer_fill:
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_trap_policer_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb,
+ int flags)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_trap_policer_item *policer_item;
+ int idx = 0;
+ int err = 0;
+
+ list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, flags);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
+ }
+
+ return err;
+}
+
+int devlink_nl_trap_policer_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_dumpit(skb, cb, devlink_nl_trap_policer_get_dump_one);
+}
+
+static int
+devlink_trap_policer_set(struct devlink *devlink,
+ struct devlink_trap_policer_item *policer_item,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct nlattr **attrs = info->attrs;
+ u64 rate, burst;
+ int err;
+
+ rate = policer_item->rate;
+ burst = policer_item->burst;
+
+ if (attrs[DEVLINK_ATTR_TRAP_POLICER_RATE])
+ rate = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_RATE]);
+
+ if (attrs[DEVLINK_ATTR_TRAP_POLICER_BURST])
+ burst = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_BURST]);
+
+ if (rate < policer_item->policer->min_rate) {
+ NL_SET_ERR_MSG(extack, "Policer rate lower than limit");
+ return -EINVAL;
+ }
+
+ if (rate > policer_item->policer->max_rate) {
+ NL_SET_ERR_MSG(extack, "Policer rate higher than limit");
+ return -EINVAL;
+ }
+
+ if (burst < policer_item->policer->min_burst) {
+ NL_SET_ERR_MSG(extack, "Policer burst size lower than limit");
+ return -EINVAL;
+ }
+
+ if (burst > policer_item->policer->max_burst) {
+ NL_SET_ERR_MSG(extack, "Policer burst size higher than limit");
+ return -EINVAL;
+ }
+
+ err = devlink->ops->trap_policer_set(devlink, policer_item->policer,
+ rate, burst, info->extack);
+ if (err)
+ return err;
+
+ policer_item->rate = rate;
+ policer_item->burst = burst;
+
+ return 0;
+}
+
+int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (list_empty(&devlink->trap_policer_list))
+ return -EOPNOTSUPP;
+
+ if (!devlink->ops->trap_policer_set)
+ return -EOPNOTSUPP;
+
+ policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
+ if (!policer_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
+ return -ENOENT;
+ }
+
+ return devlink_trap_policer_set(devlink, policer_item, info);
+}
+
+#define DEVLINK_TRAP(_id, _type) \
+ { \
+ .type = DEVLINK_TRAP_TYPE_##_type, \
+ .id = DEVLINK_TRAP_GENERIC_ID_##_id, \
+ .name = DEVLINK_TRAP_GENERIC_NAME_##_id, \
+ }
+
+static const struct devlink_trap devlink_trap_generic[] = {
+ DEVLINK_TRAP(SMAC_MC, DROP),
+ DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP),
+ DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP),
+ DEVLINK_TRAP(INGRESS_STP_FILTER, DROP),
+ DEVLINK_TRAP(EMPTY_TX_LIST, DROP),
+ DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP),
+ DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP),
+ DEVLINK_TRAP(TTL_ERROR, EXCEPTION),
+ DEVLINK_TRAP(TAIL_DROP, DROP),
+ DEVLINK_TRAP(NON_IP_PACKET, DROP),
+ DEVLINK_TRAP(UC_DIP_MC_DMAC, DROP),
+ DEVLINK_TRAP(DIP_LB, DROP),
+ DEVLINK_TRAP(SIP_MC, DROP),
+ DEVLINK_TRAP(SIP_LB, DROP),
+ DEVLINK_TRAP(CORRUPTED_IP_HDR, DROP),
+ DEVLINK_TRAP(IPV4_SIP_BC, DROP),
+ DEVLINK_TRAP(IPV6_MC_DIP_RESERVED_SCOPE, DROP),
+ DEVLINK_TRAP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, DROP),
+ DEVLINK_TRAP(MTU_ERROR, EXCEPTION),
+ DEVLINK_TRAP(UNRESOLVED_NEIGH, EXCEPTION),
+ DEVLINK_TRAP(RPF, EXCEPTION),
+ DEVLINK_TRAP(REJECT_ROUTE, EXCEPTION),
+ DEVLINK_TRAP(IPV4_LPM_UNICAST_MISS, EXCEPTION),
+ DEVLINK_TRAP(IPV6_LPM_UNICAST_MISS, EXCEPTION),
+ DEVLINK_TRAP(NON_ROUTABLE, DROP),
+ DEVLINK_TRAP(DECAP_ERROR, EXCEPTION),
+ DEVLINK_TRAP(OVERLAY_SMAC_MC, DROP),
+ DEVLINK_TRAP(INGRESS_FLOW_ACTION_DROP, DROP),
+ DEVLINK_TRAP(EGRESS_FLOW_ACTION_DROP, DROP),
+ DEVLINK_TRAP(STP, CONTROL),
+ DEVLINK_TRAP(LACP, CONTROL),
+ DEVLINK_TRAP(LLDP, CONTROL),
+ DEVLINK_TRAP(IGMP_QUERY, CONTROL),
+ DEVLINK_TRAP(IGMP_V1_REPORT, CONTROL),
+ DEVLINK_TRAP(IGMP_V2_REPORT, CONTROL),
+ DEVLINK_TRAP(IGMP_V3_REPORT, CONTROL),
+ DEVLINK_TRAP(IGMP_V2_LEAVE, CONTROL),
+ DEVLINK_TRAP(MLD_QUERY, CONTROL),
+ DEVLINK_TRAP(MLD_V1_REPORT, CONTROL),
+ DEVLINK_TRAP(MLD_V2_REPORT, CONTROL),
+ DEVLINK_TRAP(MLD_V1_DONE, CONTROL),
+ DEVLINK_TRAP(IPV4_DHCP, CONTROL),
+ DEVLINK_TRAP(IPV6_DHCP, CONTROL),
+ DEVLINK_TRAP(ARP_REQUEST, CONTROL),
+ DEVLINK_TRAP(ARP_RESPONSE, CONTROL),
+ DEVLINK_TRAP(ARP_OVERLAY, CONTROL),
+ DEVLINK_TRAP(IPV6_NEIGH_SOLICIT, CONTROL),
+ DEVLINK_TRAP(IPV6_NEIGH_ADVERT, CONTROL),
+ DEVLINK_TRAP(IPV4_BFD, CONTROL),
+ DEVLINK_TRAP(IPV6_BFD, CONTROL),
+ DEVLINK_TRAP(IPV4_OSPF, CONTROL),
+ DEVLINK_TRAP(IPV6_OSPF, CONTROL),
+ DEVLINK_TRAP(IPV4_BGP, CONTROL),
+ DEVLINK_TRAP(IPV6_BGP, CONTROL),
+ DEVLINK_TRAP(IPV4_VRRP, CONTROL),
+ DEVLINK_TRAP(IPV6_VRRP, CONTROL),
+ DEVLINK_TRAP(IPV4_PIM, CONTROL),
+ DEVLINK_TRAP(IPV6_PIM, CONTROL),
+ DEVLINK_TRAP(UC_LB, CONTROL),
+ DEVLINK_TRAP(LOCAL_ROUTE, CONTROL),
+ DEVLINK_TRAP(EXTERNAL_ROUTE, CONTROL),
+ DEVLINK_TRAP(IPV6_UC_DIP_LINK_LOCAL_SCOPE, CONTROL),
+ DEVLINK_TRAP(IPV6_DIP_ALL_NODES, CONTROL),
+ DEVLINK_TRAP(IPV6_DIP_ALL_ROUTERS, CONTROL),
+ DEVLINK_TRAP(IPV6_ROUTER_SOLICIT, CONTROL),
+ DEVLINK_TRAP(IPV6_ROUTER_ADVERT, CONTROL),
+ DEVLINK_TRAP(IPV6_REDIRECT, CONTROL),
+ DEVLINK_TRAP(IPV4_ROUTER_ALERT, CONTROL),
+ DEVLINK_TRAP(IPV6_ROUTER_ALERT, CONTROL),
+ DEVLINK_TRAP(PTP_EVENT, CONTROL),
+ DEVLINK_TRAP(PTP_GENERAL, CONTROL),
+ DEVLINK_TRAP(FLOW_ACTION_SAMPLE, CONTROL),
+ DEVLINK_TRAP(FLOW_ACTION_TRAP, CONTROL),
+ DEVLINK_TRAP(EARLY_DROP, DROP),
+ DEVLINK_TRAP(VXLAN_PARSING, DROP),
+ DEVLINK_TRAP(LLC_SNAP_PARSING, DROP),
+ DEVLINK_TRAP(VLAN_PARSING, DROP),
+ DEVLINK_TRAP(PPPOE_PPP_PARSING, DROP),
+ DEVLINK_TRAP(MPLS_PARSING, DROP),
+ DEVLINK_TRAP(ARP_PARSING, DROP),
+ DEVLINK_TRAP(IP_1_PARSING, DROP),
+ DEVLINK_TRAP(IP_N_PARSING, DROP),
+ DEVLINK_TRAP(GRE_PARSING, DROP),
+ DEVLINK_TRAP(UDP_PARSING, DROP),
+ DEVLINK_TRAP(TCP_PARSING, DROP),
+ DEVLINK_TRAP(IPSEC_PARSING, DROP),
+ DEVLINK_TRAP(SCTP_PARSING, DROP),
+ DEVLINK_TRAP(DCCP_PARSING, DROP),
+ DEVLINK_TRAP(GTP_PARSING, DROP),
+ DEVLINK_TRAP(ESP_PARSING, DROP),
+ DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP),
+ DEVLINK_TRAP(DMAC_FILTER, DROP),
+ DEVLINK_TRAP(EAPOL, CONTROL),
+ DEVLINK_TRAP(LOCKED_PORT, DROP),
+};
+
+#define DEVLINK_TRAP_GROUP(_id) \
+ { \
+ .id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id, \
+ .name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id, \
+ }
+
+static const struct devlink_trap_group devlink_trap_group_generic[] = {
+ DEVLINK_TRAP_GROUP(L2_DROPS),
+ DEVLINK_TRAP_GROUP(L3_DROPS),
+ DEVLINK_TRAP_GROUP(L3_EXCEPTIONS),
+ DEVLINK_TRAP_GROUP(BUFFER_DROPS),
+ DEVLINK_TRAP_GROUP(TUNNEL_DROPS),
+ DEVLINK_TRAP_GROUP(ACL_DROPS),
+ DEVLINK_TRAP_GROUP(STP),
+ DEVLINK_TRAP_GROUP(LACP),
+ DEVLINK_TRAP_GROUP(LLDP),
+ DEVLINK_TRAP_GROUP(MC_SNOOPING),
+ DEVLINK_TRAP_GROUP(DHCP),
+ DEVLINK_TRAP_GROUP(NEIGH_DISCOVERY),
+ DEVLINK_TRAP_GROUP(BFD),
+ DEVLINK_TRAP_GROUP(OSPF),
+ DEVLINK_TRAP_GROUP(BGP),
+ DEVLINK_TRAP_GROUP(VRRP),
+ DEVLINK_TRAP_GROUP(PIM),
+ DEVLINK_TRAP_GROUP(UC_LB),
+ DEVLINK_TRAP_GROUP(LOCAL_DELIVERY),
+ DEVLINK_TRAP_GROUP(EXTERNAL_DELIVERY),
+ DEVLINK_TRAP_GROUP(IPV6),
+ DEVLINK_TRAP_GROUP(PTP_EVENT),
+ DEVLINK_TRAP_GROUP(PTP_GENERAL),
+ DEVLINK_TRAP_GROUP(ACL_SAMPLE),
+ DEVLINK_TRAP_GROUP(ACL_TRAP),
+ DEVLINK_TRAP_GROUP(PARSER_ERROR_DROPS),
+ DEVLINK_TRAP_GROUP(EAPOL),
+};
+
+static int devlink_trap_generic_verify(const struct devlink_trap *trap)
+{
+ if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ if (strcmp(trap->name, devlink_trap_generic[trap->id].name))
+ return -EINVAL;
+
+ if (trap->type != devlink_trap_generic[trap->id].type)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int devlink_trap_driver_verify(const struct devlink_trap *trap)
+{
+ int i;
+
+ if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) {
+ if (!strcmp(trap->name, devlink_trap_generic[i].name))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_verify(const struct devlink_trap *trap)
+{
+ if (!trap || !trap->name)
+ return -EINVAL;
+
+ if (trap->generic)
+ return devlink_trap_generic_verify(trap);
+ else
+ return devlink_trap_driver_verify(trap);
+}
+
+static int
+devlink_trap_group_generic_verify(const struct devlink_trap_group *group)
+{
+ if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ if (strcmp(group->name, devlink_trap_group_generic[group->id].name))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+devlink_trap_group_driver_verify(const struct devlink_trap_group *group)
+{
+ int i;
+
+ if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) {
+ if (!strcmp(group->name, devlink_trap_group_generic[i].name))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_group_verify(const struct devlink_trap_group *group)
+{
+ if (group->generic)
+ return devlink_trap_group_generic_verify(group);
+ else
+ return devlink_trap_group_driver_verify(group);
+}
+
+static void
+devlink_trap_group_notify(struct devlink *devlink,
+ const struct devlink_trap_group_item *group_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
+ cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0,
+ 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_trap_groups_notify_register(struct devlink *devlink)
+{
+ struct devlink_trap_group_item *group_item;
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list)
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW);
+}
+
+void devlink_trap_groups_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_trap_group_item *group_item;
+
+ list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list)
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_DEL);
+}
+
+static int
+devlink_trap_item_group_link(struct devlink *devlink,
+ struct devlink_trap_item *trap_item)
+{
+ u16 group_id = trap_item->trap->init_group_id;
+ struct devlink_trap_group_item *group_item;
+
+ group_item = devlink_trap_group_item_lookup_by_id(devlink, group_id);
+ if (WARN_ON_ONCE(!group_item))
+ return -EINVAL;
+
+ trap_item->group_item = group_item;
+
+ return 0;
+}
+
+static void devlink_trap_notify(struct devlink *devlink,
+ const struct devlink_trap_item *trap_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
+ cmd != DEVLINK_CMD_TRAP_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_traps_notify_register(struct devlink *devlink)
+{
+ struct devlink_trap_item *trap_item;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list)
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+}
+
+void devlink_traps_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_trap_item *trap_item;
+
+ list_for_each_entry_reverse(trap_item, &devlink->trap_list, list)
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+}
+
+static int
+devlink_trap_register(struct devlink *devlink,
+ const struct devlink_trap *trap, void *priv)
+{
+ struct devlink_trap_item *trap_item;
+ int err;
+
+ if (devlink_trap_item_lookup(devlink, trap->name))
+ return -EEXIST;
+
+ trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL);
+ if (!trap_item)
+ return -ENOMEM;
+
+ trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+ if (!trap_item->stats) {
+ err = -ENOMEM;
+ goto err_stats_alloc;
+ }
+
+ trap_item->trap = trap;
+ trap_item->action = trap->init_action;
+ trap_item->priv = priv;
+
+ err = devlink_trap_item_group_link(devlink, trap_item);
+ if (err)
+ goto err_group_link;
+
+ err = devlink->ops->trap_init(devlink, trap, trap_item);
+ if (err)
+ goto err_trap_init;
+
+ list_add_tail(&trap_item->list, &devlink->trap_list);
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+
+ return 0;
+
+err_trap_init:
+err_group_link:
+ free_percpu(trap_item->stats);
+err_stats_alloc:
+ kfree(trap_item);
+ return err;
+}
+
+static void devlink_trap_unregister(struct devlink *devlink,
+ const struct devlink_trap *trap)
+{
+ struct devlink_trap_item *trap_item;
+
+ trap_item = devlink_trap_item_lookup(devlink, trap->name);
+ if (WARN_ON_ONCE(!trap_item))
+ return;
+
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+ list_del(&trap_item->list);
+ if (devlink->ops->trap_fini)
+ devlink->ops->trap_fini(devlink, trap, trap_item);
+ free_percpu(trap_item->stats);
+ kfree(trap_item);
+}
+
+static void devlink_trap_disable(struct devlink *devlink,
+ const struct devlink_trap *trap)
+{
+ struct devlink_trap_item *trap_item;
+
+ trap_item = devlink_trap_item_lookup(devlink, trap->name);
+ if (WARN_ON_ONCE(!trap_item))
+ return;
+
+ devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP,
+ NULL);
+ trap_item->action = DEVLINK_TRAP_ACTION_DROP;
+}
+
+/**
+ * devl_traps_register - Register packet traps with devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ * @priv: Driver private information.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devl_traps_register(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count, void *priv)
+{
+ int i, err;
+
+ if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
+ return -EINVAL;
+
+ devl_assert_locked(devlink);
+ for (i = 0; i < traps_count; i++) {
+ const struct devlink_trap *trap = &traps[i];
+
+ err = devlink_trap_verify(trap);
+ if (err)
+ goto err_trap_verify;
+
+ err = devlink_trap_register(devlink, trap, priv);
+ if (err)
+ goto err_trap_register;
+ }
+
+ return 0;
+
+err_trap_register:
+err_trap_verify:
+ for (i--; i >= 0; i--)
+ devlink_trap_unregister(devlink, &traps[i]);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devl_traps_register);
+
+/**
+ * devlink_traps_register - Register packet traps with devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ * @priv: Driver private information.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_traps_register(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count, void *priv)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_traps_register(devlink, traps, traps_count, priv);
+ devl_unlock(devlink);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_traps_register);
+
+/**
+ * devl_traps_unregister - Unregister packet traps from devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ */
+void devl_traps_unregister(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count)
+{
+ int i;
+
+ devl_assert_locked(devlink);
+ /* Make sure we do not have any packets in-flight while unregistering
+ * traps by disabling all of them and waiting for a grace period.
+ */
+ for (i = traps_count - 1; i >= 0; i--)
+ devlink_trap_disable(devlink, &traps[i]);
+ synchronize_rcu();
+ for (i = traps_count - 1; i >= 0; i--)
+ devlink_trap_unregister(devlink, &traps[i]);
+}
+EXPORT_SYMBOL_GPL(devl_traps_unregister);
+
+/**
+ * devlink_traps_unregister - Unregister packet traps from devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_traps_unregister(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count)
+{
+ devl_lock(devlink);
+ devl_traps_unregister(devlink, traps, traps_count);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_traps_unregister);
+
+static void
+devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
+ size_t skb_len)
+{
+ struct devlink_stats *stats;
+
+ stats = this_cpu_ptr(trap_stats);
+ u64_stats_update_begin(&stats->syncp);
+ u64_stats_add(&stats->rx_bytes, skb_len);
+ u64_stats_inc(&stats->rx_packets);
+ u64_stats_update_end(&stats->syncp);
+}
+
+static void
+devlink_trap_report_metadata_set(struct devlink_trap_metadata *metadata,
+ const struct devlink_trap_item *trap_item,
+ struct devlink_port *in_devlink_port,
+ const struct flow_action_cookie *fa_cookie)
+{
+ metadata->trap_name = trap_item->trap->name;
+ metadata->trap_group_name = trap_item->group_item->group->name;
+ metadata->fa_cookie = fa_cookie;
+ metadata->trap_type = trap_item->trap->type;
+
+ spin_lock(&in_devlink_port->type_lock);
+ if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
+ metadata->input_dev = in_devlink_port->type_eth.netdev;
+ spin_unlock(&in_devlink_port->type_lock);
+}
+
+/**
+ * devlink_trap_report - Report trapped packet to drop monitor.
+ * @devlink: devlink.
+ * @skb: Trapped packet.
+ * @trap_ctx: Trap context.
+ * @in_devlink_port: Input devlink port.
+ * @fa_cookie: Flow action cookie. Could be NULL.
+ */
+void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
+ void *trap_ctx, struct devlink_port *in_devlink_port,
+ const struct flow_action_cookie *fa_cookie)
+
+{
+ struct devlink_trap_item *trap_item = trap_ctx;
+
+ devlink_trap_stats_update(trap_item->stats, skb->len);
+ devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
+
+ if (tracepoint_enabled(devlink_trap_report)) {
+ struct devlink_trap_metadata metadata = {};
+
+ devlink_trap_report_metadata_set(&metadata, trap_item,
+ in_devlink_port, fa_cookie);
+ trace_devlink_trap_report(devlink, skb, &metadata);
+ }
+}
+EXPORT_SYMBOL_GPL(devlink_trap_report);
+
+/**
+ * devlink_trap_ctx_priv - Trap context to driver private information.
+ * @trap_ctx: Trap context.
+ *
+ * Return: Driver private information passed during registration.
+ */
+void *devlink_trap_ctx_priv(void *trap_ctx)
+{
+ struct devlink_trap_item *trap_item = trap_ctx;
+
+ return trap_item->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv);
+
+static int
+devlink_trap_group_item_policer_link(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item)
+{
+ u32 policer_id = group_item->group->init_policer_id;
+ struct devlink_trap_policer_item *policer_item;
+
+ if (policer_id == 0)
+ return 0;
+
+ policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
+ if (WARN_ON_ONCE(!policer_item))
+ return -EINVAL;
+
+ group_item->policer_item = policer_item;
+
+ return 0;
+}
+
+static int
+devlink_trap_group_register(struct devlink *devlink,
+ const struct devlink_trap_group *group)
+{
+ struct devlink_trap_group_item *group_item;
+ int err;
+
+ if (devlink_trap_group_item_lookup(devlink, group->name))
+ return -EEXIST;
+
+ group_item = kzalloc(sizeof(*group_item), GFP_KERNEL);
+ if (!group_item)
+ return -ENOMEM;
+
+ group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+ if (!group_item->stats) {
+ err = -ENOMEM;
+ goto err_stats_alloc;
+ }
+
+ group_item->group = group;
+
+ err = devlink_trap_group_item_policer_link(devlink, group_item);
+ if (err)
+ goto err_policer_link;
+
+ if (devlink->ops->trap_group_init) {
+ err = devlink->ops->trap_group_init(devlink, group);
+ if (err)
+ goto err_group_init;
+ }
+
+ list_add_tail(&group_item->list, &devlink->trap_group_list);
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW);
+
+ return 0;
+
+err_group_init:
+err_policer_link:
+ free_percpu(group_item->stats);
+err_stats_alloc:
+ kfree(group_item);
+ return err;
+}
+
+static void
+devlink_trap_group_unregister(struct devlink *devlink,
+ const struct devlink_trap_group *group)
+{
+ struct devlink_trap_group_item *group_item;
+
+ group_item = devlink_trap_group_item_lookup(devlink, group->name);
+ if (WARN_ON_ONCE(!group_item))
+ return;
+
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_DEL);
+ list_del(&group_item->list);
+ free_percpu(group_item->stats);
+ kfree(group_item);
+}
+
+/**
+ * devl_trap_groups_register - Register packet trap groups with devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devl_trap_groups_register(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
+{
+ int i, err;
+
+ devl_assert_locked(devlink);
+ for (i = 0; i < groups_count; i++) {
+ const struct devlink_trap_group *group = &groups[i];
+
+ err = devlink_trap_group_verify(group);
+ if (err)
+ goto err_trap_group_verify;
+
+ err = devlink_trap_group_register(devlink, group);
+ if (err)
+ goto err_trap_group_register;
+ }
+
+ return 0;
+
+err_trap_group_register:
+err_trap_group_verify:
+ for (i--; i >= 0; i--)
+ devlink_trap_group_unregister(devlink, &groups[i]);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devl_trap_groups_register);
+
+/**
+ * devlink_trap_groups_register - Register packet trap groups with devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_trap_groups_register(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_trap_groups_register(devlink, groups, groups_count);
+ devl_unlock(devlink);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_trap_groups_register);
+
+/**
+ * devl_trap_groups_unregister - Unregister packet trap groups from devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ */
+void devl_trap_groups_unregister(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
+{
+ int i;
+
+ devl_assert_locked(devlink);
+ for (i = groups_count - 1; i >= 0; i--)
+ devlink_trap_group_unregister(devlink, &groups[i]);
+}
+EXPORT_SYMBOL_GPL(devl_trap_groups_unregister);
+
+/**
+ * devlink_trap_groups_unregister - Unregister packet trap groups from devlink.
+ * @devlink: devlink.
+ * @groups: Packet trap groups.
+ * @groups_count: Count of provided packet trap groups.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_trap_groups_unregister(struct devlink *devlink,
+ const struct devlink_trap_group *groups,
+ size_t groups_count)
+{
+ devl_lock(devlink);
+ devl_trap_groups_unregister(devlink, groups, groups_count);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_trap_groups_unregister);
+
+static void
+devlink_trap_policer_notify(struct devlink *devlink,
+ const struct devlink_trap_policer_item *policer_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW &&
+ cmd != DEVLINK_CMD_TRAP_POLICER_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_trap_policer_fill(msg, devlink, policer_item, cmd, 0,
+ 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void devlink_trap_policers_notify_register(struct devlink *devlink)
+{
+ struct devlink_trap_policer_item *policer_item;
+
+ list_for_each_entry(policer_item, &devlink->trap_policer_list, list)
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW);
+}
+
+void devlink_trap_policers_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_trap_policer_item *policer_item;
+
+ list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list,
+ list)
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_DEL);
+}
+
+static int
+devlink_trap_policer_register(struct devlink *devlink,
+ const struct devlink_trap_policer *policer)
+{
+ struct devlink_trap_policer_item *policer_item;
+ int err;
+
+ if (devlink_trap_policer_item_lookup(devlink, policer->id))
+ return -EEXIST;
+
+ policer_item = kzalloc(sizeof(*policer_item), GFP_KERNEL);
+ if (!policer_item)
+ return -ENOMEM;
+
+ policer_item->policer = policer;
+ policer_item->rate = policer->init_rate;
+ policer_item->burst = policer->init_burst;
+
+ if (devlink->ops->trap_policer_init) {
+ err = devlink->ops->trap_policer_init(devlink, policer);
+ if (err)
+ goto err_policer_init;
+ }
+
+ list_add_tail(&policer_item->list, &devlink->trap_policer_list);
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW);
+
+ return 0;
+
+err_policer_init:
+ kfree(policer_item);
+ return err;
+}
+
+static void
+devlink_trap_policer_unregister(struct devlink *devlink,
+ const struct devlink_trap_policer *policer)
+{
+ struct devlink_trap_policer_item *policer_item;
+
+ policer_item = devlink_trap_policer_item_lookup(devlink, policer->id);
+ if (WARN_ON_ONCE(!policer_item))
+ return;
+
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_DEL);
+ list_del(&policer_item->list);
+ if (devlink->ops->trap_policer_fini)
+ devlink->ops->trap_policer_fini(devlink, policer);
+ kfree(policer_item);
+}
+
+/**
+ * devl_trap_policers_register - Register packet trap policers with devlink.
+ * @devlink: devlink.
+ * @policers: Packet trap policers.
+ * @policers_count: Count of provided packet trap policers.
+ *
+ * Return: Non-zero value on failure.
+ */
+int
+devl_trap_policers_register(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count)
+{
+ int i, err;
+
+ devl_assert_locked(devlink);
+ for (i = 0; i < policers_count; i++) {
+ const struct devlink_trap_policer *policer = &policers[i];
+
+ if (WARN_ON(policer->id == 0 ||
+ policer->max_rate < policer->min_rate ||
+ policer->max_burst < policer->min_burst)) {
+ err = -EINVAL;
+ goto err_trap_policer_verify;
+ }
+
+ err = devlink_trap_policer_register(devlink, policer);
+ if (err)
+ goto err_trap_policer_register;
+ }
+ return 0;
+
+err_trap_policer_register:
+err_trap_policer_verify:
+ for (i--; i >= 0; i--)
+ devlink_trap_policer_unregister(devlink, &policers[i]);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devl_trap_policers_register);
+
+/**
+ * devl_trap_policers_unregister - Unregister packet trap policers from devlink.
+ * @devlink: devlink.
+ * @policers: Packet trap policers.
+ * @policers_count: Count of provided packet trap policers.
+ */
+void
+devl_trap_policers_unregister(struct devlink *devlink,
+ const struct devlink_trap_policer *policers,
+ size_t policers_count)
+{
+ int i;
+
+ devl_assert_locked(devlink);
+ for (i = policers_count - 1; i >= 0; i--)
+ devlink_trap_policer_unregister(devlink, &policers[i]);
+}
+EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 2f6195d7b741..37ab238e8304 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1568,27 +1568,6 @@ static void dsa_port_phylink_validate(struct phylink_config *config,
phylink_generic_validate(config, supported, state);
}
-static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state)
-{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
- struct dsa_switch *ds = dp->ds;
- int err;
-
- /* Only called for inband modes */
- if (!ds->ops->phylink_mac_link_state) {
- state->link = 0;
- return;
- }
-
- err = ds->ops->phylink_mac_link_state(ds, dp->index, state);
- if (err < 0) {
- dev_err(ds->dev, "p%d: phylink_mac_link_state() failed: %d\n",
- dp->index, err);
- state->link = 0;
- }
-}
-
static struct phylink_pcs *
dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
phy_interface_t interface)
@@ -1646,17 +1625,6 @@ static int dsa_port_phylink_mac_finish(struct phylink_config *config,
return err;
}
-static void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
-{
- struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_an_restart)
- return;
-
- ds->ops->phylink_mac_an_restart(ds, dp->index);
-}
-
static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
unsigned int mode,
phy_interface_t interface)
@@ -1700,11 +1668,9 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
static const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
.validate = dsa_port_phylink_validate,
.mac_select_pcs = dsa_port_phylink_mac_select_pcs,
- .mac_pcs_get_state = dsa_port_phylink_mac_pcs_get_state,
.mac_prepare = dsa_port_phylink_mac_prepare,
.mac_config = dsa_port_phylink_mac_config,
.mac_finish = dsa_port_phylink_mac_finish,
- .mac_an_restart = dsa_port_phylink_mac_an_restart,
.mac_link_down = dsa_port_phylink_mac_link_down,
.mac_link_up = dsa_port_phylink_mac_link_up,
};
@@ -1720,21 +1686,18 @@ int dsa_port_phylink_create(struct dsa_port *dp)
if (err)
mode = PHY_INTERFACE_MODE_NA;
- /* Presence of phylink_mac_link_state or phylink_mac_an_restart is
- * an indicator of a legacy phylink driver.
- */
- if (ds->ops->phylink_mac_link_state ||
- ds->ops->phylink_mac_an_restart)
- dp->pl_config.legacy_pre_march2020 = true;
-
if (ds->ops->phylink_get_caps) {
ds->ops->phylink_get_caps(ds, dp->index, &dp->pl_config);
} else {
/* For legacy drivers */
- __set_bit(PHY_INTERFACE_MODE_INTERNAL,
- dp->pl_config.supported_interfaces);
- __set_bit(PHY_INTERFACE_MODE_GMII,
- dp->pl_config.supported_interfaces);
+ if (mode != PHY_INTERFACE_MODE_NA) {
+ __set_bit(mode, dp->pl_config.supported_interfaces);
+ } else {
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+ dp->pl_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ dp->pl_config.supported_interfaces);
+ }
}
pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn),
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 527b1d576460..48db91b33390 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -21,6 +21,7 @@
#include <linux/if_hsr.h>
#include <net/dcbnl.h>
#include <linux/netpoll.h>
+#include <linux/string.h>
#include "dsa.h"
#include "port.h"
@@ -1056,10 +1057,10 @@ static void dsa_slave_get_strings(struct net_device *dev,
if (stringset == ETH_SS_STATS) {
int len = ETH_GSTRING_LEN;
- strncpy(data, "tx_packets", len);
- strncpy(data + len, "tx_bytes", len);
- strncpy(data + 2 * len, "rx_packets", len);
- strncpy(data + 3 * len, "rx_bytes", len);
+ strscpy_pad(data, "tx_packets", len);
+ strscpy_pad(data + len, "tx_bytes", len);
+ strscpy_pad(data + 2 * len, "rx_packets", len);
+ strscpy_pad(data + 3 * len, "rx_bytes", len);
if (ds->ops->get_strings)
ds->ops->get_strings(ds, dp->index, stringset,
data + 4 * len);
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index e757c8de06f1..e5ff7c34e577 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -75,10 +75,6 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
return NULL;
}
- /* Remove QCA tag and recalculate checksum */
- skb_pull_rcsum(skb, QCA_HDR_LEN);
- dsa_strip_etype_header(skb, QCA_HDR_LEN);
-
/* Get source port information */
port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, hdr);
@@ -86,6 +82,10 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
if (!skb->dev)
return NULL;
+ /* Remove QCA tag and recalculate checksum */
+ skb_pull_rcsum(skb, QCA_HDR_LEN);
+ dsa_strip_etype_header(skb, QCA_HDR_LEN);
+
return skb;
}
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index 61c40e889a4d..7b4bbd674bae 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -24,7 +24,7 @@ const struct nla_policy ethnl_channels_get_policy[] = {
static int channels_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct channels_reply_data *data = CHANNELS_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c
index 01a59ce211c8..83112c1a71ae 100644
--- a/net/ethtool/coalesce.c
+++ b/net/ethtool/coalesce.c
@@ -59,10 +59,9 @@ const struct nla_policy ethnl_coalesce_get_policy[] = {
static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct net_device *dev = reply_base->dev;
int ret;
@@ -73,7 +72,8 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
ret = dev->ethtool_ops->get_coalesce(dev, &data->coalesce,
- &data->kernel_coalesce, extack);
+ &data->kernel_coalesce,
+ info->extack);
ethnl_ops_complete(dev);
return ret;
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 5fb19050991e..f5598c5f50de 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -665,9 +665,8 @@ const struct ethtool_phy_ops *ethtool_phy_ops;
void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops)
{
- rtnl_lock();
+ ASSERT_RTNL();
ethtool_phy_ops = ops;
- rtnl_unlock();
}
EXPORT_SYMBOL_GPL(ethtool_set_ethtool_phy_ops);
diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c
index e4369769817e..0b2dea56d461 100644
--- a/net/ethtool/debug.c
+++ b/net/ethtool/debug.c
@@ -23,7 +23,7 @@ const struct nla_policy ethnl_debug_get_policy[] = {
static int debug_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct debug_reply_data *data = DEBUG_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c
index 42104bcb0e47..2853394d06a8 100644
--- a/net/ethtool/eee.c
+++ b/net/ethtool/eee.c
@@ -26,7 +26,7 @@ const struct nla_policy ethnl_eee_get_policy[] = {
static int eee_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct eee_reply_data *data = EEE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 49c0a2a77f02..6209c3a9c8f7 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -51,8 +51,7 @@ static int fallback_set_params(struct eeprom_req_info *request,
}
static int eeprom_fallback(struct eeprom_req_info *request,
- struct eeprom_reply_data *reply,
- struct genl_info *info)
+ struct eeprom_reply_data *reply)
{
struct net_device *dev = reply->base.dev;
struct ethtool_modinfo modinfo = {0};
@@ -103,7 +102,7 @@ static int get_module_eeprom_by_page(struct net_device *dev,
static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base);
struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_base);
@@ -124,7 +123,7 @@ static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
if (ret)
goto err_free;
- ret = get_module_eeprom_by_page(dev, &page_data, info ? info->extack : NULL);
+ ret = get_module_eeprom_by_page(dev, &page_data, info->extack);
if (ret < 0)
goto err_ops;
@@ -140,7 +139,7 @@ err_free:
kfree(page_data.data);
if (ret == -EOPNOTSUPP)
- return eeprom_fallback(request, reply, info);
+ return eeprom_fallback(request, reply);
return ret;
}
diff --git a/net/ethtool/features.c b/net/ethtool/features.c
index 55d449a2d3fc..a79af8c25a07 100644
--- a/net/ethtool/features.c
+++ b/net/ethtool/features.c
@@ -35,7 +35,7 @@ static void ethnl_features_to_bitmap32(u32 *dest, netdev_features_t src)
static int features_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct features_reply_data *data = FEATURES_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c
index 0d9a3d153170..e7d3f2c352a3 100644
--- a/net/ethtool/fec.c
+++ b/net/ethtool/fec.c
@@ -92,7 +92,7 @@ fec_stats_recalc(struct fec_stat_grp *grp, struct ethtool_fec_stat *stats)
static int fec_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(active_fec_modes) = {};
struct fec_reply_data *data = FEC_REPDATA(reply_base);
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 4a51e0ec295c..0b0ce4f81c01 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -907,6 +907,38 @@ static int ethtool_rxnfc_copy_to_compat(void __user *useraddr,
return 0;
}
+static int ethtool_rxnfc_copy_struct(u32 cmd, struct ethtool_rxnfc *info,
+ size_t *info_size, void __user *useraddr)
+{
+ /* struct ethtool_rxnfc was originally defined for
+ * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+ * members. User-space might still be using that
+ * definition.
+ */
+ if (cmd == ETHTOOL_GRXFH || cmd == ETHTOOL_SRXFH)
+ *info_size = (offsetof(struct ethtool_rxnfc, data) +
+ sizeof(info->data));
+
+ if (ethtool_rxnfc_copy_from_user(info, useraddr, *info_size))
+ return -EFAULT;
+
+ if ((cmd == ETHTOOL_GRXFH || cmd == ETHTOOL_SRXFH) && info->flow_type & FLOW_RSS) {
+ *info_size = sizeof(*info);
+ if (ethtool_rxnfc_copy_from_user(info, useraddr, *info_size))
+ return -EFAULT;
+ /* Since malicious users may modify the original data,
+ * we need to check whether FLOW_RSS is still requested.
+ */
+ if (!(info->flow_type & FLOW_RSS))
+ return -EINVAL;
+ }
+
+ if (info->cmd != cmd)
+ return -EINVAL;
+
+ return 0;
+}
+
static int ethtool_rxnfc_copy_to_user(void __user *useraddr,
const struct ethtool_rxnfc *rxnfc,
size_t size, const u32 *rule_buf)
@@ -944,16 +976,9 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
if (!dev->ethtool_ops->set_rxnfc)
return -EOPNOTSUPP;
- /* struct ethtool_rxnfc was originally defined for
- * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
- * members. User-space might still be using that
- * definition. */
- if (cmd == ETHTOOL_SRXFH)
- info_size = (offsetof(struct ethtool_rxnfc, data) +
- sizeof(info.data));
-
- if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
- return -EFAULT;
+ rc = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr);
+ if (rc)
+ return rc;
rc = dev->ethtool_ops->set_rxnfc(dev, &info);
if (rc)
@@ -978,33 +1003,9 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
if (!ops->get_rxnfc)
return -EOPNOTSUPP;
- /* struct ethtool_rxnfc was originally defined for
- * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
- * members. User-space might still be using that
- * definition. */
- if (cmd == ETHTOOL_GRXFH)
- info_size = (offsetof(struct ethtool_rxnfc, data) +
- sizeof(info.data));
-
- if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
- return -EFAULT;
-
- /* If FLOW_RSS was requested then user-space must be using the
- * new definition, as FLOW_RSS is newer.
- */
- if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
- info_size = sizeof(info);
- if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
- return -EFAULT;
- /* Since malicious users may modify the original data,
- * we need to check whether FLOW_RSS is still requested.
- */
- if (!(info.flow_type & FLOW_RSS))
- return -EINVAL;
- }
-
- if (info.cmd != cmd)
- return -EINVAL;
+ ret = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr);
+ if (ret)
+ return ret;
if (info.cmd == ETHTOOL_GRXCLSRLALL) {
if (info.rule_cnt > 0) {
@@ -3207,7 +3208,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (v4_m_spec->ip4src ||
v4_m_spec->ip4dst) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS);
match->dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] =
offsetof(struct ethtool_rx_flow_key, ipv4);
}
@@ -3222,7 +3223,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (v4_m_spec->psrc ||
v4_m_spec->pdst) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_PORTS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_PORTS);
match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
offsetof(struct ethtool_rx_flow_key, tp);
}
@@ -3259,7 +3260,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (!ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6src) ||
!ipv6_addr_any((struct in6_addr *)v6_m_spec->ip6dst)) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS);
match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] =
offsetof(struct ethtool_rx_flow_key, ipv6);
}
@@ -3274,7 +3275,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (v6_m_spec->psrc ||
v6_m_spec->pdst) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_PORTS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_PORTS);
match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
offsetof(struct ethtool_rx_flow_key, tp);
}
@@ -3282,7 +3283,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
match->key.ip.tos = v6_spec->tclass;
match->mask.ip.tos = v6_m_spec->tclass;
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_IP);
+ BIT_ULL(FLOW_DISSECTOR_KEY_IP);
match->dissector.offset[FLOW_DISSECTOR_KEY_IP] =
offsetof(struct ethtool_rx_flow_key, ip);
}
@@ -3306,7 +3307,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
break;
}
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_BASIC);
match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] =
offsetof(struct ethtool_rx_flow_key, basic);
@@ -3339,7 +3340,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
if (ext_m_spec->vlan_etype ||
ext_m_spec->vlan_tci) {
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_VLAN);
+ BIT_ULL(FLOW_DISSECTOR_KEY_VLAN);
match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
offsetof(struct ethtool_rx_flow_key, vlan);
}
@@ -3354,7 +3355,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
ETH_ALEN);
match->dissector.used_keys |=
- BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS);
+ BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS);
match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] =
offsetof(struct ethtool_rx_flow_key, eth_addrs);
}
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
index 310dfe63292a..5c317d23787b 100644
--- a/net/ethtool/linkinfo.c
+++ b/net/ethtool/linkinfo.c
@@ -23,7 +23,7 @@ const struct nla_policy ethnl_linkinfo_get_policy[] = {
static int linkinfo_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct linkinfo_reply_data *data = LINKINFO_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index 20165e07ef90..b2591db49f7d 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -27,7 +27,7 @@ const struct nla_policy ethnl_linkmodes_get_policy[] = {
static int linkmodes_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index 2158c17a0b32..b2de2108b356 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -81,7 +81,7 @@ static int linkstate_get_link_ext_state(struct net_device *dev,
static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/mm.c b/net/ethtool/mm.c
index 4058a557b5a4..2816bb23c3ad 100644
--- a/net/ethtool/mm.c
+++ b/net/ethtool/mm.c
@@ -27,7 +27,7 @@ const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1] = {
static int mm_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct mm_reply_data *data = MM_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/module.c b/net/ethtool/module.c
index e0d539b21423..ceb575efc290 100644
--- a/net/ethtool/module.c
+++ b/net/ethtool/module.c
@@ -38,10 +38,9 @@ static int module_get_power_mode(struct net_device *dev,
static int module_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct module_reply_data *data = MODULE_REPDATA(reply_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct net_device *dev = reply_base->dev;
int ret;
@@ -49,7 +48,7 @@ static int module_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
- ret = module_get_power_mode(dev, data, extack);
+ ret = module_get_power_mode(dev, data, info->extack);
if (ret < 0)
goto out_complete;
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 39a459b0111b..3bbd5afb7b31 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -252,8 +252,7 @@ int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
* @ops: request ops of currently processed message type
* @req_info: parsed request header of processed request
* @reply_data: data needed to compose the reply
- * @pos_hash: saved iteration position - hashbucket
- * @pos_idx: saved iteration position - index
+ * @pos_ifindex: saved iteration position - ifindex
*
* These parameters are kept in struct netlink_callback as context preserved
* between iterations. They are initialized by ethnl_default_start() and used
@@ -263,8 +262,7 @@ struct ethnl_dump_ctx {
const struct ethnl_request_ops *ops;
struct ethnl_req_info *req_info;
struct ethnl_reply_data *reply_data;
- int pos_hash;
- int pos_idx;
+ unsigned long pos_ifindex;
};
static const struct ethnl_request_ops *
@@ -318,10 +316,8 @@ static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
/**
* ethnl_default_parse() - Parse request message
* @req_info: pointer to structure to put data into
- * @tb: parsed attributes
- * @net: request netns
+ * @info: genl_info from the request
* @request_ops: struct request_ops for request type
- * @extack: netlink extack for error reporting
* @require_dev: fail if no device identified in header
*
* Parse universal request header and call request specific ->parse_request()
@@ -330,19 +326,21 @@ static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
* Return: 0 on success or negative error code
*/
static int ethnl_default_parse(struct ethnl_req_info *req_info,
- struct nlattr **tb, struct net *net,
+ const struct genl_info *info,
const struct ethnl_request_ops *request_ops,
- struct netlink_ext_ack *extack, bool require_dev)
+ bool require_dev)
{
+ struct nlattr **tb = info->attrs;
int ret;
ret = ethnl_parse_header_dev_get(req_info, tb[request_ops->hdr_attr],
- net, extack, require_dev);
+ genl_info_net(info), info->extack,
+ require_dev);
if (ret < 0)
return ret;
if (request_ops->parse_request) {
- ret = request_ops->parse_request(req_info, tb, extack);
+ ret = request_ops->parse_request(req_info, tb, info->extack);
if (ret < 0)
return ret;
}
@@ -395,8 +393,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
}
- ret = ethnl_default_parse(req_info, info->attrs, genl_info_net(info),
- ops, info->extack, !ops->allow_nodev_do);
+ ret = ethnl_default_parse(req_info, info, ops, !ops->allow_nodev_do);
if (ret < 0)
goto err_dev;
ethnl_init_reply_data(reply_data, ops, req_info->dev);
@@ -447,12 +444,12 @@ err_dev:
static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
const struct ethnl_dump_ctx *ctx,
- struct netlink_callback *cb)
+ const struct genl_info *info)
{
void *ehdr;
int ret;
- ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ ehdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
&ethtool_genl_family, NLM_F_MULTI,
ctx->ops->reply_cmd);
if (!ehdr)
@@ -460,7 +457,7 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev);
rtnl_lock();
- ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, NULL);
+ ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, info);
rtnl_unlock();
if (ret < 0)
goto out;
@@ -490,55 +487,27 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
{
struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
struct net *net = sock_net(skb->sk);
- int s_idx = ctx->pos_idx;
- int h, idx = 0;
+ struct net_device *dev;
int ret = 0;
rtnl_lock();
- for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- struct hlist_head *head;
- struct net_device *dev;
- unsigned int seq;
-
- head = &net->dev_index_head[h];
-
-restart_chain:
- seq = net->dev_base_seq;
- cb->seq = seq;
- idx = 0;
- hlist_for_each_entry(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- dev_hold(dev);
- rtnl_unlock();
-
- ret = ethnl_default_dump_one(skb, dev, ctx, cb);
- dev_put(dev);
- if (ret < 0) {
- if (ret == -EOPNOTSUPP)
- goto lock_and_cont;
- if (likely(skb->len))
- ret = skb->len;
- goto out;
- }
-lock_and_cont:
- rtnl_lock();
- if (net->dev_base_seq != seq) {
- s_idx = idx + 1;
- goto restart_chain;
- }
-cont:
- idx++;
- }
+ for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
+ dev_hold(dev);
+ rtnl_unlock();
+
+ ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb));
+ rtnl_lock();
+ dev_put(dev);
+
+ if (ret < 0 && ret != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ ret = skb->len;
+ break;
+ }
}
rtnl_unlock();
-out:
- ctx->pos_hash = h;
- ctx->pos_idx = idx;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-
return ret;
}
@@ -568,8 +537,7 @@ static int ethnl_default_start(struct netlink_callback *cb)
goto free_req_info;
}
- ret = ethnl_default_parse(req_info, info->attrs, sock_net(cb->skb->sk),
- ops, cb->extack, false);
+ ret = ethnl_default_parse(req_info, &info->info, ops, false);
if (req_info->dev) {
/* We ignore device specification in dump requests but as the
* same parser as for non-dump (doit) requests is used, it
@@ -584,8 +552,7 @@ static int ethnl_default_start(struct netlink_callback *cb)
ctx->ops = ops;
ctx->req_info = req_info;
ctx->reply_data = reply_data;
- ctx->pos_hash = 0;
- ctx->pos_idx = 0;
+ ctx->pos_ifindex = 0;
return 0;
@@ -680,11 +647,14 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
struct ethnl_reply_data *reply_data;
const struct ethnl_request_ops *ops;
struct ethnl_req_info *req_info;
+ struct genl_info info;
struct sk_buff *skb;
void *reply_payload;
int reply_len;
int ret;
+ genl_info_init_ntf(&info, &ethtool_genl_family, cmd);
+
if (WARN_ONCE(cmd > ETHTOOL_MSG_KERNEL_MAX ||
!ethnl_default_notify_ops[cmd],
"unexpected notification type %u\n", cmd))
@@ -703,7 +673,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
req_info->flags |= ETHTOOL_FLAG_COMPACT_BITSETS;
ethnl_init_reply_data(reply_data, ops, dev);
- ret = ops->prepare_data(req_info, reply_data, NULL);
+ ret = ops->prepare_data(req_info, reply_data, &info);
if (ret < 0)
goto err_cleanup;
ret = ops->reply_size(req_info, reply_data);
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 79424b34b553..9a333a8d04c1 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -355,7 +355,7 @@ struct ethnl_request_ops {
struct netlink_ext_ack *extack);
int (*prepare_data)(const struct ethnl_req_info *req_info,
struct ethnl_reply_data *reply_data,
- struct genl_info *info);
+ const struct genl_info *info);
int (*reply_size)(const struct ethnl_req_info *req_info,
const struct ethnl_reply_data *reply_data);
int (*fill_reply)(struct sk_buff *skb,
diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c
index 6657d0b888d8..f7c847aeb1a2 100644
--- a/net/ethtool/pause.c
+++ b/net/ethtool/pause.c
@@ -51,10 +51,9 @@ static int pause_parse_request(struct ethnl_req_info *req_base,
static int pause_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
const struct pause_req_info *req_info = PAUSE_REQINFO(req_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct pause_reply_data *data = PAUSE_REPDATA(reply_base);
enum ethtool_mac_stats_src src = req_info->src;
struct net_device *dev = reply_base->dev;
@@ -74,7 +73,7 @@ static int pause_prepare_data(const struct ethnl_req_info *req_base,
if ((src == ETHTOOL_MAC_STATS_SRC_EMAC ||
src == ETHTOOL_MAC_STATS_SRC_PMAC) &&
!__ethtool_dev_mm_supported(dev)) {
- NL_SET_ERR_MSG_MOD(extack,
+ NL_SET_ERR_MSG_MOD(info->extack,
"Device does not support MAC merge layer");
ethnl_ops_complete(dev);
return -EOPNOTSUPP;
diff --git a/net/ethtool/phc_vclocks.c b/net/ethtool/phc_vclocks.c
index 637b2f5297d5..cadaabed60bd 100644
--- a/net/ethtool/phc_vclocks.c
+++ b/net/ethtool/phc_vclocks.c
@@ -24,7 +24,7 @@ const struct nla_policy ethnl_phc_vclocks_get_policy[] = {
static int phc_vclocks_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct phc_vclocks_reply_data *data = PHC_VCLOCKS_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c
index 5a8cab4df0c9..b238a1afe9ae 100644
--- a/net/ethtool/plca.c
+++ b/net/ethtool/plca.c
@@ -40,7 +40,7 @@ const struct nla_policy ethnl_plca_get_cfg_policy[] = {
static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct plca_reply_data *data = PLCA_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
@@ -183,7 +183,7 @@ const struct nla_policy ethnl_plca_get_status_policy[] = {
static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct plca_reply_data *data = PLCA_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c
index 23264a1ebf12..297be6a13ab9 100644
--- a/net/ethtool/privflags.c
+++ b/net/ethtool/privflags.c
@@ -57,7 +57,7 @@ static int ethnl_get_priv_flags_info(struct net_device *dev,
static int privflags_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index 530b8b99e6df..cc478af77111 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -53,8 +53,8 @@ static int pse_get_pse_attributes(struct net_device *dev,
}
static int pse_prepare_data(const struct ethnl_req_info *req_base,
- struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ struct ethnl_reply_data *reply_base,
+ const struct genl_info *info)
{
struct pse_reply_data *data = PSE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
@@ -64,7 +64,7 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
- ret = pse_get_pse_attributes(dev, info ? info->extack : NULL, data);
+ ret = pse_get_pse_attributes(dev, info->extack, data);
ethnl_ops_complete(dev);
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index 1c4972526142..fb09f774ea01 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -24,10 +24,9 @@ const struct nla_policy ethnl_rings_get_policy[] = {
static int rings_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct rings_reply_data *data = RINGS_REPDATA(reply_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct net_device *dev = reply_base->dev;
int ret;
@@ -39,7 +38,7 @@ static int rings_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
dev->ethtool_ops->get_ringparam(dev, &data->ringparam,
- &data->kernel_ringparam, extack);
+ &data->kernel_ringparam, info->extack);
ethnl_ops_complete(dev);
return 0;
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index be260ab34e58..5764202e6cb6 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -42,7 +42,8 @@ rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
static int
rss_prepare_data(const struct ethnl_req_info *req_base,
- struct ethnl_reply_data *reply_base, struct genl_info *info)
+ struct ethnl_reply_data *reply_base,
+ const struct genl_info *info)
{
struct rss_reply_data *data = RSS_REPDATA(reply_base);
struct rss_req_info *request = RSS_REQINFO(req_base);
diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index 010ed19ccc99..912f0c4fff2f 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -114,10 +114,9 @@ static int stats_parse_request(struct ethnl_req_info *req_base,
static int stats_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
const struct stats_req_info *req_info = STATS_REQINFO(req_base);
- struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct stats_reply_data *data = STATS_REPDATA(reply_base);
enum ethtool_mac_stats_src src = req_info->src;
struct net_device *dev = reply_base->dev;
@@ -130,7 +129,7 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
if ((src == ETHTOOL_MAC_STATS_SRC_EMAC ||
src == ETHTOOL_MAC_STATS_SRC_PMAC) &&
!__ethtool_dev_mm_supported(dev)) {
- NL_SET_ERR_MSG_MOD(extack,
+ NL_SET_ERR_MSG_MOD(info->extack,
"Device does not support MAC merge layer");
ethnl_ops_complete(dev);
return -EOPNOTSUPP;
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index 3f7de54d85fb..c678b484a079 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -274,7 +274,7 @@ static int strset_prepare_set(struct strset_info *info, struct net_device *dev,
static int strset_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
const struct strset_req_info *req_info = STRSET_REQINFO(req_base);
struct strset_reply_data *data = STRSET_REPDATA(reply_base);
diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c
index 63b5814bd460..9daed0aab162 100644
--- a/net/ethtool/tsinfo.c
+++ b/net/ethtool/tsinfo.c
@@ -25,7 +25,7 @@ const struct nla_policy ethnl_tsinfo_get_policy[] = {
static int tsinfo_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c
index 67fb414ca859..b4ce47dd2aa6 100644
--- a/net/ethtool/tunnels.c
+++ b/net/ethtool/tunnels.c
@@ -212,15 +212,14 @@ err_unlock_rtnl:
struct ethnl_tunnel_info_dump_ctx {
struct ethnl_req_info req_info;
- int pos_hash;
- int pos_idx;
+ unsigned long ifindex;
};
int ethnl_tunnel_info_start(struct netlink_callback *cb)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx;
- struct nlattr **tb = info->attrs;
+ struct nlattr **tb = info->info.attrs;
int ret;
BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
@@ -243,57 +242,39 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
- int s_idx = ctx->pos_idx;
- int h, idx = 0;
+ struct net_device *dev;
int ret = 0;
void *ehdr;
rtnl_lock();
- cb->seq = net->dev_base_seq;
- for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- struct hlist_head *head;
- struct net_device *dev;
-
- head = &net->dev_index_head[h];
- idx = 0;
- hlist_for_each_entry(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
-
- ehdr = ethnl_dump_put(skb, cb,
- ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY);
- if (!ehdr) {
- ret = -EMSGSIZE;
- goto out;
- }
-
- ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TUNNEL_INFO_HEADER);
- if (ret < 0) {
- genlmsg_cancel(skb, ehdr);
- goto out;
- }
-
- ctx->req_info.dev = dev;
- ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb);
- ctx->req_info.dev = NULL;
- if (ret < 0) {
- genlmsg_cancel(skb, ehdr);
- if (ret == -EOPNOTSUPP)
- goto cont;
- goto out;
- }
- genlmsg_end(skb, ehdr);
-cont:
- idx++;
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ ehdr = ethnl_dump_put(skb, cb,
+ ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY);
+ if (!ehdr) {
+ ret = -EMSGSIZE;
+ break;
+ }
+
+ ret = ethnl_fill_reply_header(skb, dev,
+ ETHTOOL_A_TUNNEL_INFO_HEADER);
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ break;
}
+
+ ctx->req_info.dev = dev;
+ ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb);
+ ctx->req_info.dev = NULL;
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ if (ret == -EOPNOTSUPP)
+ continue;
+ break;
+ }
+ genlmsg_end(skb, ehdr);
}
-out:
rtnl_unlock();
- ctx->pos_hash = h;
- ctx->pos_idx = idx;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-
if (ret == -EMSGSIZE && skb->len)
return skb->len;
return ret;
diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c
index a4a43d9e6e9d..0ed56c9ac1bc 100644
--- a/net/ethtool/wol.c
+++ b/net/ethtool/wol.c
@@ -24,7 +24,7 @@ const struct nla_policy ethnl_wol_get_policy[] = {
static int wol_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
- struct genl_info *info)
+ const struct genl_info *info)
{
struct wol_reply_data *data = WOL_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
@@ -39,7 +39,8 @@ static int wol_prepare_data(const struct ethnl_req_info *req_base,
dev->ethtool_ops->get_wol(dev, &data->wol);
ethnl_ops_complete(dev);
/* do not include password in notifications */
- data->show_sopass = info && (data->wol.supported & WAKE_MAGICSECURE);
+ data->show_sopass = !genl_info_is_ntf(info) &&
+ (data->wol.supported & WAKE_MAGICSECURE);
return 0;
}
diff --git a/net/handshake/Makefile b/net/handshake/Makefile
index 247d73c6ff6e..ef4d9a2112bd 100644
--- a/net/handshake/Makefile
+++ b/net/handshake/Makefile
@@ -8,6 +8,6 @@
#
obj-y += handshake.o
-handshake-y := genl.o netlink.o request.o tlshd.o trace.o
+handshake-y := alert.o genl.o netlink.o request.o tlshd.o trace.o
obj-$(CONFIG_NET_HANDSHAKE_KUNIT_TEST) += handshake-test.o
diff --git a/net/handshake/alert.c b/net/handshake/alert.c
new file mode 100644
index 000000000000..329d91984683
--- /dev/null
+++ b/net/handshake/alert.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Handle the TLS Alert protocol
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+
+#include <net/sock.h>
+#include <net/handshake.h>
+#include <net/tls.h>
+#include <net/tls_prot.h>
+
+#include "handshake.h"
+
+#include <trace/events/handshake.h>
+
+/**
+ * tls_alert_send - send a TLS Alert on a kTLS socket
+ * @sock: open kTLS socket to send on
+ * @level: TLS Alert level
+ * @description: TLS Alert description
+ *
+ * Returns zero on success or a negative errno.
+ */
+int tls_alert_send(struct socket *sock, u8 level, u8 description)
+{
+ u8 record_type = TLS_RECORD_TYPE_ALERT;
+ u8 buf[CMSG_SPACE(sizeof(record_type))];
+ struct msghdr msg = { 0 };
+ struct cmsghdr *cmsg;
+ struct kvec iov;
+ u8 alert[2];
+ int ret;
+
+ trace_tls_alert_send(sock->sk, level, description);
+
+ alert[0] = level;
+ alert[1] = description;
+ iov.iov_base = alert;
+ iov.iov_len = sizeof(alert);
+
+ memset(buf, 0, sizeof(buf));
+ msg.msg_control = buf;
+ msg.msg_controllen = sizeof(buf);
+ msg.msg_flags = MSG_DONTWAIT;
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_TLS;
+ cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(record_type));
+ memcpy(CMSG_DATA(cmsg), &record_type, sizeof(record_type));
+
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, iov.iov_len);
+ ret = sock_sendmsg(sock, &msg);
+ return ret < 0 ? ret : 0;
+}
+
+/**
+ * tls_get_record_type - Look for TLS RECORD_TYPE information
+ * @sk: socket (for IP address information)
+ * @cmsg: incoming message to be parsed
+ *
+ * Returns zero or a TLS_RECORD_TYPE value.
+ */
+u8 tls_get_record_type(const struct sock *sk, const struct cmsghdr *cmsg)
+{
+ u8 record_type;
+
+ if (cmsg->cmsg_level != SOL_TLS)
+ return 0;
+ if (cmsg->cmsg_type != TLS_GET_RECORD_TYPE)
+ return 0;
+
+ record_type = *((u8 *)CMSG_DATA(cmsg));
+ trace_tls_contenttype(sk, record_type);
+ return record_type;
+}
+EXPORT_SYMBOL(tls_get_record_type);
+
+/**
+ * tls_alert_recv - Parse TLS Alert messages
+ * @sk: socket (for IP address information)
+ * @msg: incoming message to be parsed
+ * @level: OUT - TLS AlertLevel value
+ * @description: OUT - TLS AlertDescription value
+ *
+ */
+void tls_alert_recv(const struct sock *sk, const struct msghdr *msg,
+ u8 *level, u8 *description)
+{
+ const struct kvec *iov;
+ u8 *data;
+
+ iov = msg->msg_iter.kvec;
+ data = iov->iov_base;
+ *level = data[0];
+ *description = data[1];
+
+ trace_tls_alert_recv(sk, *level, *description);
+}
+EXPORT_SYMBOL(tls_alert_recv);
diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c
index 6d37bab35c8f..16ed7bfd29e4 100644
--- a/net/handshake/handshake-test.c
+++ b/net/handshake/handshake-test.c
@@ -235,7 +235,7 @@ static void handshake_req_submit_test4(struct kunit *test)
KUNIT_EXPECT_PTR_EQ(test, req, result);
handshake_req_cancel(sock->sk);
- sock_release(sock);
+ fput(filp);
}
static void handshake_req_submit_test5(struct kunit *test)
@@ -272,7 +272,7 @@ static void handshake_req_submit_test5(struct kunit *test)
/* Assert */
KUNIT_EXPECT_EQ(test, err, -EAGAIN);
- sock_release(sock);
+ fput(filp);
hn->hn_pending = saved;
}
@@ -306,7 +306,7 @@ static void handshake_req_submit_test6(struct kunit *test)
KUNIT_EXPECT_EQ(test, err, -EBUSY);
handshake_req_cancel(sock->sk);
- sock_release(sock);
+ fput(filp);
}
static void handshake_req_cancel_test1(struct kunit *test)
@@ -340,7 +340,7 @@ static void handshake_req_cancel_test1(struct kunit *test)
/* Assert */
KUNIT_EXPECT_TRUE(test, result);
- sock_release(sock);
+ fput(filp);
}
static void handshake_req_cancel_test2(struct kunit *test)
@@ -382,7 +382,7 @@ static void handshake_req_cancel_test2(struct kunit *test)
/* Assert */
KUNIT_EXPECT_TRUE(test, result);
- sock_release(sock);
+ fput(filp);
}
static void handshake_req_cancel_test3(struct kunit *test)
@@ -427,7 +427,7 @@ static void handshake_req_cancel_test3(struct kunit *test)
/* Assert */
KUNIT_EXPECT_FALSE(test, result);
- sock_release(sock);
+ fput(filp);
}
static struct handshake_req *handshake_req_destroy_test;
@@ -471,7 +471,7 @@ static void handshake_req_destroy_test1(struct kunit *test)
handshake_req_cancel(sock->sk);
/* Act */
- sock_release(sock);
+ fput(filp);
/* Assert */
KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req);
diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h
index 4dac965c99df..a48163765a7a 100644
--- a/net/handshake/handshake.h
+++ b/net/handshake/handshake.h
@@ -41,8 +41,11 @@ struct handshake_req {
enum hr_flags_bits {
HANDSHAKE_F_REQ_COMPLETED,
+ HANDSHAKE_F_REQ_SESSION,
};
+struct genl_info;
+
/* Invariants for all handshake requests for one transport layer
* security protocol
*/
@@ -63,6 +66,9 @@ enum hp_flags_bits {
HANDSHAKE_F_PROTO_NOTIFY,
};
+/* alert.c */
+int tls_alert_send(struct socket *sock, u8 level, u8 description);
+
/* netlink.c */
int handshake_genl_notify(struct net *net, const struct handshake_proto *proto,
gfp_t flags);
diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c
index 1086653e1fad..d0bc1dd8e65a 100644
--- a/net/handshake/netlink.c
+++ b/net/handshake/netlink.c
@@ -157,26 +157,24 @@ out_status:
int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
- struct handshake_req *req = NULL;
- struct socket *sock = NULL;
+ struct handshake_req *req;
+ struct socket *sock;
int fd, status, err;
if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD))
return -EINVAL;
fd = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]);
- err = 0;
sock = sockfd_lookup(fd, &err);
- if (err) {
- err = -EBADF;
- goto out_status;
- }
+ if (!sock)
+ return err;
req = handshake_req_hash_lookup(sock->sk);
if (!req) {
err = -EBUSY;
+ trace_handshake_cmd_done_err(net, req, sock->sk, err);
fput(sock->file);
- goto out_status;
+ return err;
}
trace_handshake_cmd_done(net, req, sock->sk, fd);
@@ -188,10 +186,6 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
handshake_complete(req, status, info);
fput(sock->file);
return 0;
-
-out_status:
- trace_handshake_cmd_done_err(net, req, sock->sk, err);
- return err;
}
static unsigned int handshake_net_id;
diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c
index b735f5cced2f..bbfb4095ddd6 100644
--- a/net/handshake/tlshd.c
+++ b/net/handshake/tlshd.c
@@ -18,6 +18,7 @@
#include <net/sock.h>
#include <net/handshake.h>
#include <net/genetlink.h>
+#include <net/tls_prot.h>
#include <uapi/linux/keyctl.h>
#include <uapi/linux/handshake.h>
@@ -100,6 +101,9 @@ static void tls_handshake_done(struct handshake_req *req,
if (info)
tls_handshake_remote_peerids(treq, info);
+ if (!status)
+ set_bit(HANDSHAKE_F_REQ_SESSION, &req->hr_flags);
+
treq->th_consumer_done(treq->th_consumer_data, -status,
treq->th_peerid[0]);
}
@@ -424,3 +428,22 @@ bool tls_handshake_cancel(struct sock *sk)
return handshake_req_cancel(sk);
}
EXPORT_SYMBOL(tls_handshake_cancel);
+
+/**
+ * tls_handshake_close - send a Closure alert
+ * @sock: an open socket
+ *
+ */
+void tls_handshake_close(struct socket *sock)
+{
+ struct handshake_req *req;
+
+ req = handshake_req_hash_lookup(sock->sk);
+ if (!req)
+ return;
+ if (!test_and_clear_bit(HANDSHAKE_F_REQ_SESSION, &req->hr_flags))
+ return;
+ tls_alert_send(sock, TLS_ALERT_LEVEL_WARNING,
+ TLS_ALERT_DESC_CLOSE_NOTIFY);
+}
+EXPORT_SYMBOL(tls_handshake_close);
diff --git a/net/handshake/trace.c b/net/handshake/trace.c
index 1c4d8e27e17a..44432d0857b9 100644
--- a/net/handshake/trace.c
+++ b/net/handshake/trace.c
@@ -8,8 +8,10 @@
*/
#include <linux/types.h>
+#include <linux/ipv6.h>
#include <net/sock.h>
+#include <net/inet_sock.h>
#include <net/netlink.h>
#include <net/genetlink.h>
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 629daacc9607..b71dab630a87 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -594,6 +594,7 @@ static int fill_frame_info(struct hsr_frame_info *frame,
proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto;
/* FIXME: */
netdev_warn_once(skb->dev, "VLAN not yet supported");
+ return -EINVAL;
}
frame->is_from_san = false;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index b77f1189d19d..6d14d935ee82 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -288,13 +288,13 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
/* And leave the HSR tag. */
if (ethhdr->h_proto == htons(ETH_P_HSR)) {
- pull_size = sizeof(struct ethhdr);
+ pull_size = sizeof(struct hsr_tag);
skb_pull(skb, pull_size);
total_pull_size += pull_size;
}
/* And leave the HSR sup tag. */
- pull_size = sizeof(struct hsr_tag);
+ pull_size = sizeof(struct hsr_sup_tag);
skb_pull(skb, pull_size);
total_pull_size += pull_size;
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 6851e33df7d1..18e01791ad79 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -83,7 +83,7 @@ struct hsr_vlan_ethhdr {
struct hsr_sup_tlv {
u8 HSR_TLV_type;
u8 HSR_TLV_length;
-};
+} __packed;
/* HSR/PRP Supervision Frame data types.
* Field names as defined in the IEC:2010 standard for HSR.
diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h
index 501552d9753b..8c99e64e1cea 100644
--- a/net/hsr/hsr_netlink.h
+++ b/net/hsr/hsr_netlink.h
@@ -23,7 +23,5 @@ void __exit hsr_netlink_exit(void);
void hsr_nl_ringerror(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN],
struct hsr_port *port);
void hsr_nl_nodedown(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN]);
-void hsr_nl_framedrop(int dropcount, int dev_idx);
-void hsr_nl_linkdown(int dev_idx);
#endif /* __HSR_NETLINK_H */
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index a91283d1e5bf..6dd960ec558c 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -360,6 +360,7 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
struct ctl_table_header *hdr;
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
+ size_t table_size = ARRAY_SIZE(lowpan_frags_ns_ctl_table);
table = lowpan_frags_ns_ctl_table;
if (!net_eq(net, &init_net)) {
@@ -369,8 +370,10 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
goto err_alloc;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
+ if (net->user_ns != &init_user_ns) {
table[0].procname = NULL;
+ table_size = 0;
+ }
}
table[0].data = &ieee802154_lowpan->fqdir->high_thresh;
@@ -379,7 +382,8 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
table[1].extra2 = &ieee802154_lowpan->fqdir->high_thresh;
table[2].data = &ieee802154_lowpan->fqdir->timeout;
- hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table);
+ hdr = register_net_sysctl_sz(net, "net/ieee802154/6lowpan", table,
+ table_size);
if (hdr == NULL)
goto err_reg;
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index d610c1886160..1a265a421308 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -262,7 +262,7 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
if (!cb->args[0]) {
*wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk),
- info->attrs);
+ info->info.attrs);
if (IS_ERR(*wpan_dev)) {
err = PTR_ERR(*wpan_dev);
goto out_unlock;
@@ -570,7 +570,7 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
struct nl802154_dump_wpan_phy_state *state)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
- struct nlattr **tb = info->attrs;
+ struct nlattr **tb = info->info.attrs;
if (tb[NL802154_ATTR_WPAN_PHY])
state->filter_wpan_phy = nla_get_u32(tb[NL802154_ATTR_WPAN_PHY]);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9b2ca2fcc5a1..3d2e30e20473 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -187,24 +187,13 @@ static int inet_autobind(struct sock *sk)
return 0;
}
-/*
- * Move a socket into listening state.
- */
-int inet_listen(struct socket *sock, int backlog)
+int __inet_listen_sk(struct sock *sk, int backlog)
{
- struct sock *sk = sock->sk;
- unsigned char old_state;
+ unsigned char old_state = sk->sk_state;
int err, tcp_fastopen;
- lock_sock(sk);
-
- err = -EINVAL;
- if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
- goto out;
-
- old_state = sk->sk_state;
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
- goto out;
+ return -EINVAL;
WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
/* Really, if the socket is already in listen state
@@ -227,10 +216,27 @@ int inet_listen(struct socket *sock, int backlog)
err = inet_csk_listen_start(sk);
if (err)
- goto out;
+ return err;
+
tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
}
- err = 0;
+ return 0;
+}
+
+/*
+ * Move a socket into listening state.
+ */
+int inet_listen(struct socket *sock, int backlog)
+{
+ struct sock *sk = sock->sk;
+ int err = -EINVAL;
+
+ lock_sock(sk);
+
+ if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
+ goto out;
+
+ err = __inet_listen_sk(sk, backlog);
out:
release_sock(sk);
@@ -325,14 +331,14 @@ lookup_protocol:
sk->sk_reuse = SK_CAN_REUSE;
inet = inet_sk(sk);
- inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
+ inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
- inet->nodefrag = 0;
+ inet_clear_bit(NODEFRAG, sk);
if (SOCK_RAW == sock->type) {
inet->inet_num = protocol;
if (IPPROTO_RAW == protocol)
- inet->hdrincl = 1;
+ inet_set_bit(HDRINCL, sk);
}
if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
@@ -340,7 +346,7 @@ lookup_protocol:
else
inet->pmtudisc = IP_PMTUDISC_WANT;
- inet->inet_id = 0;
+ atomic_set(&inet->inet_id, 0);
sock_init_data(sock, sk);
@@ -350,9 +356,9 @@ lookup_protocol:
sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
inet->uc_ttl = -1;
- inet->mc_loop = 1;
+ inet_set_bit(MC_LOOP, sk);
inet->mc_ttl = 1;
- inet->mc_all = 1;
+ inet_set_bit(MC_ALL, sk);
inet->mc_index = 0;
inet->mc_list = NULL;
inet->rcv_tos = 0;
@@ -431,9 +437,8 @@ int inet_release(struct socket *sock)
}
EXPORT_SYMBOL(inet_release);
-int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct sock *sk = sock->sk;
u32 flags = BIND_WITH_LOCK;
int err;
@@ -454,6 +459,11 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return __inet_bind(sk, uaddr, addr_len, flags);
}
+
+int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+ return inet_bind_sk(sock->sk, uaddr, addr_len);
+}
EXPORT_SYMBOL(inet_bind);
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
@@ -519,7 +529,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
inet->inet_saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */
- if (snum || !(inet->bind_address_no_port ||
+ if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) ||
(flags & BIND_FORCE_ADDRESS_NO_PORT))) {
err = sk->sk_prot->get_port(sk, snum);
if (err) {
@@ -646,7 +656,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
err = -EISCONN;
goto out;
case SS_CONNECTING:
- if (inet_sk(sk)->defer_connect)
+ if (inet_test_bit(DEFER_CONNECT, sk))
err = is_sendmsg ? -EINPROGRESS : -EISCONN;
else
err = -EALREADY;
@@ -669,7 +679,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTING;
- if (!err && inet_sk(sk)->defer_connect)
+ if (!err && inet_test_bit(DEFER_CONNECT, sk))
goto out;
/* Just entered SS_CONNECTING state; the only
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 4406d796cc2f..39dcccf0f174 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -51,8 +51,6 @@ static bool is_unsupported(u32 member_offset)
return false;
}
-extern struct btf *btf_vmlinux;
-
static bool bpf_tcp_ca_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 79ae7204e8ed..d048aa833293 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1881,7 +1881,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
old = rcu_dereference_protected(sk_inet->inet_opt,
lockdep_sock_is_held(sk));
- if (sk_inet->is_icsk) {
+ if (inet_test_bit(IS_ICSK, sk)) {
sk_conn = inet_csk(sk);
if (old)
sk_conn->icsk_ext_hdr_len -= old->opt.optlen;
@@ -2051,7 +2051,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
sk_inet = inet_sk(sk);
hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
- if (sk_inet->is_icsk && hdr_delta > 0) {
+ if (inet_test_bit(IS_ICSK, sk) && hdr_delta > 0) {
struct inet_connection_sock *sk_conn = inet_csk(sk);
sk_conn->icsk_ext_hdr_len -= hdr_delta;
sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 4d1af0cd7d99..cb5dbee9e018 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -73,7 +73,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
reuseport_has_conns_set(sk);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
- inet->inet_id = get_random_u16();
+ atomic_set(&inet->inet_id, get_random_u16());
sk_dst_set(sk, &rt->dst);
err = 0;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5deac0517ef7..ca0ff15dc8fa 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -355,14 +355,14 @@ static void __inet_del_ifa(struct in_device *in_dev,
{
struct in_ifaddr *promote = NULL;
struct in_ifaddr *ifa, *ifa1;
- struct in_ifaddr *last_prim;
+ struct in_ifaddr __rcu **last_prim;
struct in_ifaddr *prev_prom = NULL;
int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
ASSERT_RTNL();
ifa1 = rtnl_dereference(*ifap);
- last_prim = rtnl_dereference(in_dev->ifa_list);
+ last_prim = ifap;
if (in_dev->dead)
goto no_promotions;
@@ -376,7 +376,7 @@ static void __inet_del_ifa(struct in_device *in_dev,
while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
ifa1->ifa_scope <= ifa->ifa_scope)
- last_prim = ifa;
+ last_prim = &ifa->ifa_next;
if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
ifa1->ifa_mask != ifa->ifa_mask ||
@@ -440,9 +440,9 @@ no_promotions:
rcu_assign_pointer(prev_prom->ifa_next, next_sec);
- last_sec = rtnl_dereference(last_prim->ifa_next);
+ last_sec = rtnl_dereference(*last_prim);
rcu_assign_pointer(promote->ifa_next, last_sec);
- rcu_assign_pointer(last_prim->ifa_next, promote);
+ rcu_assign_pointer(*last_prim, promote);
}
promote->ifa_flags &= ~IFA_F_SECONDARY;
@@ -509,6 +509,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
return -EEXIST;
}
if (ifa1->ifa_scope != ifa->ifa_scope) {
+ NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
inet_free_ifa(ifa);
return -EINVAL;
}
@@ -664,6 +665,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifm = nlmsg_data(nlh);
in_dev = inetdev_by_index(net, ifm->ifa_index);
if (!in_dev) {
+ NL_SET_ERR_MSG(extack, "ipv4: Device not found");
err = -ENODEV;
goto errout;
}
@@ -688,6 +690,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
+ NL_SET_ERR_MSG(extack, "ipv4: Address not found");
err = -EADDRNOTAVAIL;
errout:
return err;
@@ -839,13 +842,23 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
ifm = nlmsg_data(nlh);
err = -EINVAL;
- if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
+
+ if (ifm->ifa_prefixlen > 32) {
+ NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
+ goto errout;
+ }
+
+ if (!tb[IFA_LOCAL]) {
+ NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
goto errout;
+ }
dev = __dev_get_by_index(net, ifm->ifa_index);
err = -ENODEV;
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "ipv4: Device not found");
goto errout;
+ }
in_dev = __in_dev_get_rtnl(dev);
err = -ENOBUFS;
@@ -897,6 +910,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
ci = nla_data(tb[IFA_CACHEINFO]);
if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
+ NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
err = -EINVAL;
goto errout_free;
}
@@ -954,6 +968,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
int ret = ip_mc_autojoin_config(net, true, ifa);
if (ret < 0) {
+ NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
inet_free_ifa(ifa);
return ret;
}
@@ -967,8 +982,10 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
inet_free_ifa(ifa);
if (nlh->nlmsg_flags & NLM_F_EXCL ||
- !(nlh->nlmsg_flags & NLM_F_REPLACE))
+ !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
return -EEXIST;
+ }
ifa = ifa_existing;
if (ifa->ifa_rt_priority != new_metric) {
@@ -2720,7 +2737,8 @@ static __net_init int devinet_init_net(struct net *net)
goto err_reg_dflt;
err = -ENOMEM;
- forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
+ forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
+ ARRAY_SIZE(ctl_forward_entry));
if (!forw_hdr)
goto err_reg_ctl;
net->ipv4.forw_hdr = forw_hdr;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 65ba18a91865..eafa4a033515 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -278,7 +278,8 @@ void fib_release_info(struct fib_info *fi)
hlist_del(&nexthop_nh->nh_hash);
} endfor_nexthops(fi)
}
- fi->fib_dead = 1;
+ /* Paired with READ_ONCE() from fib_table_lookup() */
+ WRITE_ONCE(fi->fib_dead, 1);
fib_info_put(fi);
}
spin_unlock_bh(&fib_info_lock);
@@ -1581,6 +1582,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
link_it:
ofi = fib_find_info(fi);
if (ofi) {
+ /* fib_table_lookup() should not see @fi yet. */
fi->fib_dead = 1;
free_fib_info(fi);
refcount_inc(&ofi->fib_treeref);
@@ -1619,6 +1621,7 @@ err_inval:
failure:
if (fi) {
+ /* fib_table_lookup() should not see @fi yet. */
fi->fib_dead = 1;
free_fib_info(fi);
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 74d403dbd2b4..d13fb9e76b97 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1582,7 +1582,8 @@ found:
if (fa->fa_dscp &&
inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
continue;
- if (fi->fib_dead)
+ /* Paired with WRITE_ONCE() in fib_release_info() */
+ if (READ_ONCE(fi->fib_dead))
continue;
if (fa->fa_info->fib_scope < flp->flowi4_scope)
continue;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 48ff5f13e797..418e5fb58fd3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -353,8 +353,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
struct flowi4 fl4;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- unsigned int size = mtu;
+ unsigned int size;
+ size = min(mtu, IP_MAX_MTU);
while (1) {
skb = alloc_skb(size + hlen + tlen,
GFP_ATOMIC | __GFP_NOWARN);
@@ -2658,7 +2659,7 @@ int ip_mc_sf_allow(const struct sock *sk, __be32 loc_addr, __be32 rmt_addr,
(sdif && pmc->multi.imr_ifindex == sdif)))
break;
}
- ret = inet->mc_all;
+ ret = inet_test_bit(MC_ALL, sk);
if (!pmc)
goto unlock;
psl = rcu_dereference(pmc->sflist);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f7426926a104..e13a84433413 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -182,17 +182,17 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
r->idiag_inode = sock_i_ino(sk);
memset(&inet_sockopt, 0, sizeof(inet_sockopt));
- inet_sockopt.recverr = inet->recverr;
- inet_sockopt.is_icsk = inet->is_icsk;
- inet_sockopt.freebind = inet->freebind;
- inet_sockopt.hdrincl = inet->hdrincl;
- inet_sockopt.mc_loop = inet->mc_loop;
- inet_sockopt.transparent = inet->transparent;
- inet_sockopt.mc_all = inet->mc_all;
- inet_sockopt.nodefrag = inet->nodefrag;
- inet_sockopt.bind_address_no_port = inet->bind_address_no_port;
- inet_sockopt.recverr_rfc4884 = inet->recverr_rfc4884;
- inet_sockopt.defer_connect = inet->defer_connect;
+ inet_sockopt.recverr = inet_test_bit(RECVERR, sk);
+ inet_sockopt.is_icsk = inet_test_bit(IS_ICSK, sk);
+ inet_sockopt.freebind = inet_test_bit(FREEBIND, sk);
+ inet_sockopt.hdrincl = inet_test_bit(HDRINCL, sk);
+ inet_sockopt.mc_loop = inet_test_bit(MC_LOOP, sk);
+ inet_sockopt.transparent = inet_test_bit(TRANSPARENT, sk);
+ inet_sockopt.mc_all = inet_test_bit(MC_ALL, sk);
+ inet_sockopt.nodefrag = inet_test_bit(NODEFRAG, sk);
+ inet_sockopt.bind_address_no_port = inet_test_bit(BIND_ADDRESS_NO_PORT, sk);
+ inet_sockopt.recverr_rfc4884 = inet_test_bit(RECVERR_RFC4884, sk);
+ inet_sockopt.defer_connect = inet_test_bit(DEFER_CONNECT, sk);
if (nla_put(skb, INET_DIAG_SOCKOPT, sizeof(inet_sockopt),
&inet_sockopt))
goto errout;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 0819d6001b9a..c32f5e28758b 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -28,9 +28,9 @@
#include <net/tcp.h>
#include <net/sock_reuseport.h>
-static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
- const __u16 lport, const __be32 faddr,
- const __be16 fport)
+u32 inet_ehashfn(const struct net *net, const __be32 laddr,
+ const __u16 lport, const __be32 faddr,
+ const __be16 fport)
{
static u32 inet_ehash_secret __read_mostly;
@@ -39,6 +39,7 @@ static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
return __inet_ehashfn(laddr, lport, faddr, fport,
inet_ehash_secret + net_hash_mix(net));
}
+EXPORT_SYMBOL_GPL(inet_ehashfn);
/* This function handles inet_sock, but also timewait and request sockets
* for IPv4/IPv6.
@@ -332,20 +333,38 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
-static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
- struct sk_buff *skb, int doff,
- __be32 saddr, __be16 sport,
- __be32 daddr, unsigned short hnum)
+/**
+ * inet_lookup_reuseport() - execute reuseport logic on AF_INET socket if necessary.
+ * @net: network namespace.
+ * @sk: AF_INET socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP.
+ * @skb: context for a potential SK_REUSEPORT program.
+ * @doff: header offset.
+ * @saddr: source address.
+ * @sport: source port.
+ * @daddr: destination address.
+ * @hnum: destination port in host byte order.
+ * @ehashfn: hash function used to generate the fallback hash.
+ *
+ * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
+ * the selected sock or an error.
+ */
+struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum,
+ inet_ehashfn_t *ehashfn)
{
struct sock *reuse_sk = NULL;
u32 phash;
if (sk->sk_reuseport) {
- phash = inet_ehashfn(net, daddr, hnum, saddr, sport);
+ phash = INDIRECT_CALL_2(ehashfn, udp_ehashfn, inet_ehashfn,
+ net, daddr, hnum, saddr, sport);
reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
}
return reuse_sk;
}
+EXPORT_SYMBOL_GPL(inet_lookup_reuseport);
/*
* Here are some nice properties to exploit here. The BSD API
@@ -369,8 +388,8 @@ static struct sock *inet_lhash2_lookup(struct net *net,
sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
- result = lookup_reuseport(net, sk, skb, doff,
- saddr, sport, daddr, hnum);
+ result = inet_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum, inet_ehashfn);
if (result)
return result;
@@ -382,24 +401,23 @@ static struct sock *inet_lhash2_lookup(struct net *net,
return result;
}
-static inline struct sock *inet_lookup_run_bpf(struct net *net,
- struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
- __be32 saddr, __be16 sport,
- __be32 daddr, u16 hnum, const int dif)
+struct sock *inet_lookup_run_sk_lookup(struct net *net,
+ int protocol,
+ struct sk_buff *skb, int doff,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, u16 hnum, const int dif,
+ inet_ehashfn_t *ehashfn)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
- if (hashinfo != net->ipv4.tcp_death_row.hashinfo)
- return NULL; /* only TCP is supported */
-
- no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, saddr, sport,
+ no_reuseport = bpf_sk_lookup_run_v4(net, protocol, saddr, sport,
daddr, hnum, dif, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;
- reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ reuse_sk = inet_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum,
+ ehashfn);
if (reuse_sk)
sk = reuse_sk;
return sk;
@@ -417,9 +435,11 @@ struct sock *__inet_lookup_listener(struct net *net,
unsigned int hash2;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
- result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
- saddr, sport, daddr, hnum, dif);
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ hashinfo == net->ipv4.tcp_death_row.hashinfo) {
+ result = inet_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff,
+ saddr, sport, daddr, hnum, dif,
+ inet_ehashfn);
if (result)
goto done;
}
@@ -795,41 +815,45 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb,
const struct net *net, unsigned short port,
int l3mdev, const struct sock *sk)
{
+ if (!net_eq(ib2_net(tb), net) || tb->port != port ||
+ tb->l3mdev != l3mdev)
+ return false;
+
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family != tb->family)
+ if (sk->sk_family != tb->family) {
+ if (sk->sk_family == AF_INET)
+ return ipv6_addr_v4mapped(&tb->v6_rcv_saddr) &&
+ tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr;
+
return false;
+ }
if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
- else
+ return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
+ return tb->rcv_saddr == sk->sk_rcv_saddr;
}
bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net,
unsigned short port, int l3mdev, const struct sock *sk)
{
+ if (!net_eq(ib2_net(tb), net) || tb->port != port ||
+ tb->l3mdev != l3mdev)
+ return false;
+
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family != tb->family) {
if (sk->sk_family == AF_INET)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_any(&tb->v6_rcv_saddr);
+ return ipv6_addr_any(&tb->v6_rcv_saddr) ||
+ ipv6_addr_v4mapped_any(&tb->v6_rcv_saddr);
return false;
}
if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_any(&tb->v6_rcv_saddr);
- else
+ return ipv6_addr_any(&tb->v6_rcv_saddr);
#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
+ return tb->rcv_saddr == 0;
}
/* The socket's bhash2 hashbucket spinlock must be held when this is called */
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 2c1b245dba8e..dd37a5bf6881 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -203,7 +203,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
tw->tw_reuseport = sk->sk_reuseport;
tw->tw_hash = sk->sk_hash;
tw->tw_ipv6only = 0;
- tw->tw_transparent = inet->transparent;
+ tw->tw_transparent = inet_test_bit(TRANSPARENT, sk);
tw->tw_prot = sk->sk_prot_creator;
atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
twsk_net_set(tw, sock_net(sk));
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e18931a6d153..66fac1216d46 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -67,7 +67,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
struct ip_options *opt = &(IPCB(skb)->opt);
__IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
- __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
#ifdef CONFIG_NET_SWITCHDEV
if (skb->offload_l3_fwd_mark) {
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 69c00ffdcf3e..a4941f53b523 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -615,7 +615,8 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
table[2].data = &net->ipv4.fqdir->timeout;
table[3].data = &net->ipv4.fqdir->max_dist;
- hdr = register_net_sysctl(net, "net/ipv4", table);
+ hdr = register_net_sysctl_sz(net, "net/ipv4", table,
+ ARRAY_SIZE(ip4_frags_ns_ctl_table));
if (!hdr)
goto err_reg;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index fe9ead9ee863..5e9c8156656a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -584,7 +584,8 @@ static void ip_sublist_rcv_finish(struct list_head *head)
static struct sk_buff *ip_extract_route_hint(const struct net *net,
struct sk_buff *skb, int rt_type)
{
- if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
+ if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
+ IPCB(skb)->flags & IPSKB_MULTIPATH)
return NULL;
return skb;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6ba1a0fafbaa..4ab877cf6d35 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -133,7 +133,7 @@ EXPORT_SYMBOL_GPL(ip_local_out);
static inline int ip_select_ttl(const struct inet_sock *inet,
const struct dst_entry *dst)
{
- int ttl = inet->uc_ttl;
+ int ttl = READ_ONCE(inet->uc_ttl);
if (ttl < 0)
ttl = ip4_dst_hoplimit(dst);
@@ -207,6 +207,9 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
} else if (rt->rt_type == RTN_BROADCAST)
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
+ /* OUTOCTETS should be counted after fragment */
+ IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
+
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
if (!skb)
@@ -216,7 +219,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
int res = lwtunnel_xmit(skb);
- if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+ if (res != LWTUNNEL_XMIT_CONTINUE)
return res;
}
@@ -236,7 +239,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
__func__);
kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
- return -EINVAL;
+ return PTR_ERR(neigh);
}
static int ip_finish_output_gso(struct net *net, struct sock *sk,
@@ -366,8 +369,6 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
/*
* If the indicated interface is up and running, send the packet.
*/
- IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -424,8 +425,6 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
- IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -982,7 +981,7 @@ static int __ip_append_data(struct sock *sk,
paged = !!cork->gso_size;
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1039,7 +1038,7 @@ static int __ip_append_data(struct sock *sk,
}
}
} else if ((flags & MSG_SPLICE_PAGES) && length) {
- if (inet->hdrincl)
+ if (inet_test_bit(HDRINCL, sk))
return -EPERM;
if (rt->dst.dev->features & NETIF_F_SG &&
getfrag == ip_generic_getfrag)
@@ -1467,7 +1466,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
* so icmphdr does not in skb linear region and can not get icmp_type
* by icmp_hdr(skb)->type.
*/
- if (sk->sk_type == SOCK_RAW && !inet_sk(sk)->hdrincl)
+ if (sk->sk_type == SOCK_RAW &&
+ !inet_test_bit(HDRINCL, sk))
icmp_type = fl4->fl4_icmp_type;
else
icmp_type = icmp_hdr(skb)->type;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index d41bce8927b2..cce9cb25f3b3 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -171,8 +171,10 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, int tlen, int offset)
{
- struct inet_sock *inet = inet_sk(sk);
- unsigned int flags = inet->cmsg_flags;
+ unsigned long flags = inet_cmsg_flags(inet_sk(sk));
+
+ if (!flags)
+ return;
/* Ordered by supposed usage frequency */
if (flags & IP_CMSG_PKTINFO) {
@@ -431,7 +433,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
serr->port = port;
if (skb_pull(skb, payload - skb->data)) {
- if (inet_sk(sk)->recverr_rfc4884)
+ if (inet_test_bit(RECVERR_RFC4884, sk))
ipv4_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
skb_reset_transport_header(skb);
@@ -444,12 +446,11 @@ EXPORT_SYMBOL_GPL(ip_icmp_error);
void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
{
- struct inet_sock *inet = inet_sk(sk);
struct sock_exterr_skb *serr;
struct iphdr *iph;
struct sk_buff *skb;
- if (!inet->recverr)
+ if (!inet_test_bit(RECVERR, sk))
return;
skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
@@ -510,7 +511,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
* or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
*/
info = PKTINFO_SKB_CB(skb);
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
+ if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) ||
!info->ipi_ifindex)
return false;
@@ -568,7 +569,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
- if (inet_sk(sk)->cmsg_flags)
+ if (inet_cmsg_flags(inet_sk(sk)))
ip_cmsg_recv(msg, skb);
}
@@ -607,17 +608,13 @@ EXPORT_SYMBOL(ip_sock_set_tos);
void ip_sock_set_freebind(struct sock *sk)
{
- lock_sock(sk);
- inet_sk(sk)->freebind = true;
- release_sock(sk);
+ inet_set_bit(FREEBIND, sk);
}
EXPORT_SYMBOL(ip_sock_set_freebind);
void ip_sock_set_recverr(struct sock *sk)
{
- lock_sock(sk);
- inet_sk(sk)->recverr = true;
- release_sock(sk);
+ inet_set_bit(RECVERR, sk);
}
EXPORT_SYMBOL(ip_sock_set_recverr);
@@ -634,9 +631,7 @@ EXPORT_SYMBOL(ip_sock_set_mtu_discover);
void ip_sock_set_pktinfo(struct sock *sk)
{
- lock_sock(sk);
- inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO;
- release_sock(sk);
+ inet_set_bit(PKTINFO, sk);
}
EXPORT_SYMBOL(ip_sock_set_pktinfo);
@@ -950,6 +945,102 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (ip_mroute_opt(optname))
return ip_mroute_setsockopt(sk, optname, optval, optlen);
+ /* Handle options that can be set without locking the socket. */
+ switch (optname) {
+ case IP_PKTINFO:
+ inet_assign_bit(PKTINFO, sk, val);
+ return 0;
+ case IP_RECVTTL:
+ inet_assign_bit(TTL, sk, val);
+ return 0;
+ case IP_RECVTOS:
+ inet_assign_bit(TOS, sk, val);
+ return 0;
+ case IP_RECVOPTS:
+ inet_assign_bit(RECVOPTS, sk, val);
+ return 0;
+ case IP_RETOPTS:
+ inet_assign_bit(RETOPTS, sk, val);
+ return 0;
+ case IP_PASSSEC:
+ inet_assign_bit(PASSSEC, sk, val);
+ return 0;
+ case IP_RECVORIGDSTADDR:
+ inet_assign_bit(ORIGDSTADDR, sk, val);
+ return 0;
+ case IP_RECVFRAGSIZE:
+ if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
+ return -EINVAL;
+ inet_assign_bit(RECVFRAGSIZE, sk, val);
+ return 0;
+ case IP_RECVERR:
+ inet_assign_bit(RECVERR, sk, val);
+ if (!val)
+ skb_errqueue_purge(&sk->sk_error_queue);
+ return 0;
+ case IP_RECVERR_RFC4884:
+ if (val < 0 || val > 1)
+ return -EINVAL;
+ inet_assign_bit(RECVERR_RFC4884, sk, val);
+ return 0;
+ case IP_FREEBIND:
+ if (optlen < 1)
+ return -EINVAL;
+ inet_assign_bit(FREEBIND, sk, val);
+ return 0;
+ case IP_HDRINCL:
+ if (sk->sk_type != SOCK_RAW)
+ return -ENOPROTOOPT;
+ inet_assign_bit(HDRINCL, sk, val);
+ return 0;
+ case IP_MULTICAST_LOOP:
+ if (optlen < 1)
+ return -EINVAL;
+ inet_assign_bit(MC_LOOP, sk, val);
+ return 0;
+ case IP_MULTICAST_ALL:
+ if (optlen < 1)
+ return -EINVAL;
+ if (val != 0 && val != 1)
+ return -EINVAL;
+ inet_assign_bit(MC_ALL, sk, val);
+ return 0;
+ case IP_TRANSPARENT:
+ if (!!val && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+ if (optlen < 1)
+ return -EINVAL;
+ inet_assign_bit(TRANSPARENT, sk, val);
+ return 0;
+ case IP_NODEFRAG:
+ if (sk->sk_type != SOCK_RAW)
+ return -ENOPROTOOPT;
+ inet_assign_bit(NODEFRAG, sk, val);
+ return 0;
+ case IP_BIND_ADDRESS_NO_PORT:
+ inet_assign_bit(BIND_ADDRESS_NO_PORT, sk, val);
+ return 0;
+ case IP_TTL:
+ if (optlen < 1)
+ return -EINVAL;
+ if (val != -1 && (val < 1 || val > 255))
+ return -EINVAL;
+ WRITE_ONCE(inet->uc_ttl, val);
+ return 0;
+ case IP_MINTTL:
+ if (optlen < 1)
+ return -EINVAL;
+ if (val < 0 || val > 255)
+ return -EINVAL;
+
+ if (val)
+ static_branch_enable(&ip4_min_ttl);
+
+ WRITE_ONCE(inet->min_ttl, val);
+ return 0;
+ }
+
err = 0;
if (needs_rtnl)
rtnl_lock();
@@ -967,7 +1058,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
break;
old = rcu_dereference_protected(inet->inet_opt,
lockdep_sock_is_held(sk));
- if (inet->is_icsk) {
+ if (inet_test_bit(IS_ICSK, sk)) {
struct inet_connection_sock *icsk = inet_csk(sk);
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == PF_INET ||
@@ -989,111 +1080,27 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
kfree_rcu(old, rcu);
break;
}
- case IP_PKTINFO:
- if (val)
- inet->cmsg_flags |= IP_CMSG_PKTINFO;
- else
- inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
- break;
- case IP_RECVTTL:
- if (val)
- inet->cmsg_flags |= IP_CMSG_TTL;
- else
- inet->cmsg_flags &= ~IP_CMSG_TTL;
- break;
- case IP_RECVTOS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_TOS;
- else
- inet->cmsg_flags &= ~IP_CMSG_TOS;
- break;
- case IP_RECVOPTS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_RECVOPTS;
- else
- inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
- break;
- case IP_RETOPTS:
- if (val)
- inet->cmsg_flags |= IP_CMSG_RETOPTS;
- else
- inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
- break;
- case IP_PASSSEC:
- if (val)
- inet->cmsg_flags |= IP_CMSG_PASSSEC;
- else
- inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
- break;
- case IP_RECVORIGDSTADDR:
- if (val)
- inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR;
- else
- inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
- break;
case IP_CHECKSUM:
if (val) {
- if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) {
+ if (!(inet_test_bit(CHECKSUM, sk))) {
inet_inc_convert_csum(sk);
- inet->cmsg_flags |= IP_CMSG_CHECKSUM;
+ inet_set_bit(CHECKSUM, sk);
}
} else {
- if (inet->cmsg_flags & IP_CMSG_CHECKSUM) {
+ if (inet_test_bit(CHECKSUM, sk)) {
inet_dec_convert_csum(sk);
- inet->cmsg_flags &= ~IP_CMSG_CHECKSUM;
+ inet_clear_bit(CHECKSUM, sk);
}
}
break;
- case IP_RECVFRAGSIZE:
- if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
- goto e_inval;
- if (val)
- inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
- else
- inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
- break;
case IP_TOS: /* This sets both TOS and Precedence */
__ip_sock_set_tos(sk, val);
break;
- case IP_TTL:
- if (optlen < 1)
- goto e_inval;
- if (val != -1 && (val < 1 || val > 255))
- goto e_inval;
- inet->uc_ttl = val;
- break;
- case IP_HDRINCL:
- if (sk->sk_type != SOCK_RAW) {
- err = -ENOPROTOOPT;
- break;
- }
- inet->hdrincl = val ? 1 : 0;
- break;
- case IP_NODEFRAG:
- if (sk->sk_type != SOCK_RAW) {
- err = -ENOPROTOOPT;
- break;
- }
- inet->nodefrag = val ? 1 : 0;
- break;
- case IP_BIND_ADDRESS_NO_PORT:
- inet->bind_address_no_port = val ? 1 : 0;
- break;
case IP_MTU_DISCOVER:
if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
goto e_inval;
inet->pmtudisc = val;
break;
- case IP_RECVERR:
- inet->recverr = !!val;
- if (!val)
- skb_queue_purge(&sk->sk_error_queue);
- break;
- case IP_RECVERR_RFC4884:
- if (val < 0 || val > 1)
- goto e_inval;
- inet->recverr_rfc4884 = !!val;
- break;
case IP_MULTICAST_TTL:
if (sk->sk_type == SOCK_STREAM)
goto e_inval;
@@ -1105,11 +1112,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
goto e_inval;
inet->mc_ttl = val;
break;
- case IP_MULTICAST_LOOP:
- if (optlen < 1)
- goto e_inval;
- inet->mc_loop = !!val;
- break;
case IP_UNICAST_IF:
{
struct net_device *dev = NULL;
@@ -1214,7 +1216,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
struct ip_mreqn mreq;
err = -EPROTO;
- if (inet_sk(sk)->is_icsk)
+ if (inet_test_bit(IS_ICSK, sk))
break;
if (optlen < sizeof(struct ip_mreq))
@@ -1325,20 +1327,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
else
err = ip_set_mcast_msfilter(sk, optval, optlen);
break;
- case IP_MULTICAST_ALL:
- if (optlen < 1)
- goto e_inval;
- if (val != 0 && val != 1)
- goto e_inval;
- inet->mc_all = val;
- break;
-
- case IP_FREEBIND:
- if (optlen < 1)
- goto e_inval;
- inet->freebind = !!val;
- break;
-
case IP_IPSEC_POLICY:
case IP_XFRM_POLICY:
err = -EPERM;
@@ -1347,32 +1335,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
err = xfrm_user_policy(sk, optname, optval, optlen);
break;
- case IP_TRANSPARENT:
- if (!!val && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
- !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
- err = -EPERM;
- break;
- }
- if (optlen < 1)
- goto e_inval;
- inet->transparent = !!val;
- break;
-
- case IP_MINTTL:
- if (optlen < 1)
- goto e_inval;
- if (val < 0 || val > 255)
- goto e_inval;
-
- if (val)
- static_branch_enable(&ip4_min_ttl);
-
- /* tcp_v4_err() and tcp_v4_rcv() might read min_ttl
- * while we are changint it.
- */
- WRITE_ONCE(inet->min_ttl, val);
- break;
-
case IP_LOCAL_PORT_RANGE:
{
const __u16 lo = val;
@@ -1415,7 +1377,7 @@ e_inval:
void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
{
struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
- bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
+ bool prepare = inet_test_bit(PKTINFO, sk) ||
ipv6_sk_rxinfo(sk);
if (prepare && skb_rtable(skb)) {
@@ -1566,6 +1528,72 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (len < 0)
return -EINVAL;
+ /* Handle options that can be read without locking the socket. */
+ switch (optname) {
+ case IP_PKTINFO:
+ val = inet_test_bit(PKTINFO, sk);
+ goto copyval;
+ case IP_RECVTTL:
+ val = inet_test_bit(TTL, sk);
+ goto copyval;
+ case IP_RECVTOS:
+ val = inet_test_bit(TOS, sk);
+ goto copyval;
+ case IP_RECVOPTS:
+ val = inet_test_bit(RECVOPTS, sk);
+ goto copyval;
+ case IP_RETOPTS:
+ val = inet_test_bit(RETOPTS, sk);
+ goto copyval;
+ case IP_PASSSEC:
+ val = inet_test_bit(PASSSEC, sk);
+ goto copyval;
+ case IP_RECVORIGDSTADDR:
+ val = inet_test_bit(ORIGDSTADDR, sk);
+ goto copyval;
+ case IP_CHECKSUM:
+ val = inet_test_bit(CHECKSUM, sk);
+ goto copyval;
+ case IP_RECVFRAGSIZE:
+ val = inet_test_bit(RECVFRAGSIZE, sk);
+ goto copyval;
+ case IP_RECVERR:
+ val = inet_test_bit(RECVERR, sk);
+ goto copyval;
+ case IP_RECVERR_RFC4884:
+ val = inet_test_bit(RECVERR_RFC4884, sk);
+ goto copyval;
+ case IP_FREEBIND:
+ val = inet_test_bit(FREEBIND, sk);
+ goto copyval;
+ case IP_HDRINCL:
+ val = inet_test_bit(HDRINCL, sk);
+ goto copyval;
+ case IP_MULTICAST_LOOP:
+ val = inet_test_bit(MC_LOOP, sk);
+ goto copyval;
+ case IP_MULTICAST_ALL:
+ val = inet_test_bit(MC_ALL, sk);
+ goto copyval;
+ case IP_TRANSPARENT:
+ val = inet_test_bit(TRANSPARENT, sk);
+ goto copyval;
+ case IP_NODEFRAG:
+ val = inet_test_bit(NODEFRAG, sk);
+ goto copyval;
+ case IP_BIND_ADDRESS_NO_PORT:
+ val = inet_test_bit(BIND_ADDRESS_NO_PORT, sk);
+ goto copyval;
+ case IP_TTL:
+ val = READ_ONCE(inet->uc_ttl);
+ if (val < 0)
+ val = READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_default_ttl);
+ goto copyval;
+ case IP_MINTTL:
+ val = READ_ONCE(inet->min_ttl);
+ goto copyval;
+ }
+
if (needs_rtnl)
rtnl_lock();
sockopt_lock_sock(sk);
@@ -1600,53 +1628,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
return 0;
}
- case IP_PKTINFO:
- val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
- break;
- case IP_RECVTTL:
- val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
- break;
- case IP_RECVTOS:
- val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
- break;
- case IP_RECVOPTS:
- val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
- break;
- case IP_RETOPTS:
- val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
- break;
- case IP_PASSSEC:
- val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
- break;
- case IP_RECVORIGDSTADDR:
- val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
- break;
- case IP_CHECKSUM:
- val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
- break;
- case IP_RECVFRAGSIZE:
- val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
- break;
case IP_TOS:
val = inet->tos;
break;
- case IP_TTL:
- {
- struct net *net = sock_net(sk);
- val = (inet->uc_ttl == -1 ?
- READ_ONCE(net->ipv4.sysctl_ip_default_ttl) :
- inet->uc_ttl);
- break;
- }
- case IP_HDRINCL:
- val = inet->hdrincl;
- break;
- case IP_NODEFRAG:
- val = inet->nodefrag;
- break;
- case IP_BIND_ADDRESS_NO_PORT:
- val = inet->bind_address_no_port;
- break;
case IP_MTU_DISCOVER:
val = inet->pmtudisc;
break;
@@ -1665,18 +1649,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
}
break;
}
- case IP_RECVERR:
- val = inet->recverr;
- break;
- case IP_RECVERR_RFC4884:
- val = inet->recverr_rfc4884;
- break;
case IP_MULTICAST_TTL:
val = inet->mc_ttl;
break;
- case IP_MULTICAST_LOOP:
- val = inet->mc_loop;
- break;
case IP_UNICAST_IF:
val = (__force int)htonl((__u32) inet->uc_index);
break;
@@ -1715,9 +1690,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
else
err = ip_get_mcast_msfilter(sk, optval, optlen, len);
goto out;
- case IP_MULTICAST_ALL:
- val = inet->mc_all;
- break;
case IP_PKTOPTIONS:
{
struct msghdr msg;
@@ -1737,7 +1709,7 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
msg.msg_controllen = len;
msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0;
- if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
+ if (inet_test_bit(PKTINFO, sk)) {
struct in_pktinfo info;
info.ipi_addr.s_addr = inet->inet_rcv_saddr;
@@ -1745,26 +1717,17 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
info.ipi_ifindex = inet->mc_index;
put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}
- if (inet->cmsg_flags & IP_CMSG_TTL) {
+ if (inet_test_bit(TTL, sk)) {
int hlim = inet->mc_ttl;
put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
}
- if (inet->cmsg_flags & IP_CMSG_TOS) {
+ if (inet_test_bit(TOS, sk)) {
int tos = inet->rcv_tos;
put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
}
len -= msg.msg_controllen;
return copy_to_sockptr(optlen, &len, sizeof(int));
}
- case IP_FREEBIND:
- val = inet->freebind;
- break;
- case IP_TRANSPARENT:
- val = inet->transparent;
- break;
- case IP_MINTTL:
- val = inet->min_ttl;
- break;
case IP_LOCAL_PORT_RANGE:
val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
break;
@@ -1776,7 +1739,7 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -ENOPROTOOPT;
}
sockopt_release_sock(sk);
-
+copyval:
if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
unsigned char ucval = (unsigned char)val;
len = 1;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3f0c6d602fb7..9e222a57bc2b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1804,7 +1804,6 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
struct ip_options *opt = &(IPCB(skb)->opt);
IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
if (unlikely(opt->optlen))
ip_forward_options(skb);
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index e61ea428ea18..265b39bc435b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -7,6 +7,7 @@
#include <linux/ip.h>
#include <linux/netfilter.h>
#include <linux/module.h>
+#include <linux/rcupdate.h>
#include <linux/skbuff.h>
#include <net/netns/generic.h>
#include <net/route.h>
@@ -65,7 +66,7 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
struct sock *sk = skb->sk;
if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) &&
- inet_sk(sk)->nodefrag)
+ inet_test_bit(NODEFRAG, sk))
return NF_ACCEPT;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
@@ -113,17 +114,31 @@ static void __net_exit defrag4_net_exit(struct net *net)
}
}
+static const struct nf_defrag_hook defrag_hook = {
+ .owner = THIS_MODULE,
+ .enable = nf_defrag_ipv4_enable,
+ .disable = nf_defrag_ipv4_disable,
+};
+
static struct pernet_operations defrag4_net_ops = {
.exit = defrag4_net_exit,
};
static int __init nf_defrag_init(void)
{
- return register_pernet_subsys(&defrag4_net_ops);
+ int err;
+
+ err = register_pernet_subsys(&defrag4_net_ops);
+ if (err)
+ return err;
+
+ rcu_assign_pointer(nf_defrag_v4_hook, &defrag_hook);
+ return err;
}
static void __exit nf_defrag_fini(void)
{
+ rcu_assign_pointer(nf_defrag_v4_hook, NULL);
unregister_pernet_subsys(&defrag4_net_ops);
}
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index be5498f5dd31..bbff68b5b5d4 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1152,41 +1152,64 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
return !!(state & NUD_VALID);
}
-static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
+static bool nexthop_is_good_nh(const struct nexthop *nh)
+{
+ struct nh_info *nhi = rcu_dereference(nh->nh_info);
+
+ switch (nhi->family) {
+ case AF_INET:
+ return ipv4_good_nh(&nhi->fib_nh);
+ case AF_INET6:
+ return ipv6_good_nh(&nhi->fib6_nh);
+ }
+
+ return false;
+}
+
+static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
{
- struct nexthop *rc = NULL;
int i;
- for (i = 0; i < nhg->num_nh; ++i) {
+ for (i = 0; i < nhg->num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- struct nh_info *nhi;
if (hash > atomic_read(&nhge->hthr.upper_bound))
continue;
- nhi = rcu_dereference(nhge->nh->nh_info);
- if (nhi->fdb_nh)
- return nhge->nh;
+ return nhge->nh;
+ }
+
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
+static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
+{
+ struct nexthop *rc = NULL;
+ int i;
+
+ if (nhg->fdb_nh)
+ return nexthop_select_path_fdb(nhg, hash);
+
+ for (i = 0; i < nhg->num_nh; ++i) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
/* nexthops always check if it is good and does
* not rely on a sysctl for this behavior
*/
- switch (nhi->family) {
- case AF_INET:
- if (ipv4_good_nh(&nhi->fib_nh))
- return nhge->nh;
- break;
- case AF_INET6:
- if (ipv6_good_nh(&nhi->fib6_nh))
- return nhge->nh;
- break;
- }
+ if (!nexthop_is_good_nh(nhge->nh))
+ continue;
if (!rc)
rc = nhge->nh;
+
+ if (hash > atomic_read(&nhge->hthr.upper_bound))
+ continue;
+
+ return nhge->nh;
}
- return rc;
+ return rc ? : nhg->nh_entries[0].nh;
}
static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
@@ -3186,7 +3209,6 @@ static int rtm_dump_walk_nexthops(struct sk_buff *skb,
return err;
}
- ctx->idx++;
return 0;
}
@@ -3314,7 +3336,6 @@ static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh,
struct rtm_dump_res_bucket_ctx {
struct rtm_dump_nh_ctx nh;
u16 bucket_index;
- u32 done_nh_idx; /* 1 + the index of the last fully processed NH. */
};
static struct rtm_dump_res_bucket_ctx *
@@ -3343,9 +3364,6 @@ static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
u16 bucket_index;
int err;
- if (dd->ctx->nh.idx < dd->ctx->done_nh_idx)
- return 0;
-
nhg = rtnl_dereference(nh->nh_grp);
res_table = rtnl_dereference(nhg->res_table);
for (bucket_index = dd->ctx->bucket_index;
@@ -3372,7 +3390,6 @@ static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
return err;
}
- dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
dd->ctx->bucket_index = 0;
return 0;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 25dd78cee179..75e0aee35eb7 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -580,7 +580,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
* RFC1122: OK. Passes ICMP errors back to application, as per
* 4.1.3.3.
*/
- if ((family == AF_INET && !inet_sock->recverr) ||
+ if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) ||
(family == AF_INET6 && !inet6_sk(sk)->recverr)) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
@@ -894,7 +894,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
*addr_len = sizeof(*sin);
}
- if (isk->cmsg_flags)
+ if (inet_cmsg_flags(isk))
ip_cmsg_recv(msg, skb);
#if IS_ENABLED(CONFIG_IPV6)
@@ -921,7 +921,8 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
if (skb->protocol == htons(ETH_P_IPV6) &&
inet6_sk(sk)->rxopt.all)
pingv6_ops.ip6_datagram_recv_specific_ctl(sk, msg, skb);
- else if (skb->protocol == htons(ETH_P_IP) && isk->cmsg_flags)
+ else if (skb->protocol == htons(ETH_P_IP) &&
+ inet_cmsg_flags(isk))
ip_cmsg_recv(msg, skb);
#endif
} else {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index cb381f5aa464..4b5db5d1edc2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -203,8 +203,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
struct inet_sock *inet = inet_sk(sk);
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
- int err = 0;
int harderr = 0;
+ bool recverr;
+ int err = 0;
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
ipv4_sk_update_pmtu(skb, sk, info);
@@ -218,7 +219,8 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
2. Socket is connected (otherwise the error indication
is useless without ip_recverr and error is hard.
*/
- if (!inet->recverr && sk->sk_state != TCP_ESTABLISHED)
+ recverr = inet_test_bit(RECVERR, sk);
+ if (!recverr && sk->sk_state != TCP_ESTABLISHED)
return;
switch (type) {
@@ -245,16 +247,16 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
}
}
- if (inet->recverr) {
+ if (recverr) {
const struct iphdr *iph = (const struct iphdr *)skb->data;
u8 *payload = skb->data + (iph->ihl << 2);
- if (inet->hdrincl)
+ if (inet_test_bit(HDRINCL, sk))
payload = skb->data;
ip_icmp_error(sk, skb, err, 0, info, payload);
}
- if (inet->recverr || harderr) {
+ if (recverr || harderr) {
sk->sk_err = err;
sk_error_report(sk);
}
@@ -413,7 +415,7 @@ error_free:
kfree_skb(skb);
error:
IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
- if (err == -ENOBUFS && !inet->recverr)
+ if (err == -ENOBUFS && !inet_test_bit(RECVERR, sk))
err = 0;
return err;
}
@@ -489,12 +491,8 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (len > 0xFFFF)
goto out;
- /* hdrincl should be READ_ONCE(inet->hdrincl)
- * but READ_ONCE() doesn't work with bit fields.
- * Doing this indirectly yields the same result.
- */
- hdrincl = inet->hdrincl;
- hdrincl = READ_ONCE(hdrincl);
+ hdrincl = inet_test_bit(HDRINCL, sk);
+
/*
* Check the flags.
*/
@@ -645,7 +643,7 @@ back_from_confirm:
ip_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE)) {
err = ip_push_pending_frames(sk, &fl4);
- if (err == -ENOBUFS && !inet->recverr)
+ if (err == -ENOBUFS && !inet_test_bit(RECVERR, sk))
err = 0;
}
release_sock(sk);
@@ -767,7 +765,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
*addr_len = sizeof(*sin);
}
- if (inet->cmsg_flags)
+ if (inet_cmsg_flags(inet))
ip_cmsg_recv(msg, skb);
if (flags & MSG_TRUNC)
copied = skb->len;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 92fede388d52..a57062283219 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -515,13 +515,12 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
__u8 scope = RT_SCOPE_UNIVERSE;
if (sk) {
- const struct inet_sock *inet = inet_sk(sk);
-
oif = sk->sk_bound_dev_if;
mark = READ_ONCE(sk->sk_mark);
tos = ip_sock_rt_tos(sk);
scope = ip_sock_rt_scope(sk);
- prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
+ prot = inet_test_bit(HDRINCL, sk) ? IPPROTO_RAW :
+ sk->sk_protocol;
}
flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope,
@@ -555,7 +554,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
ip_sock_rt_scope(sk),
- inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ inet_test_bit(HDRINCL, sk) ?
+ IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk),
daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
rcu_read_unlock();
@@ -1213,6 +1213,7 @@ EXPORT_INDIRECT_CALLABLE(ipv4_dst_check);
static void ipv4_send_dest_unreach(struct sk_buff *skb)
{
+ struct net_device *dev;
struct ip_options opt;
int res;
@@ -1230,7 +1231,8 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb)
opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
rcu_read_lock();
- res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+ dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev;
+ res = __ip_options_compile(dev_net(dev), &opt, skb, NULL);
rcu_read_unlock();
if (res)
@@ -2144,6 +2146,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
+ IPCB(skb)->flags |= IPSKB_MULTIPATH;
}
#endif
@@ -3592,6 +3595,7 @@ static struct ctl_table ipv4_route_netns_table[] = {
static __net_init int sysctl_route_net_init(struct net *net)
{
struct ctl_table *tbl;
+ size_t table_size = ARRAY_SIZE(ipv4_route_netns_table);
tbl = ipv4_route_netns_table;
if (!net_eq(net, &init_net)) {
@@ -3603,8 +3607,10 @@ static __net_init int sysctl_route_net_init(struct net *net)
/* Don't export non-whitelisted sysctls to unprivileged users */
if (net->user_ns != &init_user_ns) {
- if (tbl[0].procname != ipv4_route_flush_procname)
+ if (tbl[0].procname != ipv4_route_flush_procname) {
tbl[0].procname = NULL;
+ table_size = 0;
+ }
}
/* Update the variables to point into the current struct net
@@ -3615,7 +3621,8 @@ static __net_init int sysctl_route_net_init(struct net *net)
}
tbl[0].extra1 = net;
- net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
+ net->ipv4.route_hdr = register_net_sysctl_sz(net, "net/ipv4/route",
+ tbl, table_size);
if (!net->ipv4.route_hdr)
goto err_reg;
return 0;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2afb0870648b..6ac890b4073f 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1519,7 +1519,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
}
}
- net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
+ net->ipv4.ipv4_hdr = register_net_sysctl_sz(net, "net/ipv4", table,
+ ARRAY_SIZE(ipv4_net_table));
if (!net->ipv4.ipv4_hdr)
goto err_reg;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8ed52e1e3c99..0c3040a63ebd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -457,6 +457,7 @@ void tcp_init_sock(struct sock *sk)
WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
+ tcp_scaling_ratio_init(sk);
set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
sk_sockets_allocated_inc(sk);
@@ -582,7 +583,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (urg_data & TCP_URG_VALID)
mask |= EPOLLPRI;
- } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
+ } else if (state == TCP_SYN_SENT &&
+ inet_test_bit(DEFER_CONNECT, sk)) {
/* Active TCP fastopen socket with defer_connect
* Return EPOLLOUT so application can call write()
* in order for kernel to generate SYN+data
@@ -1006,7 +1008,7 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
tp->fastopen_req->size = size;
tp->fastopen_req->uarg = uarg;
- if (inet->defer_connect) {
+ if (inet_test_bit(DEFER_CONNECT, sk)) {
err = tcp_connect(sk);
/* Same failure procedure as in tcp_v4/6_connect */
if (err) {
@@ -1024,7 +1026,7 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
if (tp->fastopen_req) {
*copied = tp->fastopen_req->copied;
tcp_free_fastopen_req(tp);
- inet->defer_connect = 0;
+ inet_clear_bit(DEFER_CONNECT, sk);
}
return err;
}
@@ -1065,7 +1067,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
zc = MSG_SPLICE_PAGES;
}
- if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
+ if (unlikely(flags & MSG_FASTOPEN ||
+ inet_test_bit(DEFER_CONNECT, sk)) &&
!tp->repair) {
err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size, uarg);
if (err == -EINPROGRESS && copied_syn > 0)
@@ -1700,7 +1703,7 @@ EXPORT_SYMBOL(tcp_peek_len);
/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
int tcp_set_rcvlowat(struct sock *sk, int val)
{
- int cap;
+ int space, cap;
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1;
@@ -1715,10 +1718,10 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
return 0;
- val <<= 1;
- if (val > sk->sk_rcvbuf) {
- WRITE_ONCE(sk->sk_rcvbuf, val);
- tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
+ space = tcp_space_from_win(sk, val);
+ if (space > sk->sk_rcvbuf) {
+ WRITE_ONCE(sk->sk_rcvbuf, space);
+ tcp_sk(sk)->window_clamp = val;
}
return 0;
}
@@ -1739,7 +1742,7 @@ void tcp_update_recv_tstamps(struct sk_buff *skb,
}
#ifdef CONFIG_MMU
-const struct vm_operations_struct tcp_vm_ops = {
+static const struct vm_operations_struct tcp_vm_ops = {
};
int tcp_mmap(struct file *file, struct socket *sock,
@@ -2042,13 +2045,10 @@ static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm,
unsigned long address,
bool *mmap_locked)
{
- struct vm_area_struct *vma = NULL;
+ struct vm_area_struct *vma = lock_vma_under_rcu(mm, address);
-#ifdef CONFIG_PER_VMA_LOCK
- vma = lock_vma_under_rcu(mm, address);
-#endif
if (vma) {
- if (!vma_is_tcp(vma)) {
+ if (vma->vm_ops != &tcp_vm_ops) {
vma_end_read(vma);
return NULL;
}
@@ -2058,7 +2058,7 @@ static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm,
mmap_read_lock(mm);
vma = vma_lookup(mm, address);
- if (!vma || !vma_is_tcp(vma)) {
+ if (!vma || vma->vm_ops != &tcp_vm_ops) {
mmap_read_unlock(mm);
return NULL;
}
@@ -2256,14 +2256,14 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
}
}
- if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
has_timestamping = true;
else
tss->ts[0] = (struct timespec64) {0};
}
if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
- if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
has_timestamping = true;
else
tss->ts[2] = (struct timespec64) {0};
@@ -2864,7 +2864,7 @@ adjudge_to_death:
if (sk->sk_state == TCP_FIN_WAIT2) {
struct tcp_sock *tp = tcp_sk(sk);
- if (tp->linger2 < 0) {
+ if (READ_ONCE(tp->linger2) < 0) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
__NET_INC_STATS(sock_net(sk),
@@ -3087,7 +3087,7 @@ int tcp_disconnect(struct sock *sk, int flags)
/* Clean up fastopen related fields */
tcp_free_fastopen_req(tp);
- inet->defer_connect = 0;
+ inet_clear_bit(DEFER_CONNECT, sk);
tp->fastopen_client_fail = 0;
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
@@ -3290,18 +3290,21 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
if (val < 1 || val > MAX_TCP_SYNCNT)
return -EINVAL;
- lock_sock(sk);
WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
- release_sock(sk);
return 0;
}
EXPORT_SYMBOL(tcp_sock_set_syncnt);
-void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
+int tcp_sock_set_user_timeout(struct sock *sk, int val)
{
- lock_sock(sk);
+ /* Cap the max time in ms TCP will retry or probe the window
+ * before giving up and aborting (ETIMEDOUT) a connection.
+ */
+ if (val < 0)
+ return -EINVAL;
+
WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
- release_sock(sk);
+ return 0;
}
EXPORT_SYMBOL(tcp_sock_set_user_timeout);
@@ -3344,9 +3347,7 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val)
if (val < 1 || val > MAX_TCP_KEEPINTVL)
return -EINVAL;
- lock_sock(sk);
WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ);
- release_sock(sk);
return 0;
}
EXPORT_SYMBOL(tcp_sock_set_keepintvl);
@@ -3356,10 +3357,8 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val)
if (val < 1 || val > MAX_TCP_KEEPCNT)
return -EINVAL;
- lock_sock(sk);
/* Paired with READ_ONCE() in keepalive_probes() */
WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val);
- release_sock(sk);
return 0;
}
EXPORT_SYMBOL(tcp_sock_set_keepcnt);
@@ -3461,6 +3460,32 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
+ /* Handle options that can be set without locking the socket. */
+ switch (optname) {
+ case TCP_SYNCNT:
+ return tcp_sock_set_syncnt(sk, val);
+ case TCP_USER_TIMEOUT:
+ return tcp_sock_set_user_timeout(sk, val);
+ case TCP_KEEPINTVL:
+ return tcp_sock_set_keepintvl(sk, val);
+ case TCP_KEEPCNT:
+ return tcp_sock_set_keepcnt(sk, val);
+ case TCP_LINGER2:
+ if (val < 0)
+ WRITE_ONCE(tp->linger2, -1);
+ else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
+ WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
+ else
+ WRITE_ONCE(tp->linger2, val * HZ);
+ return 0;
+ case TCP_DEFER_ACCEPT:
+ /* Translate value in seconds to number of retransmits */
+ WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
+ secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+ TCP_RTO_MAX / HZ));
+ return 0;
+ }
+
sockopt_lock_sock(sk);
switch (optname) {
@@ -3556,25 +3581,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_KEEPIDLE:
err = tcp_sock_set_keepidle_locked(sk, val);
break;
- case TCP_KEEPINTVL:
- if (val < 1 || val > MAX_TCP_KEEPINTVL)
- err = -EINVAL;
- else
- WRITE_ONCE(tp->keepalive_intvl, val * HZ);
- break;
- case TCP_KEEPCNT:
- if (val < 1 || val > MAX_TCP_KEEPCNT)
- err = -EINVAL;
- else
- WRITE_ONCE(tp->keepalive_probes, val);
- break;
- case TCP_SYNCNT:
- if (val < 1 || val > MAX_TCP_SYNCNT)
- err = -EINVAL;
- else
- WRITE_ONCE(icsk->icsk_syn_retries, val);
- break;
-
case TCP_SAVE_SYN:
/* 0: disable, 1: enable, 2: start from ether_header */
if (val < 0 || val > 2)
@@ -3583,22 +3589,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
tp->save_syn = val;
break;
- case TCP_LINGER2:
- if (val < 0)
- WRITE_ONCE(tp->linger2, -1);
- else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
- WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
- else
- WRITE_ONCE(tp->linger2, val * HZ);
- break;
-
- case TCP_DEFER_ACCEPT:
- /* Translate value in seconds to number of retransmits */
- WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
- secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
- TCP_RTO_MAX / HZ));
- break;
-
case TCP_WINDOW_CLAMP:
err = tcp_set_window_clamp(sk, val);
break;
@@ -3613,16 +3603,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
break;
#endif
- case TCP_USER_TIMEOUT:
- /* Cap the max time in ms TCP will retry or probe the window
- * before giving up and aborting (ETIMEDOUT) a connection.
- */
- if (val < 0)
- err = -EINVAL;
- else
- WRITE_ONCE(icsk->icsk_user_timeout, val);
- break;
-
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 85e4953f1182..8ed54e7334a9 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -451,7 +451,7 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
if (tp->fastopen_connect && !tp->fastopen_req) {
if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) {
- inet_sk(sk)->defer_connect = 1;
+ inet_set_bit(DEFER_CONNECT, sk);
return true;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 57c8af1859c1..06fe1cf645d5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -237,6 +237,16 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
*/
len = skb_shinfo(skb)->gso_size ? : skb->len;
if (len >= icsk->icsk_ack.rcv_mss) {
+ /* Note: divides are still a bit expensive.
+ * For the moment, only adjust scaling_ratio
+ * when we update icsk_ack.rcv_mss.
+ */
+ if (unlikely(len != icsk->icsk_ack.rcv_mss)) {
+ u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE;
+
+ do_div(val, skb->truesize);
+ tcp_sk(sk)->scaling_ratio = val ? val : 1;
+ }
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
/* Account for possibly-removed options */
@@ -287,7 +297,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.quick = quickacks;
}
-void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -295,7 +305,6 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
inet_csk_exit_pingpong_mode(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
-EXPORT_SYMBOL(tcp_enter_quickack_mode);
/* Send ACKs quickly, if "quick" count is not exhausted
* and the session is not interactive.
@@ -727,8 +736,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvmem, rcvbuf;
u64 rcvwin, grow;
+ int rcvbuf;
/* minimal window to cope with packet losses, assuming
* steady state. Add some cushion because of small variations.
@@ -740,12 +749,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
do_div(grow, tp->rcvq_space.space);
rcvwin += (grow << 1);
- rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
- rcvmem += 128;
-
- do_div(rcvwin, tp->advmss);
- rcvbuf = min_t(u64, rcvwin * rcvmem,
+ rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
@@ -3521,7 +3525,7 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp,
{
return after(ack, tp->snd_una) ||
after(ack_seq, tp->snd_wl1) ||
- (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
+ (ack_seq == tp->snd_wl1 && (nwin > tp->snd_wnd || !nwin));
}
/* If we update tp->snd_una, also update tp->bytes_acked */
@@ -4122,9 +4126,8 @@ void tcp_parse_options(const struct net *net,
break;
#ifdef CONFIG_TCP_MD5SIG
case TCPOPT_MD5SIG:
- /*
- * The MD5 Hash has already been
- * checked (see tcp_v{4,6}_do_rcv()).
+ /* The MD5 Hash has already been
+ * checked (see tcp_v{4,6}_rcv()).
*/
break;
#endif
@@ -4308,10 +4311,16 @@ static inline bool tcp_paws_discard(const struct sock *sk,
* (borrowed from freebsd)
*/
-static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
+static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp,
+ u32 seq, u32 end_seq)
{
- return !before(end_seq, tp->rcv_wup) &&
- !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
+ if (before(end_seq, tp->rcv_wup))
+ return SKB_DROP_REASON_TCP_OLD_SEQUENCE;
+
+ if (after(seq, tp->rcv_nxt + tcp_receive_window(tp)))
+ return SKB_DROP_REASON_TCP_INVALID_SEQUENCE;
+
+ return SKB_NOT_DROPPED_YET;
}
/* When we get a reset we do this. */
@@ -5050,13 +5059,19 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
/* Ok. In sequence. In window. */
queue_and_out:
- if (skb_queue_len(&sk->sk_receive_queue) == 0)
- sk_forced_mem_schedule(sk, skb->truesize);
- else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
- reason = SKB_DROP_REASON_PROTO_MEM;
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
+ if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+ /* TODO: maybe ratelimit these WIN 0 ACK ? */
+ inet_csk(sk)->icsk_ack.pending |=
+ (ICSK_ACK_NOMEM | ICSK_ACK_NOW);
+ inet_csk_schedule_ack(sk);
sk->sk_data_ready(sk);
- goto drop;
+
+ if (skb_queue_len(&sk->sk_receive_queue)) {
+ reason = SKB_DROP_REASON_PROTO_MEM;
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
+ goto drop;
+ }
+ sk_forced_mem_schedule(sk, skb->truesize);
}
eaten = tcp_queue_rcv(sk, skb, &fragstolen);
@@ -5734,7 +5749,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
}
/* Step 1: check sequence number */
- if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+ reason = tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ if (reason) {
/* RFC793, page 37: "In all states except SYN-SENT, all reset
* (RST) segments are validated by checking their SEQ-fields."
* And page 69: "If an incoming segment is not acceptable,
@@ -5751,7 +5767,6 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
} else if (tcp_reset_check(sk, skb)) {
goto reset;
}
- SKB_DR_SET(reason, TCP_INVALID_SEQUENCE);
goto discard;
}
@@ -6315,7 +6330,7 @@ consume:
if (fastopen_fail)
return -1;
if (sk->sk_write_pending ||
- icsk->icsk_accept_queue.rskq_defer_accept ||
+ READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept) ||
inet_csk_in_pingpong_mode(sk)) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
@@ -6615,7 +6630,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
break;
}
- if (tp->linger2 < 0) {
+ if (READ_ONCE(tp->linger2) < 0) {
tcp_done(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return 1;
@@ -6985,7 +7000,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb, sk);
- inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
+ inet_rsk(req)->no_srccheck = inet_test_bit(TRANSPARENT, sk);
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a59cc4b83861..27140e5cdc06 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -57,6 +57,7 @@
#include <linux/init.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
@@ -312,7 +313,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr));
}
- inet->inet_id = get_random_u16();
+ atomic_set(&inet->inet_id, get_random_u16());
if (tcp_fastopen_defer_connect(sk, &err))
return err;
@@ -476,7 +477,6 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
const struct iphdr *iph = (const struct iphdr *)skb->data;
struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
struct tcp_sock *tp;
- struct inet_sock *inet;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct sock *sk;
@@ -624,8 +624,8 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
* --ANK (980905)
*/
- inet = inet_sk(sk);
- if (!sock_owned_by_user(sk) && inet->recverr) {
+ if (!sock_owned_by_user(sk) &&
+ inet_test_bit(RECVERR, sk)) {
WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk);
} else { /* Only an error on timeout */
@@ -1596,7 +1596,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
- newinet->inet_id = get_random_u16();
+ atomic_set(&newinet->inet_id, get_random_u16());
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
@@ -2448,6 +2448,8 @@ static void *established_get_first(struct seq_file *seq)
struct hlist_nulls_node *node;
spinlock_t *lock = inet_ehash_lockp(hinfo, st->bucket);
+ cond_resched();
+
/* Lockless fast path for the common case of empty buckets */
if (empty_bucket(hinfo, st))
continue;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 99ac5efe244d..c196759f1d3b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -990,7 +990,7 @@ static struct genl_family tcp_metrics_nl_family __ro_after_init = {
.resv_start_op = TCP_METRICS_CMD_DEL + 1,
};
-static unsigned int tcpmhash_entries;
+static unsigned int tcpmhash_entries __initdata;
static int __init set_tcpmhash_entries(char *str)
{
ssize_t ret;
@@ -1006,15 +1006,11 @@ static int __init set_tcpmhash_entries(char *str)
}
__setup("tcpmhash_entries=", set_tcpmhash_entries);
-static int __net_init tcp_net_metrics_init(struct net *net)
+static void __init tcp_metrics_hash_alloc(void)
{
+ unsigned int slots = tcpmhash_entries;
size_t size;
- unsigned int slots;
- if (!net_eq(net, &init_net))
- return 0;
-
- slots = tcpmhash_entries;
if (!slots) {
if (totalram_pages() >= 128 * 1024)
slots = 16 * 1024;
@@ -1027,9 +1023,7 @@ static int __net_init tcp_net_metrics_init(struct net *net)
tcp_metrics_hash = kvzalloc(size, GFP_KERNEL);
if (!tcp_metrics_hash)
- return -ENOMEM;
-
- return 0;
+ panic("Could not allocate the tcp_metrics hash table\n");
}
static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_list)
@@ -1038,7 +1032,6 @@ static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_lis
}
static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
- .init = tcp_net_metrics_init,
.exit_batch = tcp_net_metrics_exit_batch,
};
@@ -1046,9 +1039,11 @@ void __init tcp_metrics_init(void)
{
int ret;
+ tcp_metrics_hash_alloc();
+
ret = register_pernet_subsys(&tcp_net_metrics_ops);
if (ret < 0)
- panic("Could not allocate the tcp_metrics hash table\n");
+ panic("Could not register tcp_net_metrics_ops\n");
ret = genl_register_family(&tcp_metrics_nl_family);
if (ret < 0)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index c8f2aa003387..b98d476f1594 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -289,9 +289,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (tw) {
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
- struct inet_sock *inet = inet_sk(sk);
- tw->tw_transparent = inet->transparent;
+ tw->tw_transparent = inet_test_bit(TRANSPARENT, sk);
tw->tw_mark = sk->sk_mark;
tw->tw_priority = sk->sk_priority;
tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
@@ -570,8 +569,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
- if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req)))
- newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
@@ -794,7 +791,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
return sk;
/* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
- if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+ if (req->num_timeout < READ_ONCE(inet_csk(sk)->icsk_accept_queue.rskq_defer_accept) &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
inet_rsk(req)->acked = 1;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 51d8638d4b4c..ccfc8bbf7455 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -257,11 +257,19 @@ EXPORT_SYMBOL(tcp_select_initial_window);
static u16 tcp_select_window(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 old_win = tp->rcv_wnd;
- u32 cur_win = tcp_receive_window(tp);
- u32 new_win = __tcp_select_window(sk);
struct net *net = sock_net(sk);
+ u32 old_win = tp->rcv_wnd;
+ u32 cur_win, new_win;
+
+ /* Make the window 0 if we failed to queue the data because we
+ * are out of memory. The window is temporary, so we don't store
+ * it on the socket.
+ */
+ if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM))
+ return 0;
+ cur_win = tcp_receive_window(tp);
+ new_win = __tcp_select_window(sk);
if (new_win < cur_win) {
/* Danger Will Robinson!
* Don't update rcv_wup/rcv_wnd here or else
@@ -1293,14 +1301,21 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
}
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
- /* if no packet is in qdisc/device queue, then allow XPS to select
- * another queue. We can be called from tcp_tsq_handler()
- * which holds one reference to sk.
- *
- * TODO: Ideally, in-flight pure ACK packets should not matter here.
- * One way to get this would be to set skb->truesize = 2 on them.
+ /* We set skb->ooo_okay to one if this packet can select
+ * a different TX queue than prior packets of this flow,
+ * to avoid self inflicted reorders.
+ * The 'other' queue decision is based on current cpu number
+ * if XPS is enabled, or sk->sk_txhash otherwise.
+ * We can switch to another (and better) queue if:
+ * 1) No packet with payload is in qdisc/device queues.
+ * Delays in TX completion can defeat the test
+ * even if packets were already sent.
+ * 2) Or rtx queue is empty.
+ * This mitigates above case if ACK packets for
+ * all prior packets were already processed.
*/
- skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
+ skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) ||
+ tcp_rtx_queue_empty(sk);
/* If we had to use memory reserve to allocate this skb,
* this might cause drops if packet is looped back :
@@ -3459,7 +3474,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
if (delta <= 0)
return;
amt = sk_mem_pages(delta);
- sk->sk_forward_alloc += amt << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
sk_memory_allocated_add(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
@@ -3741,11 +3756,6 @@ static void tcp_connect_init(struct sock *sk)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
-#ifdef CONFIG_TCP_MD5SIG
- if (tp->af_specific->md5_lookup(sk, sk))
- tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
-#endif
-
/* If user gave his TCP_MAXSEG, record it to clamp */
if (tp->rx_opt.user_mss)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 206418b6d7c4..984ab4a0421e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -26,14 +26,15 @@
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- u32 elapsed, start_ts;
+ u32 elapsed, start_ts, user_timeout;
s32 remaining;
start_ts = tcp_sk(sk)->retrans_stamp;
- if (!icsk->icsk_user_timeout)
+ user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+ if (!user_timeout)
return icsk->icsk_rto;
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
- remaining = icsk->icsk_user_timeout - elapsed;
+ remaining = user_timeout - elapsed;
if (remaining <= 0)
return 1; /* user timeout has passed; fire ASAP */
@@ -43,16 +44,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- u32 remaining;
+ u32 remaining, user_timeout;
s32 elapsed;
- if (!icsk->icsk_user_timeout || !icsk->icsk_probes_tstamp)
+ user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+ if (!user_timeout || !icsk->icsk_probes_tstamp)
return when;
elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp;
if (unlikely(elapsed < 0))
elapsed = 0;
- remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed;
+ remaining = msecs_to_jiffies(user_timeout) - elapsed;
remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN);
return min_t(u32, remaining, when);
@@ -239,7 +241,8 @@ static int tcp_write_timeout(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
__dst_negative_advice(sk);
- retry_until = icsk->icsk_syn_retries ? :
+ /* Paired with WRITE_ONCE() in tcp_sock_set_syncnt() */
+ retry_until = READ_ONCE(icsk->icsk_syn_retries) ? :
READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
max_retransmits = retry_until;
@@ -269,7 +272,7 @@ static int tcp_write_timeout(struct sock *sk)
}
if (!expired)
expired = retransmits_timed_out(sk, retry_until,
- icsk->icsk_user_timeout);
+ READ_ONCE(icsk->icsk_user_timeout));
tcp_fastopen_active_detect_blackhole(sk, expired);
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
@@ -383,13 +386,16 @@ static void tcp_probe_timer(struct sock *sk)
* corresponding system limit. We also implement similar policy when
* we use RTO to probe window in tcp_retransmit_timer().
*/
- if (!icsk->icsk_probes_tstamp)
+ if (!icsk->icsk_probes_tstamp) {
icsk->icsk_probes_tstamp = tcp_jiffies32;
- else if (icsk->icsk_user_timeout &&
- (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
- msecs_to_jiffies(icsk->icsk_user_timeout))
- goto abort;
+ } else {
+ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+ if (user_timeout &&
+ (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
+ msecs_to_jiffies(user_timeout))
+ goto abort;
+ }
max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
@@ -421,8 +427,10 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
req->rsk_ops->syn_ack_timeout(req);
- /* add one more retry for fastopen */
- max_retries = icsk->icsk_syn_retries ? :
+ /* Add one more retry for fastopen.
+ * Paired with WRITE_ONCE() in tcp_sock_set_syncnt()
+ */
+ max_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
if (req->num_timeout >= max_retries) {
@@ -446,6 +454,22 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
req->timeout << req->num_timeout, TCP_RTO_MAX);
}
+static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ const int timeout = TCP_RTO_MAX * 2;
+ u32 rcv_delta, rtx_delta;
+
+ rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp;
+ if (rcv_delta <= timeout)
+ return false;
+
+ rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) -
+ (tp->retrans_stamp ?: tcp_skb_timestamp(skb)));
+
+ return rtx_delta > timeout;
+}
/**
* tcp_retransmit_timer() - The TCP retransmit timeout handler
@@ -495,23 +519,26 @@ void tcp_retransmit_timer(struct sock *sk)
* we cannot allow such beasts to hang infinitely.
*/
struct inet_sock *inet = inet_sk(sk);
+ u32 rtx_delta;
+
+ rtx_delta = tcp_time_stamp(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp(skb));
if (sk->sk_family == AF_INET) {
- net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
- &inet->inet_daddr,
- ntohs(inet->inet_dport),
- inet->inet_num,
- tp->snd_una, tp->snd_nxt);
+ net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n",
+ &inet->inet_daddr, ntohs(inet->inet_dport),
+ inet->inet_num, tp->snd_una, tp->snd_nxt,
+ jiffies_to_msecs(jiffies - tp->rcv_tstamp),
+ rtx_delta);
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
- net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
- &sk->sk_v6_daddr,
- ntohs(inet->inet_dport),
- inet->inet_num,
- tp->snd_una, tp->snd_nxt);
+ net_dbg_ratelimited("Probing zero-window on %pI6:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n",
+ &sk->sk_v6_daddr, ntohs(inet->inet_dport),
+ inet->inet_num, tp->snd_una, tp->snd_nxt,
+ jiffies_to_msecs(jiffies - tp->rcv_tstamp),
+ rtx_delta);
}
#endif
- if (tcp_jiffies32 - tp->rcv_tstamp > TCP_RTO_MAX) {
+ if (tcp_rtx_probe0_timed_out(sk, skb)) {
tcp_write_err(sk);
goto out;
}
@@ -708,7 +735,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
tcp_mstamp_refresh(tp);
if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
- if (tp->linger2 >= 0) {
+ if (READ_ONCE(tp->linger2) >= 0) {
const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
if (tmo > 0) {
@@ -733,13 +760,15 @@ static void tcp_keepalive_timer (struct timer_list *t)
elapsed = keepalive_time_elapsed(tp);
if (elapsed >= keepalive_time_when(tp)) {
+ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+
/* If the TCP_USER_TIMEOUT option is enabled, use that
* to determine when to timeout instead.
*/
- if ((icsk->icsk_user_timeout != 0 &&
- elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
+ if ((user_timeout != 0 &&
+ elapsed >= msecs_to_jiffies(user_timeout) &&
icsk->icsk_probes_out > 0) ||
- (icsk->icsk_user_timeout == 0 &&
+ (user_timeout == 0 &&
icsk->icsk_probes_out >= keepalive_probes(tp))) {
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_write_err(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index abfa860367aa..f39b9c844580 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -407,9 +407,9 @@ static int compute_score(struct sock *sk, struct net *net,
return score;
}
-static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
- const __u16 lport, const __be32 faddr,
- const __be16 fport)
+INDIRECT_CALLABLE_SCOPE
+u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
+ const __be32 faddr, const __be16 fport)
{
static u32 udp_ehash_secret __read_mostly;
@@ -419,22 +419,6 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
udp_ehash_secret + net_hash_mix(net));
}
-static struct sock *lookup_reuseport(struct net *net, struct sock *sk,
- struct sk_buff *skb,
- __be32 saddr, __be16 sport,
- __be32 daddr, unsigned short hnum)
-{
- struct sock *reuse_sk = NULL;
- u32 hash;
-
- if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
- hash = udp_ehashfn(net, daddr, hnum, saddr, sport);
- reuse_sk = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- }
- return reuse_sk;
-}
-
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
@@ -452,42 +436,36 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- result = lookup_reuseport(net, sk, skb,
- saddr, sport, daddr, hnum);
+ badness = score;
+
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ result = sk;
+ continue;
+ }
+
+ result = inet_lookup_reuseport(net, sk, skb, sizeof(struct udphdr),
+ saddr, sport, daddr, hnum, udp_ehashfn);
+ if (!result) {
+ result = sk;
+ continue;
+ }
+
/* Fall back to scoring if group has connections */
- if (result && !reuseport_has_conns(sk))
+ if (!reuseport_has_conns(sk))
return result;
- result = result ? : sk;
- badness = score;
+ /* Reuseport logic returned an error, keep original score. */
+ if (IS_ERR(result))
+ continue;
+
+ badness = compute_score(result, net, saddr, sport,
+ daddr, hnum, dif, sdif);
+
}
}
return result;
}
-static struct sock *udp4_lookup_run_bpf(struct net *net,
- struct udp_table *udptable,
- struct sk_buff *skb,
- __be32 saddr, __be16 sport,
- __be32 daddr, u16 hnum, const int dif)
-{
- struct sock *sk, *reuse_sk;
- bool no_reuseport;
-
- if (udptable != net->ipv4.udp_table)
- return NULL; /* only UDP is supported */
-
- no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport,
- daddr, hnum, dif, &sk);
- if (no_reuseport || IS_ERR_OR_NULL(sk))
- return sk;
-
- reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
- if (reuse_sk)
- sk = reuse_sk;
- return sk;
-}
-
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
@@ -512,9 +490,11 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
goto done;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
- sk = udp4_lookup_run_bpf(net, udptable, skb,
- saddr, sport, daddr, hnum, dif);
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ udptable == net->ipv4.udp_table) {
+ sk = inet_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr),
+ saddr, sport, daddr, hnum, dif,
+ udp_ehashfn);
if (sk) {
result = sk;
goto done;
@@ -799,7 +779,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
(u8 *)(uh+1));
goto out;
}
- if (!inet->recverr) {
+ if (!inet_test_bit(RECVERR, sk)) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
} else
@@ -982,7 +962,8 @@ csum_partial:
send:
err = ip_send_skb(sock_net(sk), skb);
if (err) {
- if (err == -ENOBUFS && !inet->recverr) {
+ if (err == -ENOBUFS &&
+ !inet_test_bit(RECVERR, sk)) {
UDP_INC_STATS(sock_net(sk),
UDP_MIB_SNDBUFERRORS, is_udplite);
err = 0;
@@ -1433,9 +1414,9 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
spin_lock(&sk_queue->lock);
- sk->sk_forward_alloc += size;
+ sk_forward_alloc_add(sk, size);
amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
- sk->sk_forward_alloc -= amt;
+ sk_forward_alloc_add(sk, -amt);
if (amt)
__sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT);
@@ -1546,7 +1527,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
goto uncharge_drop;
}
- sk->sk_forward_alloc -= size;
+ sk_forward_alloc_add(sk, -size);
/* no need to setup a destructor, we will explicitly release the
* forward allocated memory on dequeue
@@ -1557,7 +1538,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
spin_unlock(&list->lock);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
+ INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
busylock_release(busy);
return 0;
@@ -1890,7 +1871,7 @@ try_again:
if (udp_sk(sk)->gro_enabled)
udp_cmsg_recv(msg, sk, skb);
- if (inet->cmsg_flags)
+ if (inet_cmsg_flags(inet))
ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
err = copied;
@@ -2412,7 +2393,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (udp4_csum_init(skb, uh, proto))
goto csum_error;
- sk = skb_steal_sock(skb, &refcounted);
+ sk = inet_steal_sock(net, skb, sizeof(struct udphdr), saddr, uh->source, daddr, uh->dest,
+ &refcounted, udp_ehashfn);
+ if (IS_ERR(sk))
+ goto no_sk;
+
if (sk) {
struct dst_entry *dst = skb_dst(skb);
int ret;
@@ -2433,7 +2418,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk)
return udp_unicast_rcv_skb(sk, skb, uh);
-
+no_sk:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
nf_reset_ct(skb);
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 5f8104cf082d..9b18f371af0d 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -63,7 +63,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
struct sock *sk = sock->sk;
/* Disable multicast loopback */
- inet_sk(sk)->mc_loop = 0;
+ inet_clear_bit(MC_LOOP, sk);
/* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
inet_inc_convert_csum(sk);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 9403bbaf1b61..c33bca2c3841 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -124,22 +124,13 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
xfrm_dst_destroy(xdst);
}
-static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
- int unregister)
-{
- if (!unregister)
- return;
-
- xfrm_dst_ifdown(dst, dev);
-}
-
static struct dst_ops xfrm4_dst_ops_template = {
.family = AF_INET,
.update_pmtu = xfrm4_update_pmtu,
.redirect = xfrm4_redirect,
.cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm4_dst_destroy,
- .ifdown = xfrm4_dst_ifdown,
+ .ifdown = xfrm_dst_ifdown,
.local_out = __ip_local_out,
.gc_thresh = 32768,
};
@@ -178,7 +169,8 @@ static __net_init int xfrm4_net_sysctl_init(struct net *net)
table[0].data = &net->xfrm.xfrm4_dst_ops.gc_thresh;
}
- hdr = register_net_sysctl(net, "net/ipv4", table);
+ hdr = register_net_sysctl_sz(net, "net/ipv4", table,
+ ARRAY_SIZE(xfrm4_policy_table));
if (!hdr)
goto err_reg;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 94cec2075eee..0b6ee962c84e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -202,6 +202,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
+ .accept_ra_min_lft = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -262,6 +263,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
+ .accept_ra_min_lft = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -1061,20 +1063,28 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
struct fib6_info *f6i = NULL;
int err = 0;
- if (addr_type == IPV6_ADDR_ANY ||
- (addr_type & IPV6_ADDR_MULTICAST &&
- !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) ||
- (!(idev->dev->flags & IFF_LOOPBACK) &&
- !netif_is_l3_master(idev->dev) &&
- addr_type & IPV6_ADDR_LOOPBACK))
+ if (addr_type == IPV6_ADDR_ANY) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid address");
return ERR_PTR(-EADDRNOTAVAIL);
+ } else if (addr_type & IPV6_ADDR_MULTICAST &&
+ !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot assign multicast address without \"IFA_F_MCAUTOJOIN\" flag");
+ return ERR_PTR(-EADDRNOTAVAIL);
+ } else if (!(idev->dev->flags & IFF_LOOPBACK) &&
+ !netif_is_l3_master(idev->dev) &&
+ addr_type & IPV6_ADDR_LOOPBACK) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot assign loopback address on this device");
+ return ERR_PTR(-EADDRNOTAVAIL);
+ }
if (idev->dead) {
- err = -ENODEV; /*XXX*/
+ NL_SET_ERR_MSG_MOD(extack, "device is going away");
+ err = -ENODEV;
goto out;
}
if (idev->cnf.disable_ipv6) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
err = -EACCES;
goto out;
}
@@ -1101,7 +1111,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
goto out;
}
- f6i = addrconf_f6i_alloc(net, idev, cfg->pfx, false, gfp_flags);
+ f6i = addrconf_f6i_alloc(net, idev, cfg->pfx, false, gfp_flags, extack);
if (IS_ERR(f6i)) {
err = PTR_ERR(f6i);
f6i = NULL;
@@ -1368,7 +1378,7 @@ retry:
* idev->desync_factor if it's larger
*/
cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
- max_desync_factor = min_t(__u32,
+ max_desync_factor = min_t(long,
idev->cnf.max_desync_factor,
cnf_temp_preferred_lft - regen_advance);
@@ -2731,6 +2741,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
return;
}
+ if (valid_lft != 0 && valid_lft < in6_dev->cnf.accept_ra_min_lft)
+ goto put;
+
/*
* Two things going on here:
* 1) Add routes for on-link prefixes
@@ -2925,30 +2938,40 @@ static int inet6_addr_add(struct net *net, int ifindex,
ASSERT_RTNL();
- if (cfg->plen > 128)
+ if (cfg->plen > 128) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length");
return -EINVAL;
+ }
/* check the lifetime */
- if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft)
+ if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft) {
+ NL_SET_ERR_MSG_MOD(extack, "address lifetime invalid");
return -EINVAL;
+ }
- if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64)
+ if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64) {
+ NL_SET_ERR_MSG_MOD(extack, "address with \"mngtmpaddr\" flag must have a prefix length of 64");
return -EINVAL;
+ }
dev = __dev_get_by_index(net, ifindex);
if (!dev)
return -ENODEV;
idev = addrconf_add_dev(dev);
- if (IS_ERR(idev))
+ if (IS_ERR(idev)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
return PTR_ERR(idev);
+ }
if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk,
true, cfg->pfx, ifindex);
- if (ret < 0)
+ if (ret < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Multicast auto join failed");
return ret;
+ }
}
cfg->scope = ipv6_addr_scope(cfg->pfx);
@@ -3005,22 +3028,29 @@ static int inet6_addr_add(struct net *net, int ifindex,
}
static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
- const struct in6_addr *pfx, unsigned int plen)
+ const struct in6_addr *pfx, unsigned int plen,
+ struct netlink_ext_ack *extack)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
struct net_device *dev;
- if (plen > 128)
+ if (plen > 128) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length");
return -EINVAL;
+ }
dev = __dev_get_by_index(net, ifindex);
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
return -ENODEV;
+ }
idev = __in6_dev_get(dev);
- if (!idev)
+ if (!idev) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
return -ENXIO;
+ }
read_lock_bh(&idev->lock);
list_for_each_entry(ifp, &idev->addr_list, if_list) {
@@ -3043,6 +3073,8 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
}
}
read_unlock_bh(&idev->lock);
+
+ NL_SET_ERR_MSG_MOD(extack, "address not found");
return -EADDRNOTAVAIL;
}
@@ -3085,7 +3117,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
rtnl_lock();
err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
- ireq.ifr6_prefixlen);
+ ireq.ifr6_prefixlen, NULL);
rtnl_unlock();
return err;
}
@@ -3488,7 +3520,7 @@ static int fixup_permanent_addr(struct net *net,
struct fib6_info *f6i, *prev;
f6i = addrconf_f6i_alloc(net, idev, &ifp->addr, false,
- GFP_ATOMIC);
+ GFP_ATOMIC, NULL);
if (IS_ERR(f6i))
return PTR_ERR(f6i);
@@ -4698,7 +4730,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifa_flags &= IFA_F_MANAGETEMPADDR;
return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
- ifm->ifa_prefixlen);
+ ifm->ifa_prefixlen, extack);
}
static int modify_prefix_route(struct inet6_ifaddr *ifp,
@@ -4903,8 +4935,10 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
}
dev = __dev_get_by_index(net, ifm->ifa_index);
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
return -ENODEV;
+ }
if (tb[IFA_FLAGS])
cfg.ifa_flags = nla_get_u32(tb[IFA_FLAGS]);
@@ -4939,10 +4973,12 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
}
if (nlh->nlmsg_flags & NLM_F_EXCL ||
- !(nlh->nlmsg_flags & NLM_F_REPLACE))
+ !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG_MOD(extack, "address already assigned");
err = -EEXIST;
- else
+ } else {
err = inet6_addr_modify(net, ifa, &cfg);
+ }
in6_ifa_put(ifa);
@@ -5602,6 +5638,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
+ array[DEVCONF_ACCEPT_RA_MIN_LFT] = cnf->accept_ra_min_lft;
}
static inline size_t inet6_ifla6_size(void)
@@ -6796,6 +6833,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "accept_ra_min_lft",
+ .data = &ipv6_devconf.accept_ra_min_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "accept_ra_pinfo",
.data = &ipv6_devconf.accept_ra_pinfo,
.maxlen = sizeof(int),
@@ -7091,7 +7135,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name);
- p->sysctl_header = register_net_sysctl(net, path, table);
+ p->sysctl_header = register_net_sysctl_sz(net, path, table,
+ ARRAY_SIZE(addrconf_sysctl));
if (!p->sysctl_header)
goto free;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 5d593ddc0347..368824fe9719 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -102,9 +102,9 @@ bool ipv6_mod_enabled(void)
}
EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
-static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
+static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
{
- const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
+ const int offset = sk->sk_prot->ipv6_pinfo_offset;
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
@@ -200,12 +200,12 @@ lookup_protocol:
sk->sk_reuse = SK_CAN_REUSE;
inet = inet_sk(sk);
- inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
+ inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
if (SOCK_RAW == sock->type) {
inet->inet_num = protocol;
if (IPPROTO_RAW == protocol)
- inet->hdrincl = 1;
+ inet_set_bit(HDRINCL, sk);
}
sk->sk_destruct = inet6_sock_destruct;
@@ -229,7 +229,7 @@ lookup_protocol:
*/
inet->uc_ttl = -1;
- inet->mc_loop = 1;
+ inet_set_bit(MC_LOOP, sk);
inet->mc_ttl = 1;
inet->mc_index = 0;
RCU_INIT_POINTER(inet->mc_list, NULL);
@@ -399,7 +399,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
sk->sk_ipv6only = 1;
/* Make sure we are allowed to bind here. */
- if (snum || !(inet->bind_address_no_port ||
+ if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) ||
(flags & BIND_FORCE_ADDRESS_NO_PORT))) {
err = sk->sk_prot->get_port(sk, snum);
if (err) {
@@ -435,10 +435,8 @@ out_unlock:
goto out;
}
-/* bind for INET6 API */
-int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
- struct sock *sk = sock->sk;
u32 flags = BIND_WITH_LOCK;
const struct proto *prot;
int err = 0;
@@ -462,6 +460,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
return __inet6_bind(sk, uaddr, addr_len, flags);
}
+
+/* bind for INET6 API */
+int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+ return inet6_bind_sk(sock->sk, uaddr, addr_len);
+}
EXPORT_SYMBOL(inet6_bind);
int inet6_release(struct socket *sock)
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index dacdea7fcb62..bb17f484ee2c 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -305,7 +305,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
}
net = dev_net(idev->dev);
- f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC);
+ f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC, NULL);
if (IS_ERR(f6i)) {
err = PTR_ERR(f6i);
goto out;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9b6818453afe..41ebc4e57473 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -38,10 +38,11 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a)
return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
}
-static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
+static void ip6_datagram_flow_key_init(struct flowi6 *fl6,
+ const struct sock *sk)
{
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
int oif = sk->sk_bound_dev_if;
memset(fl6, 0, sizeof(*fl6));
@@ -523,7 +524,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
} else {
ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
&sin->sin6_addr);
- if (inet_sk(sk)->cmsg_flags)
+ if (inet_cmsg_flags(inet_sk(sk)))
ip_cmsg_recv(msg, skb);
}
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 202fc3aaa83c..4952ae792450 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -612,8 +612,6 @@ looped_back:
kfree(buf);
- skb_dst_drop(skb);
-
ip6_route_input(skb);
if (skb_dst(skb)->error) {
@@ -650,7 +648,6 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
struct inet6_dev *idev = __in6_dev_get(skb->dev);
struct inet6_skb_parm *opt = IP6CB(skb);
struct in6_addr *addr = NULL;
- struct in6_addr daddr;
int n, i;
struct ipv6_rt_hdr *hdr;
struct rt0_hdr *rthdr;
@@ -798,9 +795,7 @@ looped_back:
return -1;
}
- daddr = *addr;
- *addr = ipv6_hdr(skb)->daddr;
- ipv6_hdr(skb)->daddr = daddr;
+ swap(*addr, ipv6_hdr(skb)->daddr);
ip6_route_input(skb);
if (skb_dst(skb)->error) {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 65fa5014bc85..93a594a901d1 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -1034,11 +1034,9 @@ drop_no_count:
return 0;
}
-void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
- u8 type,
+void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- int oif)
+ const struct in6_addr *daddr, int oif)
{
memset(fl6, 0, sizeof(*fl6));
fl6->saddr = *saddr;
@@ -1229,4 +1227,9 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
}
return table;
}
+
+size_t ipv6_icmp_sysctl_table_size(void)
+{
+ return ARRAY_SIZE(ipv6_icmp_table_template);
+}
#endif
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index 3faf62530d6a..69caed07315f 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <net/genetlink.h>
-#include <net/ila.h>
#include <net/netns/generic.h>
#include <uapi/linux/genetlink.h>
#include "ila.h"
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index bee45dfeb187..67e8c9440977 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -5,7 +5,6 @@
#include <linux/rhashtable.h>
#include <linux/vmalloc.h>
#include <net/genetlink.h>
-#include <net/ila.h>
#include <net/netns/generic.h>
#include <uapi/linux/genetlink.h>
#include "ila.h"
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b64b49012655..b0e8d278e8a9 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -39,6 +39,7 @@ u32 inet6_ehashfn(const struct net *net,
return __inet6_ehashfn(lhash, lport, fhash, fport,
inet6_ehash_secret + net_hash_mix(net));
}
+EXPORT_SYMBOL_GPL(inet6_ehashfn);
/*
* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
@@ -111,22 +112,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
-static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
- struct sk_buff *skb, int doff,
- const struct in6_addr *saddr,
- __be16 sport,
- const struct in6_addr *daddr,
- unsigned short hnum)
+/**
+ * inet6_lookup_reuseport() - execute reuseport logic on AF_INET6 socket if necessary.
+ * @net: network namespace.
+ * @sk: AF_INET6 socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP.
+ * @skb: context for a potential SK_REUSEPORT program.
+ * @doff: header offset.
+ * @saddr: source address.
+ * @sport: source port.
+ * @daddr: destination address.
+ * @hnum: destination port in host byte order.
+ * @ehashfn: hash function used to generate the fallback hash.
+ *
+ * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
+ * the selected sock or an error.
+ */
+struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned short hnum,
+ inet6_ehashfn_t *ehashfn)
{
struct sock *reuse_sk = NULL;
u32 phash;
if (sk->sk_reuseport) {
- phash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
+ phash = INDIRECT_CALL_INET(ehashfn, udp6_ehashfn, inet6_ehashfn,
+ net, daddr, hnum, saddr, sport);
reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
}
return reuse_sk;
}
+EXPORT_SYMBOL_GPL(inet6_lookup_reuseport);
/* called with rcu_read_lock() */
static struct sock *inet6_lhash2_lookup(struct net *net,
@@ -143,8 +162,8 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
- result = lookup_reuseport(net, sk, skb, doff,
- saddr, sport, daddr, hnum);
+ result = inet6_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum, inet6_ehashfn);
if (result)
return result;
@@ -156,30 +175,30 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
return result;
}
-static inline struct sock *inet6_lookup_run_bpf(struct net *net,
- struct inet_hashinfo *hashinfo,
- struct sk_buff *skb, int doff,
- const struct in6_addr *saddr,
- const __be16 sport,
- const struct in6_addr *daddr,
- const u16 hnum, const int dif)
+struct sock *inet6_lookup_run_sk_lookup(struct net *net,
+ int protocol,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport,
+ const struct in6_addr *daddr,
+ const u16 hnum, const int dif,
+ inet6_ehashfn_t *ehashfn)
{
struct sock *sk, *reuse_sk;
bool no_reuseport;
- if (hashinfo != net->ipv4.tcp_death_row.hashinfo)
- return NULL; /* only TCP is supported */
-
- no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport,
+ no_reuseport = bpf_sk_lookup_run_v6(net, protocol, saddr, sport,
daddr, hnum, dif, &sk);
if (no_reuseport || IS_ERR_OR_NULL(sk))
return sk;
- reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
+ reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff,
+ saddr, sport, daddr, hnum, ehashfn);
if (reuse_sk)
sk = reuse_sk;
return sk;
}
+EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup);
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
@@ -193,9 +212,11 @@ struct sock *inet6_lookup_listener(struct net *net,
unsigned int hash2;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
- result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
- saddr, sport, daddr, hnum, dif);
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ hashinfo == net->ipv4.tcp_death_row.hashinfo) {
+ result = inet6_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff,
+ saddr, sport, daddr, hnum, dif,
+ inet6_ehashfn);
if (result)
goto done;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index bac768d36cc1..28b01a068412 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
+ INIT_HLIST_NODE(&f6i->gc_link);
+
return f6i;
}
@@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
net->ipv6.fib6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
+ INIT_HLIST_HEAD(&table->tb6_gc_hlist);
}
return table;
@@ -1057,6 +1060,8 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
lockdep_is_held(&table->tb6_lock));
}
}
+
+ fib6_clean_expires_locked(rt);
}
/*
@@ -1118,9 +1123,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
if (!(rt->fib6_flags & RTF_EXPIRES))
- fib6_clean_expires(iter);
+ fib6_clean_expires_locked(iter);
else
- fib6_set_expires(iter, rt->expires);
+ fib6_set_expires_locked(iter,
+ rt->expires);
if (rt->fib6_pmtu)
fib6_metric_set(iter, RTAX_MTU,
@@ -1479,6 +1485,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
+
+ if (fib6_has_expires(rt))
+ hlist_add_head(&rt->gc_link, &table->tb6_gc_hlist);
+
fib6_start_gc(info->nl_net, rt);
}
@@ -2285,9 +2295,8 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-static int fib6_age(struct fib6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{
- struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies;
/*
@@ -2295,7 +2304,7 @@ static int fib6_age(struct fib6_info *rt, void *arg)
* Routes are expired even if they are in use.
*/
- if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
+ if (fib6_has_expires(rt) && rt->expires) {
if (time_after(now, rt->expires)) {
RT6_TRACE("expiring %p\n", rt);
return -1;
@@ -2312,6 +2321,40 @@ static int fib6_age(struct fib6_info *rt, void *arg)
return 0;
}
+static void fib6_gc_table(struct net *net,
+ struct fib6_table *tb6,
+ struct fib6_gc_args *gc_args)
+{
+ struct fib6_info *rt;
+ struct hlist_node *n;
+ struct nl_info info = {
+ .nl_net = net,
+ .skip_notify = false,
+ };
+
+ hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
+ if (fib6_age(rt, gc_args) == -1)
+ fib6_del(rt, &info);
+}
+
+static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
+{
+ struct fib6_table *table;
+ struct hlist_head *head;
+ unsigned int h;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+ spin_lock_bh(&table->tb6_lock);
+ fib6_gc_table(net, table, gc_args);
+ spin_unlock_bh(&table->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+}
+
void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
struct fib6_gc_args gc_args;
@@ -2327,7 +2370,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
net->ipv6.sysctl.ip6_rt_gc_interval;
gc_args.more = 0;
- fib6_clean_all(net, fib6_age, &gc_args);
+ fib6_gc_all(net, &gc_args);
now = jiffies;
net->ipv6.ip6_rt_last_gc = now;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index d94041bb4287..b8378814532c 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -99,7 +99,8 @@ static bool ip6_can_use_hint(const struct sk_buff *skb,
static struct sk_buff *ip6_extract_route_hint(const struct net *net,
struct sk_buff *skb)
{
- if (fib6_routes_require_src(net) || fib6_has_custom_rules(net))
+ if (fib6_routes_require_src(net) || fib6_has_custom_rules(net) ||
+ IP6CB(skb)->flags & IP6SKB_MULTIPATH)
return NULL;
return skb;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1e8c90e97608..54fc4c711f2c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -113,7 +113,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
int res = lwtunnel_xmit(skb);
- if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+ if (res != LWTUNNEL_XMIT_CONTINUE)
return res;
}
@@ -451,7 +451,6 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
struct dst_entry *dst = skb_dst(skb);
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
- __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
#ifdef CONFIG_NET_SWITCHDEV
if (skb->offload_l3_fwd_mark) {
@@ -1502,7 +1501,7 @@ static int __ip6_append_data(struct sock *sk,
orig_mtu = mtu;
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1591,7 +1590,7 @@ emsgsize:
}
}
} else if ((flags & MSG_SPLICE_PAGES) && length) {
- if (inet_sk(sk)->hdrincl)
+ if (inet_test_bit(HDRINCL, sk))
return -EPERM;
if (rt->dst.dev->features & NETIF_F_SG &&
getfrag == ip_generic_getfrag)
@@ -1693,7 +1692,10 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
copy = datalen - transhdrlen - fraggap - pagedlen;
- if (copy < 0) {
+ /* [!] NOTE: copy may be negative if pagedlen>0
+ * because then the equation may reduces to -fraggap.
+ */
+ if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) {
err = -EINVAL;
goto error;
}
@@ -1744,6 +1746,8 @@ alloc_new_skb:
err = -EFAULT;
kfree_skb(skb);
goto error;
+ } else if (flags & MSG_SPLICE_PAGES) {
+ copy = 0;
}
offset += copy;
@@ -1791,6 +1795,10 @@ alloc_new_skb:
} else if (flags & MSG_SPLICE_PAGES) {
struct msghdr *msg = from;
+ err = -EIO;
+ if (WARN_ON_ONCE(copy > msg->msg_iter.count))
+ goto error;
+
err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
sk->sk_allocation);
if (err < 0)
@@ -1986,7 +1994,8 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
u8 icmp6_type;
- if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
+ if (sk->sk_socket->type == SOCK_RAW &&
+ !inet_test_bit(HDRINCL, sk))
icmp6_type = fl6->fl6_icmp_type;
else
icmp6_type = icmp6_hdr(skb)->icmp6_type;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 67a3b8f6e72b..30ca064b76ef 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2010,8 +2010,6 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
{
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTOCTETS, skb->len);
return dst_output(net, sk, skb);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index ae818ff46224..0e2a0847b387 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -102,7 +102,7 @@ int ip6_ra_control(struct sock *sk, int sel)
struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
struct ipv6_txoptions *opt)
{
- if (inet_sk(sk)->is_icsk) {
+ if (inet_test_bit(IS_ICSK, sk)) {
if (opt &&
!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
inet_sk(sk)->inet_daddr != LOOPBACK4_IPV6) {
@@ -474,8 +474,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
WRITE_ONCE(sk->sk_prot, &tcp_prot);
/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific);
- sk->sk_socket->ops = &inet_stream_ops;
- sk->sk_family = PF_INET;
+ WRITE_ONCE(sk->sk_socket->ops, &inet_stream_ops);
+ WRITE_ONCE(sk->sk_family, PF_INET);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
} else {
struct proto *prot = &udp_prot;
@@ -488,8 +488,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
/* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */
WRITE_ONCE(sk->sk_prot, prot);
- sk->sk_socket->ops = &inet_dgram_ops;
- sk->sk_family = PF_INET;
+ WRITE_ONCE(sk->sk_socket->ops, &inet_dgram_ops);
+ WRITE_ONCE(sk->sk_family, PF_INET);
}
/* Disable all options not to allocate memory anymore,
@@ -633,7 +633,7 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int))
goto e_inval;
/* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
- inet_sk(sk)->transparent = valbool;
+ inet_assign_bit(TRANSPARENT, sk, valbool);
retv = 0;
break;
@@ -641,7 +641,7 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int))
goto e_inval;
/* we also don't have a separate freebind bit for IPV6 */
- inet_sk(sk)->freebind = valbool;
+ inet_assign_bit(FREEBIND, sk, valbool);
retv = 0;
break;
@@ -831,7 +831,7 @@ done:
goto e_inval;
retv = -EPROTO;
- if (inet_sk(sk)->is_icsk)
+ if (inet_test_bit(IS_ICSK, sk))
break;
retv = -EFAULT;
@@ -923,7 +923,7 @@ done:
goto e_inval;
np->recverr = valbool;
if (!val)
- skb_queue_purge(&sk->sk_error_queue);
+ skb_errqueue_purge(&sk->sk_error_queue);
retv = 0;
break;
case IPV6_FLOWINFO_SEND:
@@ -1330,11 +1330,11 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
case IPV6_TRANSPARENT:
- val = inet_sk(sk)->transparent;
+ val = inet_test_bit(TRANSPARENT, sk);
break;
case IPV6_FREEBIND:
- val = inet_sk(sk)->freebind;
+ val = inet_test_bit(FREEBIND, sk);
break;
case IPV6_RECVORIGDSTADDR:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 714cdc9e2b8e..5ce25bcb9974 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1699,11 +1699,9 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
-static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
- struct net_device *dev,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- int proto, int len)
+static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb,
+ struct net_device *dev, const struct in6_addr *saddr,
+ const struct in6_addr *daddr, int proto, int len)
{
struct ipv6hdr *hdr;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a42be96ae209..553c8664e0a7 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1267,10 +1267,6 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
}
#endif
- /*
- * set the RA_RECV flag in the interface
- */
-
in6_dev = __in6_dev_get(skb->dev);
if (!in6_dev) {
ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n",
@@ -1328,6 +1324,14 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
goto skip_defrtr;
}
+ lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
+ if (lifetime != 0 && lifetime < in6_dev->cnf.accept_ra_min_lft) {
+ ND_PRINTK(2, info,
+ "RA: router lifetime (%ds) is too short: %s\n",
+ lifetime, skb->dev->name);
+ goto skip_defrtr;
+ }
+
/* Do not accept RA with source-addr found on local machine unless
* accept_ra_from_local is set to true.
*/
@@ -1340,8 +1344,6 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
goto skip_defrtr;
}
- lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
-
#ifdef CONFIG_IPV6_ROUTER_PREF
pref = ra_msg->icmph.icmp6_router_pref;
/* 10b is handled as if it were 00b (medium) */
@@ -1517,6 +1519,9 @@ skip_linkparms:
if (ri->prefix_len == 0 &&
!in6_dev->cnf.accept_ra_defrtr)
continue;
+ if (ri->lifetime != 0 &&
+ ntohl(ri->lifetime) < in6_dev->cnf.accept_ra_min_lft)
+ continue;
if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
continue;
if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d13240f13607..b2dd48911c8d 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -87,7 +87,8 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
table[2].data = &nf_frag->fqdir->high_thresh;
table[2].extra1 = &nf_frag->fqdir->low_thresh;
- hdr = register_net_sysctl(net, "net/netfilter", table);
+ hdr = register_net_sysctl_sz(net, "net/netfilter", table,
+ ARRAY_SIZE(nf_ct_frag6_sysctl_table));
if (hdr == NULL)
goto err_reg;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index cb4eb1d2c620..d59b296b4f51 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
+#include <linux/rcupdate.h>
#include <linux/sysctl.h>
#include <net/ipv6_frag.h>
@@ -96,6 +97,12 @@ static void __net_exit defrag6_net_exit(struct net *net)
}
}
+static const struct nf_defrag_hook defrag_hook = {
+ .owner = THIS_MODULE,
+ .enable = nf_defrag_ipv6_enable,
+ .disable = nf_defrag_ipv6_disable,
+};
+
static struct pernet_operations defrag6_net_ops = {
.exit = defrag6_net_exit,
};
@@ -114,6 +121,9 @@ static int __init nf_defrag_init(void)
pr_err("nf_defrag_ipv6: can't register pernet ops\n");
goto cleanup_frag6;
}
+
+ rcu_assign_pointer(nf_defrag_v6_hook, &defrag_hook);
+
return ret;
cleanup_frag6:
@@ -124,6 +134,7 @@ cleanup_frag6:
static void __exit nf_defrag_fini(void)
{
+ rcu_assign_pointer(nf_defrag_v6_hook, NULL);
unregister_pernet_subsys(&defrag6_net_ops);
nf_ct_frag6_cleanup();
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index c2c291827a2c..5831aaa53d75 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -119,7 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EINVAL;
ipcm6_init_sk(&ipc6, np);
- ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_oif = oif;
@@ -215,6 +215,7 @@ struct proto pingv6_prot = {
.get_port = ping_get_port,
.put_port = ping_unhash,
.obj_size = sizeof(struct raw6_sock),
+ .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
};
EXPORT_SYMBOL_GPL(pingv6_prot);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 49381f35b623..42fcec3ecf5e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -291,7 +291,6 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
int err;
int harderr;
@@ -315,7 +314,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
}
if (np->recverr) {
u8 *payload = skb->data;
- if (!inet->hdrincl)
+ if (!inet_test_bit(HDRINCL, sk))
payload += offset;
ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
}
@@ -406,7 +405,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
skb->len,
inet->inet_num, 0));
- if (inet->hdrincl) {
+ if (inet_test_bit(HDRINCL, sk)) {
if (skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
@@ -762,12 +761,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
- /* hdrincl should be READ_ONCE(inet->hdrincl)
- * but READ_ONCE() doesn't work with bit fields.
- * Doing this indirectly yields the same result.
- */
- hdrincl = inet->hdrincl;
- hdrincl = READ_ONCE(hdrincl);
+ hdrincl = inet_test_bit(HDRINCL, sk);
/*
* Get and verify the address.
@@ -778,7 +772,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_uid = sk->sk_uid;
ipcm6_init(&ipc6);
- ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = fl6.flowi6_mark;
if (sin6) {
@@ -1000,7 +994,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
case IPV6_HDRINCL:
if (sk->sk_type != SOCK_RAW)
return -EINVAL;
- inet_sk(sk)->hdrincl = !!val;
+ inet_assign_bit(HDRINCL, sk, val);
return 0;
case IPV6_CHECKSUM:
if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
@@ -1068,7 +1062,7 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case IPV6_HDRINCL:
- val = inet_sk(sk)->hdrincl;
+ val = inet_test_bit(HDRINCL, sk);
break;
case IPV6_CHECKSUM:
/*
@@ -1216,6 +1210,7 @@ struct proto rawv6_prot = {
.hash = raw_hash_sk,
.unhash = raw_unhash_sk,
.obj_size = sizeof(struct raw6_sock),
+ .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6),
.useroffset = offsetof(struct raw6_sock, filter),
.usersize = sizeof_field(struct raw6_sock, filter),
.h.raw_hash = &raw_v6_hashinfo,
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5bc8a28e67f9..5ebc47da1000 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -470,7 +470,8 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
table[1].extra2 = &net->ipv6.fqdir->high_thresh;
table[2].data = &net->ipv6.fqdir->timeout;
- hdr = register_net_sysctl(net, "net/ipv6", table);
+ hdr = register_net_sysctl_sz(net, "net/ipv6", table,
+ ARRAY_SIZE(ip6_frags_ns_ctl_table));
if (!hdr)
goto err_reg;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 56a55585eb79..9c687b357e6a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -90,7 +90,7 @@ unsigned int ip6_mtu(const struct dst_entry *dst);
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
- struct net_device *dev, int how);
+ struct net_device *dev);
static void ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
@@ -371,8 +371,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
fib6_info_release(from);
}
-static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
- int how)
+static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
@@ -424,6 +423,9 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
if (match->nh && have_oif_match && res->nh)
return;
+ if (skb)
+ IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
+
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
*/
@@ -3761,10 +3763,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->dst_nocount = true;
if (cfg->fc_flags & RTF_EXPIRES)
- fib6_set_expires(rt, jiffies +
- clock_t_to_jiffies(cfg->fc_expires));
+ fib6_set_expires_locked(rt, jiffies +
+ clock_t_to_jiffies(cfg->fc_expires));
else
- fib6_clean_expires(rt);
+ fib6_clean_expires_locked(rt);
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
@@ -4544,7 +4546,8 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff
struct fib6_info *addrconf_f6i_alloc(struct net *net,
struct inet6_dev *idev,
const struct in6_addr *addr,
- bool anycast, gfp_t gfp_flags)
+ bool anycast, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
@@ -4566,7 +4569,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
cfg.fc_flags |= RTF_LOCAL;
}
- f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
+ f6i = ip6_route_info_create(&cfg, gfp_flags, extack);
if (!IS_ERR(f6i)) {
f6i->dst_nocount = true;
@@ -4581,21 +4584,19 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
/* remove deleted ip from prefsrc entries */
struct arg_dev_net_ip {
- struct net_device *dev;
struct net *net;
struct in6_addr *addr;
};
static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
{
- struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
if (!rt->nh &&
- ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
rt != net->ipv6.fib6_null_entry &&
- ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
+ ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr) &&
+ !ipv6_chk_addr(net, addr, rt->fib6_nh->fib_nh_dev, 0)) {
spin_lock_bh(&rt6_exception_lock);
/* remove prefsrc entry */
rt->fib6_prefsrc.plen = 0;
@@ -4608,7 +4609,6 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
{
struct net *net = dev_net(ifp->idev->dev);
struct arg_dev_net_ip adni = {
- .dev = ifp->idev->dev,
.net = net,
.addr = &ifp->addr,
};
@@ -6456,6 +6456,15 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
return table;
}
+
+size_t ipv6_route_sysctl_table_size(struct net *net)
+{
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ return 1;
+
+ return ARRAY_SIZE(ipv6_route_table_template);
+}
#endif
static int __net_init ip6_route_net_init(struct net *net)
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index b1c028df686e..a013b92cbb86 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -272,8 +272,6 @@ static int rpl_input(struct sk_buff *skb)
dst = dst_cache_get(&rlwt->cache);
preempt_enable();
- skb_dst_drop(skb);
-
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
@@ -284,6 +282,7 @@ static int rpl_input(struct sk_buff *skb)
preempt_enable();
}
} else {
+ skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index dd433cc265c8..24e2b4b494cb 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -109,15 +109,19 @@ struct bpf_lwt_prog {
#define next_csid_chk_lcnode_fn_bits(flen) \
next_csid_chk_lcblock_bits(flen)
+/* flag indicating that flavors are set up for a given End* behavior */
+#define SEG6_F_LOCAL_FLAVORS SEG6_F_ATTR(SEG6_LOCAL_FLAVORS)
+
#define SEG6_F_LOCAL_FLV_OP(flvname) BIT(SEG6_LOCAL_FLV_OP_##flvname)
+#define SEG6_F_LOCAL_FLV_NEXT_CSID SEG6_F_LOCAL_FLV_OP(NEXT_CSID)
#define SEG6_F_LOCAL_FLV_PSP SEG6_F_LOCAL_FLV_OP(PSP)
/* Supported RFC8986 Flavor operations are reported in this bitmask */
#define SEG6_LOCAL_FLV8986_SUPP_OPS SEG6_F_LOCAL_FLV_PSP
-/* Supported Flavor operations are reported in this bitmask */
-#define SEG6_LOCAL_FLV_SUPP_OPS (SEG6_F_LOCAL_FLV_OP(NEXT_CSID) | \
+#define SEG6_LOCAL_END_FLV_SUPP_OPS (SEG6_F_LOCAL_FLV_NEXT_CSID | \
SEG6_LOCAL_FLV8986_SUPP_OPS)
+#define SEG6_LOCAL_END_X_FLV_SUPP_OPS SEG6_F_LOCAL_FLV_NEXT_CSID
struct seg6_flavors_info {
/* Flavor operations */
@@ -411,9 +415,72 @@ static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
return input_action_end_finish(skb, slwt);
}
+static int input_action_end_x_finish(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ seg6_lookup_nexthop(skb, &slwt->nh6, 0);
+
+ return dst_input(skb);
+}
+
+static int input_action_end_x_core(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct ipv6_sr_hdr *srh;
+
+ srh = get_and_validate_srh(skb);
+ if (!srh)
+ goto drop;
+
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+ return input_action_end_x_finish(skb, slwt);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int end_x_next_csid_core(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ const struct seg6_flavors_info *finfo = &slwt->flv_info;
+ struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
+
+ if (seg6_next_csid_is_arg_zero(daddr, finfo))
+ return input_action_end_x_core(skb, slwt);
+
+ /* update DA */
+ seg6_next_csid_advance_arg(daddr, finfo);
+
+ return input_action_end_x_finish(skb, slwt);
+}
+
static bool seg6_next_csid_enabled(__u32 fops)
{
- return fops & BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID);
+ return fops & SEG6_F_LOCAL_FLV_NEXT_CSID;
+}
+
+/* Processing of SRv6 End, End.X, and End.T behaviors can be extended through
+ * the flavors framework. These behaviors must report the subset of (flavor)
+ * operations they currently implement. In this way, if a user specifies a
+ * flavor combination that is not supported by a given End* behavior, the
+ * kernel refuses to instantiate the tunnel reporting the error.
+ */
+static int seg6_flv_supp_ops_by_action(int action, __u32 *fops)
+{
+ switch (action) {
+ case SEG6_LOCAL_ACTION_END:
+ *fops = SEG6_LOCAL_END_FLV_SUPP_OPS;
+ break;
+ case SEG6_LOCAL_ACTION_END_X:
+ *fops = SEG6_LOCAL_END_X_FLV_SUPP_OPS;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
}
/* We describe the packet state in relation to the absence/presence of the SRH
@@ -746,21 +813,14 @@ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
/* regular endpoint, and forward to specified nexthop */
static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
- struct ipv6_sr_hdr *srh;
-
- srh = get_and_validate_srh(skb);
- if (!srh)
- goto drop;
-
- advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
-
- seg6_lookup_nexthop(skb, &slwt->nh6, 0);
+ const struct seg6_flavors_info *finfo = &slwt->flv_info;
+ __u32 fops = finfo->flv_ops;
- return dst_input(skb);
+ /* check for the presence of NEXT-C-SID since it applies first */
+ if (seg6_next_csid_enabled(fops))
+ return end_x_next_csid_core(skb, slwt);
-drop:
- kfree_skb(skb);
- return -EINVAL;
+ return input_action_end_x_core(skb, slwt);
}
static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
@@ -1404,13 +1464,14 @@ static struct seg6_action_desc seg6_action_table[] = {
.action = SEG6_LOCAL_ACTION_END,
.attrs = 0,
.optattrs = SEG6_F_LOCAL_COUNTERS |
- SEG6_F_ATTR(SEG6_LOCAL_FLAVORS),
+ SEG6_F_LOCAL_FLAVORS,
.input = input_action_end,
},
{
.action = SEG6_LOCAL_ACTION_END_X,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
- .optattrs = SEG6_F_LOCAL_COUNTERS,
+ .optattrs = SEG6_F_LOCAL_COUNTERS |
+ SEG6_F_LOCAL_FLAVORS,
.input = input_action_end_x,
},
{
@@ -2070,7 +2131,8 @@ static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt,
{
struct seg6_flavors_info *finfo = &slwt->flv_info;
struct nlattr *tb[SEG6_LOCAL_FLV_MAX + 1];
- unsigned long fops;
+ int action = slwt->action;
+ __u32 fops, supp_fops;
int rc;
rc = nla_parse_nested_deprecated(tb, SEG6_LOCAL_FLV_MAX,
@@ -2086,7 +2148,8 @@ static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt,
return -EINVAL;
fops = nla_get_u32(tb[SEG6_LOCAL_FLV_OPERATION]);
- if (fops & ~SEG6_LOCAL_FLV_SUPP_OPS) {
+ rc = seg6_flv_supp_ops_by_action(action, &supp_fops);
+ if (rc < 0 || (fops & ~supp_fops)) {
NL_SET_ERR_MSG(extack, "Unsupported Flavor operation(s)");
return -EOPNOTSUPP;
}
@@ -2618,6 +2681,11 @@ int __init seg6_local_init(void)
*/
BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long));
+ /* Check whether the number of defined flavors exceeds the maximum
+ * allowed value.
+ */
+ BUILD_BUG_ON(SEG6_LOCAL_FLV_OP_MAX + 1 > BITS_PER_TYPE(__u32));
+
/* If the default NEXT-C-SID Locator-Block/Node Function lengths (in
* bits) have been changed with invalid values, kernel build stops
* here.
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 94a0a294c6a1..888676163e90 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -275,17 +275,23 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
if (!ipv6_icmp_table)
goto out_ipv6_route_table;
- net->ipv6.sysctl.hdr = register_net_sysctl(net, "net/ipv6", ipv6_table);
+ net->ipv6.sysctl.hdr = register_net_sysctl_sz(net, "net/ipv6",
+ ipv6_table,
+ ARRAY_SIZE(ipv6_table_template));
if (!net->ipv6.sysctl.hdr)
goto out_ipv6_icmp_table;
- net->ipv6.sysctl.route_hdr =
- register_net_sysctl(net, "net/ipv6/route", ipv6_route_table);
+ net->ipv6.sysctl.route_hdr = register_net_sysctl_sz(net,
+ "net/ipv6/route",
+ ipv6_route_table,
+ ipv6_route_sysctl_table_size(net));
if (!net->ipv6.sysctl.route_hdr)
goto out_unregister_ipv6_table;
- net->ipv6.sysctl.icmp_hdr =
- register_net_sysctl(net, "net/ipv6/icmp", ipv6_icmp_table);
+ net->ipv6.sysctl.icmp_hdr = register_net_sysctl_sz(net,
+ "net/ipv6/icmp",
+ ipv6_icmp_table,
+ ipv6_icmp_sysctl_table_size());
if (!net->ipv6.sysctl.icmp_hdr)
goto out_unregister_route_table;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6e86721e1cdb..3a88545a265d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2176,6 +2176,7 @@ struct proto tcpv6_prot = {
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f787e6b8424c..86b5d509a468 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -72,11 +72,12 @@ int udpv6_init_sock(struct sock *sk)
return 0;
}
-static u32 udp6_ehashfn(const struct net *net,
- const struct in6_addr *laddr,
- const u16 lport,
- const struct in6_addr *faddr,
- const __be16 fport)
+INDIRECT_CALLABLE_SCOPE
+u32 udp6_ehashfn(const struct net *net,
+ const struct in6_addr *laddr,
+ const u16 lport,
+ const struct in6_addr *faddr,
+ const __be16 fport)
{
static u32 udp6_ehash_secret __read_mostly;
static u32 udp_ipv6_hash_secret __read_mostly;
@@ -161,24 +162,6 @@ static int compute_score(struct sock *sk, struct net *net,
return score;
}
-static struct sock *lookup_reuseport(struct net *net, struct sock *sk,
- struct sk_buff *skb,
- const struct in6_addr *saddr,
- __be16 sport,
- const struct in6_addr *daddr,
- unsigned int hnum)
-{
- struct sock *reuse_sk = NULL;
- u32 hash;
-
- if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
- hash = udp6_ehashfn(net, daddr, hnum, saddr, sport);
- reuse_sk = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- }
- return reuse_sk;
-}
-
/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -195,44 +178,35 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- result = lookup_reuseport(net, sk, skb,
- saddr, sport, daddr, hnum);
+ badness = score;
+
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ result = sk;
+ continue;
+ }
+
+ result = inet6_lookup_reuseport(net, sk, skb, sizeof(struct udphdr),
+ saddr, sport, daddr, hnum, udp6_ehashfn);
+ if (!result) {
+ result = sk;
+ continue;
+ }
+
/* Fall back to scoring if group has connections */
- if (result && !reuseport_has_conns(sk))
+ if (!reuseport_has_conns(sk))
return result;
- result = result ? : sk;
- badness = score;
+ /* Reuseport logic returned an error, keep original score. */
+ if (IS_ERR(result))
+ continue;
+
+ badness = compute_score(sk, net, saddr, sport,
+ daddr, hnum, dif, sdif);
}
}
return result;
}
-static inline struct sock *udp6_lookup_run_bpf(struct net *net,
- struct udp_table *udptable,
- struct sk_buff *skb,
- const struct in6_addr *saddr,
- __be16 sport,
- const struct in6_addr *daddr,
- u16 hnum, const int dif)
-{
- struct sock *sk, *reuse_sk;
- bool no_reuseport;
-
- if (udptable != net->ipv4.udp_table)
- return NULL; /* only UDP is supported */
-
- no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport,
- daddr, hnum, dif, &sk);
- if (no_reuseport || IS_ERR_OR_NULL(sk))
- return sk;
-
- reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
- if (reuse_sk)
- sk = reuse_sk;
- return sk;
-}
-
/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -257,9 +231,11 @@ struct sock *__udp6_lib_lookup(struct net *net,
goto done;
/* Lookup redirect from BPF */
- if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
- sk = udp6_lookup_run_bpf(net, udptable, skb,
- saddr, sport, daddr, hnum, dif);
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
+ udptable == net->ipv4.udp_table) {
+ sk = inet6_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr),
+ saddr, sport, daddr, hnum, dif,
+ udp6_ehashfn);
if (sk) {
result = sk;
goto done;
@@ -444,7 +420,7 @@ try_again:
ip6_datagram_recv_common_ctl(sk, msg, skb);
if (is_udp4) {
- if (inet->cmsg_flags)
+ if (inet_cmsg_flags(inet))
ip_cmsg_recv_offset(msg, sk, skb,
sizeof(struct udphdr), off);
} else {
@@ -992,7 +968,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
goto csum_error;
/* Check if the socket is already available, e.g. due to early demux */
- sk = skb_steal_sock(skb, &refcounted);
+ sk = inet6_steal_sock(net, skb, sizeof(struct udphdr), saddr, uh->source, daddr, uh->dest,
+ &refcounted, udp6_ehashfn);
+ if (IS_ERR(sk))
+ goto no_sk;
+
if (sk) {
struct dst_entry *dst = skb_dst(skb);
int ret;
@@ -1026,7 +1006,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
goto report_csum_error;
return udp6_unicast_rcv_skb(sk, skb, uh);
}
-
+no_sk:
reason = SKB_DROP_REASON_NO_SOCKET;
if (!uh->check)
@@ -1359,7 +1339,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init(&ipc6);
ipc6.gso_size = READ_ONCE(up->gso_size);
- ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
/* destination address check */
@@ -1802,6 +1782,7 @@ struct proto udpv6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = NULL,
.diag_destroy = udp_abort,
};
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 8e010d07917a..267d491e9707 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -67,6 +67,7 @@ struct proto udplitev6_prot = {
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6),
.h.udp_table = &udplite_table,
};
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index eecc5e59da17..41a680c76d2e 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -124,14 +124,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
xfrm_dst_destroy(xdst);
}
-static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
- int unregister)
+static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
struct xfrm_dst *xdst;
- if (!unregister)
- return;
-
xdst = (struct xfrm_dst *)dst;
if (xdst->u.rt6.rt6i_idev->dev == dev) {
struct inet6_dev *loopback_idev =
@@ -205,7 +201,8 @@ static int __net_init xfrm6_net_sysctl_init(struct net *net)
table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh;
}
- hdr = register_net_sysctl(net, "net/ipv6", table);
+ hdr = register_net_sysctl_sz(net, "net/ipv6", table,
+ ARRAY_SIZE(xfrm6_policy_table));
if (!hdr)
goto err_reg;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 393f01b2a7e6..dd1d8ffd5f59 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -930,15 +930,18 @@ partial_message:
out_error:
kcm_push(kcm);
- if (copied && sock->type == SOCK_SEQPACKET) {
+ if (sock->type == SOCK_SEQPACKET) {
/* Wrote some bytes before encountering an
* error, return partial success.
*/
- goto partial_message;
- }
-
- if (head != kcm->seq_skb)
+ if (copied)
+ goto partial_message;
+ if (head != kcm->seq_skb)
+ kfree_skb(head);
+ } else {
kfree_skb(head);
+ kcm->seq_skb = NULL;
+ }
err = sk_stream_error(sk, msg->msg_flags, err);
@@ -1859,6 +1862,8 @@ static __net_exit void kcm_exit_net(struct net *net)
* that all multiplexors and psocks have been destroyed.
*/
WARN_ON(!list_empty(&knet->mux_list));
+
+ mutex_destroy(&knet->mutex);
}
static struct pernet_operations kcm_net_ops = {
diff --git a/net/key/af_key.c b/net/key/af_key.c
index b4ea4cf9fad4..d68d01804dc7 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1281,7 +1281,6 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1];
natt->encap_dport = n_port->sadb_x_nat_t_port_port;
}
- memset(&natt->encap_oa, 0, sizeof(natt->encap_oa));
}
err = xfrm_init_state(x);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index f9073bc7281f..9a2a9ed3ba47 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -552,7 +552,7 @@ static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg,
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
*addr_len = sizeof(*sin);
}
- if (inet->cmsg_flags)
+ if (inet_cmsg_flags(inet))
ip_cmsg_recv(msg, skb);
if (flags & MSG_TRUNC)
copied = skb->len;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index ff78217f0cb1..ed8ebb6f5909 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -36,9 +36,6 @@ struct l2tp_ip6_sock {
u32 conn_id;
u32 peer_conn_id;
- /* ipv6_pinfo has to be the last member of l2tp_ip6_sock, see
- * inet6_sk_generic
- */
struct ipv6_pinfo inet6;
};
@@ -730,6 +727,7 @@ static struct proto l2tp_ip6_prot = {
.hash = l2tp_ip6_hash,
.unhash = l2tp_ip6_unhash,
.obj_size = sizeof(struct l2tp_ip6_sock),
+ .ipv6_pinfo_offset = offsetof(struct l2tp_ip6_sock, inet6),
};
static const struct proto_ops l2tp_ip6_ops = {
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index d037009ee10f..0a3f5e0bec00 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -14,14 +14,15 @@
#include <linux/init.h>
#include <linux/slab.h>
-#include <net/llc_sap.h>
-#include <net/llc_conn.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <net/llc_c_ev.h>
+#include <net/llc.h>
#include <net/llc_c_ac.h>
+#include <net/llc_c_ev.h>
#include <net/llc_c_st.h>
+#include <net/llc_conn.h>
#include <net/llc_pdu.h>
+#include <net/llc_sap.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
#if 0
#define dprintk(args...) printk(KERN_DEBUG args)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e7ac24603892..45e7a5d9c7d9 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3648,12 +3648,6 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
lockdep_assert_held(&local->mtx);
lockdep_assert_held(&local->chanctx_mtx);
- if (sdata->vif.bss_conf.eht_puncturing != sdata->vif.bss_conf.csa_punct_bitmap) {
- sdata->vif.bss_conf.eht_puncturing =
- sdata->vif.bss_conf.csa_punct_bitmap;
- changed |= BSS_CHANGED_EHT_PUNCTURING;
- }
-
/*
* using reservation isn't immediate as it may be deferred until later
* with multi-vif. once reservation is complete it will re-schedule the
@@ -3683,6 +3677,12 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
if (err)
return err;
+ if (sdata->vif.bss_conf.eht_puncturing != sdata->vif.bss_conf.csa_punct_bitmap) {
+ sdata->vif.bss_conf.eht_puncturing =
+ sdata->vif.bss_conf.csa_punct_bitmap;
+ changed |= BSS_CHANGED_EHT_PUNCTURING;
+ }
+
ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed);
if (sdata->deflink.csa_block_tx) {
@@ -4133,19 +4133,20 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
mutex_lock(&local->mtx);
rcu_read_lock();
+ sta = sta_info_get_bss(sdata, peer);
+ if (!sta) {
+ ret = -ENOLINK;
+ goto unlock;
+ }
+
+ qos = sta->sta.wme;
+
chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf);
if (WARN_ON(!chanctx_conf)) {
ret = -EINVAL;
goto unlock;
}
band = chanctx_conf->def.chan->band;
- sta = sta_info_get_bss(sdata, peer);
- if (sta) {
- qos = sta->sta.wme;
- } else {
- ret = -ENOLINK;
- goto unlock;
- }
if (qos) {
fc = cpu_to_le16(IEEE80211_FTYPE_DATA |
diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c
index e1d4cfd99128..912c46f74d24 100644
--- a/net/mac80211/fils_aead.c
+++ b/net/mac80211/fils_aead.c
@@ -5,9 +5,9 @@
*/
#include <crypto/aes.h>
-#include <crypto/algapi.h>
#include <crypto/hash.h>
#include <crypto/skcipher.h>
+#include <crypto/utils.h>
#include "ieee80211_i.h"
#include "aes_cmac.h"
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 91633a0b723e..06bd406846d2 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1872,7 +1872,6 @@ void ieee80211_send_pspoll(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata);
void ieee80211_recalc_ps(struct ieee80211_local *local);
void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata);
-int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
@@ -2564,7 +2563,6 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
struct ieee80211_link_data *rsvd_for);
bool ieee80211_is_radar_required(struct ieee80211_local *local);
-void ieee80211_dfs_cac_timer(unsigned long data);
void ieee80211_dfs_cac_timer_work(struct work_struct *work);
void ieee80211_dfs_cac_cancel(struct ieee80211_local *local);
void ieee80211_dfs_radar_detected_work(struct work_struct *work);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 21cf5a208910..13050dc9321f 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -9,6 +9,7 @@
* Copyright 2018-2020, 2022-2023 Intel Corporation
*/
+#include <crypto/utils.h>
#include <linux/if_ether.h>
#include <linux/etherdevice.h>
#include <linux/list.h>
@@ -17,7 +18,6 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <net/mac80211.h>
-#include <crypto/algapi.h>
#include <asm/unaligned.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 6c94222a9df5..ad8469293d71 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -212,7 +212,6 @@ int mesh_rmc_check(struct ieee80211_sub_if_data *sdata,
const u8 *addr, struct ieee80211s_hdr *mesh_hdr);
bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *ie);
-void mesh_ids_set_default(struct ieee80211_if_mesh *mesh);
int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
int mesh_add_meshid_ie(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4f707d2a160f..e751cda5eef6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1083,7 +1083,8 @@ static inline bool ieee80211_rx_reorder_ready(struct tid_ampdu_rx *tid_agg_rx,
struct sk_buff *tail = skb_peek_tail(frames);
struct ieee80211_rx_status *status;
- if (tid_agg_rx->reorder_buf_filtered & BIT_ULL(index))
+ if (tid_agg_rx->reorder_buf_filtered &&
+ tid_agg_rx->reorder_buf_filtered & BIT_ULL(index))
return true;
if (!tail)
@@ -1124,7 +1125,8 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
}
no_frame:
- tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
+ if (tid_agg_rx->reorder_buf_filtered)
+ tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num);
}
@@ -3732,6 +3734,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
break;
goto queue;
case WLAN_CATEGORY_S1G:
+ if (len < offsetofend(typeof(*mgmt),
+ u.action.u.s1g.action_code))
+ break;
+
switch (mgmt->u.action.u.s1g.action_code) {
case WLAN_S1G_TWT_SETUP:
case WLAN_S1G_TWT_TEARDOWN:
@@ -4264,6 +4270,7 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
u16 ssn, u64 filtered,
u16 received_mpdus)
{
+ struct ieee80211_local *local;
struct sta_info *sta;
struct tid_ampdu_rx *tid_agg_rx;
struct sk_buff_head frames;
@@ -4281,6 +4288,11 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
sta = container_of(pubsta, struct sta_info, sta);
+ local = sta->sdata->local;
+ WARN_ONCE(local->hw.max_rx_aggregation_subframes > 64,
+ "RX BA marker can't support max_rx_aggregation_subframes %u > 64\n",
+ local->hw.max_rx_aggregation_subframes);
+
if (!ieee80211_rx_data_set_sta(&rx, sta, -1))
return;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 4133496da378..2d8e38b3bcb5 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -15,7 +15,7 @@
#include <asm/unaligned.h>
#include <net/mac80211.h>
#include <crypto/aes.h>
-#include <crypto/algapi.h>
+#include <crypto/utils.h>
#include "ieee80211_i.h"
#include "michael.h"
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index bf6e81d56263..1af29af65388 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1419,7 +1419,8 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
- mdev->sysctl = register_net_sysctl(net, path, table);
+ mdev->sysctl = register_net_sysctl_sz(net, path, table,
+ ARRAY_SIZE(mpls_dev_table));
if (!mdev->sysctl)
goto free;
@@ -2689,7 +2690,8 @@ static int mpls_net_init(struct net *net)
for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
table[i].data = (char *)net + (uintptr_t)table[i].data;
- net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
+ net->mpls.ctl = register_net_sysctl_sz(net, "net/mpls", table,
+ ARRAY_SIZE(mpls_table));
if (net->mpls.ctl == NULL) {
kfree(table);
return -ENOMEM;
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index a3829ce548f9..84e531f86b82 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -2,7 +2,7 @@
obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
- mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o
+ mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 5a0a84ad94af..8a16672b94e2 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -19,3 +19,18 @@ struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
return NULL;
}
+
+BTF_SET8_START(bpf_mptcp_fmodret_ids)
+BTF_ID_FLAGS(func, update_socket_protocol)
+BTF_SET8_END(bpf_mptcp_fmodret_ids)
+
+static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_mptcp_fmodret_ids,
+};
+
+static int __init bpf_mptcp_kfunc_init(void)
+{
+ return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
+}
+late_initcall(bpf_mptcp_kfunc_init);
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index ae20b7d92e28..e72b518c5d02 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -32,6 +32,7 @@ struct mptcp_pernet {
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
u8 pm_type;
+ char scheduler[MPTCP_SCHED_NAME_MAX];
};
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -69,6 +70,11 @@ int mptcp_get_pm_type(const struct net *net)
return mptcp_get_pernet(net)->pm_type;
}
+const char *mptcp_get_scheduler(const struct net *net)
+{
+ return mptcp_get_pernet(net)->scheduler;
+}
+
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
@@ -77,6 +83,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
+ strcpy(pernet->scheduler, "default");
}
#ifdef CONFIG_SYSCTL
@@ -128,6 +135,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &mptcp_pm_type_max
},
+ {
+ .procname = "scheduler",
+ .maxlen = MPTCP_SCHED_NAME_MAX,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
{}
};
@@ -149,8 +162,10 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[3].data = &pernet->allow_join_initial_addr_port;
table[4].data = &pernet->stale_loss_cnt;
table[5].data = &pernet->pm_type;
+ table[6].data = &pernet->scheduler;
- hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
+ hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
+ ARRAY_SIZE(mptcp_sysctl_table));
if (!hdr)
goto err_reg;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index c254accb14de..cd15ec73073e 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1269,12 +1269,13 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
if (rcv_wnd == rcv_wnd_old)
break;
- if (before64(rcv_wnd_new, rcv_wnd)) {
+
+ rcv_wnd_old = rcv_wnd;
+ if (before64(rcv_wnd_new, rcv_wnd_old)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE);
goto raise_win;
}
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
- rcv_wnd_old = rcv_wnd;
}
return;
}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 7dbbad1e4f55..d8da5374d9e1 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -299,15 +299,8 @@ void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)
pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
msk = mptcp_sk(sk);
- if (subflow->backup != bkup) {
+ if (subflow->backup != bkup)
subflow->backup = bkup;
- mptcp_data_lock(sk);
- if (!sock_owned_by_user(sk))
- msk->last_snd = NULL;
- else
- __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags);
- mptcp_data_unlock(sk);
- }
mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC);
}
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 5692daf57a4d..9661f3812682 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -9,6 +9,7 @@
#include <linux/inet.h>
#include <linux/kernel.h>
#include <net/tcp.h>
+#include <net/inet_common.h>
#include <net/netns/generic.h>
#include <net/mptcp.h>
#include <net/genetlink.h>
@@ -471,9 +472,6 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con
slow = lock_sock_fast(ssk);
if (prio) {
- if (subflow->backup != backup)
- msk->last_snd = NULL;
-
subflow->send_mp_prio = 1;
subflow->backup = backup;
subflow->request_bkup = backup;
@@ -1005,8 +1003,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
bool is_ipv6 = sk->sk_family == AF_INET6;
int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
- struct socket *ssock;
- struct sock *newsk;
+ struct sock *newsk, *ssk;
int backlog = 1024;
int err;
@@ -1032,28 +1029,32 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
&mptcp_keys[is_ipv6]);
lock_sock(newsk);
- ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
+ ssk = __mptcp_nmpc_sk(mptcp_sk(newsk));
release_sock(newsk);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ if (IS_ERR(ssk))
+ return PTR_ERR(ssk);
mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if (entry->addr.family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen);
+ if (ssk->sk_family == AF_INET)
+ err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ else if (ssk->sk_family == AF_INET6)
+ err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen);
+#endif
if (err)
return err;
inet_sk_state_store(newsk, TCP_LISTEN);
- err = kernel_listen(ssock, backlog);
- if (err)
- return err;
-
- mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
-
- return 0;
+ lock_sock(ssk);
+ err = __inet_listen_sk(ssk, backlog);
+ if (!err)
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
+ release_sock(ssk);
+ return err;
}
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d80658547836..e252539b1e19 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -67,11 +67,11 @@ static bool mptcp_is_tcpsk(struct sock *sk)
* Hand the socket over to tcp so all further socket ops
* bypass mptcp.
*/
- sock->ops = &inet_stream_ops;
+ WRITE_ONCE(sock->ops, &inet_stream_ops);
return true;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
} else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
- sock->ops = &inet6_stream_ops;
+ WRITE_ONCE(sock->ops, &inet6_stream_ops);
return true;
#endif
}
@@ -90,8 +90,8 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
if (err)
return err;
+ msk->scaling_ratio = tcp_sk(ssock->sk)->scaling_ratio;
WRITE_ONCE(msk->first, ssock->sk);
- WRITE_ONCE(msk->subflow, ssock);
subflow = mptcp_subflow_ctx(ssock->sk);
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssock->sk);
@@ -101,6 +101,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
/* This is the first subflow, always with id 0 */
subflow->local_id_valid = 1;
mptcp_sock_graft(msk->first, sk->sk_socket);
+ iput(SOCK_INODE(ssock));
return 0;
}
@@ -108,7 +109,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
/* If the MPC handshake is not started, returns the first subflow,
* eventually allocating it.
*/
-struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
+struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
int ret;
@@ -116,10 +117,7 @@ struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
return ERR_PTR(-EINVAL);
- if (!msk->subflow) {
- if (msk->first)
- return ERR_PTR(-EINVAL);
-
+ if (!msk->first) {
ret = __mptcp_socket_create(msk);
if (ret)
return ERR_PTR(ret);
@@ -127,7 +125,7 @@ struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk)
mptcp_sockopt_sync(msk, msk->first);
}
- return msk->subflow;
+ return msk->first;
}
static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
@@ -136,9 +134,15 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
+static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size)
+{
+ WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc,
+ mptcp_sk(sk)->rmem_fwd_alloc + size);
+}
+
static void mptcp_rmem_charge(struct sock *sk, int size)
{
- mptcp_sk(sk)->rmem_fwd_alloc -= size;
+ mptcp_rmem_fwd_alloc_add(sk, -size);
}
static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
@@ -179,7 +183,7 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
{
amount >>= PAGE_SHIFT;
- mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT;
+ mptcp_rmem_charge(sk, amount << PAGE_SHIFT);
__sk_mem_reduce_allocated(sk, amount);
}
@@ -188,7 +192,7 @@ static void mptcp_rmem_uncharge(struct sock *sk, int size)
struct mptcp_sock *msk = mptcp_sk(sk);
int reclaimable;
- msk->rmem_fwd_alloc += size;
+ mptcp_rmem_fwd_alloc_add(sk, size);
reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
/* see sk_mem_uncharge() for the rationale behind the following schema */
@@ -343,7 +347,7 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV))
return false;
- msk->rmem_fwd_alloc += amount;
+ mptcp_rmem_fwd_alloc_add(sk, amount);
return true;
}
@@ -401,7 +405,7 @@ drop:
return false;
}
-static void mptcp_stop_timer(struct sock *sk)
+static void mptcp_stop_rtx_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -766,6 +770,46 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
return moved;
}
+static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
+{
+ int err = sock_error(ssk);
+ int ssk_state;
+
+ if (!err)
+ return false;
+
+ /* only propagate errors on fallen-back sockets or
+ * on MPC connect
+ */
+ if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk)))
+ return false;
+
+ /* We need to propagate only transition to CLOSE state.
+ * Orphaned socket will see such state change via
+ * subflow_sched_work_if_closed() and that path will properly
+ * destroy the msk as needed.
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+ inet_sk_state_store(sk, ssk_state);
+ WRITE_ONCE(sk->sk_err, -err);
+
+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+ smp_wmb();
+ sk_error_report(sk);
+ return true;
+}
+
+void __mptcp_error_report(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_for_each_subflow(msk, subflow)
+ if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow)))
+ break;
+}
+
/* In most cases we will be able to lock the mptcp socket. If its already
* owned, we need to defer to the work queue to avoid ABBA deadlock.
*/
@@ -848,6 +892,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
mptcp_sockopt_sync_locked(msk, ssk);
mptcp_subflow_joined(msk, ssk);
+ mptcp_stop_tout_timer(sk);
return true;
}
@@ -867,12 +912,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list
}
}
-static bool mptcp_timer_pending(struct sock *sk)
+static bool mptcp_rtx_timer_pending(struct sock *sk)
{
return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
}
-static void mptcp_reset_timer(struct sock *sk)
+static void mptcp_reset_rtx_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
unsigned long tout;
@@ -1006,10 +1051,10 @@ static void __mptcp_clean_una(struct sock *sk)
out:
if (snd_una == READ_ONCE(msk->snd_nxt) &&
snd_una == READ_ONCE(msk->write_seq)) {
- if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
- mptcp_stop_timer(sk);
+ if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
+ mptcp_stop_rtx_timer(sk);
} else {
- mptcp_reset_timer(sk);
+ mptcp_reset_rtx_timer(sk);
}
}
@@ -1368,7 +1413,7 @@ bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
* returns the subflow that will transmit the next DSS
* additionally updates the rtx timeout
*/
-static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
+struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
{
struct subflow_send_info send_info[SSK_MODE_MAX];
struct mptcp_subflow_context *subflow;
@@ -1379,23 +1424,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
u64 linger_time;
long tout = 0;
- msk_owned_by_me(msk);
-
- if (__mptcp_check_fallback(msk)) {
- if (!msk->first)
- return NULL;
- return __tcp_can_send(msk->first) &&
- sk_stream_memory_free(msk->first) ? msk->first : NULL;
- }
-
- /* re-use last subflow, if the burst allow that */
- if (msk->last_snd && msk->snd_burst > 0 &&
- sk_stream_memory_free(msk->last_snd) &&
- mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
- mptcp_set_timeout(sk);
- return msk->last_snd;
- }
-
/* pick the subflow with the lower wmem/wspace ratio */
for (i = 0; i < SSK_MODE_MAX; ++i) {
send_info[i].ssk = NULL;
@@ -1448,16 +1476,13 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
wmem = READ_ONCE(ssk->sk_wmem_queued);
- if (!burst) {
- msk->last_snd = NULL;
+ if (!burst)
return ssk;
- }
subflow = mptcp_subflow_ctx(ssk);
subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
READ_ONCE(ssk->sk_pacing_rate) * burst,
burst + wmem);
- msk->last_snd = ssk;
msk->snd_burst = burst;
return ssk;
}
@@ -1501,67 +1526,109 @@ void mptcp_check_and_set_pending(struct sock *sk)
mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
}
-void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
+ struct mptcp_sendmsg_info *info)
{
- struct sock *prev_ssk = NULL, *ssk = NULL;
struct mptcp_sock *msk = mptcp_sk(sk);
- struct mptcp_sendmsg_info info = {
- .flags = flags,
- };
- bool do_check_data_fin = false;
struct mptcp_data_frag *dfrag;
- int len;
+ int len, copied = 0, err = 0;
while ((dfrag = mptcp_send_head(sk))) {
- info.sent = dfrag->already_sent;
- info.limit = dfrag->data_len;
+ info->sent = dfrag->already_sent;
+ info->limit = dfrag->data_len;
len = dfrag->data_len - dfrag->already_sent;
while (len > 0) {
int ret = 0;
- prev_ssk = ssk;
- ssk = mptcp_subflow_get_send(msk);
-
- /* First check. If the ssk has changed since
- * the last round, release prev_ssk
- */
- if (ssk != prev_ssk && prev_ssk)
- mptcp_push_release(prev_ssk, &info);
- if (!ssk)
- goto out;
-
- /* Need to lock the new subflow only if different
- * from the previous one, otherwise we are still
- * helding the relevant lock
- */
- if (ssk != prev_ssk)
- lock_sock(ssk);
-
- ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+ ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info);
if (ret <= 0) {
- if (ret == -EAGAIN)
- continue;
- mptcp_push_release(ssk, &info);
+ err = copied ? : ret;
goto out;
}
- do_check_data_fin = true;
- info.sent += ret;
+ info->sent += ret;
+ copied += ret;
len -= ret;
mptcp_update_post_push(msk, dfrag, ret);
}
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+
+ if (msk->snd_burst <= 0 ||
+ !sk_stream_memory_free(ssk) ||
+ !mptcp_subflow_active(mptcp_subflow_ctx(ssk))) {
+ err = copied;
+ goto out;
+ }
+ mptcp_set_timeout(sk);
+ }
+ err = copied;
+
+out:
+ return err;
+}
+
+void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+{
+ struct sock *prev_ssk = NULL, *ssk = NULL;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct mptcp_sendmsg_info info = {
+ .flags = flags,
+ };
+ bool do_check_data_fin = false;
+ int push_count = 1;
+
+ while (mptcp_send_head(sk) && (push_count > 0)) {
+ struct mptcp_subflow_context *subflow;
+ int ret = 0;
+
+ if (mptcp_sched_get_send(msk))
+ break;
+
+ push_count = 0;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled)) {
+ mptcp_subflow_set_scheduled(subflow, false);
+
+ prev_ssk = ssk;
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ if (ssk != prev_ssk) {
+ /* First check. If the ssk has changed since
+ * the last round, release prev_ssk
+ */
+ if (prev_ssk)
+ mptcp_push_release(prev_ssk, &info);
+
+ /* Need to lock the new subflow only if different
+ * from the previous one, otherwise we are still
+ * helding the relevant lock
+ */
+ lock_sock(ssk);
+ }
+
+ push_count++;
+
+ ret = __subflow_push_pending(sk, ssk, &info);
+ if (ret <= 0) {
+ if (ret != -EAGAIN ||
+ (1 << ssk->sk_state) &
+ (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSE))
+ push_count--;
+ continue;
+ }
+ do_check_data_fin = true;
+ }
+ }
}
/* at this point we held the socket lock for the last subflow we used */
if (ssk)
mptcp_push_release(ssk, &info);
-out:
/* ensure the rtx timer is running */
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
if (do_check_data_fin)
mptcp_check_send_data_fin(sk);
}
@@ -1572,42 +1639,49 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
struct mptcp_sendmsg_info info = {
.data_lock_held = true,
};
- struct mptcp_data_frag *dfrag;
+ bool keep_pushing = true;
struct sock *xmit_ssk;
- int len, copied = 0;
+ int copied = 0;
info.flags = 0;
- while ((dfrag = mptcp_send_head(sk))) {
- info.sent = dfrag->already_sent;
- info.limit = dfrag->data_len;
- len = dfrag->data_len - dfrag->already_sent;
- while (len > 0) {
- int ret = 0;
-
- /* check for a different subflow usage only after
- * spooling the first chunk of data
- */
- xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
- if (!xmit_ssk)
- goto out;
- if (xmit_ssk != ssk) {
- mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
- MPTCP_DELEGATE_SEND);
- goto out;
- }
+ while (mptcp_send_head(sk) && keep_pushing) {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ int ret = 0;
- ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+ /* check for a different subflow usage only after
+ * spooling the first chunk of data
+ */
+ if (first) {
+ mptcp_subflow_set_scheduled(subflow, false);
+ ret = __subflow_push_pending(sk, ssk, &info);
+ first = false;
if (ret <= 0)
- goto out;
+ break;
+ copied += ret;
+ continue;
+ }
+
+ if (mptcp_sched_get_send(msk))
+ goto out;
- info.sent += ret;
+ if (READ_ONCE(subflow->scheduled)) {
+ mptcp_subflow_set_scheduled(subflow, false);
+ ret = __subflow_push_pending(sk, ssk, &info);
+ if (ret <= 0)
+ keep_pushing = false;
copied += ret;
- len -= ret;
- first = false;
+ }
- mptcp_update_post_push(msk, dfrag, ret);
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled)) {
+ xmit_ssk = mptcp_subflow_tcp_sock(subflow);
+ if (xmit_ssk != ssk) {
+ mptcp_subflow_delegate(subflow,
+ MPTCP_DELEGATE_SEND);
+ keep_pushing = false;
+ }
+ }
}
- WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
}
out:
@@ -1617,8 +1691,8 @@ out:
if (copied) {
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
if (msk->snd_data_fin_enable &&
msk->snd_nxt + 1 == msk->write_seq)
@@ -1642,7 +1716,6 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
{
unsigned int saved_flags = msg->msg_flags;
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
struct sock *ssk;
int ret;
@@ -1653,9 +1726,9 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
* fastopen attempt, no need to check for additional subflow status.
*/
if (msg->msg_flags & MSG_FASTOPEN) {
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk))
+ return PTR_ERR(ssk);
}
if (!msk->first)
return -EINVAL;
@@ -1689,7 +1762,7 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
if (!mptcp_disconnect(sk, 0))
sk->sk_socket->state = SS_UNCONNECTED;
}
- inet_sk(sk)->defer_connect = 0;
+ inet_clear_bit(DEFER_CONNECT, sk);
return ret;
}
@@ -1707,7 +1780,8 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
lock_sock(sk);
- if (unlikely(inet_sk(sk)->defer_connect || msg->msg_flags & MSG_FASTOPEN)) {
+ if (unlikely(inet_test_bit(DEFER_CONNECT, sk) ||
+ msg->msg_flags & MSG_FASTOPEN)) {
int copied_syn = 0;
ret = mptcp_sendmsg_fastopen(sk, msg, len, &copied_syn);
@@ -1773,7 +1847,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
/* data successfully copied into the write queue */
- sk->sk_forward_alloc -= total_ts;
+ sk_forward_alloc_add(sk, -total_ts);
copied += psize;
dfrag->data_len += psize;
frag_truesize += psize;
@@ -1881,6 +1955,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
{
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
+ u8 scaling_ratio = U8_MAX;
u32 time, advmss = 1;
u64 rtt_us, mstamp;
@@ -1911,9 +1986,11 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
rtt_us = max(sf_rtt_us, rtt_us);
advmss = max(sf_advmss, advmss);
+ scaling_ratio = min(tp->scaling_ratio, scaling_ratio);
}
msk->rcvq_space.rtt_us = rtt_us;
+ msk->scaling_ratio = scaling_ratio;
if (time < (rtt_us >> 3) || rtt_us == 0)
return;
@@ -1922,8 +1999,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvmem, rcvbuf;
u64 rcvwin, grow;
+ int rcvbuf;
rcvwin = ((u64)msk->rcvq_space.copied << 1) + 16 * advmss;
@@ -1932,18 +2009,13 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
do_div(grow, msk->rcvq_space.space);
rcvwin += (grow << 1);
- rcvmem = SKB_TRUESIZE(advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(sk, rcvmem) < advmss)
- rcvmem += 128;
-
- do_div(rcvwin, advmss);
- rcvbuf = min_t(u64, rcvwin * rcvmem,
+ rcvbuf = min_t(u64, __tcp_space_from_win(scaling_ratio, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
u32 window_clamp;
- window_clamp = tcp_win_from_space(sk, rcvbuf);
+ window_clamp = __tcp_win_from_space(scaling_ratio, rcvbuf);
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make subflows follow along. If we do not do this, we
@@ -2189,7 +2261,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
sock_put(sk);
}
-static void mptcp_timeout_timer(struct timer_list *t)
+static void mptcp_tout_timer(struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
@@ -2202,17 +2274,12 @@ static void mptcp_timeout_timer(struct timer_list *t)
*
* A backup subflow is returned only if that is the only kind available.
*/
-static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
+struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
{
struct sock *backup = NULL, *pick = NULL;
struct mptcp_subflow_context *subflow;
int min_stale_count = INT_MAX;
- msk_owned_by_me(msk);
-
- if (__mptcp_check_fallback(msk))
- return NULL;
-
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -2243,14 +2310,6 @@ static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
return min_stale_count > 1 ? backup : NULL;
}
-static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
-{
- if (msk->subflow) {
- iput(SOCK_INODE(msk->subflow));
- WRITE_ONCE(msk->subflow, NULL);
- }
-}
-
bool __mptcp_retransmit_pending_data(struct sock *sk)
{
struct mptcp_data_frag *cur, *rtx_head;
@@ -2311,25 +2370,21 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
bool dispose_it, need_push = false;
/* If the first subflow moved to a close state before accept, e.g. due
- * to an incoming reset, mptcp either:
- * - if either the subflow or the msk are dead, destroy the context
- * (the subflow socket is deleted by inet_child_forget) and the msk
- * - otherwise do nothing at the moment and take action at accept and/or
- * listener shutdown - user-space must be able to accept() the closed
- * socket.
+ * to an incoming reset or listener shutdown, the subflow socket is
+ * already deleted by inet_child_forget() and the mptcp socket can't
+ * survive too.
*/
- if (msk->in_accept_queue && msk->first == ssk) {
- if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
- return;
-
+ if (msk->in_accept_queue && msk->first == ssk &&
+ (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) {
/* ensure later check in mptcp_worker() will dispose the msk */
+ mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1));
sock_set_flag(sk, SOCK_DEAD);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
mptcp_subflow_drop_ctx(ssk);
goto out_release;
}
- dispose_it = !msk->subflow || ssk != msk->subflow->sk;
+ dispose_it = msk->free_first || ssk != msk->first;
if (dispose_it)
list_del(&subflow->node);
@@ -2350,7 +2405,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
* disconnect should never fail
*/
WARN_ON_ONCE(tcp_disconnect(ssk, 0));
- msk->subflow->state = SS_UNCONNECTED;
mptcp_subflow_ctx_reset(subflow);
release_sock(ssk);
@@ -2375,6 +2429,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
}
out_release:
+ __mptcp_subflow_error_report(sk, ssk);
release_sock(ssk);
sock_put(ssk);
@@ -2383,11 +2438,24 @@ out_release:
WRITE_ONCE(msk->first, NULL);
out:
- if (ssk == msk->last_snd)
- msk->last_snd = NULL;
-
if (need_push)
__mptcp_push_pending(sk, 0);
+
+ /* Catch every 'all subflows closed' scenario, including peers silently
+ * closing them, e.g. due to timeout.
+ * For established sockets, allow an additional timeout before closing,
+ * as the protocol can still create more subflows.
+ */
+ if (list_is_singular(&msk->conn_list) && msk->first &&
+ inet_sk_state_load(msk->first) == TCP_CLOSE) {
+ if (sk->sk_state != TCP_ESTABLISHED ||
+ msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
+ inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_close_wake_up(sk);
+ } else {
+ mptcp_start_tout_timer(sk);
+ }
+ }
}
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2431,23 +2499,14 @@ static void __mptcp_close_subflow(struct sock *sk)
}
-static bool mptcp_should_close(const struct sock *sk)
+static bool mptcp_close_tout_expired(const struct sock *sk)
{
- s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
- struct mptcp_subflow_context *subflow;
-
- if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
- return true;
+ if (!inet_csk(sk)->icsk_mtup.probe_timestamp ||
+ sk->sk_state == TCP_CLOSE)
+ return false;
- /* if all subflows are in closed status don't bother with additional
- * timeout
- */
- mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
- if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) !=
- TCP_CLOSE)
- return false;
- }
- return true;
+ return time_after32(tcp_jiffies32,
+ inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN);
}
static void mptcp_check_fastclose(struct mptcp_sock *msk)
@@ -2502,16 +2561,17 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
static void __mptcp_retrans(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct mptcp_subflow_context *subflow;
struct mptcp_sendmsg_info info = {};
struct mptcp_data_frag *dfrag;
- size_t copied = 0;
struct sock *ssk;
- int ret;
+ int ret, err;
+ u16 len = 0;
mptcp_clean_una_wakeup(sk);
/* first check ssk: need to kick "stale" logic */
- ssk = mptcp_subflow_get_retrans(msk);
+ err = mptcp_sched_get_retrans(msk);
dfrag = mptcp_rtx_head(sk);
if (!dfrag) {
if (mptcp_data_fin_enabled(msk)) {
@@ -2530,57 +2590,71 @@ static void __mptcp_retrans(struct sock *sk)
goto reset_timer;
}
- if (!ssk)
+ if (err)
goto reset_timer;
- lock_sock(ssk);
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled)) {
+ u16 copied = 0;
- /* limit retransmission to the bytes already sent on some subflows */
- info.sent = 0;
- info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
- while (info.sent < info.limit) {
- ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
- if (ret <= 0)
- break;
+ mptcp_subflow_set_scheduled(subflow, false);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
- copied += ret;
- info.sent += ret;
- }
- if (copied) {
- dfrag->already_sent = max(dfrag->already_sent, info.sent);
- msk->bytes_retrans += copied;
- tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
- info.size_goal);
- WRITE_ONCE(msk->allow_infinite_fallback, false);
+ ssk = mptcp_subflow_tcp_sock(subflow);
+
+ lock_sock(ssk);
+
+ /* limit retransmission to the bytes already sent on some subflows */
+ info.sent = 0;
+ info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len :
+ dfrag->already_sent;
+ while (info.sent < info.limit) {
+ ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+ if (ret <= 0)
+ break;
+
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
+ copied += ret;
+ info.sent += ret;
+ }
+ if (copied) {
+ len = max(copied, len);
+ tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
+ info.size_goal);
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
+ }
+
+ release_sock(ssk);
+ }
}
- release_sock(ssk);
+ msk->bytes_retrans += len;
+ dfrag->already_sent = max(dfrag->already_sent, len);
reset_timer:
mptcp_check_and_set_pending(sk);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
}
/* schedule the timeout timer for the relevant event: either close timeout
* or mp_fail timeout. The close timeout takes precedence on the mp_fail one
*/
-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout)
+void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
{
struct sock *sk = (struct sock *)msk;
unsigned long timeout, close_timeout;
- if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
+ if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
return;
- close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;
+ close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies +
+ TCP_TIMEWAIT_LEN;
/* the close timeout takes precedence on the fail one, and here at least one of
* them is active
*/
- timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout;
+ timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;
sk_reset_timer(sk, &sk->sk_timer, timeout);
}
@@ -2599,8 +2673,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
mptcp_subflow_reset(ssk);
WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
unlock_sock_fast(ssk, slow);
-
- mptcp_reset_timeout(msk, 0);
}
static void mptcp_do_fastclose(struct sock *sk)
@@ -2637,18 +2709,14 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(sk);
- /* There is no point in keeping around an orphaned sk timedout or
- * closed, but we need the msk around to reply to incoming DATA_FIN,
- * even if it is orphaned and in FIN_WAIT2 state
- */
- if (sock_flag(sk, SOCK_DEAD)) {
- if (mptcp_should_close(sk))
- mptcp_do_fastclose(sk);
+ if (mptcp_close_tout_expired(sk)) {
+ mptcp_do_fastclose(sk);
+ mptcp_close_wake_up(sk);
+ }
- if (sk->sk_state == TCP_CLOSE) {
- __mptcp_destroy_sock(sk);
- goto unlock;
- }
+ if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) {
+ __mptcp_destroy_sock(sk);
+ goto unlock;
}
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
@@ -2663,7 +2731,7 @@ unlock:
sock_put(sk);
}
-static int __mptcp_init_sock(struct sock *sk)
+static void __mptcp_init_sock(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2689,9 +2757,7 @@ static int __mptcp_init_sock(struct sock *sk)
/* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
- timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
-
- return 0;
+ timer_setup(&sk->sk_timer, mptcp_tout_timer, 0);
}
static void mptcp_ca_reset(struct sock *sk)
@@ -2711,9 +2777,7 @@ static int mptcp_init_sock(struct sock *sk)
struct net *net = sock_net(sk);
int ret;
- ret = __mptcp_init_sock(sk);
- if (ret)
- return ret;
+ __mptcp_init_sock(sk);
if (!mptcp_is_enabled(net))
return -ENOPROTOOPT;
@@ -2721,6 +2785,11 @@ static int mptcp_init_sock(struct sock *sk)
if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net))
return -ENOMEM;
+ ret = mptcp_init_sched(mptcp_sk(sk),
+ mptcp_sched_find(mptcp_get_scheduler(net)));
+ if (ret)
+ return ret;
+
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
@@ -2779,8 +2848,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
} else {
pr_debug("Sending DATA_FIN on subflow %p", ssk);
tcp_send_ack(ssk);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
}
break;
}
@@ -2863,9 +2932,10 @@ static void __mptcp_destroy_sock(struct sock *sk)
might_sleep();
- mptcp_stop_timer(sk);
+ mptcp_stop_rtx_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;
+ mptcp_release_sched(msk);
sk->sk_prot->destroy(sk);
@@ -2945,7 +3015,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
cleanup:
/* orphan all the subflows */
- inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
@@ -2982,7 +3051,7 @@ cleanup:
__mptcp_destroy_sock(sk);
do_cancel_work = true;
} else {
- mptcp_reset_timeout(msk, 0);
+ mptcp_start_tout_timer(sk);
}
return do_cancel_work;
@@ -3045,8 +3114,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);
- mptcp_stop_timer(sk);
- sk_stop_timer(sk, &sk->sk_timer);
+ mptcp_stop_rtx_timer(sk);
+ mptcp_stop_tout_timer(sk);
if (msk->token)
mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
@@ -3055,7 +3124,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
* subflow
*/
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
- msk->last_snd = NULL;
WRITE_ONCE(msk->flags, 0);
msk->cb_flags = 0;
msk->push_pending = 0;
@@ -3111,7 +3179,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk = mptcp_sk(nsk);
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
- WRITE_ONCE(msk->subflow, NULL);
msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
@@ -3122,6 +3189,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->snd_una = msk->write_seq;
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
+ mptcp_init_sched(msk, mptcp_sk(sk)->sched);
/* passive msk is created after the first/MPC subflow */
msk->subflow_id = 2;
@@ -3175,25 +3243,17 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
}
-static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
+static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
bool kern)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *listener;
struct sock *newsk;
- listener = READ_ONCE(msk->subflow);
- if (WARN_ON_ONCE(!listener)) {
- *err = -EINVAL;
- return NULL;
- }
-
- pr_debug("msk=%p, listener=%p", msk, mptcp_subflow_ctx(listener->sk));
- newsk = inet_csk_accept(listener->sk, flags, err, kern);
+ pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
+ newsk = inet_csk_accept(ssk, flags, err, kern);
if (!newsk)
return NULL;
- pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk));
+ pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
struct sock *new_mptcp_sock;
@@ -3210,9 +3270,9 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
}
newsk = new_mptcp_sock;
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
} else {
- MPTCP_INC_STATS(sock_net(sk),
+ MPTCP_INC_STATS(sock_net(ssk),
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
}
@@ -3242,8 +3302,8 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
/* move all the rx fwd alloc into the sk_mem_reclaim_final in
* inet_sock_destruct() will dispose it
*/
- sk->sk_forward_alloc += msk->rmem_fwd_alloc;
- msk->rmem_fwd_alloc = 0;
+ sk_forward_alloc_add(sk, msk->rmem_fwd_alloc);
+ WRITE_ONCE(msk->rmem_fwd_alloc, 0);
mptcp_token_destroy(msk);
mptcp_pm_free_anno_list(msk);
mptcp_free_local_addr_list(msk);
@@ -3253,10 +3313,8 @@ static void mptcp_destroy(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- /* clears msk->subflow, allowing the following to close
- * even the initial subflow
- */
- mptcp_dispose_initial_subflow(msk);
+ /* allow the following to close even the initial subflow */
+ msk->free_first = 1;
mptcp_destroy_common(msk, 0);
sk_sockets_allocated_dec(sk);
}
@@ -3336,8 +3394,6 @@ static void mptcp_release_cb(struct sock *sk)
__mptcp_set_connected(sk);
if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
__mptcp_error_report(sk);
- if (__test_and_clear_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags))
- msk->last_snd = NULL;
}
__mptcp_update_rmem(sk);
@@ -3406,14 +3462,12 @@ static void mptcp_unhash(struct sock *sk)
static int mptcp_get_port(struct sock *sk, unsigned short snum)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
- ssock = msk->subflow;
- pr_debug("msk=%p, subflow=%p", msk, ssock);
- if (WARN_ON_ONCE(!ssock))
+ pr_debug("msk=%p, ssk=%p", msk, msk->first);
+ if (WARN_ON_ONCE(!msk->first))
return -EINVAL;
- return inet_csk_get_port(ssock->sk, snum);
+ return inet_csk_get_port(msk->first, snum);
}
void mptcp_finish_connect(struct sock *ssk)
@@ -3513,7 +3567,8 @@ static void mptcp_shutdown(struct sock *sk, int how)
static int mptcp_forward_alloc_get(const struct sock *sk)
{
- return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
+ return READ_ONCE(sk->sk_forward_alloc) +
+ READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc);
}
static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
@@ -3588,25 +3643,24 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
- struct socket *ssock;
int err = -EINVAL;
+ struct sock *ssk;
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock))
- return PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk))
+ return PTR_ERR(ssk);
- mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_SYN_SENT);
- subflow = mptcp_subflow_ctx(ssock->sk);
+ subflow = mptcp_subflow_ctx(ssk);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
* TCP option space.
*/
- if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
+ if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
mptcp_subflow_early_fallback(msk, subflow);
#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
- MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
+ if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
mptcp_subflow_early_fallback(msk, subflow);
}
if (likely(!__mptcp_check_fallback(msk)))
@@ -3615,25 +3669,42 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* if reaching here via the fastopen/sendmsg path, the caller already
* acquired the subflow socket lock, too.
*/
- if (msk->fastopening)
- err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
- else
- err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
- inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
+ if (!msk->fastopening)
+ lock_sock(ssk);
+
+ /* the following mirrors closely a very small chunk of code from
+ * __inet_stream_connect()
+ */
+ if (ssk->sk_state != TCP_CLOSE)
+ goto out;
+
+ if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) {
+ err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len);
+ if (err)
+ goto out;
+ }
+
+ err = ssk->sk_prot->connect(ssk, uaddr, addr_len);
+ if (err < 0)
+ goto out;
+
+ inet_assign_bit(DEFER_CONNECT, sk, inet_test_bit(DEFER_CONNECT, ssk));
+
+out:
+ if (!msk->fastopening)
+ release_sock(ssk);
/* on successful connect, the msk state will be moved to established by
* subflow_finish_connect()
*/
- if (unlikely(err && err != -EINPROGRESS)) {
- inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ if (unlikely(err)) {
+ /* avoid leaving a dangling token in an unconnected socket */
+ mptcp_token_destroy(msk);
+ inet_sk_state_store(sk, TCP_CLOSE);
return err;
}
- mptcp_copy_inaddrs(sk, ssock->sk);
-
- /* silence EINPROGRESS and let the caller inet_stream_connect
- * handle the connection in progress
- */
+ mptcp_copy_inaddrs(sk, ssk);
return 0;
}
@@ -3674,22 +3745,27 @@ static struct proto mptcp_prot = {
static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
- struct socket *ssock;
- int err;
+ struct sock *ssk, *sk = sock->sk;
+ int err = -EINVAL;
- lock_sock(sock->sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ lock_sock(sk);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ err = PTR_ERR(ssk);
goto unlock;
}
- err = ssock->ops->bind(ssock, uaddr, addr_len);
+ if (sk->sk_family == AF_INET)
+ err = inet_bind_sk(ssk, uaddr, addr_len);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ else if (sk->sk_family == AF_INET6)
+ err = inet6_bind_sk(ssk, uaddr, addr_len);
+#endif
if (!err)
- mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ mptcp_copy_inaddrs(sk, ssk);
unlock:
- release_sock(sock->sk);
+ release_sock(sk);
return err;
}
@@ -3697,7 +3773,7 @@ static int mptcp_listen(struct socket *sock, int backlog)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct sock *sk = sock->sk;
- struct socket *ssock;
+ struct sock *ssk;
int err;
pr_debug("msk=%p", msk);
@@ -3708,22 +3784,24 @@ static int mptcp_listen(struct socket *sock, int backlog)
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto unlock;
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
- err = PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ err = PTR_ERR(ssk);
goto unlock;
}
- mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_LISTEN);
sock_set_flag(sk, SOCK_RCU_FREE);
- err = ssock->ops->listen(ssock, backlog);
- inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ lock_sock(ssk);
+ err = __inet_listen_sk(ssk, backlog);
+ release_sock(ssk);
+ inet_sk_state_store(sk, inet_sk_state_load(ssk));
+
if (!err) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- mptcp_copy_inaddrs(sk, ssock->sk);
- mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
+ mptcp_copy_inaddrs(sk, ssk);
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
}
unlock:
@@ -3735,8 +3813,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
int flags, bool kern)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
- struct socket *ssock;
- struct sock *newsk;
+ struct sock *ssk, *newsk;
int err;
pr_debug("msk=%p", msk);
@@ -3744,11 +3821,11 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* Buggy applications can call accept on socket states other then LISTEN
* but no need to allocate the first subflow just to error out.
*/
- ssock = READ_ONCE(msk->subflow);
- if (!ssock)
+ ssk = READ_ONCE(msk->first);
+ if (!ssk)
return -EINVAL;
- newsk = mptcp_accept(sock->sk, flags, &err, kern);
+ newsk = mptcp_accept(ssk, flags, &err, kern);
if (!newsk)
return err;
@@ -3775,11 +3852,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* Do late cleanup for the first subflow as necessary. Also
* deal with bad peers not doing a complete shutdown.
*/
- if (msk->first &&
- unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
+ if (unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
__mptcp_close_ssk(newsk, msk->first,
mptcp_subflow_ctx(msk->first), 0);
- if (unlikely(list_empty(&msk->conn_list)))
+ if (unlikely(list_is_singular(&msk->conn_list)))
inet_sk_state_store(newsk, TCP_CLOSE);
}
}
@@ -3818,12 +3894,12 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
state = inet_sk_state_load(sk);
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
if (state == TCP_LISTEN) {
- struct socket *ssock = READ_ONCE(msk->subflow);
+ struct sock *ssk = READ_ONCE(msk->first);
- if (WARN_ON_ONCE(!ssock || !ssock->sk))
+ if (WARN_ON_ONCE(!ssk))
return 0;
- return inet_csk_listen_poll(ssock->sk);
+ return inet_csk_listen_poll(ssk);
}
shutdown = READ_ONCE(sk->sk_shutdown);
@@ -3838,7 +3914,8 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
mask |= EPOLLOUT | EPOLLWRNORM;
else
mask |= mptcp_check_writeable(msk);
- } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
+ } else if (state == TCP_SYN_SENT &&
+ inet_test_bit(DEFER_CONNECT, sk)) {
/* cf tcp_poll() note about TFO */
mask |= EPOLLOUT | EPOLLWRNORM;
}
@@ -3934,6 +4011,7 @@ void __init mptcp_proto_init(void)
mptcp_subflow_init();
mptcp_pm_init();
+ mptcp_sched_init();
mptcp_token_init();
if (proto_register(&mptcp_prot, 1) != 0)
@@ -3987,6 +4065,7 @@ int __init mptcp_proto_v6_init(void)
strcpy(mptcp_v6_prot.name, "MPTCPv6");
mptcp_v6_prot.slab = NULL;
mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock);
+ mptcp_v6_prot.ipv6_pinfo_offset = offsetof(struct mptcp6_sock, np);
err = proto_register(&mptcp_v6_prot, 1);
if (err)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index ba2a873a4d2e..ed61d6850cce 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -123,7 +123,6 @@
#define MPTCP_RETRANSMIT 4
#define MPTCP_FLUSH_JOIN_LIST 5
#define MPTCP_CONNECTED 6
-#define MPTCP_RESET_SCHEDULER 7
struct mptcp_skb_cb {
u64 map_seq;
@@ -269,7 +268,6 @@ struct mptcp_sock {
u64 rcv_data_fin_seq;
u64 bytes_retrans;
int rmem_fwd_alloc;
- struct sock *last_snd;
int snd_burst;
int old_wspace;
u64 recovery_snd_nxt; /* in recovery mode accept up to this seq;
@@ -299,7 +297,8 @@ struct mptcp_sock {
cork:1,
nodelay:1,
fastopening:1,
- in_accept_queue:1;
+ in_accept_queue:1,
+ free_first:1;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -308,19 +307,19 @@ struct mptcp_sock {
struct list_head rtx_queue;
struct mptcp_data_frag *first_pending;
struct list_head join_list;
- struct socket *subflow; /* outgoing connect/listener/!mp_capable
- * The mptcp ops can safely dereference, using suitable
- * ONCE annotation, the subflow outside the socket
- * lock as such sock is freed after close().
- */
- struct sock *first;
+ struct sock *first; /* The mptcp ops can safely dereference, using suitable
+ * ONCE annotation, the subflow outside the socket
+ * lock as such sock is freed after close().
+ */
struct mptcp_pm_data pm;
+ struct mptcp_sched_ops *sched;
struct {
u32 space; /* bytes copied in last measurement window */
u32 copied; /* bytes copied in this measurement window */
u64 time; /* start time of measurement window */
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;
+ u8 scaling_ratio;
u32 subflow_id;
u32 setsockopt_seq;
@@ -350,9 +349,14 @@ static inline int __mptcp_rmem(const struct sock *sk)
return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
}
+static inline int mptcp_win_from_space(const struct sock *sk, int space)
+{
+ return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space);
+}
+
static inline int __mptcp_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
+ return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
}
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
@@ -487,6 +491,7 @@ struct mptcp_subflow_context {
is_mptfo : 1, /* subflow is doing TFO */
__unused : 9;
enum mptcp_data_avail data_avail;
+ bool scheduled;
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
@@ -620,6 +625,7 @@ int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
int mptcp_get_pm_type(const struct net *net);
+const char *mptcp_get_scheduler(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
@@ -634,7 +640,7 @@ void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
-struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk);
+struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void __mptcp_unaccepted_force_close(struct sock *sk);
@@ -652,6 +658,19 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
+struct mptcp_sched_ops *mptcp_sched_find(const char *name);
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched);
+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched);
+void mptcp_sched_init(void);
+int mptcp_init_sched(struct mptcp_sock *msk,
+ struct mptcp_sched_ops *sched);
+void mptcp_release_sched(struct mptcp_sock *msk);
+void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
+ bool scheduled);
+struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
+struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
+int mptcp_sched_get_send(struct mptcp_sock *msk);
+int mptcp_sched_get_retrans(struct mptcp_sock *msk);
static inline bool __tcp_can_send(const struct sock *ssk)
{
@@ -699,7 +718,29 @@ void mptcp_get_options(const struct sk_buff *skb,
void mptcp_finish_connect(struct sock *sk);
void __mptcp_set_connected(struct sock *sk);
-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout);
+void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
+
+static inline void mptcp_stop_tout_timer(struct sock *sk)
+{
+ if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
+ return;
+
+ sk_stop_timer(sk, &sk->sk_timer);
+ inet_csk(sk)->icsk_mtup.probe_timestamp = 0;
+}
+
+static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout)
+{
+ /* avoid 0 timestamp, as that means no close timeout */
+ inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1;
+}
+
+static inline void mptcp_start_tout_timer(struct sock *sk)
+{
+ mptcp_set_close_tout(sk, tcp_jiffies32);
+ mptcp_reset_tout_timer(mptcp_sk(sk), 0);
+}
+
static inline bool mptcp_is_fully_established(struct sock *sk)
{
return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
new file mode 100644
index 000000000000..4ab0693c069c
--- /dev/null
+++ b/net/mptcp/sched.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2022, SUSE.
+ */
+
+#define pr_fmt(fmt) "MPTCP: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include "protocol.h"
+
+static DEFINE_SPINLOCK(mptcp_sched_list_lock);
+static LIST_HEAD(mptcp_sched_list);
+
+static int mptcp_sched_default_get_subflow(struct mptcp_sock *msk,
+ struct mptcp_sched_data *data)
+{
+ struct sock *ssk;
+
+ ssk = data->reinject ? mptcp_subflow_get_retrans(msk) :
+ mptcp_subflow_get_send(msk);
+ if (!ssk)
+ return -EINVAL;
+
+ mptcp_subflow_set_scheduled(mptcp_subflow_ctx(ssk), true);
+ return 0;
+}
+
+static struct mptcp_sched_ops mptcp_sched_default = {
+ .get_subflow = mptcp_sched_default_get_subflow,
+ .name = "default",
+ .owner = THIS_MODULE,
+};
+
+/* Must be called with rcu read lock held */
+struct mptcp_sched_ops *mptcp_sched_find(const char *name)
+{
+ struct mptcp_sched_ops *sched, *ret = NULL;
+
+ list_for_each_entry_rcu(sched, &mptcp_sched_list, list) {
+ if (!strcmp(sched->name, name)) {
+ ret = sched;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
+{
+ if (!sched->get_subflow)
+ return -EINVAL;
+
+ spin_lock(&mptcp_sched_list_lock);
+ if (mptcp_sched_find(sched->name)) {
+ spin_unlock(&mptcp_sched_list_lock);
+ return -EEXIST;
+ }
+ list_add_tail_rcu(&sched->list, &mptcp_sched_list);
+ spin_unlock(&mptcp_sched_list_lock);
+
+ pr_debug("%s registered", sched->name);
+ return 0;
+}
+
+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched)
+{
+ if (sched == &mptcp_sched_default)
+ return;
+
+ spin_lock(&mptcp_sched_list_lock);
+ list_del_rcu(&sched->list);
+ spin_unlock(&mptcp_sched_list_lock);
+}
+
+void mptcp_sched_init(void)
+{
+ mptcp_register_scheduler(&mptcp_sched_default);
+}
+
+int mptcp_init_sched(struct mptcp_sock *msk,
+ struct mptcp_sched_ops *sched)
+{
+ if (!sched)
+ sched = &mptcp_sched_default;
+
+ if (!bpf_try_module_get(sched, sched->owner))
+ return -EBUSY;
+
+ msk->sched = sched;
+ if (msk->sched->init)
+ msk->sched->init(msk);
+
+ pr_debug("sched=%s", msk->sched->name);
+
+ return 0;
+}
+
+void mptcp_release_sched(struct mptcp_sock *msk)
+{
+ struct mptcp_sched_ops *sched = msk->sched;
+
+ if (!sched)
+ return;
+
+ msk->sched = NULL;
+ if (sched->release)
+ sched->release(msk);
+
+ bpf_module_put(sched, sched->owner);
+}
+
+void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
+ bool scheduled)
+{
+ WRITE_ONCE(subflow->scheduled, scheduled);
+}
+
+int mptcp_sched_get_send(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sched_data data;
+
+ msk_owned_by_me(msk);
+
+ /* the following check is moved out of mptcp_subflow_get_send */
+ if (__mptcp_check_fallback(msk)) {
+ if (msk->first &&
+ __tcp_can_send(msk->first) &&
+ sk_stream_memory_free(msk->first)) {
+ mptcp_subflow_set_scheduled(mptcp_subflow_ctx(msk->first), true);
+ return 0;
+ }
+ return -EINVAL;
+ }
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled))
+ return 0;
+ }
+
+ data.reinject = false;
+ if (msk->sched == &mptcp_sched_default || !msk->sched)
+ return mptcp_sched_default_get_subflow(msk, &data);
+ return msk->sched->get_subflow(msk, &data);
+}
+
+int mptcp_sched_get_retrans(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sched_data data;
+
+ msk_owned_by_me(msk);
+
+ /* the following check is moved out of mptcp_subflow_get_retrans */
+ if (__mptcp_check_fallback(msk))
+ return -EINVAL;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->scheduled))
+ return 0;
+ }
+
+ data.reinject = true;
+ if (msk->sched == &mptcp_sched_default || !msk->sched)
+ return mptcp_sched_default_get_subflow(msk, &data);
+ return msk->sched->get_subflow(msk, &data);
+}
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index a3f1fe810cc9..8260202c0066 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -292,7 +292,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
- struct socket *ssock;
+ struct sock *ssk;
int ret;
switch (optname) {
@@ -301,22 +301,22 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_BINDTODEVICE:
case SO_BINDTOIFINDEX:
lock_sock(sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
release_sock(sk);
- return PTR_ERR(ssock);
+ return PTR_ERR(ssk);
}
- ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
+ ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen);
if (ret == 0) {
if (optname == SO_REUSEPORT)
- sk->sk_reuseport = ssock->sk->sk_reuseport;
+ sk->sk_reuseport = ssk->sk_reuseport;
else if (optname == SO_REUSEADDR)
- sk->sk_reuse = ssock->sk->sk_reuse;
+ sk->sk_reuse = ssk->sk_reuse;
else if (optname == SO_BINDTODEVICE)
- sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
+ sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
else if (optname == SO_BINDTOIFINDEX)
- sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
+ sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
}
release_sock(sk);
return ret;
@@ -390,20 +390,20 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
{
struct sock *sk = (struct sock *)msk;
int ret = -EOPNOTSUPP;
- struct socket *ssock;
+ struct sock *ssk;
switch (optname) {
case IPV6_V6ONLY:
case IPV6_TRANSPARENT:
case IPV6_FREEBIND:
lock_sock(sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
release_sock(sk);
- return PTR_ERR(ssock);
+ return PTR_ERR(ssk);
}
- ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
+ ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen);
if (ret != 0) {
release_sock(sk);
return ret;
@@ -413,13 +413,15 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
switch (optname) {
case IPV6_V6ONLY:
- sk->sk_ipv6only = ssock->sk->sk_ipv6only;
+ sk->sk_ipv6only = ssk->sk_ipv6only;
break;
case IPV6_TRANSPARENT:
- inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent;
+ inet_assign_bit(TRANSPARENT, sk,
+ inet_test_bit(TRANSPARENT, ssk));
break;
case IPV6_FREEBIND:
- inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind;
+ inet_assign_bit(FREEBIND, sk,
+ inet_test_bit(FREEBIND, ssk));
break;
}
@@ -684,8 +686,7 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
- struct inet_sock *issk;
- struct socket *ssock;
+ struct sock *ssk;
int err;
err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
@@ -694,20 +695,19 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
lock_sock(sk);
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
release_sock(sk);
- return PTR_ERR(ssock);
+ return PTR_ERR(ssk);
}
- issk = inet_sk(ssock->sk);
-
switch (optname) {
case IP_FREEBIND:
- issk->freebind = inet_sk(sk)->freebind;
+ inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
break;
case IP_TRANSPARENT:
- issk->transparent = inet_sk(sk)->transparent;
+ inet_assign_bit(TRANSPARENT, ssk,
+ inet_test_bit(TRANSPARENT, sk));
break;
default:
release_sock(sk);
@@ -763,18 +763,18 @@ static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
- struct socket *sock;
+ struct sock *ssk;
int ret;
/* Limit to first subflow, before the connection establishment */
lock_sock(sk);
- sock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(sock)) {
- ret = PTR_ERR(sock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ ret = PTR_ERR(ssk);
goto unlock;
}
- ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
+ ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
unlock:
release_sock(sk);
@@ -864,9 +864,8 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
char __user *optval, int __user *optlen)
{
struct sock *sk = (struct sock *)msk;
- struct socket *ssock;
- int ret;
struct sock *ssk;
+ int ret;
lock_sock(sk);
ssk = msk->first;
@@ -875,13 +874,13 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
goto out;
}
- ssock = __mptcp_nmpc_socket(msk);
- if (IS_ERR(ssock)) {
- ret = PTR_ERR(ssock);
+ ssk = __mptcp_nmpc_sk(msk);
+ if (IS_ERR(ssk)) {
+ ret = PTR_ERR(ssk);
goto out;
}
- ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen);
+ ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
out:
release_sock(sk);
@@ -1441,8 +1440,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
__tcp_sock_set_cork(ssk, !!msk->cork);
__tcp_sock_set_nodelay(ssk, !!msk->nodelay);
- inet_sk(ssk)->transparent = inet_sk(sk)->transparent;
- inet_sk(ssk)->freebind = inet_sk(sk)->freebind;
+ inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
+ inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
}
static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 94ae7dd01c65..918c1a235790 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1226,7 +1226,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
WRITE_ONCE(subflow->fail_tout, fail_tout);
tcp_send_ack(ssk);
- mptcp_reset_timeout(msk, subflow->fail_tout);
+ mptcp_reset_tout_timer(msk, subflow->fail_tout);
}
static bool subflow_check_data_avail(struct sock *ssk)
@@ -1359,43 +1359,7 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
const struct sock *sk = subflow->conn;
*space = __mptcp_space(sk);
- *full_space = tcp_full_space(sk);
-}
-
-void __mptcp_error_report(struct sock *sk)
-{
- struct mptcp_subflow_context *subflow;
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- int err = sock_error(ssk);
- int ssk_state;
-
- if (!err)
- continue;
-
- /* only propagate errors on fallen-back sockets or
- * on MPC connect
- */
- if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
- continue;
-
- /* We need to propagate only transition to CLOSE state.
- * Orphaned socket will see such state change via
- * subflow_sched_work_if_closed() and that path will properly
- * destroy the msk as needed.
- */
- ssk_state = inet_sk_state_load(ssk);
- if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
- inet_sk_state_store(sk, ssk_state);
- WRITE_ONCE(sk->sk_err, -err);
-
- /* This barrier is coupled with smp_rmb() in mptcp_poll() */
- smp_wmb();
- sk_error_report(sk);
- break;
- }
+ *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
static void subflow_error_report(struct sock *ssk)
@@ -1588,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
WRITE_ONCE(msk->allow_infinite_fallback, false);
+ mptcp_stop_tout_timer(sk);
return 0;
failed_unlink:
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 62fb1031763d..f8854bff286c 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -89,6 +89,11 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
if ((had_link == has_link) || chained)
return 0;
+ if (had_link)
+ netif_carrier_off(ndp->ndev.dev);
+ else
+ netif_carrier_on(ndp->ndev.dev);
+
if (!ndp->multi_package && !nc->package->multi_channel) {
if (had_link) {
ndp->flags |= NCSI_DEV_RESHUFFLE;
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index d27f4eccce6d..a3a6753a1db7 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -563,7 +563,7 @@ int ncsi_send_netlink_timeout(struct ncsi_request *nr,
int ncsi_send_netlink_err(struct net_device *dev,
u32 snd_seq,
u32 snd_portid,
- struct nlmsghdr *nlhdr,
+ const struct nlmsghdr *nlhdr,
int err)
{
struct nlmsghdr *nlh;
diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h
index 39a1a9d7bf77..747767ea0aae 100644
--- a/net/ncsi/ncsi-netlink.h
+++ b/net/ncsi/ncsi-netlink.h
@@ -19,7 +19,7 @@ int ncsi_send_netlink_timeout(struct ncsi_request *nr,
int ncsi_send_netlink_err(struct net_device *dev,
u32 snd_seq,
u32 snd_portid,
- struct nlmsghdr *nlhdr,
+ const struct nlmsghdr *nlhdr,
int err);
#endif /* __NCSI_NETLINK_H__ */
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 5f76ae86a656..ef4e76e5aef9 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -680,6 +680,12 @@ EXPORT_SYMBOL_GPL(nfnl_ct_hook);
const struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_hook);
+const struct nf_defrag_hook __rcu *nf_defrag_v4_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nf_defrag_v4_hook);
+
+const struct nf_defrag_hook __rcu *nf_defrag_v6_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nf_defrag_v6_hook);
+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
u8 nf_ctnetlink_has_listener;
EXPORT_SYMBOL_GPL(nf_ctnetlink_has_listener);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 0b68e2e2824e..35d2f9c9ada0 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -683,6 +683,14 @@ __ip_set_put(struct ip_set *set)
* a separate reference counter
*/
static void
+__ip_set_get_netlink(struct ip_set *set)
+{
+ write_lock_bh(&ip_set_ref_lock);
+ set->ref_netlink++;
+ write_unlock_bh(&ip_set_ref_lock);
+}
+
+static void
__ip_set_put_netlink(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -872,7 +880,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
BUG_ON(!set);
read_lock_bh(&ip_set_ref_lock);
- strncpy(name, set->name, IPSET_MAXNAMELEN);
+ strscpy_pad(name, set->name, IPSET_MAXNAMELEN);
read_unlock_bh(&ip_set_ref_lock);
}
EXPORT_SYMBOL_GPL(ip_set_name_byindex);
@@ -1326,7 +1334,7 @@ static int ip_set_rename(struct sk_buff *skb, const struct nfnl_info *info,
goto out;
}
}
- strncpy(set->name, name2, IPSET_MAXNAMELEN);
+ strscpy_pad(set->name, name2, IPSET_MAXNAMELEN);
out:
write_unlock_bh(&ip_set_ref_lock);
@@ -1380,9 +1388,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
return -EBUSY;
}
- strncpy(from_name, from->name, IPSET_MAXNAMELEN);
- strncpy(from->name, to->name, IPSET_MAXNAMELEN);
- strncpy(to->name, from_name, IPSET_MAXNAMELEN);
+ strscpy_pad(from_name, from->name, IPSET_MAXNAMELEN);
+ strscpy_pad(from->name, to->name, IPSET_MAXNAMELEN);
+ strscpy_pad(to->name, from_name, IPSET_MAXNAMELEN);
swap(from->ref, to->ref);
ip_set(inst, from_id) = to;
@@ -1693,11 +1701,11 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
do {
if (retried) {
- __ip_set_get(set);
+ __ip_set_get_netlink(set);
nfnl_unlock(NFNL_SUBSYS_IPSET);
cond_resched();
nfnl_lock(NFNL_SUBSYS_IPSET);
- __ip_set_put(set);
+ __ip_set_put_netlink(set);
}
ip_set_lock(set);
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 005a7ce87217..bf4f91b78e1d 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -36,6 +36,7 @@ MODULE_ALIAS("ip_set_hash:net,port,net");
#define IP_SET_HASH_WITH_PROTO
#define IP_SET_HASH_WITH_NETS
#define IPSET_NET_COUNT 2
+#define IP_SET_HASH_WITH_NET0
/* IPv4 variant */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index cb83ca506c5c..3230506ae3ff 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1346,7 +1346,7 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
+ if (sk->sk_family == PF_INET && inet_test_bit(NODEFRAG, sk))
return NF_ACCEPT;
}
@@ -1946,7 +1946,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
+ if (sk->sk_family == PF_INET && inet_test_bit(NODEFRAG, sk))
return NF_ACCEPT;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 4bb0d90eca1c..143a341bbc0a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -4269,6 +4269,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
struct net *net = ipvs->net;
struct ctl_table *tbl;
int idx, ret;
+ size_t ctl_table_size = ARRAY_SIZE(vs_vars);
atomic_set(&ipvs->dropentry, 0);
spin_lock_init(&ipvs->dropentry_lock);
@@ -4285,8 +4286,10 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
+ if (net->user_ns != &init_user_ns) {
tbl[0].procname = NULL;
+ ctl_table_size = 0;
+ }
} else
tbl = vs_vars;
/* Initialize sysctl defaults */
@@ -4357,7 +4360,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
#endif
ret = -ENOMEM;
- ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
+ ipvs->sysctl_hdr = register_net_sysctl_sz(net, "net/ipv4/vs", tbl,
+ ctl_table_size);
if (!ipvs->sysctl_hdr)
goto err;
ipvs->sysctl_tbl = tbl;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 1b87214d385e..cf78ba4ce5ff 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -550,6 +550,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = {
static int __net_init __ip_vs_lblc_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
+ size_t vars_table_size = ARRAY_SIZE(vs_vars_table);
if (!ipvs)
return -ENOENT;
@@ -562,16 +563,19 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
+ if (net->user_ns != &init_user_ns) {
ipvs->lblc_ctl_table[0].procname = NULL;
+ vars_table_size = 0;
+ }
} else
ipvs->lblc_ctl_table = vs_vars_table;
ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
- ipvs->lblc_ctl_header =
- register_net_sysctl(net, "net/ipv4/vs", ipvs->lblc_ctl_table);
+ ipvs->lblc_ctl_header = register_net_sysctl_sz(net, "net/ipv4/vs",
+ ipvs->lblc_ctl_table,
+ vars_table_size);
if (!ipvs->lblc_ctl_header) {
if (!net_eq(net, &init_net))
kfree(ipvs->lblc_ctl_table);
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index ad8f5fea6d3a..9eddf118b40e 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -736,6 +736,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
static int __net_init __ip_vs_lblcr_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
+ size_t vars_table_size = ARRAY_SIZE(vs_vars_table);
if (!ipvs)
return -ENOENT;
@@ -748,15 +749,18 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
return -ENOMEM;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
+ if (net->user_ns != &init_user_ns) {
ipvs->lblcr_ctl_table[0].procname = NULL;
+ vars_table_size = 0;
+ }
} else
ipvs->lblcr_ctl_table = vs_vars_table;
ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
- ipvs->lblcr_ctl_header =
- register_net_sysctl(net, "net/ipv4/vs", ipvs->lblcr_ctl_table);
+ ipvs->lblcr_ctl_header = register_net_sysctl_sz(net, "net/ipv4/vs",
+ ipvs->lblcr_ctl_table,
+ vars_table_size);
if (!ipvs->lblcr_ctl_header) {
if (!net_eq(net, &init_net))
kfree(ipvs->lblcr_ctl_table);
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 264f2f87a437..da5af28ff57b 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1297,11 +1297,9 @@ static void set_sock_size(struct sock *sk, int mode, int val)
*/
static void set_mcast_loop(struct sock *sk, u_char loop)
{
- struct inet_sock *inet = inet_sk(sk);
-
/* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */
lock_sock(sk);
- inet->mc_loop = loop ? 1 : 0;
+ inet_assign_bit(MC_LOOP, sk, loop);
#ifdef CONFIG_IP_VS_IPV6
if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index c36da56d756f..e502ec00b2fe 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_bpf_link.h>
@@ -23,8 +25,90 @@ struct bpf_nf_link {
struct nf_hook_ops hook_ops;
struct net *net;
u32 dead;
+ const struct nf_defrag_hook *defrag_hook;
};
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+static const struct nf_defrag_hook *
+get_proto_defrag_hook(struct bpf_nf_link *link,
+ const struct nf_defrag_hook __rcu *global_hook,
+ const char *mod)
+{
+ const struct nf_defrag_hook *hook;
+ int err;
+
+ /* RCU protects us from races against module unloading */
+ rcu_read_lock();
+ hook = rcu_dereference(global_hook);
+ if (!hook) {
+ rcu_read_unlock();
+ err = request_module(mod);
+ if (err)
+ return ERR_PTR(err < 0 ? err : -EINVAL);
+
+ rcu_read_lock();
+ hook = rcu_dereference(global_hook);
+ }
+
+ if (hook && try_module_get(hook->owner)) {
+ /* Once we have a refcnt on the module, we no longer need RCU */
+ hook = rcu_pointer_handoff(hook);
+ } else {
+ WARN_ONCE(!hook, "%s has bad registration", mod);
+ hook = ERR_PTR(-ENOENT);
+ }
+ rcu_read_unlock();
+
+ if (!IS_ERR(hook)) {
+ err = hook->enable(link->net);
+ if (err) {
+ module_put(hook->owner);
+ hook = ERR_PTR(err);
+ }
+ }
+
+ return hook;
+}
+#endif
+
+static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
+{
+ const struct nf_defrag_hook __maybe_unused *hook;
+
+ switch (link->hook_ops.pf) {
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ case NFPROTO_IPV4:
+ hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4");
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ link->defrag_hook = hook;
+ return 0;
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ case NFPROTO_IPV6:
+ hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6");
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ link->defrag_hook = hook;
+ return 0;
+#endif
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+static void bpf_nf_disable_defrag(struct bpf_nf_link *link)
+{
+ const struct nf_defrag_hook *hook = link->defrag_hook;
+
+ if (!hook)
+ return;
+ hook->disable(link->net);
+ module_put(hook->owner);
+}
+
static void bpf_nf_link_release(struct bpf_link *link)
{
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
@@ -32,11 +116,11 @@ static void bpf_nf_link_release(struct bpf_link *link)
if (nf_link->dead)
return;
- /* prevent hook-not-found warning splat from netfilter core when
- * .detach was already called
- */
- if (!cmpxchg(&nf_link->dead, 0, 1))
+ /* do not double release in case .detach was already called */
+ if (!cmpxchg(&nf_link->dead, 0, 1)) {
nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
+ bpf_nf_disable_defrag(nf_link);
+ }
}
static void bpf_nf_link_dealloc(struct bpf_link *link)
@@ -92,6 +176,8 @@ static const struct bpf_link_ops bpf_nf_link_lops = {
static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
{
+ int prio;
+
switch (attr->link_create.netfilter.pf) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
@@ -102,19 +188,18 @@ static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
return -EAFNOSUPPORT;
}
- if (attr->link_create.netfilter.flags)
+ if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG)
return -EOPNOTSUPP;
- /* make sure conntrack confirm is always last.
- *
- * In the future, if userspace can e.g. request defrag, then
- * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG"
- * should fail.
- */
- switch (attr->link_create.netfilter.priority) {
- case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */
- case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */
- }
+ /* make sure conntrack confirm is always last */
+ prio = attr->link_create.netfilter.priority;
+ if (prio == NF_IP_PRI_FIRST)
+ return -ERANGE; /* sabotage_in and other warts */
+ else if (prio == NF_IP_PRI_LAST)
+ return -ERANGE; /* e.g. conntrack confirm */
+ else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) &&
+ prio <= NF_IP_PRI_CONNTRACK_DEFRAG)
+ return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */
return 0;
}
@@ -149,6 +234,7 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
link->net = net;
link->dead = false;
+ link->defrag_hook = NULL;
err = bpf_link_prime(&link->link, &link_primer);
if (err) {
@@ -156,8 +242,17 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
return err;
}
+ if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) {
+ err = bpf_nf_enable_defrag(link);
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ return err;
+ }
+ }
+
err = nf_register_net_hook(net, &link->hook_ops);
if (err) {
+ bpf_nf_disable_defrag(link);
bpf_link_cleanup(&link_primer);
return err;
}
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 0d36d7285e3f..b21799d468d2 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/btf_ids.h>
#include <linux/net_namespace.h>
+#include <net/xdp.h>
#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -380,6 +381,8 @@ __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
struct nf_conn *nfct = (struct nf_conn *)nfct_i;
int err;
+ if (!nf_ct_is_confirmed(nfct))
+ nfct->timeout += nfct_time_stamp;
nfct->status |= IPS_CONFIRMED;
err = nf_conntrack_hash_check_insert(nfct);
if (err < 0) {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 992393102d5f..9f6f2e643575 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1756,7 +1756,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
cnet = nf_ct_pernet(net);
if (cnet->expect_count) {
spin_lock_bh(&nf_conntrack_expect_lock);
- exp = nf_ct_find_expectation(net, zone, tuple);
+ exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl));
if (exp) {
/* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &ct->status);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 96948e98ec53..81ca348915c9 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -171,7 +171,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
- const struct nf_conntrack_tuple *tuple)
+ const struct nf_conntrack_tuple *tuple, bool unlink)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
struct nf_conntrack_expect *i, *exp = NULL;
@@ -211,7 +211,7 @@ nf_ct_find_expectation(struct net *net,
!refcount_inc_not_zero(&exp->master->ct_general.use)))
return NULL;
- if (exp->flags & NF_CT_EXPECT_PERMANENT) {
+ if (exp->flags & NF_CT_EXPECT_PERMANENT || !unlink) {
refcount_inc(&exp->use);
return exp;
} else if (del_timer(&exp->timeout)) {
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 0b513f7bf9f3..dd62cc12e775 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -40,10 +40,10 @@ static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = {
[NF_CT_EXT_ECACHE] = sizeof(struct nf_conntrack_ecache),
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
- [NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_acct),
+ [NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_tstamp),
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- [NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_tstamp),
+ [NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_timeout),
#endif
#ifdef CONFIG_NF_CONNTRACK_LABELS
[NF_CT_EXT_LABELS] = sizeof(struct nf_conn_labels),
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 69c8c8c7e9b8..334db22199c1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1321,15 +1321,11 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
struct nlattr *tb[CTA_IP_MAX+1];
int ret = 0;
- ret = nla_parse_nested_deprecated(tb, CTA_IP_MAX, attr, NULL, NULL);
+ ret = nla_parse_nested_deprecated(tb, CTA_IP_MAX, attr,
+ cta_ip_nla_policy, NULL);
if (ret < 0)
return ret;
- ret = nla_validate_nested_deprecated(attr, CTA_IP_MAX,
- cta_ip_nla_policy, NULL);
- if (ret)
- return ret;
-
switch (tuple->src.l3num) {
case NFPROTO_IPV4:
ret = ipv4_nlattr_to_tuple(tb, tuple, flags);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index d4fd626d2b8c..e2db1f4ec2df 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -69,6 +69,7 @@
#define DCCP_MSL (2 * 60 * HZ)
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
static const char * const dccp_state_names[] = {
[CT_DCCP_NONE] = "NONE",
[CT_DCCP_REQUEST] = "REQUEST",
@@ -81,6 +82,7 @@ static const char * const dccp_state_names[] = {
[CT_DCCP_IGNORE] = "IGNORE",
[CT_DCCP_INVALID] = "INVALID",
};
+#endif
#define sNO CT_DCCP_NONE
#define sRQ CT_DCCP_REQUEST
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 169e16fc2bce..0ee98ce5b816 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1106,7 +1106,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
}
- cnet->sysctl_header = register_net_sysctl(net, "net/netfilter", table);
+ cnet->sysctl_header = register_net_sysctl_sz(net, "net/netfilter",
+ table,
+ ARRAY_SIZE(nf_ct_sysctl_table));
if (!cnet->sysctl_header)
goto out_unregister_netfilter;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 1c26f03fc661..a010b25076ca 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -34,7 +34,7 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
{
struct nf_flow_key *mask = &match->mask;
struct nf_flow_key *key = &match->key;
- unsigned int enc_keys;
+ unsigned long long enc_keys;
if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
return;
@@ -43,8 +43,8 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
mask->enc_key_id.keyid = 0xffffffff;
- enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
- BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
+ enc_keys = BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL);
if (ip_tunnel_info_af(tun_info) == AF_INET) {
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
@@ -55,7 +55,7 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
mask->enc_ipv4.src = 0xffffffff;
if (key->enc_ipv4.dst)
mask->enc_ipv4.dst = 0xffffffff;
- enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
+ enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
} else {
memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
@@ -70,7 +70,7 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
sizeof(struct in6_addr)))
memset(&mask->enc_ipv6.dst, 0xff,
sizeof(struct in6_addr));
- enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
+ enc_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
@@ -163,14 +163,14 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
return -EOPNOTSUPP;
}
mask->control.addr_type = 0xffff;
- match->dissector.used_keys |= BIT(key->control.addr_type);
+ match->dissector.used_keys |= BIT_ULL(key->control.addr_type);
mask->basic.n_proto = 0xffff;
switch (tuple->l4proto) {
case IPPROTO_TCP:
key->tcp.flags = 0;
mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_TCP);
break;
case IPPROTO_UDP:
case IPPROTO_GRE:
@@ -182,9 +182,9 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
key->basic.ip_proto = tuple->l4proto;
mask->basic.ip_proto = 0xff;
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
- BIT(FLOW_DISSECTOR_KEY_CONTROL) |
- BIT(FLOW_DISSECTOR_KEY_BASIC);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_META) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_BASIC);
switch (tuple->l4proto) {
case IPPROTO_TCP:
@@ -194,7 +194,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
key->tp.dst = tuple->dst_port;
mask->tp.dst = 0xffff;
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_PORTS);
break;
}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8a29290149bd..8cc52d2bd31b 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -487,9 +487,10 @@ static int netfilter_log_sysctl_init(struct net *net)
for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
table[i].extra2 = net;
- net->nf.nf_log_dir_header = register_net_sysctl(net,
- "net/netfilter/nf_log",
- table);
+ net->nf.nf_log_dir_header = register_net_sysctl_sz(net,
+ "net/netfilter/nf_log",
+ table,
+ ARRAY_SIZE(nf_log_sysctl_table));
if (!net->nf.nf_log_dir_header)
goto err_reg;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index fadbd4ed3dc0..c4e0516a8dfa 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -327,7 +327,7 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
/* If we source map this tuple so reply looks like reply_tuple, will
* that meet the constraints of range.
*/
-static int in_range(const struct nf_conntrack_tuple *tuple,
+static int nf_in_range(const struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range)
{
/* If we are supposed to map IPs, then we must be in the
@@ -376,7 +376,7 @@ find_appropriate_src(struct net *net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
result->dst = tuple->dst;
- if (in_range(result, range))
+ if (nf_in_range(result, range))
return 1;
}
}
@@ -607,7 +607,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
if (maniptype == NF_NAT_MANIP_SRC &&
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
/* try the original tuple first */
- if (in_range(orig_tuple, range)) {
+ if (nf_in_range(orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
*tuple = *orig_tuple;
return;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3e841e45f2c0..4356189360fb 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -102,6 +102,7 @@ static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
[NFT_MSG_NEWFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_REGISTER,
[NFT_MSG_GETFLOWTABLE] = AUDIT_NFT_OP_INVALID,
[NFT_MSG_DELFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+ [NFT_MSG_GETSETELEM_RESET] = AUDIT_NFT_OP_SETELEM_RESET,
};
static void nft_validate_state_update(struct nft_table *table, u8 new_validate_state)
@@ -1218,6 +1219,10 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
flags & NFT_TABLE_F_OWNER))
return -EOPNOTSUPP;
+ /* No dormant off/on/off/on games in single transaction */
+ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ return -EINVAL;
+
trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
sizeof(struct nft_trans_table));
if (trans == NULL)
@@ -1373,7 +1378,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
if (table == NULL)
goto err_kzalloc;
- table->validate_state = NFT_VALIDATE_SKIP;
+ table->validate_state = nft_net->validate_state;
table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
if (table->name == NULL)
goto err_strdup;
@@ -1431,7 +1436,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, chain))
continue;
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
continue;
ctx->chain = chain;
@@ -1445,8 +1450,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, set))
continue;
- if (nft_set_is_anonymous(set) &&
- !list_empty(&set->bindings))
+ if (nft_set_is_anonymous(set))
continue;
err = nft_delset(ctx, set);
@@ -1476,7 +1480,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, chain))
continue;
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
continue;
ctx->chain = chain;
@@ -2909,6 +2913,9 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
return PTR_ERR(chain);
}
+ if (nft_chain_binding(chain))
+ return -EOPNOTSUPP;
+
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
if (nla[NFTA_CHAIN_HOOK]) {
@@ -3421,6 +3428,18 @@ err:
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
+static void audit_log_rule_reset(const struct nft_table *table,
+ unsigned int base_seq,
+ unsigned int nentries)
+{
+ char *buf = kasprintf(GFP_ATOMIC, "%s:%u",
+ table->name, base_seq);
+
+ audit_log_nfcfg(buf, table->family, nentries,
+ AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
+ kfree(buf);
+}
+
struct nft_rule_dump_ctx {
char *table;
char *chain;
@@ -3436,6 +3455,8 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
const struct nft_rule *rule, *prule;
unsigned int s_idx = cb->args[0];
+ unsigned int entries = 0;
+ int ret = 0;
u64 handle;
prule = NULL;
@@ -3458,16 +3479,22 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
NFT_MSG_NEWRULE,
NLM_F_MULTI | NLM_F_APPEND,
table->family,
- table, chain, rule, handle, reset) < 0)
- return 1;
-
+ table, chain, rule, handle, reset) < 0) {
+ ret = 1;
+ break;
+ }
+ entries++;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
prule = rule;
cont_skip:
(*idx)++;
}
- return 0;
+
+ if (reset && entries)
+ audit_log_rule_reset(table, cb->seq, entries);
+
+ return ret;
}
static int nf_tables_dump_rules(struct sk_buff *skb,
@@ -3634,6 +3661,9 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
if (err < 0)
goto err_fill_rule_info;
+ if (reset)
+ audit_log_rule_reset(table, nft_pernet(net)->base_seq, 1);
+
return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
err_fill_rule_info:
@@ -3675,6 +3705,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
return -EMLINK;
list_for_each_entry(rule, &chain->rules, list) {
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
if (!nft_is_active_next(ctx->net, rule))
continue;
@@ -3948,6 +3981,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
+ if (nft_chain_binding(chain)) {
+ err = -EOPNOTSUPP;
+ goto err_destroy_flow_rule;
+ }
+
err = nft_delrule(&ctx, old_rule);
if (err < 0)
goto err_destroy_flow_rule;
@@ -4055,7 +4093,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
}
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
return -EOPNOTSUPP;
}
@@ -4089,7 +4127,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_active_next(net, chain))
continue;
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
continue;
ctx.chain = chain;
@@ -5621,13 +5659,25 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
}
+static void audit_log_nft_set_reset(const struct nft_table *table,
+ unsigned int base_seq,
+ unsigned int nentries)
+{
+ char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq);
+
+ audit_log_nfcfg(buf, table->family, nentries,
+ AUDIT_NFT_OP_SETELEM_RESET, GFP_ATOMIC);
+ kfree(buf);
+}
+
struct nft_set_dump_ctx {
const struct nft_set *set;
struct nft_ctx ctx;
};
static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
- const struct nft_set *set, bool reset)
+ const struct nft_set *set, bool reset,
+ unsigned int base_seq)
{
struct nft_set_elem_catchall *catchall;
u8 genmask = nft_genmask_cur(net);
@@ -5643,6 +5693,8 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
elem.priv = catchall->elem;
ret = nf_tables_fill_setelem(skb, set, &elem, reset);
+ if (reset && !ret)
+ audit_log_nft_set_reset(set->table, base_seq, 1);
break;
}
@@ -5722,12 +5774,17 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
set->ops->walk(&dump_ctx->ctx, set, &args.iter);
if (!args.iter.err && args.iter.count == cb->args[0])
- args.iter.err = nft_set_catchall_dump(net, skb, set, reset);
- rcu_read_unlock();
-
+ args.iter.err = nft_set_catchall_dump(net, skb, set,
+ reset, cb->seq);
nla_nest_end(skb, nest);
nlmsg_end(skb, nlh);
+ if (reset && args.iter.count > args.iter.skip)
+ audit_log_nft_set_reset(table, cb->seq,
+ args.iter.count - args.iter.skip);
+
+ rcu_read_unlock();
+
if (args.iter.err && args.iter.err != -EMSGSIZE)
return args.iter.err;
if (args.iter.count == cb->args[0])
@@ -5952,13 +6009,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
u8 family = info->nfmsg->nfgen_family;
+ int rem, err = 0, nelems = 0;
struct net *net = info->net;
struct nft_table *table;
struct nft_set *set;
struct nlattr *attr;
struct nft_ctx ctx;
bool reset = false;
- int rem, err = 0;
table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
genmask, 0);
@@ -6001,8 +6058,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
NL_SET_BAD_ATTR(extack, attr);
break;
}
+ nelems++;
}
+ if (reset)
+ audit_log_nft_set_reset(table, nft_pernet(net)->base_seq,
+ nelems);
+
return err;
}
@@ -7136,8 +7198,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings) &&
- (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
+ if (nft_set_is_anonymous(set))
+ return -EOPNOTSUPP;
+
+ if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT))
return -EBUSY;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
@@ -9051,9 +9115,8 @@ static int nf_tables_validate(struct net *net)
return -EAGAIN;
nft_validate_state_update(table, NFT_VALIDATE_SKIP);
+ break;
}
-
- break;
}
return 0;
@@ -9457,9 +9520,9 @@ static void nft_trans_gc_work(struct work_struct *work)
struct nft_trans_gc *trans, *next;
LIST_HEAD(trans_gc_list);
- spin_lock(&nf_tables_destroy_list_lock);
+ spin_lock(&nf_tables_gc_list_lock);
list_splice_init(&nf_tables_gc_list, &trans_gc_list);
- spin_unlock(&nf_tables_destroy_list_lock);
+ spin_unlock(&nf_tables_gc_list_lock);
list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
list_del(&trans->list);
@@ -9516,12 +9579,15 @@ static int nft_trans_gc_space(struct nft_trans_gc *trans)
struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
unsigned int gc_seq, gfp_t gfp)
{
+ struct nft_set *set;
+
if (nft_trans_gc_space(gc))
return gc;
+ set = gc->set;
nft_trans_gc_queue_work(gc);
- return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+ return nft_trans_gc_alloc(set, gc_seq, gfp);
}
void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
@@ -9536,15 +9602,18 @@ void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
{
+ struct nft_set *set;
+
if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
return NULL;
if (nft_trans_gc_space(gc))
return gc;
+ set = gc->set;
call_rcu(&gc->rcu, nft_trans_gc_trans_free);
- return nft_trans_gc_alloc(gc->set, 0, gfp);
+ return nft_trans_gc_alloc(set, 0, gfp);
}
void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
@@ -9559,8 +9628,9 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
call_rcu(&trans->rcu, nft_trans_gc_trans_free);
}
-struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
- unsigned int gc_seq)
+static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+ unsigned int gc_seq,
+ bool sync)
{
struct nft_set_elem_catchall *catchall;
const struct nft_set *set = gc->set;
@@ -9576,7 +9646,11 @@ struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
nft_set_elem_dead(ext);
dead_elem:
- gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (sync)
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ else
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+
if (!gc)
return NULL;
@@ -9586,6 +9660,17 @@ dead_elem:
return gc;
}
+struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq)
+{
+ return nft_trans_gc_catchall(gc, gc_seq, false);
+}
+
+struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
+{
+ return nft_trans_gc_catchall(gc, 0, true);
+}
+
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -9799,8 +9884,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
/* 0. Validate ruleset, otherwise roll back for error reporting. */
- if (nf_tables_validate(net) < 0)
+ if (nf_tables_validate(net) < 0) {
+ nft_net->validate_state = NFT_VALIDATE_DO;
return -EAGAIN;
+ }
err = nft_flow_rule_offload_commit(net);
if (err < 0)
@@ -10059,6 +10146,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
nft_gc_seq_end(nft_net, gc_seq);
+ nft_net->validate_state = NFT_VALIDATE_SKIP;
nf_tables_commit_release(net);
return 0;
@@ -10335,8 +10423,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
enum nfnl_abort_action action)
{
struct nftables_pernet *nft_net = nft_pernet(net);
- int ret = __nf_tables_abort(net, action);
+ unsigned int gc_seq;
+ int ret;
+ gc_seq = nft_gc_seq_begin(nft_net);
+ ret = __nf_tables_abort(net, action);
+ nft_gc_seq_end(nft_net, gc_seq);
mutex_unlock(&nft_net->commit_mutex);
return ret;
@@ -10479,6 +10571,9 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
if (ctx->chain == chain)
return -ELOOP;
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
list_for_each_entry(rule, &chain->rules, list) {
nft_rule_for_each_expr(expr, last, rule) {
struct nft_immediate_expr *priv;
@@ -10998,7 +11093,7 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
ctx.family = table->family;
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
- if (nft_chain_is_bound(chain))
+ if (nft_chain_binding(chain))
continue;
ctx.chain = chain;
@@ -11071,7 +11166,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
gc_seq = nft_gc_seq_begin(nft_net);
if (!list_empty(&nf_tables_destroy_list))
- rcu_barrier();
+ nf_tables_trans_destroy_flush_work();
again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
@@ -11115,6 +11210,7 @@ static int __net_init nf_tables_init_net(struct net *net)
mutex_init(&nft_net->commit_mutex);
nft_net->base_seq = 1;
nft_net->gc_seq = 0;
+ nft_net->validate_state = NFT_VALIDATE_SKIP;
return 0;
}
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 910ef881c3b8..12ab78fa5d84 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -35,12 +35,12 @@ void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
struct nft_flow_key *mask = &match->mask;
struct nft_flow_key *key = &match->key;
- if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL))
+ if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL))
return;
key->control.addr_type = addr_type;
mask->control.addr_type = 0xffff;
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL);
match->dissector.offset[FLOW_DISSECTOR_KEY_CONTROL] =
offsetof(struct nft_flow_key, control);
}
@@ -59,7 +59,7 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
.mask = match->mask.basic.n_proto,
};
- if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_VLAN) &&
+ if (match->dissector.used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) &&
(match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) ||
match->key.vlan.vlan_tpid == htons(ETH_P_8021AD))) {
match->key.basic.n_proto = match->key.cvlan.vlan_tpid;
@@ -70,8 +70,9 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
match->mask.vlan.vlan_tpid = ethertype.mask;
match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] =
offsetof(struct nft_flow_key, cvlan);
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CVLAN);
- } else if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC) &&
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN);
+ } else if (match->dissector.used_keys &
+ BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) &&
(match->key.basic.n_proto == htons(ETH_P_8021Q) ||
match->key.basic.n_proto == htons(ETH_P_8021AD))) {
match->key.basic.n_proto = match->key.vlan.vlan_tpid;
@@ -80,7 +81,7 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
match->mask.vlan.vlan_tpid = ethertype.mask;
match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
offsetof(struct nft_flow_key, vlan);
- match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_VLAN);
+ match->dissector.used_keys |= BIT_ULL(FLOW_DISSECTOR_KEY_VLAN);
}
}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e57eb168ee13..53c9e76473ba 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -470,7 +470,6 @@ __build_packet_message(struct nfnl_log_net *log,
sk_buff_data_t old_tail = inst->skb->tail;
struct sock *sk;
const unsigned char *hwhdrp;
- ktime_t tstamp;
nlh = nfnl_msg_put(inst->skb, 0, 0,
nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
@@ -599,10 +598,9 @@ __build_packet_message(struct nfnl_log_net *log,
goto nla_put_failure;
}
- tstamp = skb_tstamp_cond(skb, false);
- if (hooknum <= NF_INET_FORWARD && tstamp) {
+ if (hooknum <= NF_INET_FORWARD) {
+ struct timespec64 kts = ktime_to_timespec64(skb_tstamp_cond(skb, true));
struct nfulnl_msg_packet_timestamp ts;
- struct timespec64 kts = ktime_to_timespec64(tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 8f1bfa6ccc2d..50723ba08289 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -315,6 +315,14 @@ static int nfnl_osf_add_callback(struct sk_buff *skb,
f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+ if (f->opt_num > ARRAY_SIZE(f->opt))
+ return -EINVAL;
+
+ if (!memchr(f->genre, 0, MAXGENRELEN) ||
+ !memchr(f->subtype, 0, MAXGENRELEN) ||
+ !memchr(f->version, 0, MAXGENRELEN))
+ return -EINVAL;
+
kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL);
if (!kf)
return -ENOMEM;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 6eb21a4f5698..cd4652259095 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -162,7 +162,7 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
memcpy(key + reg->offset, data, reg->len);
memcpy(mask + reg->offset, datamask, reg->len);
- flow->match.dissector.used_keys |= BIT(reg->key);
+ flow->match.dissector.used_keys |= BIT_ULL(reg->key);
flow->match.dissector.offset[reg->key] = reg->base_offset;
if (reg->key == FLOW_DISSECTOR_KEY_META &&
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 38958e067aa8..86bb9d7797d9 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -108,7 +108,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
helper = rcu_dereference(help->helper);
if (helper == NULL)
goto err;
- strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
+ strscpy_pad((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
return;
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS: {
@@ -262,6 +262,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
regs->verdict.code = NF_DROP;
return;
}
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
}
nf_ct_set(skb, ct, IP_CT_NEW);
@@ -368,6 +369,7 @@ static bool nft_ct_tmpl_alloc_pcpu(void)
return false;
}
+ __set_bit(IPS_CONFIRMED_BIT, &tmp->status);
per_cpu(nft_ct_pcpu_template, cpu) = tmp;
}
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 7f856ceb3a66..3fbaa7bf41f9 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -35,6 +35,14 @@ static unsigned int optlen(const u8 *opt, unsigned int offset)
return opt[offset + 1];
}
+static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
+{
+ if (len % NFT_REG32_SIZE)
+ dest[len / NFT_REG32_SIZE] = 0;
+
+ return skb_copy_bits(skb, offset, dest, len);
+}
+
static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -56,8 +64,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
@@ -153,8 +160,7 @@ static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
@@ -210,7 +216,8 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
if (priv->flags & NFT_EXTHDR_F_PRESENT) {
*dest = 1;
} else {
- dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (priv->len % NFT_REG32_SIZE)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
memcpy(dest, opt + offset, priv->len);
}
@@ -238,7 +245,12 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
if (!tcph)
goto err;
+ if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
+ goto err;
+
+ tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
opt = (u8 *)tcph;
+
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
union {
__be16 v16;
@@ -253,15 +265,6 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
goto err;
- if (skb_ensure_writable(pkt->skb,
- nft_thoff(pkt) + i + priv->len))
- goto err;
-
- tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
- &tcphdr_len);
- if (!tcph)
- goto err;
-
offset = i + priv->offset;
switch (priv->len) {
@@ -325,9 +328,9 @@ static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr,
if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
goto drop;
- opt = (u8 *)nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
- if (!opt)
- goto err;
+ tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
+ opt = (u8 *)tcph;
+
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
unsigned int j;
@@ -392,9 +395,8 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
offset + ntohs(sch->length) > pkt->skb->len)
break;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset + priv->offset,
- dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset + priv->offset,
+ dest, priv->len) < 0)
break;
return;
}
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 6e049fd48760..04b51f285332 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -14,17 +14,18 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_fib.h>
+#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
+ NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
+ NFTA_FIB_F_PRESENT)
+
const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
[NFTA_FIB_DREG] = { .type = NLA_U32 },
[NFTA_FIB_RESULT] = { .type = NLA_U32 },
- [NFTA_FIB_FLAGS] = { .type = NLA_U32 },
+ [NFTA_FIB_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NFTA_FIB_F_ALL),
};
EXPORT_SYMBOL(nft_fib_policy);
-#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
- NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \
- NFTA_FIB_F_PRESENT)
-
int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nft_data **data)
{
@@ -77,7 +78,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS]));
- if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL))
+ if (priv->flags == 0)
return -EINVAL;
if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) ==
@@ -150,7 +151,7 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv,
if (priv->flags & NFTA_FIB_F_PRESENT)
*dreg = !!dev;
else
- strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+ strscpy_pad(reg, dev ? dev->name : "", IFNAMSIZ);
break;
default:
WARN_ON_ONCE(1);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 29ac48cdd6db..870e5b113d13 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -90,7 +90,8 @@ static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
[NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 },
[NFTA_LOOKUP_SREG] = { .type = NLA_U32 },
[NFTA_LOOKUP_DREG] = { .type = NLA_U32 },
- [NFTA_LOOKUP_FLAGS] = { .type = NLA_U32 },
+ [NFTA_LOOKUP_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NFT_LOOKUP_F_INV),
};
static int nft_lookup_init(const struct nft_ctx *ctx,
@@ -120,9 +121,6 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (tb[NFTA_LOOKUP_FLAGS]) {
flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS]));
- if (flags & ~NFT_LOOKUP_F_INV)
- return -EINVAL;
-
if (flags & NFT_LOOKUP_F_INV)
priv->invert = true;
}
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index b115d77fbbc7..8a14aaca93bb 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -20,7 +20,8 @@ struct nft_masq {
};
static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
- [NFTA_MASQ_FLAGS] = { .type = NLA_U32 },
+ [NFTA_MASQ_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK),
[NFTA_MASQ_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_MASQ_REG_PROTO_MAX] = { .type = NLA_U32 },
};
@@ -47,11 +48,8 @@ static int nft_masq_init(const struct nft_ctx *ctx,
struct nft_masq *priv = nft_expr_priv(expr);
int err;
- if (tb[NFTA_MASQ_FLAGS]) {
+ if (tb[NFTA_MASQ_FLAGS])
priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EINVAL;
- }
if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN],
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 8fdc7318c03c..f7da7c43333b 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -185,12 +185,12 @@ static noinline bool nft_meta_get_eval_kind(enum nft_meta_keys key,
case NFT_META_IIFKIND:
if (!in || !in->rtnl_link_ops)
return false;
- strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
+ strscpy_pad((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
break;
case NFT_META_OIFKIND:
if (!out || !out->rtnl_link_ops)
return false;
- strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
+ strscpy_pad((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
break;
default:
return false;
@@ -206,7 +206,7 @@ static void nft_meta_store_ifindex(u32 *dest, const struct net_device *dev)
static void nft_meta_store_ifname(u32 *dest, const struct net_device *dev)
{
- strncpy((char *)dest, dev ? dev->name : "", IFNAMSIZ);
+ strscpy_pad((char *)dest, dev ? dev->name : "", IFNAMSIZ);
}
static bool nft_meta_store_iftype(u32 *dest, const struct net_device *dev)
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 5c29915ab028..583885ce7232 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -132,7 +132,8 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
[NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 },
[NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 },
- [NFTA_NAT_FLAGS] = { .type = NLA_U32 },
+ [NFTA_NAT_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK),
};
static int nft_nat_validate(const struct nft_ctx *ctx,
@@ -246,11 +247,8 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
- if (tb[NFTA_NAT_FLAGS]) {
+ if (tb[NFTA_NAT_FLAGS])
priv->flags |= ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EOPNOTSUPP;
- }
return nf_ct_netns_get(ctx->net, family);
}
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 70820c66b591..7f61506e5b44 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -23,7 +23,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
struct nft_osf *priv = nft_expr_priv(expr);
u32 *dest = &regs->data[priv->dreg];
struct sk_buff *skb = pkt->skb;
- char os_match[NFT_OSF_MAXGENRELEN + 1];
+ char os_match[NFT_OSF_MAXGENRELEN];
const struct tcphdr *tcp;
struct nf_osf_data data;
struct tcphdr _tcph;
@@ -45,7 +45,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
}
if (!nf_osf_find(skb, nf_osf_fingers, priv->ttl, &data)) {
- strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
+ strscpy_pad((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
} else {
if (priv->flags & NFT_OSF_F_VERSION)
snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s",
@@ -53,7 +53,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
else
strscpy(os_match, data.genre, NFT_OSF_MAXGENRELEN);
- strncpy((char *)dest, os_match, NFT_OSF_MAXGENRELEN);
+ strscpy_pad((char *)dest, os_match, NFT_OSF_MAXGENRELEN);
}
}
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index a70196ffcb1e..a58bd8d291ff 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -22,7 +22,8 @@ struct nft_redir {
static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
[NFTA_REDIR_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_REDIR_REG_PROTO_MAX] = { .type = NLA_U32 },
- [NFTA_REDIR_FLAGS] = { .type = NLA_U32 },
+ [NFTA_REDIR_FLAGS] =
+ NLA_POLICY_MASK(NLA_BE32, NF_NAT_RANGE_MASK),
};
static int nft_redir_validate(const struct nft_ctx *ctx,
@@ -68,11 +69,8 @@ static int nft_redir_init(const struct nft_ctx *ctx,
priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
- if (tb[NFTA_REDIR_FLAGS]) {
+ if (tb[NFTA_REDIR_FLAGS])
priv->flags = ntohl(nla_get_be32(tb[NFTA_REDIR_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EINVAL;
- }
return nf_ct_netns_get(ctx->net, ctx->family);
}
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index cef5df846000..2013de934cef 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -326,6 +326,9 @@ static void nft_rhash_gc(struct work_struct *work)
nft_net = nft_pernet(net);
gc_seq = READ_ONCE(nft_net->gc_seq);
+ if (nft_set_gc_is_pending(set))
+ goto done;
+
gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
if (!gc)
goto done;
@@ -335,12 +338,9 @@ static void nft_rhash_gc(struct work_struct *work)
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
- if (PTR_ERR(he) != -EAGAIN) {
- nft_trans_gc_destroy(gc);
- gc = NULL;
- goto try_later;
- }
- continue;
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
}
/* Ruleset has been updated, try later. */
@@ -369,7 +369,7 @@ dead_elem:
nft_trans_gc_elem_add(gc, he);
}
- gc = nft_trans_gc_catchall(gc, gc_seq);
+ gc = nft_trans_gc_catchall_async(gc, gc_seq);
try_later:
/* catchall list iteration requires rcu read side lock. */
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 3757fcc55723..c0dcc40de358 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -902,12 +902,14 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k,
int mask_bits)
{
- int rule = f->rules++, group, ret, bit_offset = 0;
+ int rule = f->rules, group, ret, bit_offset = 0;
- ret = pipapo_resize(f, f->rules - 1, f->rules);
+ ret = pipapo_resize(f, f->rules, f->rules + 1);
if (ret)
return ret;
+ f->rules++;
+
for (group = 0; group < f->groups; group++) {
int i, v;
u8 mask;
@@ -1052,7 +1054,9 @@ static int pipapo_expand(struct nft_pipapo_field *f,
step++;
if (step >= len) {
if (!masks) {
- pipapo_insert(f, base, 0);
+ err = pipapo_insert(f, base, 0);
+ if (err < 0)
+ return err;
masks = 1;
}
goto out;
@@ -1235,6 +1239,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
ret = pipapo_expand(f, start, end, f->groups * f->bb);
+ if (ret < 0)
+ return ret;
+
if (f->bsize > bsize_max)
bsize_max = f->bsize;
@@ -1589,7 +1596,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (!gc)
- break;
+ return;
nft_pipapo_gc_deactivate(net, set, e);
pipapo_drop(m, rulemap);
@@ -1603,7 +1610,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
}
}
- gc = nft_trans_gc_catchall(gc, 0);
+ gc = nft_trans_gc_catchall_sync(gc);
if (gc) {
nft_trans_gc_queue_sync_done(gc);
priv->last_gc = jiffies;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index f9d4c8fcbbf8..487572dcd614 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -312,6 +312,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
struct rb_node *node, *next, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
+ u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
int d, err;
@@ -357,8 +358,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
if (!nft_set_elem_active(&rbe->ext, genmask))
continue;
- /* perform garbage collection to avoid bogus overlap reports. */
- if (nft_set_elem_expired(&rbe->ext)) {
+ /* perform garbage collection to avoid bogus overlap reports
+ * but skip new elements in this transaction.
+ */
+ if (nft_set_elem_expired(&rbe->ext) &&
+ nft_set_elem_active(&rbe->ext, cur_genmask)) {
err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
if (err < 0)
return err;
@@ -611,12 +615,14 @@ static void nft_rbtree_gc(struct work_struct *work)
nft_net = nft_pernet(net);
gc_seq = READ_ONCE(nft_net->gc_seq);
+ if (nft_set_gc_is_pending(set))
+ goto done;
+
gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
if (!gc)
goto done;
- write_lock_bh(&priv->lock);
- write_seqcount_begin(&priv->count);
+ read_lock_bh(&priv->lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
/* Ruleset has been updated, try later. */
@@ -663,11 +669,10 @@ dead_elem:
nft_trans_gc_elem_add(gc, rbe);
}
- gc = nft_trans_gc_catchall(gc, gc_seq);
+ gc = nft_trans_gc_catchall_async(gc, gc_seq);
try_later:
- write_seqcount_end(&priv->count);
- write_unlock_bh(&priv->lock);
+ read_unlock_bh(&priv->lock);
if (gc)
nft_trans_gc_queue_async_done(gc);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 470282cf3fae..21624d68314f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -768,7 +768,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
m->u.user.match_size = msize;
strscpy(name, match->name, sizeof(name));
module_put(match->me);
- strncpy(m->u.user.name, name, sizeof(m->u.user.name));
+ strscpy_pad(m->u.user.name, name, sizeof(m->u.user.name));
*size += off;
*dstptr += msize;
@@ -1148,7 +1148,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
t->u.user.target_size = tsize;
strscpy(name, target->name, sizeof(name));
module_put(target->me);
- strncpy(t->u.user.name, name, sizeof(t->u.user.name));
+ strscpy_pad(t->u.user.name, name, sizeof(t->u.user.name));
*size += off;
*dstptr += tsize;
@@ -2014,4 +2014,3 @@ static void __exit xt_fini(void)
module_init(xt_init);
module_exit(xt_fini);
-
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
index 68ccbe50bb1e..5d1fb7018dba 100644
--- a/net/netfilter/xt_repldata.h
+++ b/net/netfilter/xt_repldata.h
@@ -29,7 +29,7 @@
if (tbl == NULL) \
return NULL; \
term = (struct type##_error *)&(((char *)tbl)[term_offset]); \
- strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
+ strscpy_pad(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
*term = (struct type##_error)typ2##_ERROR_INIT; \
tbl->repl.valid_hooks = hook_mask; \
tbl->repl.num_entries = nhooks + 1; \
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index e8961094a282..b46a6a512058 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -149,6 +149,8 @@ static int sctp_mt_check(const struct xt_mtchk_param *par)
{
const struct xt_sctp_info *info = par->matchinfo;
+ if (info->flag_count > ARRAY_SIZE(info->flag_info))
+ return -EINVAL;
if (info->flags & ~XT_SCTP_VALID_FLAGS)
return -EINVAL;
if (info->invflags & ~XT_SCTP_VALID_FLAGS)
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index 177b40d08098..117d4615d668 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -96,11 +96,32 @@ static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par)
return ret ^ data->invert;
}
+static int u32_mt_checkentry(const struct xt_mtchk_param *par)
+{
+ const struct xt_u32 *data = par->matchinfo;
+ const struct xt_u32_test *ct;
+ unsigned int i;
+
+ if (data->ntests > ARRAY_SIZE(data->tests))
+ return -EINVAL;
+
+ for (i = 0; i < data->ntests; ++i) {
+ ct = &data->tests[i];
+
+ if (ct->nnums > ARRAY_SIZE(ct->location) ||
+ ct->nvalues > ARRAY_SIZE(ct->value))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static struct xt_match xt_u32_mt_reg __read_mostly = {
.name = "u32",
.revision = 0,
.family = NFPROTO_UNSPEC,
.match = u32_mt,
+ .checkentry = u32_mt_checkentry,
.matchsize = sizeof(struct xt_u32),
.me = THIS_MODULE,
};
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index 85d7ecb05728..9518ab56ec98 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -149,7 +149,4 @@ enum {
/* NetLabel protocol functions */
int netlbl_cipsov4_genl_init(void);
-/* Free the memory associated with a CIPSOv4 DOI definition */
-void netlbl_cipsov4_doi_free(struct rcu_head *entry);
-
#endif
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 383631873748..642b9d382fb4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -84,7 +84,7 @@ struct listeners {
static inline int netlink_is_kernel(struct sock *sk)
{
- return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET;
+ return nlk_test_bit(KERNEL_SOCKET, sk);
}
struct netlink_table *nl_table __read_mostly;
@@ -349,9 +349,7 @@ static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src,
static void netlink_overrun(struct sock *sk)
{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) {
+ if (!nlk_test_bit(RECV_NO_ENOBUFS, sk)) {
if (!test_and_set_bit(NETLINK_S_CONGESTED,
&nlk_sk(sk)->state)) {
sk->sk_err = ENOBUFS;
@@ -677,6 +675,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
struct netlink_sock *nlk;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
+ void (*release)(struct sock *sock, unsigned long *groups);
int err = 0;
sock->state = SS_UNCONNECTED;
@@ -704,6 +703,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
cb_mutex = nl_table[protocol].cb_mutex;
bind = nl_table[protocol].bind;
unbind = nl_table[protocol].unbind;
+ release = nl_table[protocol].release;
netlink_unlock_table();
if (err < 0)
@@ -719,6 +719,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
nlk->module = module;
nlk->netlink_bind = bind;
nlk->netlink_unbind = unbind;
+ nlk->netlink_release = release;
out:
return err;
@@ -763,6 +764,8 @@ static int netlink_release(struct socket *sock)
* OK. Socket is unlinked, any packets that arrive now
* will be purged.
*/
+ if (nlk->netlink_release)
+ nlk->netlink_release(sk, nlk->groups);
/* must not acquire netlink_table_lock in any way again before unbind
* and notifying genetlink is done as otherwise it might deadlock
@@ -1402,9 +1405,7 @@ EXPORT_SYMBOL_GPL(netlink_has_listeners);
bool netlink_strict_get_check(struct sk_buff *skb)
{
- const struct netlink_sock *nlk = nlk_sk(NETLINK_CB(skb).sk);
-
- return nlk->flags & NETLINK_F_STRICT_CHK;
+ return nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
}
EXPORT_SYMBOL_GPL(netlink_strict_get_check);
@@ -1432,6 +1433,8 @@ struct netlink_broadcast_data {
int delivered;
gfp_t allocation;
struct sk_buff *skb, *skb2;
+ int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
+ void *tx_data;
};
static void do_one_broadcast(struct sock *sk,
@@ -1448,7 +1451,7 @@ static void do_one_broadcast(struct sock *sk,
return;
if (!net_eq(sock_net(sk), p->net)) {
- if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
+ if (!nlk_test_bit(LISTEN_ALL_NSID, sk))
return;
if (!peernet_has_id(sock_net(sk), p->net))
@@ -1481,10 +1484,17 @@ static void do_one_broadcast(struct sock *sk,
netlink_overrun(sk);
/* Clone failed. Notify ALL listeners. */
p->failure = 1;
- if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
+ if (nlk_test_bit(BROADCAST_SEND_ERROR, sk))
p->delivery_failure = 1;
goto out;
}
+
+ if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+ kfree_skb(p->skb2);
+ p->skb2 = NULL;
+ goto out;
+ }
+
if (sk_filter(sk, p->skb2)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
@@ -1496,7 +1506,7 @@ static void do_one_broadcast(struct sock *sk,
val = netlink_broadcast_deliver(sk, p->skb2);
if (val < 0) {
netlink_overrun(sk);
- if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
+ if (nlk_test_bit(BROADCAST_SEND_ERROR, sk))
p->delivery_failure = 1;
} else {
p->congested |= val;
@@ -1507,8 +1517,12 @@ out:
sock_put(sk);
}
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
- u32 group, gfp_t allocation)
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
+ u32 portid,
+ u32 group, gfp_t allocation,
+ int (*filter)(struct sock *dsk,
+ struct sk_buff *skb, void *data),
+ void *filter_data)
{
struct net *net = sock_net(ssk);
struct netlink_broadcast_data info;
@@ -1527,6 +1541,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
info.allocation = allocation;
info.skb = skb;
info.skb2 = NULL;
+ info.tx_filter = filter;
+ info.tx_data = filter_data;
/* While we sleep in clone, do not allow to change socket list */
@@ -1552,6 +1568,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
}
return -ESRCH;
}
+EXPORT_SYMBOL(netlink_broadcast_filtered);
+
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
+ u32 group, gfp_t allocation)
+{
+ return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
+ NULL, NULL);
+}
EXPORT_SYMBOL(netlink_broadcast);
struct netlink_set_err_data {
@@ -1576,7 +1600,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
!test_bit(p->group - 1, nlk->groups))
goto out;
- if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) {
+ if (p->code == ENOBUFS && nlk_test_bit(RECV_NO_ENOBUFS, sk)) {
ret = 1;
goto out;
}
@@ -1629,10 +1653,7 @@ static void netlink_update_socket_mc(struct netlink_sock *nlk,
old = test_bit(group - 1, nlk->groups);
subscriptions = nlk->subscriptions - old + new;
- if (new)
- __set_bit(group - 1, nlk->groups);
- else
- __clear_bit(group - 1, nlk->groups);
+ __assign_bit(group - 1, nlk->groups, new);
netlink_update_subscriptions(&nlk->sk, subscriptions);
netlink_update_listeners(&nlk->sk);
}
@@ -1643,7 +1664,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
unsigned int val = 0;
- int err;
+ int nr = -1;
if (level != SOL_NETLINK)
return -ENOPROTOOPT;
@@ -1654,14 +1675,12 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case NETLINK_PKTINFO:
- if (val)
- nlk->flags |= NETLINK_F_RECV_PKTINFO;
- else
- nlk->flags &= ~NETLINK_F_RECV_PKTINFO;
- err = 0;
+ nr = NETLINK_F_RECV_PKTINFO;
break;
case NETLINK_ADD_MEMBERSHIP:
case NETLINK_DROP_MEMBERSHIP: {
+ int err;
+
if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
return -EPERM;
err = netlink_realloc_groups(sk);
@@ -1681,61 +1700,38 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
nlk->netlink_unbind(sock_net(sk), val);
- err = 0;
break;
}
case NETLINK_BROADCAST_ERROR:
- if (val)
- nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR;
- else
- nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR;
- err = 0;
+ nr = NETLINK_F_BROADCAST_SEND_ERROR;
break;
case NETLINK_NO_ENOBUFS:
+ assign_bit(NETLINK_F_RECV_NO_ENOBUFS, &nlk->flags, val);
if (val) {
- nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS;
clear_bit(NETLINK_S_CONGESTED, &nlk->state);
wake_up_interruptible(&nlk->wait);
- } else {
- nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS;
}
- err = 0;
break;
case NETLINK_LISTEN_ALL_NSID:
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
return -EPERM;
-
- if (val)
- nlk->flags |= NETLINK_F_LISTEN_ALL_NSID;
- else
- nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID;
- err = 0;
+ nr = NETLINK_F_LISTEN_ALL_NSID;
break;
case NETLINK_CAP_ACK:
- if (val)
- nlk->flags |= NETLINK_F_CAP_ACK;
- else
- nlk->flags &= ~NETLINK_F_CAP_ACK;
- err = 0;
+ nr = NETLINK_F_CAP_ACK;
break;
case NETLINK_EXT_ACK:
- if (val)
- nlk->flags |= NETLINK_F_EXT_ACK;
- else
- nlk->flags &= ~NETLINK_F_EXT_ACK;
- err = 0;
+ nr = NETLINK_F_EXT_ACK;
break;
case NETLINK_GET_STRICT_CHK:
- if (val)
- nlk->flags |= NETLINK_F_STRICT_CHK;
- else
- nlk->flags &= ~NETLINK_F_STRICT_CHK;
- err = 0;
+ nr = NETLINK_F_STRICT_CHK;
break;
default:
- err = -ENOPROTOOPT;
+ return -ENOPROTOOPT;
}
- return err;
+ if (nr >= 0)
+ assign_bit(nr, &nlk->flags, val);
+ return 0;
}
static int netlink_getsockopt(struct socket *sock, int level, int optname,
@@ -1802,7 +1798,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & flag ? 1 : 0;
+ val = test_bit(flag, &nlk->flags);
if (put_user(len, optlen) ||
copy_to_user(optval, &val, len))
@@ -1979,9 +1975,9 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
msg->msg_namelen = sizeof(*addr);
}
- if (nlk->flags & NETLINK_F_RECV_PKTINFO)
+ if (nlk_test_bit(RECV_PKTINFO, sk))
netlink_cmsg_recv_pktinfo(msg, skb);
- if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+ if (nlk_test_bit(LISTEN_ALL_NSID, sk))
netlink_cmsg_listen_all_nsid(sk, msg, skb);
memset(&scm, 0, sizeof(scm));
@@ -2058,7 +2054,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
goto out_sock_release;
nlk = nlk_sk(sk);
- nlk->flags |= NETLINK_F_KERNEL_SOCKET;
+ set_bit(NETLINK_F_KERNEL_SOCKET, &nlk->flags);
netlink_table_grab();
if (!nl_table[unit].registered) {
@@ -2069,6 +2065,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (cfg) {
nl_table[unit].bind = cfg->bind;
nl_table[unit].unbind = cfg->unbind;
+ nl_table[unit].release = cfg->release;
nl_table[unit].flags = cfg->flags;
}
nl_table[unit].registered = 1;
@@ -2192,7 +2189,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb,
nl_dump_check_consistent(cb, nlh);
memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, sizeof(nlk->dump_done_errno));
- if (extack->_msg && nlk->flags & NETLINK_F_EXT_ACK) {
+ if (extack->_msg && test_bit(NETLINK_F_EXT_ACK, &nlk->flags)) {
nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg))
nlmsg_end(skb, nlh);
@@ -2321,8 +2318,8 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
struct netlink_dump_control *control)
{
- struct netlink_sock *nlk, *nlk2;
struct netlink_callback *cb;
+ struct netlink_sock *nlk;
struct sock *sk;
int ret;
@@ -2357,8 +2354,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->min_dump_alloc = control->min_dump_alloc;
cb->skb = skb;
- nlk2 = nlk_sk(NETLINK_CB(skb).sk);
- cb->strict_check = !!(nlk2->flags & NETLINK_F_STRICT_CHK);
+ cb->strict_check = nlk_test_bit(STRICT_CHK, NETLINK_CB(skb).sk);
if (control->start) {
cb->extack = control->extack;
@@ -2402,7 +2398,7 @@ netlink_ack_tlv_len(struct netlink_sock *nlk, int err,
{
size_t tlvlen;
- if (!extack || !(nlk->flags & NETLINK_F_EXT_ACK))
+ if (!extack || !test_bit(NETLINK_F_EXT_ACK, &nlk->flags))
return 0;
tlvlen = 0;
@@ -2474,7 +2470,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
* requests to cap the error message, and get extra error data if
* requested.
*/
- if (err && !(nlk->flags & NETLINK_F_CAP_ACK))
+ if (err && !test_bit(NETLINK_F_CAP_ACK, &nlk->flags))
payload += nlmsg_len(nlh);
else
flags |= NLM_F_CAPPED;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 90a3198a9b7f..2145979b9986 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -8,14 +8,16 @@
#include <net/sock.h>
/* flags */
-#define NETLINK_F_KERNEL_SOCKET 0x1
-#define NETLINK_F_RECV_PKTINFO 0x2
-#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
-#define NETLINK_F_RECV_NO_ENOBUFS 0x8
-#define NETLINK_F_LISTEN_ALL_NSID 0x10
-#define NETLINK_F_CAP_ACK 0x20
-#define NETLINK_F_EXT_ACK 0x40
-#define NETLINK_F_STRICT_CHK 0x80
+enum {
+ NETLINK_F_KERNEL_SOCKET,
+ NETLINK_F_RECV_PKTINFO,
+ NETLINK_F_BROADCAST_SEND_ERROR,
+ NETLINK_F_RECV_NO_ENOBUFS,
+ NETLINK_F_LISTEN_ALL_NSID,
+ NETLINK_F_CAP_ACK,
+ NETLINK_F_EXT_ACK,
+ NETLINK_F_STRICT_CHK,
+};
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
@@ -23,10 +25,10 @@
struct netlink_sock {
/* struct sock has to be the first member of netlink_sock */
struct sock sk;
+ unsigned long flags;
u32 portid;
u32 dst_portid;
u32 dst_group;
- u32 flags;
u32 subscriptions;
u32 ngroups;
unsigned long *groups;
@@ -42,6 +44,8 @@ struct netlink_sock {
void (*netlink_rcv)(struct sk_buff *skb);
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
+ void (*netlink_release)(struct sock *sk,
+ unsigned long *groups);
struct module *module;
struct rhash_head node;
@@ -54,6 +58,8 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk)
return container_of(sk, struct netlink_sock, sk);
}
+#define nlk_test_bit(nr, sk) test_bit(NETLINK_F_##nr, &nlk_sk(sk)->flags)
+
struct netlink_table {
struct rhashtable hash;
struct hlist_head mc_list;
@@ -64,6 +70,8 @@ struct netlink_table {
struct module *module;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
+ void (*release)(struct sock *sk,
+ unsigned long *groups);
int registered;
};
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index e4f21b1067bc..9c4f231be275 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -27,15 +27,15 @@ static int sk_diag_put_flags(struct sock *sk, struct sk_buff *skb)
if (nlk->cb_running)
flags |= NDIAG_FLAG_CB_RUNNING;
- if (nlk->flags & NETLINK_F_RECV_PKTINFO)
+ if (nlk_test_bit(RECV_PKTINFO, sk))
flags |= NDIAG_FLAG_PKTINFO;
- if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
+ if (nlk_test_bit(BROADCAST_SEND_ERROR, sk))
flags |= NDIAG_FLAG_BROADCAST_ERROR;
- if (nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)
+ if (nlk_test_bit(RECV_NO_ENOBUFS, sk))
flags |= NDIAG_FLAG_NO_ENOBUFS;
- if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+ if (nlk_test_bit(LISTEN_ALL_NSID, sk))
flags |= NDIAG_FLAG_LISTEN_ALL_NSID;
- if (nlk->flags & NETLINK_F_CAP_ACK)
+ if (nlk_test_bit(CAP_ACK, sk))
flags |= NDIAG_FLAG_CAP_ACK;
return nla_put_u32(skb, NETLINK_DIAG_FLAGS, flags);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a157247a1e45..8315d31b53db 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -52,6 +52,18 @@ static void genl_unlock_all(void)
up_write(&cb_lock);
}
+static void genl_op_lock(const struct genl_family *family)
+{
+ if (!family->parallel_ops)
+ genl_lock();
+}
+
+static void genl_op_unlock(const struct genl_family *family)
+{
+ if (!family->parallel_ops)
+ genl_unlock();
+}
+
static DEFINE_IDR(genl_fam_idr);
/*
@@ -593,8 +605,12 @@ static int genl_validate_ops(const struct genl_family *family)
return -EINVAL;
/* Check sort order */
- if (a->cmd < b->cmd)
+ if (a->cmd < b->cmd) {
continue;
+ } else if (a->cmd > b->cmd) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
if (a->internal_flags != b->internal_flags ||
((a->flags ^ b->flags) & ~(GENL_CMD_CAP_DO |
@@ -828,64 +844,63 @@ static int genl_start(struct netlink_callback *cb)
genl_family_rcv_msg_attrs_free(attrs);
return -ENOMEM;
}
- info->family = ctx->family;
info->op = *ops;
- info->attrs = attrs;
+ info->info.family = ctx->family;
+ info->info.snd_seq = cb->nlh->nlmsg_seq;
+ info->info.snd_portid = NETLINK_CB(cb->skb).portid;
+ info->info.nlhdr = cb->nlh;
+ info->info.genlhdr = nlmsg_data(cb->nlh);
+ info->info.attrs = attrs;
+ genl_info_net_set(&info->info, sock_net(cb->skb->sk));
+ info->info.extack = cb->extack;
+ memset(&info->info.user_ptr, 0, sizeof(info->info.user_ptr));
cb->data = info;
if (ops->start) {
- if (!ctx->family->parallel_ops)
- genl_lock();
+ genl_op_lock(ctx->family);
rc = ops->start(cb);
- if (!ctx->family->parallel_ops)
- genl_unlock();
+ genl_op_unlock(ctx->family);
}
if (rc) {
- genl_family_rcv_msg_attrs_free(info->attrs);
+ genl_family_rcv_msg_attrs_free(info->info.attrs);
genl_dumpit_info_free(info);
cb->data = NULL;
}
return rc;
}
-static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+static int genl_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
- const struct genl_split_ops *ops = &genl_dumpit_info(cb)->op;
+ struct genl_dumpit_info *dump_info = cb->data;
+ const struct genl_split_ops *ops = &dump_info->op;
+ struct genl_info *info = &dump_info->info;
int rc;
- genl_lock();
+ info->extack = cb->extack;
+
+ genl_op_lock(info->family);
rc = ops->dumpit(skb, cb);
- genl_unlock();
+ genl_op_unlock(info->family);
return rc;
}
-static int genl_lock_done(struct netlink_callback *cb)
+static int genl_done(struct netlink_callback *cb)
{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
- const struct genl_split_ops *ops = &info->op;
+ struct genl_dumpit_info *dump_info = cb->data;
+ const struct genl_split_ops *ops = &dump_info->op;
+ struct genl_info *info = &dump_info->info;
int rc = 0;
+ info->extack = cb->extack;
+
if (ops->done) {
- genl_lock();
+ genl_op_lock(info->family);
rc = ops->done(cb);
- genl_unlock();
+ genl_op_unlock(info->family);
}
genl_family_rcv_msg_attrs_free(info->attrs);
- genl_dumpit_info_free(info);
- return rc;
-}
-
-static int genl_parallel_done(struct netlink_callback *cb)
-{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
- const struct genl_split_ops *ops = &info->op;
- int rc = 0;
-
- if (ops->done)
- rc = ops->done(cb);
- genl_family_rcv_msg_attrs_free(info->attrs);
- genl_dumpit_info_free(info);
+ genl_dumpit_info_free(dump_info);
return rc;
}
@@ -897,6 +912,14 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
int hdrlen, struct net *net)
{
struct genl_start_context ctx;
+ struct netlink_dump_control c = {
+ .module = family->module,
+ .data = &ctx,
+ .start = genl_start,
+ .dump = genl_dumpit,
+ .done = genl_done,
+ .extack = extack,
+ };
int err;
ctx.family = family;
@@ -905,31 +928,9 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
ctx.ops = ops;
ctx.hdrlen = hdrlen;
- if (!family->parallel_ops) {
- struct netlink_dump_control c = {
- .module = family->module,
- .data = &ctx,
- .start = genl_start,
- .dump = genl_lock_dumpit,
- .done = genl_lock_done,
- .extack = extack,
- };
-
- genl_unlock();
- err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
- genl_lock();
- } else {
- struct netlink_dump_control c = {
- .module = family->module,
- .data = &ctx,
- .start = genl_start,
- .dump = ops->dumpit,
- .done = genl_parallel_done,
- .extack = extack,
- };
-
- err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
- }
+ genl_op_unlock(family);
+ err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
+ genl_op_lock(family);
return err;
}
@@ -953,9 +954,9 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family,
info.snd_seq = nlh->nlmsg_seq;
info.snd_portid = NETLINK_CB(skb).portid;
+ info.family = family;
info.nlhdr = nlh;
info.genlhdr = nlmsg_data(nlh);
- info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
info.attrs = attrbuf;
info.extack = extack;
genl_info_net_set(&info, net);
@@ -1061,13 +1062,9 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if (family == NULL)
return -ENOENT;
- if (!family->parallel_ops)
- genl_lock();
-
+ genl_op_lock(family);
err = genl_family_rcv_msg(family, skb, nlh, extack);
-
- if (!family->parallel_ops)
- genl_unlock();
+ genl_op_unlock(family);
return err;
}
@@ -1392,7 +1389,7 @@ static int ctrl_dumppolicy_start(struct netlink_callback *cb)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct ctrl_dump_policy_ctx *ctx = (void *)cb->ctx;
- struct nlattr **tb = info->attrs;
+ struct nlattr **tb = info->info.attrs;
const struct genl_family *rt;
struct genl_op_iter i;
int err;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index eb8ccbd58df7..96e91ab71573 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -660,6 +660,11 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
goto out_release;
}
+ if (sock->state == SS_CONNECTING) {
+ err = -EALREADY;
+ goto out_release;
+ }
+
sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index cc8fa9e36159..ed1508a9e093 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -172,7 +172,7 @@ static int nci_uart_tty_open(struct tty_struct *tty)
*/
static void nci_uart_tty_close(struct tty_struct *tty)
{
- struct nci_uart *nu = (void *)tty->disc_data;
+ struct nci_uart *nu = tty->disc_data;
/* Detach from the tty */
tty->disc_data = NULL;
@@ -204,7 +204,7 @@ static void nci_uart_tty_close(struct tty_struct *tty)
*/
static void nci_uart_tty_wakeup(struct tty_struct *tty)
{
- struct nci_uart *nu = (void *)tty->disc_data;
+ struct nci_uart *nu = tty->disc_data;
if (!nu)
return;
@@ -296,9 +296,9 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
* Return Value: None
*/
static void nci_uart_tty_receive(struct tty_struct *tty, const u8 *data,
- const char *flags, int count)
+ const u8 *flags, size_t count)
{
- struct nci_uart *nu = (void *)tty->disc_data;
+ struct nci_uart *nu = tty->disc_data;
if (!nu || tty != nu->tty)
return;
@@ -325,7 +325,7 @@ static void nci_uart_tty_receive(struct tty_struct *tty, const u8 *data,
static int nci_uart_tty_ioctl(struct tty_struct *tty, unsigned int cmd,
unsigned long arg)
{
- struct nci_uart *nu = (void *)tty->disc_data;
+ struct nci_uart *nu = tty->disc_data;
int err = 0;
switch (cmd) {
@@ -345,20 +345,14 @@ static int nci_uart_tty_ioctl(struct tty_struct *tty, unsigned int cmd,
/* We don't provide read/write/poll interface for user space. */
static ssize_t nci_uart_tty_read(struct tty_struct *tty, struct file *file,
- unsigned char *buf, size_t nr,
- void **cookie, unsigned long offset)
+ u8 *buf, size_t nr, void **cookie,
+ unsigned long offset)
{
return 0;
}
static ssize_t nci_uart_tty_write(struct tty_struct *tty, struct file *file,
- const unsigned char *data, size_t count)
-{
- return 0;
-}
-
-static __poll_t nci_uart_tty_poll(struct tty_struct *tty,
- struct file *filp, poll_table *wait)
+ const u8 *data, size_t count)
{
return 0;
}
@@ -435,7 +429,6 @@ static struct tty_ldisc_ops nci_uart_ldisc = {
.close = nci_uart_tty_close,
.read = nci_uart_tty_read,
.write = nci_uart_tty_write,
- .poll = nci_uart_tty_poll,
.receive_buf = nci_uart_tty_receive,
.write_wakeup = nci_uart_tty_wakeup,
.ioctl = nci_uart_tty_ioctl,
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index e9ac6a6f934e..aa1dbf654c3e 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -110,10 +110,10 @@ static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb)
struct nfc_dev *dev;
u32 idx;
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+ if (!info->info.attrs[NFC_ATTR_DEVICE_INDEX])
return ERR_PTR(-EINVAL);
- idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+ idx = nla_get_u32(info->info.attrs[NFC_ATTR_DEVICE_INDEX]);
dev = nfc_get_device(idx);
if (!dev)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index cab1e02b63e0..fd66014d8a76 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -27,6 +27,7 @@
#include <net/sctp/checksum.h>
#include "datapath.h"
+#include "drop.h"
#include "flow.h"
#include "conntrack.h"
#include "vport.h"
@@ -781,7 +782,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk,
struct vport *vport = data->vport;
if (skb_cow_head(skb, data->l2_len) < 0) {
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM);
return -ENOMEM;
}
@@ -852,6 +853,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
struct sk_buff *skb, u16 mru,
struct sw_flow_key *key)
{
+ enum ovs_drop_reason reason;
u16 orig_network_offset = 0;
if (eth_p_mpls(skb->protocol)) {
@@ -861,6 +863,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
if (skb_network_offset(skb) > MAX_L2_LEN) {
OVS_NLERR(1, "L2 header too long to fragment");
+ reason = OVS_DROP_FRAG_L2_TOO_LONG;
goto err;
}
@@ -901,12 +904,13 @@ static void ovs_fragment(struct net *net, struct vport *vport,
WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
ovs_vport_name(vport), ntohs(key->eth.type), mru,
vport->dev->mtu);
+ reason = OVS_DROP_FRAG_INVALID_PROTO;
goto err;
}
return;
err:
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, reason);
}
static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
@@ -933,10 +937,10 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
ovs_fragment(net, vport, skb, mru, key);
} else {
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
}
} else {
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_DEV_READY);
}
}
@@ -1010,7 +1014,7 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
return clone_execute(dp, skb, key, 0, nla_data(actions),
nla_len(actions), true, false);
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_IP_TTL);
return 0;
}
@@ -1036,7 +1040,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
if ((arg->probability != U32_MAX) &&
(!arg->probability || get_random_u32() > arg->probability)) {
if (last)
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
return 0;
}
@@ -1297,6 +1301,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
if (trace_ovs_do_execute_action_enabled())
trace_ovs_do_execute_action(dp, skb, key, a, rem);
+ /* Actions that rightfully have to consume the skb should do it
+ * and return directly.
+ */
switch (nla_type(a)) {
case OVS_ACTION_ATTR_OUTPUT: {
int port = nla_get_u32(a);
@@ -1332,6 +1339,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
output_userspace(dp, skb, key, a, attr,
len, OVS_CB(skb)->cutlen);
OVS_CB(skb)->cutlen = 0;
+ if (nla_is_last(a, rem)) {
+ consume_skb(skb);
+ return 0;
+ }
break;
case OVS_ACTION_ATTR_HASH:
@@ -1446,7 +1457,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_METER:
if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_METER);
return 0;
}
break;
@@ -1477,15 +1488,24 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return dec_ttl_exception_handler(dp, skb,
key, a);
break;
+
+ case OVS_ACTION_ATTR_DROP: {
+ enum ovs_drop_reason reason = nla_get_u32(a)
+ ? OVS_DROP_EXPLICIT_WITH_ERROR
+ : OVS_DROP_EXPLICIT;
+
+ ovs_kfree_skb_reason(skb, reason);
+ return 0;
+ }
}
if (unlikely(err)) {
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_ACTION_ERROR);
return err;
}
}
- consume_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
return 0;
}
@@ -1547,7 +1567,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
/* Out of per CPU action FIFO space. Drop the 'skb' and
* log an error.
*/
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_DEFERRED_LIMIT);
if (net_ratelimit()) {
if (actions) { /* Sample action */
@@ -1599,7 +1619,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
if (unlikely(level > OVS_RECURSION_LIMIT)) {
net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
ovs_dp_name(dp));
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_RECURSION_LIMIT);
err = -ENETDOWN;
goto out;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 331730fd3580..0b9a785dea45 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -29,6 +29,7 @@
#include <net/netfilter/nf_conntrack_act_ct.h>
#include "datapath.h"
+#include "drop.h"
#include "conntrack.h"
#include "flow.h"
#include "flow_netlink.h"
@@ -455,45 +456,6 @@ static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key,
return 0;
}
-static struct nf_conntrack_expect *
-ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
- u16 proto, const struct sk_buff *skb)
-{
- struct nf_conntrack_tuple tuple;
- struct nf_conntrack_expect *exp;
-
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
- return NULL;
-
- exp = __nf_ct_expect_find(net, zone, &tuple);
- if (exp) {
- struct nf_conntrack_tuple_hash *h;
-
- /* Delete existing conntrack entry, if it clashes with the
- * expectation. This can happen since conntrack ALGs do not
- * check for clashes between (new) expectations and existing
- * conntrack entries. nf_conntrack_in() will check the
- * expectations only if a conntrack entry can not be found,
- * which can lead to OVS finding the expectation (here) in the
- * init direction, but which will not be removed by the
- * nf_conntrack_in() call, if a matching conntrack entry is
- * found instead. In this case all init direction packets
- * would be reported as new related packets, while reply
- * direction packets would be reported as un-related
- * established packets.
- */
- h = nf_conntrack_find_get(net, zone, &tuple);
- if (h) {
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-
- nf_ct_delete(ct, 0, 0);
- nf_ct_put(ct);
- }
- }
-
- return exp;
-}
-
/* This replicates logic from nf_conntrack_core.c that is not exported. */
static enum ip_conntrack_info
ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
@@ -852,36 +814,16 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
struct sk_buff *skb)
{
- struct nf_conntrack_expect *exp;
-
- /* If we pass an expected packet through nf_conntrack_in() the
- * expectation is typically removed, but the packet could still be
- * lost in upcall processing. To prevent this from happening we
- * perform an explicit expectation lookup. Expected connections are
- * always new, and will be passed through conntrack only when they are
- * committed, as it is OK to remove the expectation at that time.
- */
- exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
- if (exp) {
- u8 state;
-
- /* NOTE: New connections are NATted and Helped only when
- * committed, so we are not calling into NAT here.
- */
- state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
- __ovs_ct_update_key(key, state, &info->zone, exp->master);
- } else {
- struct nf_conn *ct;
- int err;
+ struct nf_conn *ct;
+ int err;
- err = __ovs_ct_lookup(net, key, info, skb);
- if (err)
- return err;
+ err = __ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ return err;
- ct = (struct nf_conn *)skb_nfct(skb);
- if (ct)
- nf_ct_deliver_cached_events(ct);
- }
+ ct = (struct nf_conn *)skb_nfct(skb);
+ if (ct)
+ nf_ct_deliver_cached_events(ct);
return 0;
}
@@ -1094,7 +1036,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
skb_push_rcsum(skb, nh_ofs);
if (err)
- kfree_skb(skb);
+ ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);
return err;
}
@@ -1460,7 +1402,8 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (err)
goto err_free_ct;
- __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+ if (ct_info.commit)
+ __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
return 0;
err_free_ct:
__ovs_ct_free_action(&ct_info);
@@ -1662,7 +1605,7 @@ static struct sk_buff *
ovs_ct_limit_cmd_reply_start(struct genl_info *info, u8 cmd,
struct ovs_header **ovs_reply_header)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sk_buff *skb;
skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 3d7a91e64c88..11c69415c605 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -41,6 +41,7 @@
#include <net/pkt_cls.h>
#include "datapath.h"
+#include "drop.h"
#include "flow.h"
#include "flow_table.h"
#include "flow_netlink.h"
@@ -589,7 +590,7 @@ out:
static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct sw_flow_actions *acts;
@@ -966,7 +967,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sw_flow *flow = NULL, *new_flow;
struct sw_flow_mask mask;
struct sk_buff *reply;
@@ -1213,7 +1214,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sw_flow_key key;
struct sw_flow *flow;
struct sk_buff *reply = NULL;
@@ -1314,7 +1315,7 @@ error:
static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
@@ -1373,7 +1374,7 @@ unlock:
static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
@@ -1641,7 +1642,7 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb,
{
struct datapath *dp;
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
info->attrs);
if (IS_ERR(dp))
return;
@@ -1934,7 +1935,8 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+ info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
goto err_unlock_free;
@@ -1967,7 +1969,8 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+ info->attrs);
err = PTR_ERR(dp);
if (IS_ERR(dp))
goto err_unlock_free;
@@ -2002,7 +2005,8 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
+ info->attrs);
if (IS_ERR(dp)) {
err = PTR_ERR(dp);
goto err_unlock_free;
@@ -2245,7 +2249,7 @@ static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct vport_parms parms;
struct sk_buff *reply;
struct vport *vport;
@@ -2347,7 +2351,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
+ vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock_free;
@@ -2403,7 +2407,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
- vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
+ vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock_free;
@@ -2446,7 +2450,7 @@ exit_unlock_free:
static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct sk_buff *reply;
struct vport *vport;
int err;
@@ -2702,6 +2706,17 @@ static struct pernet_operations ovs_net_ops = {
.size = sizeof(struct ovs_net),
};
+static const char * const ovs_drop_reasons[] = {
+#define S(x) (#x),
+ OVS_DROP_REASONS(S)
+#undef S
+};
+
+static struct drop_reason_list drop_reason_list_ovs = {
+ .reasons = ovs_drop_reasons,
+ .n_reasons = ARRAY_SIZE(ovs_drop_reasons),
+};
+
static int __init dp_init(void)
{
int err;
@@ -2743,6 +2758,9 @@ static int __init dp_init(void)
if (err < 0)
goto error_unreg_netdev;
+ drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH,
+ &drop_reason_list_ovs);
+
return 0;
error_unreg_netdev:
@@ -2769,6 +2787,7 @@ static void dp_cleanup(void)
ovs_netdev_exit();
unregister_netdevice_notifier(&ovs_dp_device_notifier);
unregister_pernet_device(&ovs_net_ops);
+ drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH);
rcu_barrier();
ovs_vport_exit();
ovs_flow_exit();
diff --git a/net/openvswitch/drop.h b/net/openvswitch/drop.h
new file mode 100644
index 000000000000..cedf9b7b5796
--- /dev/null
+++ b/net/openvswitch/drop.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * OpenvSwitch drop reason list.
+ */
+
+#ifndef OPENVSWITCH_DROP_H
+#define OPENVSWITCH_DROP_H
+#include <linux/skbuff.h>
+#include <net/dropreason.h>
+
+#define OVS_DROP_REASONS(R) \
+ R(OVS_DROP_LAST_ACTION) \
+ R(OVS_DROP_ACTION_ERROR) \
+ R(OVS_DROP_EXPLICIT) \
+ R(OVS_DROP_EXPLICIT_WITH_ERROR) \
+ R(OVS_DROP_METER) \
+ R(OVS_DROP_RECURSION_LIMIT) \
+ R(OVS_DROP_DEFERRED_LIMIT) \
+ R(OVS_DROP_FRAG_L2_TOO_LONG) \
+ R(OVS_DROP_FRAG_INVALID_PROTO) \
+ R(OVS_DROP_CONNTRACK) \
+ R(OVS_DROP_IP_TTL) \
+ /* deliberate comment for trailing \ */
+
+enum ovs_drop_reason {
+ __OVS_DROP_REASON = SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
+ SKB_DROP_REASON_SUBSYS_SHIFT,
+#define ENUM(x) x,
+ OVS_DROP_REASONS(ENUM)
+#undef ENUM
+
+ OVS_DROP_MAX,
+};
+
+static inline void
+ovs_kfree_skb_reason(struct sk_buff *skb, enum ovs_drop_reason reason)
+{
+ kfree_skb_reason(skb, (u32)reason);
+}
+
+#endif /* OPENVSWITCH_DROP_H */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 41116361433d..88965e2068ac 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -38,6 +38,7 @@
#include <net/tun_proto.h>
#include <net/erspan.h>
+#include "drop.h"
#include "flow_netlink.h"
struct ovs_len_tbl {
@@ -61,6 +62,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_RECIRC:
case OVS_ACTION_ATTR_TRUNC:
case OVS_ACTION_ATTR_USERSPACE:
+ case OVS_ACTION_ATTR_DROP:
break;
case OVS_ACTION_ATTR_CT:
@@ -2394,7 +2396,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
/* Whenever new actions are added, the need to update this
* function should be considered.
*/
- BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
+ BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24);
if (!actions)
return;
@@ -3182,6 +3184,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
+ [OVS_ACTION_ATTR_DROP] = sizeof(u32),
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -3453,6 +3456,11 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
skip_copy = true;
break;
+ case OVS_ACTION_ATTR_DROP:
+ if (!nla_is_last(a, rem))
+ return -EINVAL;
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index c4ebf810e4b1..cc08e0403909 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -211,7 +211,7 @@ ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
struct ovs_header **ovs_reply_header)
{
struct sk_buff *skb;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb)
@@ -272,7 +272,7 @@ error:
static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
struct nlattr *nla, *band_nla;
struct sk_buff *reply;
@@ -409,7 +409,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct dp_meter *meter, *old_meter;
struct sk_buff *reply;
struct ovs_header *ovs_reply_header;
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct dp_meter_table *meter_tbl;
struct datapath *dp;
int err;
@@ -482,7 +482,7 @@ exit_free_meter:
static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
struct nlattr **a = info->attrs;
struct dp_meter *meter;
@@ -535,7 +535,7 @@ exit_unlock:
static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
- struct ovs_header *ovs_header = info->userhdr;
+ struct ovs_header *ovs_header = genl_info_userhdr(info);
struct ovs_header *ovs_reply_header;
struct nlattr **a = info->attrs;
struct dp_meter *old_meter;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a2935bd18ed9..8f97648d652f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2931,8 +2931,10 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
if (prepad + len < PAGE_SIZE || !linear)
linear = len;
+ if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+ linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER);
skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
- err, 0);
+ err, PAGE_ALLOC_COSTLY_ORDER);
if (!skb)
return NULL;
diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c
index 78beb74146e7..41ece61eb57a 100644
--- a/net/qrtr/af_qrtr.c
+++ b/net/qrtr/af_qrtr.c
@@ -23,6 +23,8 @@
#define QRTR_EPH_PORT_RANGE \
XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET)
+#define QRTR_PORT_CTRL_LEGACY 0xffff
+
/**
* struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
* @version: protocol version
@@ -495,6 +497,9 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
goto err;
}
+ if (cb->dst_port == QRTR_PORT_CTRL_LEGACY)
+ cb->dst_port = QRTR_PORT_CTRL;
+
if (!size || len != ALIGN(size, 4) + hdrlen)
goto err;
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 0f7a729f1a1f..b1db0b519179 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -16,7 +16,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/qrtr.h>
-static RADIX_TREE(nodes, GFP_KERNEL);
+static DEFINE_XARRAY(nodes);
static struct {
struct socket *sock;
@@ -66,14 +66,14 @@ struct qrtr_server {
struct qrtr_node {
unsigned int id;
- struct radix_tree_root servers;
+ struct xarray servers;
};
static struct qrtr_node *node_get(unsigned int node_id)
{
struct qrtr_node *node;
- node = radix_tree_lookup(&nodes, node_id);
+ node = xa_load(&nodes, node_id);
if (node)
return node;
@@ -83,8 +83,9 @@ static struct qrtr_node *node_get(unsigned int node_id)
return NULL;
node->id = node_id;
+ xa_init(&node->servers);
- if (radix_tree_insert(&nodes, node_id, node)) {
+ if (xa_store(&nodes, node_id, node, GFP_KERNEL)) {
kfree(node);
return NULL;
}
@@ -193,40 +194,23 @@ static void lookup_notify(struct sockaddr_qrtr *to, struct qrtr_server *srv,
static int announce_servers(struct sockaddr_qrtr *sq)
{
- struct radix_tree_iter iter;
struct qrtr_server *srv;
struct qrtr_node *node;
- void __rcu **slot;
+ unsigned long index;
int ret;
node = node_get(qrtr_ns.local_node);
if (!node)
return 0;
- rcu_read_lock();
/* Announce the list of servers registered in this node */
- radix_tree_for_each_slot(slot, &node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
-
+ xa_for_each(&node->servers, index, srv) {
ret = service_announce_new(sq, srv);
if (ret < 0) {
pr_err("failed to announce new service\n");
return ret;
}
-
- rcu_read_lock();
}
-
- rcu_read_unlock();
-
return 0;
}
@@ -256,14 +240,17 @@ static struct qrtr_server *server_add(unsigned int service,
goto err;
/* Delete the old server on the same port */
- old = radix_tree_lookup(&node->servers, port);
+ old = xa_store(&node->servers, port, srv, GFP_KERNEL);
if (old) {
- radix_tree_delete(&node->servers, port);
- kfree(old);
+ if (xa_is_err(old)) {
+ pr_err("failed to add server [0x%x:0x%x] ret:%d\n",
+ srv->service, srv->instance, xa_err(old));
+ goto err;
+ } else {
+ kfree(old);
+ }
}
- radix_tree_insert(&node->servers, port, srv);
-
trace_qrtr_ns_server_add(srv->service, srv->instance,
srv->node, srv->port);
@@ -280,11 +267,11 @@ static int server_del(struct qrtr_node *node, unsigned int port, bool bcast)
struct qrtr_server *srv;
struct list_head *li;
- srv = radix_tree_lookup(&node->servers, port);
+ srv = xa_load(&node->servers, port);
if (!srv)
return -ENOENT;
- radix_tree_delete(&node->servers, port);
+ xa_erase(&node->servers, port);
/* Broadcast the removal of local servers */
if (srv->node == qrtr_ns.local_node && bcast)
@@ -344,13 +331,12 @@ static int ctrl_cmd_hello(struct sockaddr_qrtr *sq)
static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
{
struct qrtr_node *local_node;
- struct radix_tree_iter iter;
struct qrtr_ctrl_pkt pkt;
struct qrtr_server *srv;
struct sockaddr_qrtr sq;
struct msghdr msg = { };
struct qrtr_node *node;
- void __rcu **slot;
+ unsigned long index;
struct kvec iv;
int ret;
@@ -361,22 +347,9 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
if (!node)
return 0;
- rcu_read_lock();
/* Advertise removal of this client to all servers of remote node */
- radix_tree_for_each_slot(slot, &node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
+ xa_for_each(&node->servers, index, srv)
server_del(node, srv->port, true);
- rcu_read_lock();
- }
- rcu_read_unlock();
/* Advertise the removal of this client to all local servers */
local_node = node_get(qrtr_ns.local_node);
@@ -387,18 +360,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE);
pkt.client.node = cpu_to_le32(from->sq_node);
- rcu_read_lock();
- radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
-
+ xa_for_each(&local_node->servers, index, srv) {
sq.sq_family = AF_QIPCRTR;
sq.sq_node = srv->node;
sq.sq_port = srv->port;
@@ -411,11 +373,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
pr_err("failed to send bye cmd\n");
return ret;
}
- rcu_read_lock();
}
-
- rcu_read_unlock();
-
return 0;
}
@@ -423,7 +381,6 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
unsigned int node_id, unsigned int port)
{
struct qrtr_node *local_node;
- struct radix_tree_iter iter;
struct qrtr_lookup *lookup;
struct qrtr_ctrl_pkt pkt;
struct msghdr msg = { };
@@ -432,7 +389,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
struct qrtr_node *node;
struct list_head *tmp;
struct list_head *li;
- void __rcu **slot;
+ unsigned long index;
struct kvec iv;
int ret;
@@ -477,18 +434,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
pkt.client.node = cpu_to_le32(node_id);
pkt.client.port = cpu_to_le32(port);
- rcu_read_lock();
- radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) {
- srv = radix_tree_deref_slot(slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
- slot = radix_tree_iter_resume(slot, &iter);
- rcu_read_unlock();
-
+ xa_for_each(&local_node->servers, index, srv) {
sq.sq_family = AF_QIPCRTR;
sq.sq_node = srv->node;
sq.sq_port = srv->port;
@@ -501,11 +447,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
pr_err("failed to send del client cmd\n");
return ret;
}
- rcu_read_lock();
}
-
- rcu_read_unlock();
-
return 0;
}
@@ -576,13 +518,12 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from,
static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,
unsigned int service, unsigned int instance)
{
- struct radix_tree_iter node_iter;
struct qrtr_server_filter filter;
- struct radix_tree_iter srv_iter;
struct qrtr_lookup *lookup;
+ struct qrtr_server *srv;
struct qrtr_node *node;
- void __rcu **node_slot;
- void __rcu **srv_slot;
+ unsigned long node_idx;
+ unsigned long srv_idx;
/* Accept only local observers */
if (from->sq_node != qrtr_ns.local_node)
@@ -601,40 +542,14 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,
filter.service = service;
filter.instance = instance;
- rcu_read_lock();
- radix_tree_for_each_slot(node_slot, &nodes, &node_iter, 0) {
- node = radix_tree_deref_slot(node_slot);
- if (!node)
- continue;
- if (radix_tree_deref_retry(node)) {
- node_slot = radix_tree_iter_retry(&node_iter);
- continue;
- }
- node_slot = radix_tree_iter_resume(node_slot, &node_iter);
-
- radix_tree_for_each_slot(srv_slot, &node->servers,
- &srv_iter, 0) {
- struct qrtr_server *srv;
-
- srv = radix_tree_deref_slot(srv_slot);
- if (!srv)
- continue;
- if (radix_tree_deref_retry(srv)) {
- srv_slot = radix_tree_iter_retry(&srv_iter);
- continue;
- }
-
+ xa_for_each(&nodes, node_idx, node) {
+ xa_for_each(&node->servers, srv_idx, srv) {
if (!server_match(srv, &filter))
continue;
- srv_slot = radix_tree_iter_resume(srv_slot, &srv_iter);
-
- rcu_read_unlock();
lookup_notify(from, srv, true);
- rcu_read_lock();
}
}
- rcu_read_unlock();
/* Empty notification, to indicate end of listing */
lookup_notify(from, NULL, true);
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index d36f3f6b4351..b15cf316b23a 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -86,11 +86,13 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
break;
case RDMA_CM_EVENT_ADDR_RESOLVED:
- rdma_set_service_type(cm_id, conn->c_tos);
- rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32);
- /* XXX do we need to clean up if this fails? */
- ret = rdma_resolve_route(cm_id,
- RDS_RDMA_RESOLVE_TIMEOUT_MS);
+ if (conn) {
+ rdma_set_service_type(cm_id, conn->c_tos);
+ rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32);
+ /* XXX do we need to clean up if this fails? */
+ ret = rdma_resolve_route(cm_id,
+ RDS_RDMA_RESOLVE_TIMEOUT_MS);
+ }
break;
case RDMA_CM_EVENT_ROUTE_RESOLVED:
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index ca4c3a667091..d2fdb1529585 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -17,7 +17,6 @@
*/
#define RDS_RDMA_REJ_INCOMPAT 1
-int rds_rdma_conn_connect(struct rds_connection *conn);
int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
diff --git a/net/rds/rds.h b/net/rds/rds.h
index d35d1fc39807..dc360252c515 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -863,7 +863,6 @@ int rds_message_next_extension(struct rds_header *hdr,
unsigned int *pos, void *buf, unsigned int *buflen);
int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
-void rds_message_inc_free(struct rds_incoming *inc);
void rds_message_addref(struct rds_message *rm);
void rds_message_put(struct rds_message *rm);
void rds_message_wait(struct rds_message *rm);
@@ -1013,7 +1012,5 @@ void rds_trans_put(struct rds_transport *trans);
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
unsigned int avail);
struct rds_transport *rds_trans_get(int t_type);
-int rds_trans_init(void);
-void rds_trans_exit(void);
#endif
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index c5b86066ff66..2dba7505b414 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -565,7 +565,8 @@ static __net_init int rds_tcp_init_net(struct net *net)
}
tbl[RDS_TCP_SNDBUF].data = &rtn->sndbuf_size;
tbl[RDS_TCP_RCVBUF].data = &rtn->rcvbuf_size;
- rtn->rds_tcp_sysctl = register_net_sysctl(net, "net/rds/tcp", tbl);
+ rtn->rds_tcp_sysctl = register_net_sysctl_sz(net, "net/rds/tcp", tbl,
+ ARRAY_SIZE(rds_tcp_sysctl_table));
if (!rtn->rds_tcp_sysctl) {
pr_warn("could not register sysctl\n");
err = -ENOMEM;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index f8b5930d7b34..053aa7da87ef 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -56,7 +56,6 @@ void rds_tcp_restore_callbacks(struct socket *sock,
struct rds_tcp_connection *tc);
u32 rds_tcp_write_seq(struct rds_tcp_connection *tc);
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
-u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
extern struct rds_transport rds_tcp_transport;
void rds_tcp_accept_work(struct sock *sk);
int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 4b95cb1ac435..470c70deffe2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -347,8 +347,7 @@ config NET_SCH_FQ_PIE
config NET_SCH_INGRESS
tristate "Ingress/classifier-action Qdisc"
depends on NET_CLS_ACT
- select NET_INGRESS
- select NET_EGRESS
+ select NET_XGRESS
help
Say Y here if you want to use classifiers for incoming and/or outgoing
packets. This qdisc doesn't do anything else besides running classifiers,
@@ -679,6 +678,7 @@ config NET_EMATCH_IPT
config NET_CLS_ACT
bool "Actions"
select NET_CLS
+ select NET_XGRESS
help
Say Y here if you want to use traffic control actions. Actions
get attached to classifiers and are invoked after a successful
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index abc71a06d634..7c652d14528b 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1238,7 +1238,8 @@ static int tcf_ct_fill_params(struct net *net,
}
}
- __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
+ if (p->ct_action & TCA_CT_ACT_COMMIT)
+ __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
return 0;
err:
nf_ct_put(p->tmpl);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9f0711da9c95..e5314a31f75a 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -72,6 +72,7 @@ struct fl_flow_key {
struct flow_dissector_key_num_of_vlans num_of_vlans;
struct flow_dissector_key_pppoe pppoe;
struct flow_dissector_key_l2tpv3 l2tpv3;
+ struct flow_dissector_key_ipsec ipsec;
struct flow_dissector_key_cfm cfm;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
@@ -726,6 +727,8 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_PPPOE_SID] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_PPP_PROTO] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_L2TPV3_SID] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_SPI] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_SPI_MASK] = { .type = NLA_U32 },
[TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1),
[TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED },
};
@@ -796,6 +799,24 @@ static void fl_set_key_val(struct nlattr **tb,
nla_memcpy(mask, tb[mask_type], len);
}
+static int fl_set_key_spi(struct nlattr **tb, struct fl_flow_key *key,
+ struct fl_flow_key *mask,
+ struct netlink_ext_ack *extack)
+{
+ if (key->basic.ip_proto != IPPROTO_ESP &&
+ key->basic.ip_proto != IPPROTO_AH) {
+ NL_SET_ERR_MSG(extack,
+ "Protocol must be either ESP or AH");
+ return -EINVAL;
+ }
+
+ fl_set_key_val(tb, &key->ipsec.spi,
+ TCA_FLOWER_KEY_SPI,
+ &mask->ipsec.spi, TCA_FLOWER_KEY_SPI_MASK,
+ sizeof(key->ipsec.spi));
+ return 0;
+}
+
static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
@@ -1895,6 +1916,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
return ret;
}
+ if (tb[TCA_FLOWER_KEY_SPI]) {
+ ret = fl_set_key_spi(tb, key, mask, extack);
+ if (ret)
+ return ret;
+ }
+
if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -2068,6 +2095,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_L2TPV3, l2tpv3);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPSEC, ipsec);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_CFM, cfm);
skb_flow_dissector_init(dissector, keys, cnt);
@@ -3365,6 +3394,12 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
sizeof(key->l2tpv3.session_id)))
goto nla_put_failure;
+ if (key->ipsec.spi &&
+ fl_dump_key_val(skb, &key->ipsec.spi, TCA_FLOWER_KEY_SPI,
+ &mask->ipsec.spi, TCA_FLOWER_KEY_SPI_MASK,
+ sizeof(key->ipsec.spi)))
+ goto nla_put_failure;
+
if ((key->basic.ip_proto == IPPROTO_TCP ||
key->basic.ip_proto == IPPROTO_UDP ||
key->basic.ip_proto == IPPROTO_SCTP) &&
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 6fdba069f6bf..da34fd4c9269 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -502,7 +502,7 @@ META_COLLECTOR(int_sk_lingertime)
*err = -1;
return;
}
- dst->value = sk->sk_lingertime / HZ;
+ dst->value = READ_ONCE(sk->sk_lingertime) / HZ;
}
META_COLLECTOR(int_sk_err_qlen)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index aa6b1fe65151..e9eaf637220e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1547,10 +1547,28 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
return 0;
}
+static bool req_create_or_replace(struct nlmsghdr *n)
+{
+ return (n->nlmsg_flags & NLM_F_CREATE &&
+ n->nlmsg_flags & NLM_F_REPLACE);
+}
+
+static bool req_create_exclusive(struct nlmsghdr *n)
+{
+ return (n->nlmsg_flags & NLM_F_CREATE &&
+ n->nlmsg_flags & NLM_F_EXCL);
+}
+
+static bool req_change(struct nlmsghdr *n)
+{
+ return (!(n->nlmsg_flags & NLM_F_CREATE) &&
+ !(n->nlmsg_flags & NLM_F_REPLACE) &&
+ !(n->nlmsg_flags & NLM_F_EXCL));
+}
+
/*
* Create/change qdisc.
*/
-
static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
@@ -1644,27 +1662,35 @@ replay:
*
* We know, that some child q is already
* attached to this parent and have choice:
- * either to change it or to create/graft new one.
+ * 1) change it or 2) create/graft new one.
+ * If the requested qdisc kind is different
+ * than the existing one, then we choose graft.
+ * If they are the same then this is "change"
+ * operation - just let it fallthrough..
*
* 1. We are allowed to create/graft only
- * if CREATE and REPLACE flags are set.
+ * if the request is explicitly stating
+ * "please create if it doesn't exist".
*
- * 2. If EXCL is set, requestor wanted to say,
- * that qdisc tcm_handle is not expected
+ * 2. If the request is to exclusive create
+ * then the qdisc tcm_handle is not expected
* to exist, so that we choose create/graft too.
*
* 3. The last case is when no flags are set.
+ * This will happen when for example tc
+ * utility issues a "change" command.
* Alas, it is sort of hole in API, we
* cannot decide what to do unambiguously.
- * For now we select create/graft, if
- * user gave KIND, which does not match existing.
+ * For now we select create/graft.
*/
- if ((n->nlmsg_flags & NLM_F_CREATE) &&
- (n->nlmsg_flags & NLM_F_REPLACE) &&
- ((n->nlmsg_flags & NLM_F_EXCL) ||
- (tca[TCA_KIND] &&
- nla_strcmp(tca[TCA_KIND], q->ops->id))))
- goto create_n_graft;
+ if (tca[TCA_KIND] &&
+ nla_strcmp(tca[TCA_KIND], q->ops->id)) {
+ if (req_create_or_replace(n) ||
+ req_create_exclusive(n))
+ goto create_n_graft;
+ else if (req_change(n))
+ goto create_n_graft2;
+ }
}
}
} else {
@@ -1698,6 +1724,7 @@ create_n_graft:
NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
return -ENOENT;
}
+create_n_graft2:
if (clid == TC_H_INGRESS) {
if (dev_ingress_queue(dev)) {
q = qdisc_create(dev, dev_ingress_queue(dev),
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index e35a4e90f4e6..19901e77cd3b 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -17,7 +17,6 @@
struct drr_class {
struct Qdisc_class_common common;
- unsigned int filter_cnt;
struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
@@ -150,8 +149,10 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg,
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl = (struct drr_class *)arg;
- if (cl->filter_cnt > 0)
+ if (qdisc_class_in_use(&cl->common)) {
+ NL_SET_ERR_MSG(extack, "DRR class is in use");
return -EBUSY;
+ }
sch_tree_lock(sch);
@@ -187,8 +188,8 @@ static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,
{
struct drr_class *cl = drr_find_class(sch, classid);
- if (cl != NULL)
- cl->filter_cnt++;
+ if (cl)
+ qdisc_class_get(&cl->common);
return (unsigned long)cl;
}
@@ -197,7 +198,7 @@ static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg)
{
struct drr_class *cl = (struct drr_class *)arg;
- cl->filter_cnt--;
+ qdisc_class_put(&cl->common);
}
static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 591d87d5e5c0..68e6acd0f130 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -61,6 +61,7 @@ struct fq_pie_sched_data {
struct pie_params p_params;
u32 ecn_prob;
u32 flows_cnt;
+ u32 flows_cursor;
u32 quantum;
u32 memory_limit;
u32 new_flow_count;
@@ -375,22 +376,32 @@ flow_error:
static void fq_pie_timer(struct timer_list *t)
{
struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
+ unsigned long next, tupdate;
struct Qdisc *sch = q->sch;
spinlock_t *root_lock; /* to lock qdisc for probability calculations */
- u32 idx;
+ int max_cnt, i;
rcu_read_lock();
root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
- for (idx = 0; idx < q->flows_cnt; idx++)
- pie_calculate_probability(&q->p_params, &q->flows[idx].vars,
- q->flows[idx].backlog);
-
- /* reset the timer to fire after 'tupdate' jiffies. */
- if (q->p_params.tupdate)
- mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
+ /* Limit this expensive loop to 2048 flows per round. */
+ max_cnt = min_t(int, q->flows_cnt - q->flows_cursor, 2048);
+ for (i = 0; i < max_cnt; i++) {
+ pie_calculate_probability(&q->p_params,
+ &q->flows[q->flows_cursor].vars,
+ q->flows[q->flows_cursor].backlog);
+ q->flows_cursor++;
+ }
+ tupdate = q->p_params.tupdate;
+ next = 0;
+ if (q->flows_cursor >= q->flows_cnt) {
+ q->flows_cursor = 0;
+ next = tupdate;
+ }
+ if (tupdate)
+ mod_timer(&q->adapt_timer, jiffies + next);
spin_unlock(root_lock);
rcu_read_unlock();
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 70b0c5873d32..3554085bc2be 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -116,7 +116,6 @@ struct hfsc_class {
struct net_rate_estimator __rcu *rate_est;
struct tcf_proto __rcu *filter_list; /* filter list */
struct tcf_block *block;
- unsigned int filter_cnt; /* filter count */
unsigned int level; /* class level in hierarchy */
struct hfsc_sched *sched; /* scheduler data */
@@ -1012,6 +1011,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (parent == NULL)
return -ENOENT;
}
+ if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
+ NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC");
+ return -EINVAL;
+ }
if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
return -EINVAL;
@@ -1094,8 +1097,11 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg,
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *cl = (struct hfsc_class *)arg;
- if (cl->level > 0 || cl->filter_cnt > 0 || cl == &q->root)
+ if (cl->level > 0 || qdisc_class_in_use(&cl->cl_common) ||
+ cl == &q->root) {
+ NL_SET_ERR_MSG(extack, "HFSC class in use");
return -EBUSY;
+ }
sch_tree_lock(sch);
@@ -1223,7 +1229,7 @@ hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid)
if (cl != NULL) {
if (p != NULL && p->level <= cl->level)
return 0;
- cl->filter_cnt++;
+ qdisc_class_get(&cl->cl_common);
}
return (unsigned long)cl;
@@ -1234,7 +1240,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
{
struct hfsc_class *cl = (struct hfsc_class *)arg;
- cl->filter_cnt--;
+ qdisc_class_put(&cl->cl_common);
}
static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 325c29041c7d..0d947414e616 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -102,7 +102,6 @@ struct htb_class {
struct tcf_proto __rcu *filter_list; /* class attached filters */
struct tcf_block *block;
- int filter_cnt;
int level; /* our level (see above) */
unsigned int children;
@@ -1710,8 +1709,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
* tc subsys guarantee us that in htb_destroy it holds no class
* refs so that we can remove children safely there ?
*/
- if (cl->children || cl->filter_cnt)
+ if (cl->children || qdisc_class_in_use(&cl->common)) {
+ NL_SET_ERR_MSG(extack, "HTB class in use");
return -EBUSY;
+ }
if (!cl->level && htb_parent_last_child(cl))
last_child = 1;
@@ -1810,10 +1811,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
goto failure;
}
- if (hopt->quantum) {
- NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
- goto failure;
- }
}
/* Keeping backward compatible with rate_table based iproute2 tc */
@@ -1910,6 +1907,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
.prio = hopt->prio,
+ .quantum = hopt->quantum,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
@@ -1931,6 +1929,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
.prio = hopt->prio,
+ .quantum = hopt->quantum,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
@@ -2017,6 +2016,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
.prio = hopt->prio,
+ .quantum = hopt->quantum,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
@@ -2108,7 +2108,7 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
* be broken by class during destroy IIUC.
*/
if (cl)
- cl->filter_cnt++;
+ qdisc_class_get(&cl->common);
return (unsigned long)cl;
}
@@ -2116,8 +2116,7 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
{
struct htb_class *cl = (struct htb_class *)arg;
- if (cl)
- cl->filter_cnt--;
+ qdisc_class_put(&cl->common);
}
static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index e43a45499372..a463a63192c3 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -13,6 +13,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tcx.h>
struct ingress_sched_data {
struct tcf_block *block;
@@ -78,6 +79,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
{
struct ingress_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *entry;
+ bool created;
int err;
if (sch->parent != TC_H_INGRESS)
@@ -85,7 +88,13 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
net_inc_ingress_queue();
- mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
+ entry = tcx_entry_fetch_or_create(dev, true, &created);
+ if (!entry)
+ return -ENOMEM;
+ tcx_miniq_set_active(entry, true);
+ mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq);
+ if (created)
+ tcx_entry_update(dev, entry, true);
q->block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
q->block_info.chain_head_change = clsact_chain_head_change;
@@ -103,11 +112,22 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *entry = rtnl_dereference(dev->tcx_ingress);
if (sch->parent != TC_H_INGRESS)
return;
tcf_block_put_ext(q->block, sch, &q->block_info);
+
+ if (entry) {
+ tcx_miniq_set_active(entry, false);
+ if (!tcx_entry_is_active(entry)) {
+ tcx_entry_update(dev, NULL, true);
+ tcx_entry_free(entry);
+ }
+ }
+
net_dec_ingress_queue();
}
@@ -223,6 +243,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
{
struct clsact_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *entry;
+ bool created;
int err;
if (sch->parent != TC_H_CLSACT)
@@ -231,7 +253,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
net_inc_ingress_queue();
net_inc_egress_queue();
- mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
+ entry = tcx_entry_fetch_or_create(dev, true, &created);
+ if (!entry)
+ return -ENOMEM;
+ tcx_miniq_set_active(entry, true);
+ mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq);
+ if (created)
+ tcx_entry_update(dev, entry, true);
q->ingress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
q->ingress_block_info.chain_head_change = clsact_chain_head_change;
@@ -244,7 +272,13 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block);
- mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
+ entry = tcx_entry_fetch_or_create(dev, false, &created);
+ if (!entry)
+ return -ENOMEM;
+ tcx_miniq_set_active(entry, true);
+ mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq);
+ if (created)
+ tcx_entry_update(dev, entry, false);
q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
q->egress_block_info.chain_head_change = clsact_chain_head_change;
@@ -256,12 +290,31 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
static void clsact_destroy(struct Qdisc *sch)
{
struct clsact_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct bpf_mprog_entry *ingress_entry = rtnl_dereference(dev->tcx_ingress);
+ struct bpf_mprog_entry *egress_entry = rtnl_dereference(dev->tcx_egress);
if (sch->parent != TC_H_CLSACT)
return;
- tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
+ tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
+
+ if (ingress_entry) {
+ tcx_miniq_set_active(ingress_entry, false);
+ if (!tcx_entry_is_active(ingress_entry)) {
+ tcx_entry_update(dev, NULL, true);
+ tcx_entry_free(ingress_entry);
+ }
+ }
+
+ if (egress_entry) {
+ tcx_miniq_set_active(egress_entry, false);
+ if (!tcx_entry_is_active(egress_entry)) {
+ tcx_entry_update(dev, NULL, false);
+ tcx_entry_free(egress_entry);
+ }
+ }
net_dec_ingress_queue();
net_dec_egress_queue();
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 38d9aa0cd30e..4ad39a4a3cf5 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -105,6 +105,11 @@ struct netem_sched_data {
u32 rho;
} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
+ struct prng {
+ u64 seed;
+ struct rnd_state prng_state;
+ } prng;
+
struct disttable *delay_dist;
enum {
@@ -179,15 +184,16 @@ static void init_crandom(struct crndstate *state, unsigned long rho)
* Next number depends on last value.
* rho is scaled to avoid floating point.
*/
-static u32 get_crandom(struct crndstate *state)
+static u32 get_crandom(struct crndstate *state, struct prng *p)
{
u64 value, rho;
unsigned long answer;
+ struct rnd_state *s = &p->prng_state;
if (!state || state->rho == 0) /* no correlation */
- return get_random_u32();
+ return prandom_u32_state(s);
- value = get_random_u32();
+ value = prandom_u32_state(s);
rho = (u64)state->rho + 1;
answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
state->last = answer;
@@ -201,7 +207,7 @@ static u32 get_crandom(struct crndstate *state)
static bool loss_4state(struct netem_sched_data *q)
{
struct clgstate *clg = &q->clg;
- u32 rnd = get_random_u32();
+ u32 rnd = prandom_u32_state(&q->prng.prng_state);
/*
* Makes a comparison between rnd and the transition
@@ -266,18 +272,19 @@ static bool loss_4state(struct netem_sched_data *q)
static bool loss_gilb_ell(struct netem_sched_data *q)
{
struct clgstate *clg = &q->clg;
+ struct rnd_state *s = &q->prng.prng_state;
switch (clg->state) {
case GOOD_STATE:
- if (get_random_u32() < clg->a1)
+ if (prandom_u32_state(s) < clg->a1)
clg->state = BAD_STATE;
- if (get_random_u32() < clg->a4)
+ if (prandom_u32_state(s) < clg->a4)
return true;
break;
case BAD_STATE:
- if (get_random_u32() < clg->a2)
+ if (prandom_u32_state(s) < clg->a2)
clg->state = GOOD_STATE;
- if (get_random_u32() > clg->a3)
+ if (prandom_u32_state(s) > clg->a3)
return true;
}
@@ -289,7 +296,7 @@ static bool loss_event(struct netem_sched_data *q)
switch (q->loss_model) {
case CLG_RANDOM:
/* Random packet drop 0 => none, ~0 => all */
- return q->loss && q->loss >= get_crandom(&q->loss_cor);
+ return q->loss && q->loss >= get_crandom(&q->loss_cor, &q->prng);
case CLG_4_STATES:
/* 4state loss model algorithm (used also for GI model)
@@ -318,6 +325,7 @@ static bool loss_event(struct netem_sched_data *q)
*/
static s64 tabledist(s64 mu, s32 sigma,
struct crndstate *state,
+ struct prng *prng,
const struct disttable *dist)
{
s64 x;
@@ -327,7 +335,7 @@ static s64 tabledist(s64 mu, s32 sigma,
if (sigma == 0)
return mu;
- rnd = get_crandom(state);
+ rnd = get_crandom(state, prng);
/* default uniform distribution */
if (dist == NULL)
@@ -449,7 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
skb->prev = NULL;
/* Random duplication */
- if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
+ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng))
++count;
/* Drop packet? */
@@ -492,7 +500,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
* If packet is going to be hardware checksummed, then
* do it now in software before we mangle it.
*/
- if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor, &q->prng)) {
if (skb_is_gso(skb)) {
skb = netem_segment(skb, sch, to_free);
if (!skb)
@@ -530,12 +538,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
cb = netem_skb_cb(skb);
if (q->gap == 0 || /* not doing reordering */
q->counter < q->gap - 1 || /* inside last reordering gap */
- q->reorder < get_crandom(&q->reorder_cor)) {
+ q->reorder < get_crandom(&q->reorder_cor, &q->prng)) {
u64 now;
s64 delay;
delay = tabledist(q->latency, q->jitter,
- &q->delay_cor, q->delay_dist);
+ &q->delay_cor, &q->prng, q->delay_dist);
now = ktime_get_ns();
@@ -639,7 +647,7 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)
else
next_delay = tabledist(q->slot_config.dist_delay,
(s32)(q->slot_config.dist_jitter),
- NULL, q->slot_dist);
+ NULL, &q->prng, q->slot_dist);
q->slot.slot_next = now + next_delay;
q->slot.packets_left = q->slot_config.max_packets;
@@ -922,6 +930,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
[TCA_NETEM_LATENCY64] = { .type = NLA_S64 },
[TCA_NETEM_JITTER64] = { .type = NLA_S64 },
[TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) },
+ [TCA_NETEM_PRNG_SEED] = { .type = NLA_U64 },
};
static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -1040,6 +1049,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
/* capping jitter to the range acceptable by tabledist() */
q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
+ if (tb[TCA_NETEM_PRNG_SEED])
+ q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]);
+ else
+ q->prng.seed = get_random_u64();
+ prandom_seed_state(&q->prng.prng_state, q->prng.seed);
+
unlock:
sch_tree_unlock(sch);
@@ -1203,6 +1218,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure;
}
+ if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed,
+ TCA_NETEM_PAD))
+ goto nla_put_failure;
+
return nla_nest_end(skb, nla);
nla_put_failure:
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index ea8c4a7174bb..35f49edf63db 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -207,7 +207,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct plug_sched_data),
.enqueue = plug_enqueue,
.dequeue = plug_dequeue,
- .peek = qdisc_peek_head,
+ .peek = qdisc_peek_dequeued,
.init = plug_init,
.change = plug_change,
.reset = qdisc_reset_queue,
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index befaf74b33ca..546c10adcacd 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -130,8 +130,6 @@ struct qfq_aggregate;
struct qfq_class {
struct Qdisc_class_common common;
- unsigned int filter_cnt;
-
struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
struct net_rate_estimator __rcu *rate_est;
@@ -545,8 +543,10 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl = (struct qfq_class *)arg;
- if (cl->filter_cnt > 0)
+ if (qdisc_class_in_use(&cl->common)) {
+ NL_SET_ERR_MSG_MOD(extack, "QFQ class in use");
return -EBUSY;
+ }
sch_tree_lock(sch);
@@ -580,8 +580,8 @@ static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
{
struct qfq_class *cl = qfq_find_class(sch, classid);
- if (cl != NULL)
- cl->filter_cnt++;
+ if (cl)
+ qdisc_class_get(&cl->common);
return (unsigned long)cl;
}
@@ -590,7 +590,7 @@ static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
{
struct qfq_class *cl = (struct qfq_class *)arg;
- cl->filter_cnt--;
+ qdisc_class_put(&cl->common);
}
static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
@@ -974,10 +974,13 @@ static void qfq_update_eligible(struct qfq_sched *q)
}
/* Dequeue head packet of the head class in the DRR queue of the aggregate. */
-static void agg_dequeue(struct qfq_aggregate *agg,
- struct qfq_class *cl, unsigned int len)
+static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
+ struct qfq_class *cl, unsigned int len)
{
- qdisc_dequeue_peeked(cl->qdisc);
+ struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc);
+
+ if (!skb)
+ return NULL;
cl->deficit -= (int) len;
@@ -987,6 +990,8 @@ static void agg_dequeue(struct qfq_aggregate *agg,
cl->deficit += agg->lmax;
list_move_tail(&cl->alist, &agg->active);
}
+
+ return skb;
}
static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
@@ -1132,11 +1137,18 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
if (!skb)
return NULL;
- qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
+
+ skb = agg_dequeue(in_serv_agg, cl, len);
+
+ if (!skb) {
+ sch->q.qlen++;
+ return NULL;
+ }
+
+ qdisc_qstats_backlog_dec(sch, skb);
qdisc_bstats_update(sch, skb);
- agg_dequeue(in_serv_agg, cl, len);
/* If lmax is lowered, through qfq_change_class, for a class
* owning pending packets with larger size than the new value
* of lmax, then the following condition may hold.
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 8c9cfff7fd05..1cb5e41c0ec7 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -2099,11 +2099,8 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
return -EOPNOTSUPP;
}
- /* pre-allocate qdisc, attachment can't fail */
- q->qdiscs = kcalloc(dev->num_tx_queues,
- sizeof(q->qdiscs[0]),
+ q->qdiscs = kcalloc(dev->num_tx_queues, sizeof(q->qdiscs[0]),
GFP_KERNEL);
-
if (!q->qdiscs)
return -ENOMEM;
@@ -2145,25 +2142,32 @@ static void taprio_attach(struct Qdisc *sch)
/* Attach underlying qdisc */
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
- struct Qdisc *qdisc = q->qdiscs[ntx];
- struct Qdisc *old;
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
+ struct Qdisc *old, *dev_queue_qdisc;
if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+ struct Qdisc *qdisc = q->qdiscs[ntx];
+
+ /* In offload mode, the root taprio qdisc is bypassed
+ * and the netdev TX queues see the children directly
+ */
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
- old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+ dev_queue_qdisc = qdisc;
} else {
- old = dev_graft_qdisc(qdisc->dev_queue, sch);
- qdisc_refcount_inc(sch);
+ /* In software mode, attach the root taprio qdisc
+ * to all netdev TX queues, so that dev_qdisc_enqueue()
+ * goes through taprio_enqueue().
+ */
+ dev_queue_qdisc = sch;
}
+ old = dev_graft_qdisc(dev_queue, dev_queue_qdisc);
+ /* The qdisc's refcount requires to be elevated once
+ * for each netdev TX queue it is grafted onto
+ */
+ qdisc_refcount_inc(dev_queue_qdisc);
if (old)
qdisc_put(old);
}
-
- /* access to the child qdiscs is not needed in offload mode */
- if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
- kfree(q->qdiscs);
- q->qdiscs = NULL;
- }
}
static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
@@ -2192,13 +2196,23 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl,
if (dev->flags & IFF_UP)
dev_deactivate(dev);
+ /* In offload mode, the child Qdisc is directly attached to the netdev
+ * TX queue, and thus, we need to keep its refcount elevated in order
+ * to counteract qdisc_graft()'s call to qdisc_put() once per TX queue.
+ * However, save the reference to the new qdisc in the private array in
+ * both software and offload cases, to have an up-to-date reference to
+ * our children.
+ */
+ *old = q->qdiscs[cl - 1];
if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
- *old = dev_graft_qdisc(dev_queue, new);
- } else {
- *old = q->qdiscs[cl - 1];
- q->qdiscs[cl - 1] = new;
+ WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old);
+ if (new)
+ qdisc_refcount_inc(new);
+ if (*old)
+ qdisc_put(*old);
}
+ q->qdiscs[cl - 1] = new;
if (new)
new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
@@ -2436,12 +2450,14 @@ start_error:
static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
{
- struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned int ntx = cl - 1;
- if (!dev_queue)
+ if (ntx >= dev->num_tx_queues)
return NULL;
- return rtnl_dereference(dev_queue->qdisc_sleeping);
+ return q->qdiscs[ntx];
}
static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
@@ -2456,11 +2472,11 @@ static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
struct sk_buff *skb, struct tcmsg *tcm)
{
- struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+ struct Qdisc *child = taprio_leaf(sch, cl);
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle |= TC_H_MIN(cl);
- tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
+ tcm->tcm_info = child->handle;
return 0;
}
@@ -2470,16 +2486,14 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
__releases(d->lock)
__acquires(d->lock)
{
- struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+ struct Qdisc *child = taprio_leaf(sch, cl);
struct tc_taprio_qopt_offload offload = {
.cmd = TAPRIO_CMD_QUEUE_STATS,
.queue_stats = {
.queue = cl - 1,
},
};
- struct Qdisc *child;
- child = rtnl_dereference(dev_queue->qdisc_sleeping);
if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 ||
qdisc_qstats_copy(d, child) < 0)
return -1;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 2613c4d74b16..17fcaa9b0df9 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -581,7 +581,7 @@ static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb,
default:
return;
}
- if (!sock_owned_by_user(sk) && inet_sk(sk)->recverr) {
+ if (!sock_owned_by_user(sk) && inet_test_bit(RECVERR, sk)) {
sk->sk_err = err;
sk_error_report(sk);
} else { /* Only an error on timeout */
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index f13d6a34f32f..ec00ee75d59a 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -282,7 +282,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
assoc->init_retries, assoc->shutdown_retries,
assoc->rtx_data_chunks,
refcount_read(&sk->sk_wmem_alloc),
- sk->sk_wmem_queued,
+ READ_ONCE(sk->sk_wmem_queued),
sk->sk_sndbuf,
sk->sk_rcvbuf);
seq_printf(seq, "\n");
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 274d07bd774f..2185f44198de 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -360,7 +360,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
ret = inet_addr_type_table(net, addr->v4.sin_addr.s_addr, tb_id);
if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
ret != RTN_LOCAL &&
- !sp->inet.freebind &&
+ !inet_test_bit(FREEBIND, sk) &&
!READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind))
return 0;
@@ -435,7 +435,8 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
fl4->fl4_dport = daddr->v4.sin_port;
fl4->flowi4_proto = IPPROTO_SCTP;
if (asoc) {
- fl4->flowi4_tos = RT_CONN_FLAGS_TOS(asoc->base.sk, tos);
+ fl4->flowi4_tos = RT_TOS(tos);
+ fl4->flowi4_scope = ip_sock_rt_scope(asoc->base.sk);
fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
fl4->fl4_sport = htons(asoc->base.bind_addr.port);
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9388d98aebc0..ab943e8fb1db 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -69,7 +69,7 @@
#include <net/sctp/stream_sched.h>
/* Forward declarations for internal helper functions. */
-static bool sctp_writeable(struct sock *sk);
+static bool sctp_writeable(const struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
size_t msg_len);
@@ -99,7 +99,7 @@ struct percpu_counter sctp_sockets_allocated;
static void sctp_enter_memory_pressure(struct sock *sk)
{
- sctp_memory_pressure = 1;
+ WRITE_ONCE(sctp_memory_pressure, 1);
}
@@ -140,7 +140,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
asoc->sndbuf_used += chunk->skb->truesize + sizeof(struct sctp_chunk);
- sk->sk_wmem_queued += chunk->skb->truesize + sizeof(struct sctp_chunk);
+ sk_wmem_queued_add(sk, chunk->skb->truesize + sizeof(struct sctp_chunk));
sk_mem_charge(sk, chunk->skb->truesize);
}
@@ -9144,7 +9144,7 @@ static void sctp_wfree(struct sk_buff *skb)
struct sock *sk = asoc->base.sk;
sk_mem_uncharge(sk, skb->truesize);
- sk->sk_wmem_queued -= skb->truesize + sizeof(struct sctp_chunk);
+ sk_wmem_queued_add(sk, -(skb->truesize + sizeof(struct sctp_chunk)));
asoc->sndbuf_used -= skb->truesize + sizeof(struct sctp_chunk);
WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk),
&sk->sk_wmem_alloc));
@@ -9299,9 +9299,9 @@ void sctp_write_space(struct sock *sk)
* UDP-style sockets or TCP-style sockets, this code should work.
* - Daisy
*/
-static bool sctp_writeable(struct sock *sk)
+static bool sctp_writeable(const struct sock *sk)
{
- return sk->sk_sndbuf > sk->sk_wmem_queued;
+ return READ_ONCE(sk->sk_sndbuf) > READ_ONCE(sk->sk_wmem_queued);
}
/* Wait for an association to go into ESTABLISHED state. If timeout is 0,
@@ -9479,10 +9479,10 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
newinet->inet_dport = htons(asoc->peer.port);
newinet->pmtudisc = inet->pmtudisc;
- newinet->inet_id = get_random_u16();
+ atomic_set(&newinet->inet_id, get_random_u16());
newinet->uc_ttl = inet->uc_ttl;
- newinet->mc_loop = 1;
+ inet_set_bit(MC_LOOP, newsk);
newinet->mc_ttl = 1;
newinet->mc_index = 0;
newinet->mc_list = NULL;
@@ -9732,6 +9732,7 @@ struct proto sctpv6_prot = {
.unhash = sctp_unhash,
.no_autobind = true,
.obj_size = sizeof(struct sctp6_sock),
+ .ipv6_pinfo_offset = offsetof(struct sctp6_sock, inet6),
.useroffset = offsetof(struct sctp6_sock, sctp.subscribe),
.usersize = offsetof(struct sctp6_sock, sctp.initmsg) -
offsetof(struct sctp6_sock, sctp.subscribe) +
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index a7a9136198fd..f65d6f92afcb 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -612,7 +612,9 @@ int sctp_sysctl_net_register(struct net *net)
table[SCTP_PF_RETRANS_IDX].extra2 = &net->sctp.ps_retrans;
table[SCTP_PS_RETRANS_IDX].extra1 = &net->sctp.pf_retrans;
- net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table);
+ net->sctp.sysctl_header = register_net_sysctl_sz(net, "net/sctp",
+ table,
+ ARRAY_SIZE(sctp_net_table));
if (net->sctp.sysctl_header == NULL) {
kfree(table);
return -ENOMEM;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index f5834af5fad5..bacdd971615e 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -641,20 +641,22 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
smc_llc_link_active(link);
smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
- /* optional 2nd link, receive ADD LINK request from server */
- qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
- SMC_LLC_ADD_LINK);
- if (!qentry) {
- struct smc_clc_msg_decline dclc;
-
- rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
- SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
- if (rc == -EAGAIN)
- rc = 0; /* no DECLINE received, go with one link */
- return rc;
+ if (link->lgr->max_links > 1) {
+ /* optional 2nd link, receive ADD LINK request from server */
+ qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+ SMC_LLC_ADD_LINK);
+ if (!qentry) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
+ if (rc == -EAGAIN)
+ rc = 0; /* no DECLINE received, go with one link */
+ return rc;
+ }
+ smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
+ smc_llc_cli_add_link(link, qentry);
}
- smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
- smc_llc_cli_add_link(link, qentry);
return 0;
}
@@ -1144,7 +1146,7 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
#define SMC_CLC_MAX_ACCEPT_LEN \
(sizeof(struct smc_clc_msg_accept_confirm_v2) + \
- sizeof(struct smc_clc_first_contact_ext) + \
+ sizeof(struct smc_clc_first_contact_ext_v2x) + \
sizeof(struct smc_clc_msg_trail))
/* CLC handshake during connect */
@@ -1198,8 +1200,8 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
struct smc_clc_first_contact_ext *fce =
- (struct smc_clc_first_contact_ext *)
- (((u8 *)clc_v2) + sizeof(*clc_v2));
+ smc_get_clc_first_contact_ext(clc_v2, false);
+ int rc;
if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
return 0;
@@ -1218,6 +1220,12 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
return SMC_CLC_DECL_NOINDIRECT;
}
}
+
+ ini->release_nr = fce->release;
+ rc = smc_clc_clnt_v2x_features_validate(fce, ini);
+ if (rc)
+ return rc;
+
return 0;
}
@@ -1236,6 +1244,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
+ ini->max_conns = SMC_CONN_PER_LGR_MAX;
+ ini->max_links = SMC_LINKS_ADD_LNK_MAX;
reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
if (reason_code)
@@ -1386,6 +1396,16 @@ static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *aclc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
+ if (ini->first_contact_peer) {
+ struct smc_clc_first_contact_ext *fce =
+ smc_get_clc_first_contact_ext(aclc_v2, true);
+
+ ini->release_nr = fce->release;
+ rc = smc_clc_clnt_v2x_features_validate(fce, ini);
+ if (rc)
+ return rc;
+ }
+
rc = smc_v2_determine_accepted_chid(aclc_v2, ini);
if (rc)
return rc;
@@ -1420,7 +1440,7 @@ static int smc_connect_ism(struct smc_sock *smc,
}
rc = smc_clc_send_confirm(smc, ini->first_contact_local,
- aclc->hdr.version, eid, NULL);
+ aclc->hdr.version, eid, ini);
if (rc)
goto connect_abort;
mutex_unlock(&smc_server_lgr_pending);
@@ -1820,7 +1840,7 @@ void smc_close_non_accepted(struct sock *sk)
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
- sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
+ WRITE_ONCE(sk->sk_lingertime, SMC_MAX_STREAM_WAIT_TIMEOUT);
__smc_release(smc);
release_sock(sk);
sock_put(sk); /* sock_hold above */
@@ -1870,10 +1890,12 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
smc_llc_link_active(link);
smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
- down_write(&link->lgr->llc_conf_mutex);
- /* initial contact - try to establish second link */
- smc_llc_srv_add_link(link, NULL);
- up_write(&link->lgr->llc_conf_mutex);
+ if (link->lgr->max_links > 1) {
+ down_write(&link->lgr->llc_conf_mutex);
+ /* initial contact - try to establish second link */
+ smc_llc_srv_add_link(link, NULL);
+ up_write(&link->lgr->llc_conf_mutex);
+ }
return 0;
}
@@ -1996,6 +2018,10 @@ static int smc_listen_v2_check(struct smc_sock *new_smc,
}
}
+ ini->release_nr = pclc_v2_ext->hdr.flag.release;
+ if (pclc_v2_ext->hdr.flag.release > SMC_RELEASE)
+ ini->release_nr = SMC_RELEASE;
+
out:
if (!ini->smcd_version && !ini->smcr_version)
return rc;
@@ -2430,6 +2456,10 @@ static void smc_listen_work(struct work_struct *work)
if (rc)
goto out_decl;
+ rc = smc_clc_srv_v2x_features_validate(pclc, ini);
+ if (rc)
+ goto out_decl;
+
mutex_lock(&smc_server_lgr_pending);
smc_close_init(new_smc);
smc_rx_init(new_smc);
@@ -2443,7 +2473,7 @@ static void smc_listen_work(struct work_struct *work)
/* send SMC Accept CLC message */
accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
- accept_version, ini->negotiated_eid);
+ accept_version, ini->negotiated_eid, ini);
if (rc)
goto out_unlock;
@@ -2462,6 +2492,18 @@ static void smc_listen_work(struct work_struct *work)
goto out_decl;
}
+ rc = smc_clc_v2x_features_confirm_check(cclc, ini);
+ if (rc) {
+ if (!ini->is_smcd)
+ goto out_unlock;
+ goto out_decl;
+ }
+
+ /* fce smc release version is needed in smc_listen_rdma_finish,
+ * so save fce info here.
+ */
+ smc_conn_save_peer_info_fce(new_smc, cclc);
+
/* finish worker */
if (!ini->is_smcd) {
rc = smc_listen_rdma_finish(new_smc, cclc,
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 1f2b912c43d1..24745fde4ac2 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -21,7 +21,10 @@
#define SMC_V1 1 /* SMC version V1 */
#define SMC_V2 2 /* SMC version V2 */
-#define SMC_RELEASE 0
+
+#define SMC_RELEASE_0 0
+#define SMC_RELEASE_1 1
+#define SMC_RELEASE SMC_RELEASE_1 /* the latest release version */
#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index c90d9e5dda54..8deb46c28f1d 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -391,9 +391,7 @@ smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
return false;
} else {
if (hdr->typev1 == SMC_TYPE_D &&
- ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 &&
- (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 +
- sizeof(struct smc_clc_first_contact_ext)))
+ ntohs(hdr->length) < SMCD_CLC_ACCEPT_CONFIRM_LEN_V2)
return false;
if (hdr->typev1 == SMC_TYPE_R &&
ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2)
@@ -420,13 +418,29 @@ smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc)
return true;
}
-static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len)
+static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce,
+ struct smc_init_info *ini)
{
+ int ret = sizeof(*fce);
+
memset(fce, 0, sizeof(*fce));
- fce->os_type = SMC_CLC_OS_LINUX;
- fce->release = SMC_RELEASE;
- memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname));
- (*len) += sizeof(*fce);
+ fce->fce_v2_base.os_type = SMC_CLC_OS_LINUX;
+ fce->fce_v2_base.release = ini->release_nr;
+ memcpy(fce->fce_v2_base.hostname, smc_hostname, sizeof(smc_hostname));
+ if (ini->is_smcd && ini->release_nr < SMC_RELEASE_1) {
+ ret = sizeof(struct smc_clc_first_contact_ext);
+ goto out;
+ }
+
+ if (ini->release_nr >= SMC_RELEASE_1) {
+ if (!ini->is_smcd) {
+ fce->max_conns = ini->max_conns;
+ fce->max_links = ini->max_links;
+ }
+ }
+
+out:
+ return ret;
}
/* check if received message has a correct header length and contains valid
@@ -927,8 +941,11 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
sizeof(struct smc_clc_smcd_gid_chid);
}
}
- if (smcr_indicated(ini->smc_type_v2))
+ if (smcr_indicated(ini->smc_type_v2)) {
memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE);
+ v2_ext->max_conns = SMC_CONN_PER_LGR_PREFER;
+ v2_ext->max_links = SMC_LINKS_PER_LGR_MAX_PREFER;
+ }
pclc_base->hdr.length = htons(plen);
memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
@@ -986,13 +1003,13 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
u8 *eid, struct smc_init_info *ini)
{
struct smc_connection *conn = &smc->conn;
+ struct smc_clc_first_contact_ext_v2x fce;
struct smc_clc_msg_accept_confirm *clc;
- struct smc_clc_first_contact_ext fce;
struct smc_clc_fce_gid_ext gle;
struct smc_clc_msg_trail trl;
+ int i, len, fce_len;
struct kvec vec[5];
struct msghdr msg;
- int i, len;
/* send SMC Confirm CLC msg */
clc = (struct smc_clc_msg_accept_confirm *)clc_v2;
@@ -1018,8 +1035,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
if (eid && eid[0])
memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN);
len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
- if (first_contact)
- smc_clc_fill_fce(&fce, &len);
+ if (first_contact) {
+ fce_len = smc_clc_fill_fce(&fce, ini);
+ len += fce_len;
+ }
clc_v2->hdr.length = htons(len);
}
memcpy(trl.eyecatcher, SMCD_EYECATCHER,
@@ -1063,15 +1082,14 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN);
len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2;
if (first_contact) {
- smc_clc_fill_fce(&fce, &len);
- fce.v2_direct = !link->lgr->uses_gateway;
- memset(&gle, 0, sizeof(gle));
- if (ini && clc->hdr.type == SMC_CLC_CONFIRM) {
+ fce_len = smc_clc_fill_fce(&fce, ini);
+ len += fce_len;
+ fce.fce_v2_base.v2_direct = !link->lgr->uses_gateway;
+ if (clc->hdr.type == SMC_CLC_CONFIRM) {
+ memset(&gle, 0, sizeof(gle));
gle.gid_cnt = ini->smcrv2.gidlist.len;
len += sizeof(gle);
len += gle.gid_cnt * sizeof(gle.gid[0]);
- } else {
- len += sizeof(gle.reserved);
}
}
clc_v2->hdr.length = htons(len);
@@ -1094,7 +1112,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
sizeof(trl);
if (version > SMC_V1 && first_contact) {
vec[i].iov_base = &fce;
- vec[i++].iov_len = sizeof(fce);
+ vec[i++].iov_len = fce_len;
if (!conn->lgr->is_smcd) {
if (clc->hdr.type == SMC_CLC_CONFIRM) {
vec[i].iov_base = &gle;
@@ -1102,9 +1120,6 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
vec[i].iov_base = &ini->smcrv2.gidlist.list;
vec[i++].iov_len = gle.gid_cnt *
sizeof(gle.gid[0]);
- } else {
- vec[i].iov_base = &gle.reserved;
- vec[i++].iov_len = sizeof(gle.reserved);
}
}
}
@@ -1141,7 +1156,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
/* send CLC ACCEPT message across internal TCP socket */
int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
- u8 version, u8 *negotiated_eid)
+ u8 version, u8 *negotiated_eid, struct smc_init_info *ini)
{
struct smc_clc_msg_accept_confirm_v2 aclc_v2;
int len;
@@ -1149,13 +1164,95 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
memset(&aclc_v2, 0, sizeof(aclc_v2));
aclc_v2.hdr.type = SMC_CLC_ACCEPT;
len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
- version, negotiated_eid, NULL);
+ version, negotiated_eid, ini);
if (len < ntohs(aclc_v2.hdr.length))
len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
return len > 0 ? 0 : len;
}
+int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_v2_extension *pclc_v2_ext;
+
+ ini->max_conns = SMC_CONN_PER_LGR_MAX;
+ ini->max_links = SMC_LINKS_ADD_LNK_MAX;
+
+ if ((!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) ||
+ ini->release_nr < SMC_RELEASE_1)
+ return 0;
+
+ pclc_v2_ext = smc_get_clc_v2_ext(pclc);
+ if (!pclc_v2_ext)
+ return SMC_CLC_DECL_NOV2EXT;
+
+ if (ini->smcr_version & SMC_V2) {
+ ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, SMC_CONN_PER_LGR_PREFER);
+ if (ini->max_conns < SMC_CONN_PER_LGR_MIN)
+ return SMC_CLC_DECL_MAXCONNERR;
+
+ ini->max_links = min_t(u8, pclc_v2_ext->max_links, SMC_LINKS_PER_LGR_MAX_PREFER);
+ if (ini->max_links < SMC_LINKS_ADD_LNK_MIN)
+ return SMC_CLC_DECL_MAXLINKERR;
+ }
+
+ return 0;
+}
+
+int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_first_contact_ext_v2x *fce_v2x =
+ (struct smc_clc_first_contact_ext_v2x *)fce;
+
+ if (ini->release_nr < SMC_RELEASE_1)
+ return 0;
+
+ if (!ini->is_smcd) {
+ if (fce_v2x->max_conns < SMC_CONN_PER_LGR_MIN)
+ return SMC_CLC_DECL_MAXCONNERR;
+ ini->max_conns = fce_v2x->max_conns;
+
+ if (fce_v2x->max_links > SMC_LINKS_ADD_LNK_MAX ||
+ fce_v2x->max_links < SMC_LINKS_ADD_LNK_MIN)
+ return SMC_CLC_DECL_MAXLINKERR;
+ ini->max_links = fce_v2x->max_links;
+ }
+
+ return 0;
+}
+
+int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
+ (struct smc_clc_msg_accept_confirm_v2 *)cclc;
+ struct smc_clc_first_contact_ext *fce =
+ smc_get_clc_first_contact_ext(clc_v2, ini->is_smcd);
+ struct smc_clc_first_contact_ext_v2x *fce_v2x =
+ (struct smc_clc_first_contact_ext_v2x *)fce;
+
+ if (cclc->hdr.version == SMC_V1 ||
+ !(cclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
+ return 0;
+
+ if (ini->release_nr != fce->release)
+ return SMC_CLC_DECL_RELEASEERR;
+
+ if (fce->release < SMC_RELEASE_1)
+ return 0;
+
+ if (!ini->is_smcd) {
+ if (fce_v2x->max_conns != ini->max_conns)
+ return SMC_CLC_DECL_MAXCONNERR;
+ if (fce_v2x->max_links != ini->max_links)
+ return SMC_CLC_DECL_MAXLINKERR;
+ }
+
+ return 0;
+}
+
void smc_clc_get_hostname(u8 **host)
{
*host = &smc_hostname[0];
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 5fee545c9a10..c5c8e7db775a 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -45,6 +45,9 @@
#define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */
#define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */
#define SMC_CLC_DECL_NOUEID 0x03030008 /* peer sent no UEID */
+#define SMC_CLC_DECL_RELEASEERR 0x03030009 /* release version negotiate failed */
+#define SMC_CLC_DECL_MAXCONNERR 0x0303000a /* max connections negotiate failed */
+#define SMC_CLC_DECL_MAXLINKERR 0x0303000b /* max links negotiate failed */
#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
@@ -133,7 +136,9 @@ struct smc_clc_smcd_gid_chid {
struct smc_clc_v2_extension {
struct smc_clnt_opts_area_hdr hdr;
u8 roce[16]; /* RoCEv2 GID */
- u8 reserved[16];
+ u8 max_conns;
+ u8 max_links;
+ u8 reserved[14];
u8 user_eids[][SMC_MAX_EID_LEN];
};
@@ -147,7 +152,9 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
struct smc_clc_msg_smcd { /* SMC-D GID information */
struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requestor */
__be16 v2_ext_offset; /* SMC Version 2 Extension Offset */
- u8 reserved[28];
+ u8 vendor_oui[3]; /* vendor organizationally unique identifier */
+ u8 vendor_exp_options[5];
+ u8 reserved[20];
};
struct smc_clc_smcd_v2_extension {
@@ -231,8 +238,19 @@ struct smc_clc_first_contact_ext {
u8 hostname[SMC_MAX_HOSTNAME_LEN];
};
+struct smc_clc_first_contact_ext_v2x {
+ struct smc_clc_first_contact_ext fce_v2_base;
+ u8 max_conns; /* for SMC-R only */
+ u8 max_links; /* for SMC-R only */
+ u8 reserved3[2];
+ __be32 vendor_exp_options;
+ u8 reserved4[8];
+} __packed; /* format defined in
+ * IBM Shared Memory Communications Version 2 (Third Edition)
+ * (https://www.ibm.com/support/pages/node/7009315)
+ */
+
struct smc_clc_fce_gid_ext {
- u8 reserved[16];
u8 gid_cnt;
u8 reserved2[3];
u8 gid[][SMC_GID_SIZE];
@@ -370,6 +388,27 @@ smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext)
ntohs(prop_v2ext->hdr.smcd_v2_ext_offset));
}
+static inline struct smc_clc_first_contact_ext *
+smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm_v2 *clc_v2,
+ bool is_smcd)
+{
+ int clc_v2_len;
+
+ if (clc_v2->hdr.version == SMC_V1 ||
+ !(clc_v2->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
+ return NULL;
+
+ if (is_smcd)
+ clc_v2_len =
+ offsetofend(struct smc_clc_msg_accept_confirm_v2, d1);
+ else
+ clc_v2_len =
+ offsetofend(struct smc_clc_msg_accept_confirm_v2, r1);
+
+ return (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) +
+ clc_v2_len);
+}
+
struct smcd_dev;
struct smc_init_info;
@@ -382,7 +421,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version, u8 *eid, struct smc_init_info *ini);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
- u8 version, u8 *negotiated_eid);
+ u8 version, u8 *negotiated_eid, struct smc_init_info *ini);
+int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini);
+int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
+ struct smc_init_info *ini);
+int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc,
+ struct smc_init_info *ini);
void smc_clc_init(void) __init;
void smc_clc_exit(void);
void smc_clc_get_hostname(u8 **host);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 6b78075404d7..d520ee62c8ec 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -319,6 +319,10 @@ static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
goto errv2attr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_CONNS, lgr->max_conns))
+ goto errv2attr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_LINKS, lgr->max_links))
+ goto errv2attr;
nla_nest_end(skb, v2_attrs);
return 0;
@@ -895,9 +899,13 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->uses_gateway = ini->smcrv2.uses_gateway;
memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
ETH_ALEN);
+ lgr->max_conns = ini->max_conns;
+ lgr->max_links = ini->max_links;
} else {
ibdev = ini->ib_dev;
ibport = ini->ib_port;
+ lgr->max_conns = SMC_CONN_PER_LGR_MAX;
+ lgr->max_links = SMC_LINKS_ADD_LNK_MAX;
}
memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
SMC_MAX_PNETID_LEN);
@@ -1654,6 +1662,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr, *n;
+ spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
struct smc_link *link;
@@ -1664,11 +1673,15 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
!rdma_dev_access_netns(smcibdev->ibdev, lgr->net))
continue;
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1)
+ continue;
+
/* trigger local add link processing */
link = smc_llc_usable_link(lgr);
if (link)
smc_llc_add_link_local(link);
}
+ spin_unlock_bh(&smc_lgr_list.lock);
}
/* link is down - switch connections to alternate link,
@@ -1888,7 +1901,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
(ini->smcd_version == SMC_V2 ||
lgr->vlan_id == ini->vlan_id) &&
(role == SMC_CLNT || ini->is_smcd ||
- (lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
+ (lgr->conns_num < lgr->max_conns &&
!bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
/* link group found */
ini->first_contact_local = 0;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 1645fba0d2d3..120027d40469 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -22,6 +22,15 @@
#include "smc_ib.h"
#define SMC_RMBS_PER_LGR_MAX 255 /* max. # of RMBs per link group */
+#define SMC_CONN_PER_LGR_MIN 16 /* min. # of connections per link group */
+#define SMC_CONN_PER_LGR_MAX 255 /* max. # of connections per link group,
+ * also is the default value for SMC-R v1 and v2.0
+ */
+#define SMC_CONN_PER_LGR_PREFER 255 /* Preferred connections per link group used for
+ * SMC-R v2.1 and later negotiation, vendors or
+ * distrubutions may modify it to a value between
+ * 16-255 as needed.
+ */
struct smc_lgr_list { /* list of link group definition */
struct list_head list;
@@ -164,6 +173,15 @@ struct smc_link {
*/
#define SMC_LINKS_PER_LGR_MAX 3
#define SMC_SINGLE_LINK 0
+#define SMC_LINKS_ADD_LNK_MIN 1 /* min. # of links per link group */
+#define SMC_LINKS_ADD_LNK_MAX 2 /* max. # of links per link group, also is the
+ * default value for smc-r v1.0 and v2.0
+ */
+#define SMC_LINKS_PER_LGR_MAX_PREFER 2 /* Preferred max links per link group used for
+ * SMC-R v2.1 and later negotiation, vendors or
+ * distrubutions may modify it to a value between
+ * 1-2 as needed.
+ */
/* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */
struct smc_buf_desc {
@@ -331,6 +349,10 @@ struct smc_link_group {
__be32 saddr;
/* net namespace */
struct net *net;
+ u8 max_conns;
+ /* max conn can be assigned to lgr */
+ u8 max_links;
+ /* max links can be added in lgr */
};
struct { /* SMC-D */
u64 peer_gid;
@@ -374,6 +396,9 @@ struct smc_init_info {
u8 is_smcd;
u8 smc_type_v1;
u8 smc_type_v2;
+ u8 release_nr;
+ u8 max_conns;
+ u8 max_links;
u8 first_contact_peer;
u8 first_contact_local;
unsigned short vlan_id;
@@ -539,7 +564,6 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini);
-void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
int smc_core_init(void);
void smc_core_exit(void);
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 034295676e88..4df5f8c8a0a1 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -96,7 +96,6 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk);
int smc_ib_create_queue_pair(struct smc_link *lnk);
int smc_ib_ready_link(struct smc_link *lnk);
int smc_ib_modify_qp_rts(struct smc_link *lnk);
-int smc_ib_modify_qp_reset(struct smc_link *lnk);
int smc_ib_modify_qp_error(struct smc_link *lnk);
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 90f0b60b196a..018ce8133b02 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -52,14 +52,13 @@ struct smc_llc_msg_confirm_link { /* type 0x01 */
u8 link_num;
u8 link_uid[SMC_LGR_ID_SIZE];
u8 max_links;
- u8 reserved[9];
+ u8 max_conns;
+ u8 reserved[8];
};
#define SMC_LLC_FLAG_ADD_LNK_REJ 0x40
#define SMC_LLC_REJ_RSN_NO_ALT_PATH 1
-#define SMC_LLC_ADD_LNK_MAX_LINKS 2
-
struct smc_llc_msg_add_link { /* type 0x02 */
struct smc_llc_hdr hd;
u8 sender_mac[ETH_ALEN];
@@ -471,7 +470,12 @@ int smc_llc_send_confirm_link(struct smc_link *link,
hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
confllc->link_num = link->link_id;
memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE);
- confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS;
+ confllc->max_links = SMC_LINKS_ADD_LNK_MAX;
+ if (link->lgr->smc_version == SMC_V2 &&
+ link->lgr->peer_smc_release >= SMC_RELEASE_1) {
+ confllc->max_conns = link->lgr->max_conns;
+ confllc->max_links = link->lgr->max_links;
+ }
/* send llc message */
rc = smc_wr_tx_send(link, pend);
put_out:
@@ -1041,6 +1045,11 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
goto out_reject;
}
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) {
+ rc = 0;
+ goto out_reject;
+ }
+
ini->vlan_id = lgr->vlan_id;
if (lgr->smc_version == SMC_V2) {
ini->check_smcrv2 = true;
@@ -1165,6 +1174,9 @@ static void smc_llc_cli_add_link_invite(struct smc_link *link,
lgr->type == SMC_LGR_ASYMMETRIC_PEER)
goto out;
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1)
+ goto out;
+
ini = kzalloc(sizeof(*ini), GFP_KERNEL);
if (!ini)
goto out;
@@ -1410,6 +1422,11 @@ int smc_llc_srv_add_link(struct smc_link *link,
goto out;
}
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) {
+ rc = 0;
+ goto out;
+ }
+
/* ignore client add link recommendation, start new flow */
ini->vlan_id = lgr->vlan_id;
if (lgr->smc_version == SMC_V2) {
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
index b60fe1eb37ab..aa8928975cc6 100644
--- a/net/smc/smc_stats.h
+++ b/net/smc/smc_stats.h
@@ -243,8 +243,9 @@ while (0)
#define SMC_STAT_SERV_SUCC_INC(net, _ini) \
do { \
typeof(_ini) i = (_ini); \
- bool is_v2 = (i->smcd_version & SMC_V2); \
bool is_smcd = (i->is_smcd); \
+ u8 version = is_smcd ? i->smcd_version : i->smcr_version; \
+ bool is_v2 = (version & SMC_V2); \
typeof(net->smc.smc_stats) smc_stats = (net)->smc.smc_stats; \
if (is_v2 && is_smcd) \
this_cpu_inc(smc_stats->smc[SMC_TYPE_D].srv_v2_succ_cnt); \
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index 0b2a957ca5f5..5cbc18c6e62b 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -87,7 +87,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
table[i].data += (void *)net - (void *)&init_net;
}
- net->smc.smc_hdr = register_net_sysctl(net, "net/smc", table);
+ net->smc.smc_hdr = register_net_sysctl_sz(net, "net/smc", table,
+ ARRAY_SIZE(smc_table));
if (!net->smc.smc_hdr)
goto err_reg;
diff --git a/net/socket.c b/net/socket.c
index 2b0e54b2405c..c8b08b32f097 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -88,6 +88,7 @@
#include <linux/xattr.h>
#include <linux/nospec.h>
#include <linux/indirect_call_wrapper.h>
+#include <linux/io_uring.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -136,9 +137,10 @@ static void sock_splice_eof(struct file *file);
static void sock_show_fdinfo(struct seq_file *m, struct file *f)
{
struct socket *sock = f->private_data;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
- if (sock->ops->show_fdinfo)
- sock->ops->show_fdinfo(m, sock);
+ if (ops->show_fdinfo)
+ ops->show_fdinfo(m, sock);
}
#else
#define sock_show_fdinfo NULL
@@ -159,6 +161,7 @@ static const struct file_operations socket_file_ops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_sock_ioctl,
#endif
+ .uring_cmd = io_uring_cmd_sock,
.mmap = sock_mmap,
.release = sock_close,
.fasync = sock_fasync,
@@ -646,12 +649,14 @@ EXPORT_SYMBOL(sock_alloc);
static void __sock_release(struct socket *sock, struct inode *inode)
{
- if (sock->ops) {
- struct module *owner = sock->ops->owner;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
+
+ if (ops) {
+ struct module *owner = ops->owner;
if (inode)
inode_lock(inode);
- sock->ops->release(sock);
+ ops->release(sock);
sock->sk = NULL;
if (inode)
inode_unlock(inode);
@@ -722,7 +727,7 @@ static noinline void call_trace_sock_send_length(struct sock *sk, int ret,
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
- int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
+ int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg,
inet_sendmsg, sock, msg,
msg_data_left(msg));
BUG_ON(ret == -EIOCBQUEUED);
@@ -786,13 +791,14 @@ int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
struct socket *sock = sk->sk_socket;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
- if (!sock->ops->sendmsg_locked)
+ if (!ops->sendmsg_locked)
return sock_no_sendmsg_locked(sk, msg, size);
iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
- return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
+ return ops->sendmsg_locked(sk, msg, msg_data_left(msg));
}
EXPORT_SYMBOL(kernel_sendmsg_locked);
@@ -821,7 +827,7 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
{
- bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
+ bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
struct net_device *orig_dev;
ktime_t hwtstamp;
@@ -873,12 +879,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
struct scm_timestamping_internal tss;
-
int empty = 1, false_tstamp = 0;
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
int if_index;
ktime_t hwtstamp;
+ u32 tsflags;
/* Race occurred between timestamp enabling and packet
receiving. Fill in the current time for now. */
@@ -920,11 +926,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
}
memset(&tss, 0, sizeof(tss));
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
+ tsflags = READ_ONCE(sk->sk_tsflags);
+ if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
empty = 0;
if (shhwtstamps &&
- (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+ (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
!skb_is_swtx_tstamp(skb, false_tstamp)) {
if_index = 0;
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
@@ -932,14 +939,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
else
hwtstamp = shhwtstamps->hwtstamp;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
+ if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
hwtstamp = ptp_convert_timestamp(&hwtstamp,
- sk->sk_bind_phc);
+ READ_ONCE(sk->sk_bind_phc));
if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
empty = 0;
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+ if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
!skb_is_err_queue(skb))
put_ts_pktinfo(msg, skb, if_index);
}
@@ -1017,7 +1024,8 @@ static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int f
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
int flags)
{
- int ret = INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
+ int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->recvmsg,
+ inet6_recvmsg,
inet_recvmsg, sock, msg,
msg_data_left(msg), flags);
if (trace_sock_recv_length_enabled())
@@ -1072,19 +1080,23 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
unsigned int flags)
{
struct socket *sock = file->private_data;
+ const struct proto_ops *ops;
- if (unlikely(!sock->ops->splice_read))
+ ops = READ_ONCE(sock->ops);
+ if (unlikely(!ops->splice_read))
return copy_splice_read(file, ppos, pipe, len, flags);
- return sock->ops->splice_read(sock, ppos, pipe, len, flags);
+ return ops->splice_read(sock, ppos, pipe, len, flags);
}
static void sock_splice_eof(struct file *file)
{
struct socket *sock = file->private_data;
+ const struct proto_ops *ops;
- if (sock->ops->splice_eof)
- sock->ops->splice_eof(sock);
+ ops = READ_ONCE(sock->ops);
+ if (ops->splice_eof)
+ ops->splice_eof(sock);
}
static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -1181,13 +1193,14 @@ EXPORT_SYMBOL(vlan_ioctl_set);
static long sock_do_ioctl(struct net *net, struct socket *sock,
unsigned int cmd, unsigned long arg)
{
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
struct ifreq ifr;
bool need_copyout;
int err;
void __user *argp = (void __user *)arg;
void __user *data;
- err = sock->ops->ioctl(sock, cmd, arg);
+ err = ops->ioctl(sock, cmd, arg);
/*
* If this ioctl is unknown try to hand it down
@@ -1216,6 +1229,7 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
+ const struct proto_ops *ops;
struct socket *sock;
struct sock *sk;
void __user *argp = (void __user *)arg;
@@ -1223,6 +1237,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
struct net *net;
sock = file->private_data;
+ ops = READ_ONCE(sock->ops);
sk = sock->sk;
net = sock_net(sk);
if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
@@ -1280,23 +1295,23 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
break;
case SIOCGSTAMP_OLD:
case SIOCGSTAMPNS_OLD:
- if (!sock->ops->gettstamp) {
+ if (!ops->gettstamp) {
err = -ENOIOCTLCMD;
break;
}
- err = sock->ops->gettstamp(sock, argp,
- cmd == SIOCGSTAMP_OLD,
- !IS_ENABLED(CONFIG_64BIT));
+ err = ops->gettstamp(sock, argp,
+ cmd == SIOCGSTAMP_OLD,
+ !IS_ENABLED(CONFIG_64BIT));
break;
case SIOCGSTAMP_NEW:
case SIOCGSTAMPNS_NEW:
- if (!sock->ops->gettstamp) {
+ if (!ops->gettstamp) {
err = -ENOIOCTLCMD;
break;
}
- err = sock->ops->gettstamp(sock, argp,
- cmd == SIOCGSTAMP_NEW,
- false);
+ err = ops->gettstamp(sock, argp,
+ cmd == SIOCGSTAMP_NEW,
+ false);
break;
case SIOCGIFCONF:
@@ -1357,9 +1372,10 @@ EXPORT_SYMBOL(sock_create_lite);
static __poll_t sock_poll(struct file *file, poll_table *wait)
{
struct socket *sock = file->private_data;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
__poll_t events = poll_requested_events(wait), flag = 0;
- if (!sock->ops->poll)
+ if (!ops->poll)
return 0;
if (sk_can_busy_loop(sock->sk)) {
@@ -1371,14 +1387,14 @@ static __poll_t sock_poll(struct file *file, poll_table *wait)
flag = POLL_BUSY_LOOP;
}
- return sock->ops->poll(file, sock, wait) | flag;
+ return ops->poll(file, sock, wait) | flag;
}
static int sock_mmap(struct file *file, struct vm_area_struct *vma)
{
struct socket *sock = file->private_data;
- return sock->ops->mmap(file, sock, vma);
+ return READ_ONCE(sock->ops)->mmap(file, sock, vma);
}
static int sock_close(struct inode *inode, struct file *filp)
@@ -1644,12 +1660,36 @@ struct file *__sys_socket_file(int family, int type, int protocol)
return sock_alloc_file(sock, flags, NULL);
}
+/* A hook for bpf progs to attach to and update socket protocol.
+ *
+ * A static noinline declaration here could cause the compiler to
+ * optimize away the function. A global noinline declaration will
+ * keep the definition, but may optimize away the callsite.
+ * Therefore, __weak is needed to ensure that the call is still
+ * emitted, by telling the compiler that we don't know what the
+ * function might eventually be.
+ *
+ * __diag_* below are needed to dismiss the missing prototype warning.
+ */
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "A fmod_ret entry point for BPF programs");
+
+__weak noinline int update_socket_protocol(int family, int type, int protocol)
+{
+ return protocol;
+}
+
+__diag_pop();
+
int __sys_socket(int family, int type, int protocol)
{
struct socket *sock;
int flags;
- sock = __sys_socket_create(family, type, protocol);
+ sock = __sys_socket_create(family, type,
+ update_socket_protocol(family, type, protocol));
if (IS_ERR(sock))
return PTR_ERR(sock);
@@ -1728,7 +1768,7 @@ int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
goto out;
}
- err = sock1->ops->socketpair(sock1, sock2);
+ err = READ_ONCE(sock1->ops)->socketpair(sock1, sock2);
if (unlikely(err < 0)) {
sock_release(sock2);
sock_release(sock1);
@@ -1789,7 +1829,7 @@ int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
(struct sockaddr *)&address,
addrlen);
if (!err)
- err = sock->ops->bind(sock,
+ err = READ_ONCE(sock->ops)->bind(sock,
(struct sockaddr *)
&address, addrlen);
}
@@ -1823,7 +1863,7 @@ int __sys_listen(int fd, int backlog)
err = security_socket_listen(sock, backlog);
if (!err)
- err = sock->ops->listen(sock, backlog);
+ err = READ_ONCE(sock->ops)->listen(sock, backlog);
fput_light(sock->file, fput_needed);
}
@@ -1843,6 +1883,7 @@ struct file *do_accept(struct file *file, unsigned file_flags,
struct file *newfile;
int err, len;
struct sockaddr_storage address;
+ const struct proto_ops *ops;
sock = sock_from_file(file);
if (!sock)
@@ -1851,15 +1892,16 @@ struct file *do_accept(struct file *file, unsigned file_flags,
newsock = sock_alloc();
if (!newsock)
return ERR_PTR(-ENFILE);
+ ops = READ_ONCE(sock->ops);
newsock->type = sock->type;
- newsock->ops = sock->ops;
+ newsock->ops = ops;
/*
* We don't need try_module_get here, as the listening socket (sock)
* has the protocol module (sock->ops->owner) held.
*/
- __module_get(newsock->ops->owner);
+ __module_get(ops->owner);
newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
if (IS_ERR(newfile))
@@ -1869,14 +1911,13 @@ struct file *do_accept(struct file *file, unsigned file_flags,
if (err)
goto out_fd;
- err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
+ err = ops->accept(sock, newsock, sock->file->f_flags | file_flags,
false);
if (err < 0)
goto out_fd;
if (upeer_sockaddr) {
- len = newsock->ops->getname(newsock,
- (struct sockaddr *)&address, 2);
+ len = ops->getname(newsock, (struct sockaddr *)&address, 2);
if (len < 0) {
err = -ECONNABORTED;
goto out_fd;
@@ -1989,8 +2030,8 @@ int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
if (err)
goto out;
- err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
- sock->file->f_flags | file_flags);
+ err = READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)address,
+ addrlen, sock->file->f_flags | file_flags);
out:
return err;
}
@@ -2039,7 +2080,7 @@ int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
if (err)
goto out_put;
- err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
+ err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 0);
if (err < 0)
goto out_put;
/* "err" is actually length in this case */
@@ -2071,13 +2112,15 @@ int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) {
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
+
err = security_socket_getpeername(sock);
if (err) {
fput_light(sock->file, fput_needed);
return err;
}
- err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
+ err = ops->getname(sock, (struct sockaddr *)&address, 1);
if (err >= 0)
/* "err" is actually length in this case */
err = move_addr_to_user(&address, err, usockaddr,
@@ -2227,6 +2270,7 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
int optlen)
{
sockptr_t optval = USER_SOCKPTR(user_optval);
+ const struct proto_ops *ops;
char *kernel_optval = NULL;
int err, fput_needed;
struct socket *sock;
@@ -2255,12 +2299,13 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
if (kernel_optval)
optval = KERNEL_SOCKPTR(kernel_optval);
+ ops = READ_ONCE(sock->ops);
if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
err = sock_setsockopt(sock, level, optname, optval, optlen);
- else if (unlikely(!sock->ops->setsockopt))
+ else if (unlikely(!ops->setsockopt))
err = -EOPNOTSUPP;
else
- err = sock->ops->setsockopt(sock, level, optname, optval,
+ err = ops->setsockopt(sock, level, optname, optval,
optlen);
kfree(kernel_optval);
out_put:
@@ -2285,6 +2330,7 @@ int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
int __user *optlen)
{
int max_optlen __maybe_unused;
+ const struct proto_ops *ops;
int err, fput_needed;
struct socket *sock;
@@ -2299,12 +2345,13 @@ int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
if (!in_compat_syscall())
max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
+ ops = READ_ONCE(sock->ops);
if (level == SOL_SOCKET)
err = sock_getsockopt(sock, level, optname, optval, optlen);
- else if (unlikely(!sock->ops->getsockopt))
+ else if (unlikely(!ops->getsockopt))
err = -EOPNOTSUPP;
else
- err = sock->ops->getsockopt(sock, level, optname, optval,
+ err = ops->getsockopt(sock, level, optname, optval,
optlen);
if (!in_compat_syscall())
@@ -2332,7 +2379,7 @@ int __sys_shutdown_sock(struct socket *sock, int how)
err = security_socket_shutdown(sock, how);
if (!err)
- err = sock->ops->shutdown(sock, how);
+ err = READ_ONCE(sock->ops)->shutdown(sock, how);
return err;
}
@@ -3324,6 +3371,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
void __user *argp = compat_ptr(arg);
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
+ const struct proto_ops *ops;
if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
return sock_ioctl(file, cmd, (unsigned long)argp);
@@ -3333,10 +3381,11 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
return compat_siocwandev(net, argp);
case SIOCGSTAMP_OLD:
case SIOCGSTAMPNS_OLD:
- if (!sock->ops->gettstamp)
+ ops = READ_ONCE(sock->ops);
+ if (!ops->gettstamp)
return -ENOIOCTLCMD;
- return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
- !COMPAT_USE_64BIT_TIME);
+ return ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
+ !COMPAT_USE_64BIT_TIME);
case SIOCETHTOOL:
case SIOCBONDSLAVEINFOQUERY:
@@ -3417,6 +3466,7 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct socket *sock = file->private_data;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
int ret = -ENOIOCTLCMD;
struct sock *sk;
struct net *net;
@@ -3424,8 +3474,8 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
sk = sock->sk;
net = sock_net(sk);
- if (sock->ops->compat_ioctl)
- ret = sock->ops->compat_ioctl(sock, cmd, arg);
+ if (ops->compat_ioctl)
+ ret = ops->compat_ioctl(sock, cmd, arg);
if (ret == -ENOIOCTLCMD &&
(cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
@@ -3449,7 +3499,7 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
{
- return sock->ops->bind(sock, addr, addrlen);
+ return READ_ONCE(sock->ops)->bind(sock, addr, addrlen);
}
EXPORT_SYMBOL(kernel_bind);
@@ -3463,7 +3513,7 @@ EXPORT_SYMBOL(kernel_bind);
int kernel_listen(struct socket *sock, int backlog)
{
- return sock->ops->listen(sock, backlog);
+ return READ_ONCE(sock->ops)->listen(sock, backlog);
}
EXPORT_SYMBOL(kernel_listen);
@@ -3481,6 +3531,7 @@ EXPORT_SYMBOL(kernel_listen);
int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
{
struct sock *sk = sock->sk;
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
int err;
err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
@@ -3488,15 +3539,15 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
if (err < 0)
goto done;
- err = sock->ops->accept(sock, *newsock, flags, true);
+ err = ops->accept(sock, *newsock, flags, true);
if (err < 0) {
sock_release(*newsock);
*newsock = NULL;
goto done;
}
- (*newsock)->ops = sock->ops;
- __module_get((*newsock)->ops->owner);
+ (*newsock)->ops = ops;
+ __module_get(ops->owner);
done:
return err;
@@ -3519,7 +3570,12 @@ EXPORT_SYMBOL(kernel_accept);
int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
int flags)
{
- return sock->ops->connect(sock, addr, addrlen, flags);
+ struct sockaddr_storage address;
+
+ memcpy(&address, addr, addrlen);
+
+ return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)&address,
+ addrlen, flags);
}
EXPORT_SYMBOL(kernel_connect);
@@ -3534,7 +3590,7 @@ EXPORT_SYMBOL(kernel_connect);
int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, 0);
+ return READ_ONCE(sock->ops)->getname(sock, addr, 0);
}
EXPORT_SYMBOL(kernel_getsockname);
@@ -3549,7 +3605,7 @@ EXPORT_SYMBOL(kernel_getsockname);
int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
{
- return sock->ops->getname(sock, addr, 1);
+ return READ_ONCE(sock->ops)->getname(sock, addr, 1);
}
EXPORT_SYMBOL(kernel_getpeername);
@@ -3563,7 +3619,7 @@ EXPORT_SYMBOL(kernel_getpeername);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
{
- return sock->ops->shutdown(sock, how);
+ return READ_ONCE(sock->ops)->shutdown(sock, how);
}
EXPORT_SYMBOL(kernel_sock_shutdown);
diff --git a/net/sunrpc/.kunitconfig b/net/sunrpc/.kunitconfig
index a55a00fa649b..eb02b906c295 100644
--- a/net/sunrpc/.kunitconfig
+++ b/net/sunrpc/.kunitconfig
@@ -23,7 +23,6 @@ CONFIG_NFS_FS=y
CONFIG_SUNRPC=y
CONFIG_SUNRPC_GSS=y
CONFIG_RPCSEC_GSS_KRB5=y
-CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES=y
CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1=y
CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA=y
CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2=y
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 4afc5fd71d44..2d8b67dac7b5 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -34,38 +34,6 @@ config RPCSEC_GSS_KRB5
If unsure, say Y.
-config RPCSEC_GSS_KRB5_SIMPLIFIED
- bool
- depends on RPCSEC_GSS_KRB5
-
-config RPCSEC_GSS_KRB5_CRYPTOSYSTEM
- bool
- depends on RPCSEC_GSS_KRB5
-
-config RPCSEC_GSS_KRB5_ENCTYPES_DES
- bool "Enable Kerberos enctypes based on DES (deprecated)"
- depends on RPCSEC_GSS_KRB5
- depends on CRYPTO_CBC && CRYPTO_CTS && CRYPTO_ECB
- depends on CRYPTO_HMAC && CRYPTO_MD5 && CRYPTO_SHA1
- depends on CRYPTO_DES
- default n
- select RPCSEC_GSS_KRB5_SIMPLIFIED
- help
- Choose Y to enable the use of deprecated Kerberos 5
- encryption types that utilize Data Encryption Standard
- (DES) based ciphers. These include des-cbc-md5,
- des-cbc-crc, and des-cbc-md4, which were deprecated by
- RFC 6649, and des3-cbc-sha1, which was deprecated by RFC
- 8429.
-
- These encryption types are known to be insecure, therefore
- the default setting of this option is N. Support for these
- encryption types is available only for compatibility with
- legacy NFS client and server implementations.
-
- Removal of support is planned for a subsequent kernel
- release.
-
config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1
bool "Enable Kerberos enctypes based on AES and SHA-1"
depends on RPCSEC_GSS_KRB5
@@ -73,7 +41,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1
depends on CRYPTO_HMAC && CRYPTO_SHA1
depends on CRYPTO_AES
default y
- select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
help
Choose Y to enable the use of Kerberos 5 encryption types
that utilize Advanced Encryption Standard (AES) ciphers and
@@ -86,7 +53,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA
depends on CRYPTO_CBC && CRYPTO_CTS && CRYPTO_CAMELLIA
depends on CRYPTO_CMAC
default n
- select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
help
Choose Y to enable the use of Kerberos 5 encryption types
that utilize Camellia ciphers (RFC 3713) and CMAC digests
@@ -100,7 +66,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2
depends on CRYPTO_HMAC && CRYPTO_SHA256 && CRYPTO_SHA512
depends on CRYPTO_AES
default n
- select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
help
Choose Y to enable the use of Kerberos 5 encryption types
that utilize Advanced Encryption Standard (AES) ciphers and
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 012ae1720689..ad1736d93b76 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -12,6 +12,6 @@ auth_rpcgss-y := auth_gss.o gss_generic_token.o \
obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
- gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
+ gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
obj-$(CONFIG_RPCSEC_GSS_KRB5_KUNIT_TEST) += gss_krb5_test.o
diff --git a/net/sunrpc/auth_gss/gss_krb5_internal.h b/net/sunrpc/auth_gss/gss_krb5_internal.h
index b673e2626acb..3afd4065bf3d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_internal.h
+++ b/net/sunrpc/auth_gss/gss_krb5_internal.h
@@ -33,7 +33,6 @@ struct gss_krb5_enctype {
const u32 Ke_length; /* encryption subkey length, in octets */
const u32 Ki_length; /* integrity subkey length, in octets */
- int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask);
int (*derive_key)(const struct gss_krb5_enctype *gk5e,
const struct xdr_netobj *in,
struct xdr_netobj *out,
@@ -85,24 +84,15 @@ struct krb5_ctx {
* GSS Kerberos 5 mechanism Per-Message calls.
*/
-u32 gss_krb5_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
- struct xdr_netobj *token);
u32 gss_krb5_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
struct xdr_netobj *token);
-u32 gss_krb5_verify_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
- struct xdr_netobj *read_token);
u32 gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
struct xdr_netobj *read_token);
-u32 gss_krb5_wrap_v1(struct krb5_ctx *kctx, int offset,
- struct xdr_buf *buf, struct page **pages);
u32 gss_krb5_wrap_v2(struct krb5_ctx *kctx, int offset,
struct xdr_buf *buf, struct page **pages);
-u32 gss_krb5_unwrap_v1(struct krb5_ctx *kctx, int offset, int len,
- struct xdr_buf *buf, unsigned int *slack,
- unsigned int *align);
u32 gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len,
struct xdr_buf *buf, unsigned int *slack,
unsigned int *align);
@@ -113,12 +103,6 @@ u32 gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len,
/* Key Derivation Functions */
-int krb5_derive_key_v1(const struct gss_krb5_enctype *gk5e,
- const struct xdr_netobj *inkey,
- struct xdr_netobj *outkey,
- const struct xdr_netobj *label,
- gfp_t gfp_mask);
-
int krb5_derive_key_v2(const struct gss_krb5_enctype *gk5e,
const struct xdr_netobj *inkey,
struct xdr_netobj *outkey,
@@ -169,13 +153,6 @@ static inline int krb5_derive_key(struct krb5_ctx *kctx,
return gk5e->derive_key(gk5e, inkey, outkey, &label, gfp_mask);
}
-s32 krb5_make_seq_num(struct krb5_ctx *kctx, struct crypto_sync_skcipher *key,
- int direction, u32 seqnum, unsigned char *cksum,
- unsigned char *buf);
-
-s32 krb5_get_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
- unsigned char *buf, int *direction, u32 *seqnum);
-
void krb5_make_confounder(u8 *p, int conflen);
u32 make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 5347fe1cc93f..06d8ee0db000 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -222,90 +222,6 @@ err_return:
return ret;
}
-#define smask(step) ((1<<step)-1)
-#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step)))
-#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1)
-
-static void mit_des_fixup_key_parity(u8 key[8])
-{
- int i;
- for (i = 0; i < 8; i++) {
- key[i] &= 0xfe;
- key[i] |= 1^parity_char(key[i]);
- }
-}
-
-static int krb5_random_to_key_v1(const struct gss_krb5_enctype *gk5e,
- struct xdr_netobj *randombits,
- struct xdr_netobj *key)
-{
- int i, ret = -EINVAL;
-
- if (key->len != 24) {
- dprintk("%s: key->len is %d\n", __func__, key->len);
- goto err_out;
- }
- if (randombits->len != 21) {
- dprintk("%s: randombits->len is %d\n",
- __func__, randombits->len);
- goto err_out;
- }
-
- /* take the seven bytes, move them around into the top 7 bits of the
- 8 key bytes, then compute the parity bits. Do this three times. */
-
- for (i = 0; i < 3; i++) {
- memcpy(key->data + i*8, randombits->data + i*7, 7);
- key->data[i*8+7] = (((key->data[i*8]&1)<<1) |
- ((key->data[i*8+1]&1)<<2) |
- ((key->data[i*8+2]&1)<<3) |
- ((key->data[i*8+3]&1)<<4) |
- ((key->data[i*8+4]&1)<<5) |
- ((key->data[i*8+5]&1)<<6) |
- ((key->data[i*8+6]&1)<<7));
-
- mit_des_fixup_key_parity(key->data + i*8);
- }
- ret = 0;
-err_out:
- return ret;
-}
-
-/**
- * krb5_derive_key_v1 - Derive a subkey for an RFC 3961 enctype
- * @gk5e: Kerberos 5 enctype profile
- * @inkey: base protocol key
- * @outkey: OUT: derived key
- * @label: subkey usage label
- * @gfp_mask: memory allocation control flags
- *
- * Caller sets @outkey->len to the desired length of the derived key.
- *
- * On success, returns 0 and fills in @outkey. A negative errno value
- * is returned on failure.
- */
-int krb5_derive_key_v1(const struct gss_krb5_enctype *gk5e,
- const struct xdr_netobj *inkey,
- struct xdr_netobj *outkey,
- const struct xdr_netobj *label,
- gfp_t gfp_mask)
-{
- struct xdr_netobj inblock;
- int ret;
-
- inblock.len = gk5e->keybytes;
- inblock.data = kmalloc(inblock.len, gfp_mask);
- if (!inblock.data)
- return -ENOMEM;
-
- ret = krb5_DK(gk5e, inkey, inblock.data, label, gfp_mask);
- if (!ret)
- ret = krb5_random_to_key_v1(gk5e, &inblock, outkey);
-
- kfree_sensitive(inblock.data);
- return ret;
-}
-
/*
* This is the identity function, with some sanity checking.
*/
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 20e21d08badb..e31cfdf7eadc 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -30,61 +30,7 @@
static struct gss_api_mech gss_kerberos_mech;
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-static int gss_krb5_import_ctx_des(struct krb5_ctx *ctx, gfp_t gfp_mask);
-static int gss_krb5_import_ctx_v1(struct krb5_ctx *ctx, gfp_t gfp_mask);
-#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_CRYPTOSYSTEM)
-static int gss_krb5_import_ctx_v2(struct krb5_ctx *ctx, gfp_t gfp_mask);
-#endif
-
static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
-#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES)
- /*
- * DES (All DES enctypes are mapped to the same gss functionality)
- */
- {
- .etype = ENCTYPE_DES_CBC_RAW,
- .ctype = CKSUMTYPE_RSA_MD5,
- .name = "des-cbc-crc",
- .encrypt_name = "cbc(des)",
- .cksum_name = "md5",
- .import_ctx = gss_krb5_import_ctx_des,
- .get_mic = gss_krb5_get_mic_v1,
- .verify_mic = gss_krb5_verify_mic_v1,
- .wrap = gss_krb5_wrap_v1,
- .unwrap = gss_krb5_unwrap_v1,
- .signalg = SGN_ALG_DES_MAC_MD5,
- .sealalg = SEAL_ALG_DES,
- .keybytes = 7,
- .keylength = 8,
- .cksumlength = 8,
- .keyed_cksum = 0,
- },
- /*
- * 3DES
- */
- {
- .etype = ENCTYPE_DES3_CBC_RAW,
- .ctype = CKSUMTYPE_HMAC_SHA1_DES3,
- .name = "des3-hmac-sha1",
- .encrypt_name = "cbc(des3_ede)",
- .cksum_name = "hmac(sha1)",
- .import_ctx = gss_krb5_import_ctx_v1,
- .derive_key = krb5_derive_key_v1,
- .get_mic = gss_krb5_get_mic_v1,
- .verify_mic = gss_krb5_verify_mic_v1,
- .wrap = gss_krb5_wrap_v1,
- .unwrap = gss_krb5_unwrap_v1,
- .signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
- .sealalg = SEAL_ALG_DES3KD,
- .keybytes = 21,
- .keylength = 24,
- .cksumlength = 20,
- .keyed_cksum = 1,
- },
-#endif
-
#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1)
/*
* AES-128 with SHA-1 (RFC 3962)
@@ -96,7 +42,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.encrypt_name = "cts(cbc(aes))",
.aux_cipher = "cbc(aes)",
.cksum_name = "hmac(sha1)",
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_derive_key_v2,
.encrypt = gss_krb5_aes_encrypt,
.decrypt = gss_krb5_aes_decrypt,
@@ -126,7 +71,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.encrypt_name = "cts(cbc(aes))",
.aux_cipher = "cbc(aes)",
.cksum_name = "hmac(sha1)",
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_derive_key_v2,
.encrypt = gss_krb5_aes_encrypt,
.decrypt = gss_krb5_aes_decrypt,
@@ -166,7 +110,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.Ke_length = BITS2OCTETS(128),
.Ki_length = BITS2OCTETS(128),
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_kdf_feedback_cmac,
.encrypt = gss_krb5_aes_encrypt,
.decrypt = gss_krb5_aes_decrypt,
@@ -193,7 +136,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.Ke_length = BITS2OCTETS(256),
.Ki_length = BITS2OCTETS(256),
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_kdf_feedback_cmac,
.encrypt = gss_krb5_aes_encrypt,
.decrypt = gss_krb5_aes_decrypt,
@@ -223,7 +165,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.Ke_length = BITS2OCTETS(128),
.Ki_length = BITS2OCTETS(128),
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_kdf_hmac_sha2,
.encrypt = krb5_etm_encrypt,
.decrypt = krb5_etm_decrypt,
@@ -250,7 +191,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.Ke_length = BITS2OCTETS(256),
.Ki_length = BITS2OCTETS(192),
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_kdf_hmac_sha2,
.encrypt = krb5_etm_encrypt,
.decrypt = krb5_etm_decrypt,
@@ -284,12 +224,6 @@ static void gss_krb5_prepare_enctype_priority_list(void)
ENCTYPE_AES256_CTS_HMAC_SHA1_96,
ENCTYPE_AES128_CTS_HMAC_SHA1_96,
#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES)
- ENCTYPE_DES3_CBC_SHA1,
- ENCTYPE_DES_CBC_MD5,
- ENCTYPE_DES_CBC_CRC,
- ENCTYPE_DES_CBC_MD4,
-#endif
};
size_t total, i;
char buf[16];
@@ -330,185 +264,6 @@ const struct gss_krb5_enctype *gss_krb5_lookup_enctype(u32 etype)
EXPORT_SYMBOL_IF_KUNIT(gss_krb5_lookup_enctype);
static struct crypto_sync_skcipher *
-gss_krb5_alloc_cipher_v1(struct krb5_ctx *ctx, struct xdr_netobj *key)
-{
- struct crypto_sync_skcipher *tfm;
-
- tfm = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
- if (IS_ERR(tfm))
- return NULL;
- if (crypto_sync_skcipher_setkey(tfm, key->data, key->len)) {
- crypto_free_sync_skcipher(tfm);
- return NULL;
- }
- return tfm;
-}
-
-static inline const void *
-get_key(const void *p, const void *end,
- struct krb5_ctx *ctx, struct crypto_sync_skcipher **res)
-{
- struct crypto_sync_skcipher *tfm;
- struct xdr_netobj key;
- int alg;
-
- p = simple_get_bytes(p, end, &alg, sizeof(alg));
- if (IS_ERR(p))
- goto out_err;
- switch (alg) {
- case ENCTYPE_DES_CBC_CRC:
- case ENCTYPE_DES_CBC_MD4:
- case ENCTYPE_DES_CBC_MD5:
- /* Map all these key types to ENCTYPE_DES_CBC_RAW */
- alg = ENCTYPE_DES_CBC_RAW;
- break;
- }
- if (!gss_krb5_lookup_enctype(alg)) {
- pr_warn("gss_krb5: unsupported enctype: %d\n", alg);
- goto out_err_inval;
- }
-
- p = simple_get_netobj(p, end, &key);
- if (IS_ERR(p))
- goto out_err;
- tfm = gss_krb5_alloc_cipher_v1(ctx, &key);
- kfree(key.data);
- if (!tfm) {
- pr_warn("gss_krb5: failed to initialize cipher '%s'\n",
- ctx->gk5e->encrypt_name);
- goto out_err_inval;
- }
- *res = tfm;
-
- return p;
-
-out_err_inval:
- p = ERR_PTR(-EINVAL);
-out_err:
- return p;
-}
-
-static int
-gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
-{
- u32 seq_send;
- int tmp;
- u32 time32;
-
- p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
- if (IS_ERR(p))
- goto out_err;
-
- /* Old format supports only DES! Any other enctype uses new format */
- ctx->enctype = ENCTYPE_DES_CBC_RAW;
-
- ctx->gk5e = gss_krb5_lookup_enctype(ctx->enctype);
- if (ctx->gk5e == NULL) {
- p = ERR_PTR(-EINVAL);
- goto out_err;
- }
-
- /* The downcall format was designed before we completely understood
- * the uses of the context fields; so it includes some stuff we
- * just give some minimal sanity-checking, and some we ignore
- * completely (like the next twenty bytes): */
- if (unlikely(p + 20 > end || p + 20 < p)) {
- p = ERR_PTR(-EFAULT);
- goto out_err;
- }
- p += 20;
- p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
- if (IS_ERR(p))
- goto out_err;
- if (tmp != SGN_ALG_DES_MAC_MD5) {
- p = ERR_PTR(-ENOSYS);
- goto out_err;
- }
- p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
- if (IS_ERR(p))
- goto out_err;
- if (tmp != SEAL_ALG_DES) {
- p = ERR_PTR(-ENOSYS);
- goto out_err;
- }
- p = simple_get_bytes(p, end, &time32, sizeof(time32));
- if (IS_ERR(p))
- goto out_err;
- /* unsigned 32-bit time overflows in year 2106 */
- ctx->endtime = (time64_t)time32;
- p = simple_get_bytes(p, end, &seq_send, sizeof(seq_send));
- if (IS_ERR(p))
- goto out_err;
- atomic_set(&ctx->seq_send, seq_send);
- p = simple_get_netobj(p, end, &ctx->mech_used);
- if (IS_ERR(p))
- goto out_err;
- p = get_key(p, end, ctx, &ctx->enc);
- if (IS_ERR(p))
- goto out_err_free_mech;
- p = get_key(p, end, ctx, &ctx->seq);
- if (IS_ERR(p))
- goto out_err_free_key1;
- if (p != end) {
- p = ERR_PTR(-EFAULT);
- goto out_err_free_key2;
- }
-
- return 0;
-
-out_err_free_key2:
- crypto_free_sync_skcipher(ctx->seq);
-out_err_free_key1:
- crypto_free_sync_skcipher(ctx->enc);
-out_err_free_mech:
- kfree(ctx->mech_used.data);
-out_err:
- return PTR_ERR(p);
-}
-
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-static int
-gss_krb5_import_ctx_des(struct krb5_ctx *ctx, gfp_t gfp_mask)
-{
- return -EINVAL;
-}
-
-static int
-gss_krb5_import_ctx_v1(struct krb5_ctx *ctx, gfp_t gfp_mask)
-{
- struct xdr_netobj keyin, keyout;
-
- keyin.data = ctx->Ksess;
- keyin.len = ctx->gk5e->keylength;
-
- ctx->seq = gss_krb5_alloc_cipher_v1(ctx, &keyin);
- if (ctx->seq == NULL)
- goto out_err;
- ctx->enc = gss_krb5_alloc_cipher_v1(ctx, &keyin);
- if (ctx->enc == NULL)
- goto out_free_seq;
-
- /* derive cksum */
- keyout.data = ctx->cksum;
- keyout.len = ctx->gk5e->keylength;
- if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_SIGN,
- KEY_USAGE_SEED_CHECKSUM, gfp_mask))
- goto out_free_enc;
-
- return 0;
-
-out_free_enc:
- crypto_free_sync_skcipher(ctx->enc);
-out_free_seq:
- crypto_free_sync_skcipher(ctx->seq);
-out_err:
- return -EINVAL;
-}
-#endif
-
-#if defined(CONFIG_RPCSEC_GSS_KRB5_CRYPTOSYSTEM)
-
-static struct crypto_sync_skcipher *
gss_krb5_alloc_cipher_v2(const char *cname, const struct xdr_netobj *key)
{
struct crypto_sync_skcipher *tfm;
@@ -636,8 +391,6 @@ out_free:
goto out;
}
-#endif
-
static int
gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
gfp_t gfp_mask)
@@ -671,9 +424,6 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
if (IS_ERR(p))
goto out_err;
- /* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
- if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
- ctx->enctype = ENCTYPE_DES3_CBC_RAW;
ctx->gk5e = gss_krb5_lookup_enctype(ctx->enctype);
if (ctx->gk5e == NULL) {
dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
@@ -700,7 +450,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
}
ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
- return ctx->gk5e->import_ctx(ctx, gfp_mask);
+ return gss_krb5_import_ctx_v2(ctx, gfp_mask);
out_err:
return PTR_ERR(p);
@@ -718,10 +468,7 @@ gss_krb5_import_sec_context(const void *p, size_t len, struct gss_ctx *ctx_id,
if (ctx == NULL)
return -ENOMEM;
- if (len == 85)
- ret = gss_import_v1_context(p, end, ctx);
- else
- ret = gss_import_v2_context(p, end, ctx, gfp_mask);
+ ret = gss_import_v2_context(p, end, ctx, gfp_mask);
memzero_explicit(&ctx->Ksess, sizeof(ctx->Ksess));
if (ret) {
kfree(ctx);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 146aa755f07d..ce540df9bce4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -71,75 +71,6 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-
-static void *
-setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
-{
- u16 *ptr;
- void *krb5_hdr;
- int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
-
- token->len = g_token_size(&ctx->mech_used, body_size);
-
- ptr = (u16 *)token->data;
- g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
-
- /* ptr now at start of header described in rfc 1964, section 1.2.1: */
- krb5_hdr = ptr;
- *ptr++ = KG_TOK_MIC_MSG;
- /*
- * signalg is stored as if it were converted from LE to host endian, even
- * though it's an opaque pair of bytes according to the RFC.
- */
- *ptr++ = (__force u16)cpu_to_le16(ctx->gk5e->signalg);
- *ptr++ = SEAL_ALG_NONE;
- *ptr = 0xffff;
-
- return krb5_hdr;
-}
-
-u32
-gss_krb5_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
- struct xdr_netobj *token)
-{
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- void *ptr;
- time64_t now;
- u32 seq_send;
- u8 *cksumkey;
-
- dprintk("RPC: %s\n", __func__);
- BUG_ON(ctx == NULL);
-
- now = ktime_get_real_seconds();
-
- ptr = setup_token(ctx, token);
-
- if (ctx->gk5e->keyed_cksum)
- cksumkey = ctx->cksum;
- else
- cksumkey = NULL;
-
- if (make_checksum(ctx, ptr, 8, text, 0, cksumkey,
- KG_USAGE_SIGN, &md5cksum))
- return GSS_S_FAILURE;
-
- memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
-
- seq_send = atomic_fetch_inc(&ctx->seq_send);
-
- if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
- seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
- return GSS_S_FAILURE;
-
- return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
-}
-
-#endif
-
static void *
setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
{
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
deleted file mode 100644
index 1babc3474e10..000000000000
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * linux/net/sunrpc/gss_krb5_seqnum.c
- *
- * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/util_seqnum.c
- *
- * Copyright (c) 2000 The Regents of the University of Michigan.
- * All rights reserved.
- *
- * Andy Adamson <andros@umich.edu>
- */
-
-/*
- * Copyright 1993 by OpenVision Technologies, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software
- * and its documentation for any purpose is hereby granted without fee,
- * provided that the above copyright notice appears in all copies and
- * that both that copyright notice and this permission notice appear in
- * supporting documentation, and that the name of OpenVision not be used
- * in advertising or publicity pertaining to distribution of the software
- * without specific, written prior permission. OpenVision makes no
- * representations about the suitability of this software for any
- * purpose. It is provided "as is" without express or implied warranty.
- *
- * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
- * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
- * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <crypto/skcipher.h>
-#include <linux/types.h>
-#include <linux/sunrpc/gss_krb5.h>
-
-#include "gss_krb5_internal.h"
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_AUTH
-#endif
-
-s32
-krb5_make_seq_num(struct krb5_ctx *kctx,
- struct crypto_sync_skcipher *key,
- int direction,
- u32 seqnum,
- unsigned char *cksum, unsigned char *buf)
-{
- unsigned char *plain;
- s32 code;
-
- plain = kmalloc(8, GFP_KERNEL);
- if (!plain)
- return -ENOMEM;
-
- plain[0] = (unsigned char) (seqnum & 0xff);
- plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
- plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
- plain[3] = (unsigned char) ((seqnum >> 24) & 0xff);
-
- plain[4] = direction;
- plain[5] = direction;
- plain[6] = direction;
- plain[7] = direction;
-
- code = krb5_encrypt(key, cksum, plain, buf, 8);
- kfree(plain);
- return code;
-}
-
-s32
-krb5_get_seq_num(struct krb5_ctx *kctx,
- unsigned char *cksum,
- unsigned char *buf,
- int *direction, u32 *seqnum)
-{
- s32 code;
- unsigned char *plain;
- struct crypto_sync_skcipher *key = kctx->seq;
-
- dprintk("RPC: krb5_get_seq_num:\n");
-
- plain = kmalloc(8, GFP_KERNEL);
- if (!plain)
- return -ENOMEM;
-
- if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
- goto out;
-
- if ((plain[4] != plain[5]) || (plain[4] != plain[6]) ||
- (plain[4] != plain[7])) {
- code = (s32)KG_BAD_SEQ;
- goto out;
- }
-
- *direction = plain[4];
-
- *seqnum = ((plain[0]) |
- (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
-
-out:
- kfree(plain);
- return code;
-}
diff --git a/net/sunrpc/auth_gss/gss_krb5_test.c b/net/sunrpc/auth_gss/gss_krb5_test.c
index 95ca783795c5..85625e3f3814 100644
--- a/net/sunrpc/auth_gss/gss_krb5_test.c
+++ b/net/sunrpc/auth_gss/gss_krb5_test.c
@@ -320,208 +320,12 @@ static void rfc3961_nfold_case(struct kunit *test)
"result mismatch");
}
-/*
- * RFC 3961 Appendix A.3. DES3 DR and DK
- *
- * These tests show the derived-random and derived-key values for the
- * des3-hmac-sha1-kd encryption scheme, using the DR and DK functions
- * defined in section 6.3.1. The input keys were randomly generated;
- * the usage values are from this specification.
- *
- * This test material is copyright (C) The Internet Society (2005).
- */
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_155,
- 0x00, 0x00, 0x00, 0x01, 0x55
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_1aa,
- 0x00, 0x00, 0x00, 0x01, 0xaa
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_kerberos,
- 0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test1_base_key,
- 0xdc, 0xe0, 0x6b, 0x1f, 0x64, 0xc8, 0x57, 0xa1,
- 0x1c, 0x3d, 0xb5, 0x7c, 0x51, 0x89, 0x9b, 0x2c,
- 0xc1, 0x79, 0x10, 0x08, 0xce, 0x97, 0x3b, 0x92
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test1_derived_key,
- 0x92, 0x51, 0x79, 0xd0, 0x45, 0x91, 0xa7, 0x9b,
- 0x5d, 0x31, 0x92, 0xc4, 0xa7, 0xe9, 0xc2, 0x89,
- 0xb0, 0x49, 0xc7, 0x1f, 0x6e, 0xe6, 0x04, 0xcd
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test2_base_key,
- 0x5e, 0x13, 0xd3, 0x1c, 0x70, 0xef, 0x76, 0x57,
- 0x46, 0x57, 0x85, 0x31, 0xcb, 0x51, 0xc1, 0x5b,
- 0xf1, 0x1c, 0xa8, 0x2c, 0x97, 0xce, 0xe9, 0xf2
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test2_derived_key,
- 0x9e, 0x58, 0xe5, 0xa1, 0x46, 0xd9, 0x94, 0x2a,
- 0x10, 0x1c, 0x46, 0x98, 0x45, 0xd6, 0x7a, 0x20,
- 0xe3, 0xc4, 0x25, 0x9e, 0xd9, 0x13, 0xf2, 0x07
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test3_base_key,
- 0x98, 0xe6, 0xfd, 0x8a, 0x04, 0xa4, 0xb6, 0x85,
- 0x9b, 0x75, 0xa1, 0x76, 0x54, 0x0b, 0x97, 0x52,
- 0xba, 0xd3, 0xec, 0xd6, 0x10, 0xa2, 0x52, 0xbc
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test3_derived_key,
- 0x13, 0xfe, 0xf8, 0x0d, 0x76, 0x3e, 0x94, 0xec,
- 0x6d, 0x13, 0xfd, 0x2c, 0xa1, 0xd0, 0x85, 0x07,
- 0x02, 0x49, 0xda, 0xd3, 0x98, 0x08, 0xea, 0xbf
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test4_base_key,
- 0x62, 0x2a, 0xec, 0x25, 0xa2, 0xfe, 0x2c, 0xad,
- 0x70, 0x94, 0x68, 0x0b, 0x7c, 0x64, 0x94, 0x02,
- 0x80, 0x08, 0x4c, 0x1a, 0x7c, 0xec, 0x92, 0xb5
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test4_derived_key,
- 0xf8, 0xdf, 0xbf, 0x04, 0xb0, 0x97, 0xe6, 0xd9,
- 0xdc, 0x07, 0x02, 0x68, 0x6b, 0xcb, 0x34, 0x89,
- 0xd9, 0x1f, 0xd9, 0xa4, 0x51, 0x6b, 0x70, 0x3e
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test5_base_key,
- 0xd3, 0xf8, 0x29, 0x8c, 0xcb, 0x16, 0x64, 0x38,
- 0xdc, 0xb9, 0xb9, 0x3e, 0xe5, 0xa7, 0x62, 0x92,
- 0x86, 0xa4, 0x91, 0xf8, 0x38, 0xf8, 0x02, 0xfb
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test5_derived_key,
- 0x23, 0x70, 0xda, 0x57, 0x5d, 0x2a, 0x3d, 0xa8,
- 0x64, 0xce, 0xbf, 0xdc, 0x52, 0x04, 0xd5, 0x6d,
- 0xf7, 0x79, 0xa7, 0xdf, 0x43, 0xd9, 0xda, 0x43
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test6_base_key,
- 0xc1, 0x08, 0x16, 0x49, 0xad, 0xa7, 0x43, 0x62,
- 0xe6, 0xa1, 0x45, 0x9d, 0x01, 0xdf, 0xd3, 0x0d,
- 0x67, 0xc2, 0x23, 0x4c, 0x94, 0x07, 0x04, 0xda
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test6_derived_key,
- 0x34, 0x80, 0x57, 0xec, 0x98, 0xfd, 0xc4, 0x80,
- 0x16, 0x16, 0x1c, 0x2a, 0x4c, 0x7a, 0x94, 0x3e,
- 0x92, 0xae, 0x49, 0x2c, 0x98, 0x91, 0x75, 0xf7
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test7_base_key,
- 0x5d, 0x15, 0x4a, 0xf2, 0x38, 0xf4, 0x67, 0x13,
- 0x15, 0x57, 0x19, 0xd5, 0x5e, 0x2f, 0x1f, 0x79,
- 0x0d, 0xd6, 0x61, 0xf2, 0x79, 0xa7, 0x91, 0x7c
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test7_derived_key,
- 0xa8, 0x80, 0x8a, 0xc2, 0x67, 0xda, 0xda, 0x3d,
- 0xcb, 0xe9, 0xa7, 0xc8, 0x46, 0x26, 0xfb, 0xc7,
- 0x61, 0xc2, 0x94, 0xb0, 0x13, 0x15, 0xe5, 0xc1
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test8_base_key,
- 0x79, 0x85, 0x62, 0xe0, 0x49, 0x85, 0x2f, 0x57,
- 0xdc, 0x8c, 0x34, 0x3b, 0xa1, 0x7f, 0x2c, 0xa1,
- 0xd9, 0x73, 0x94, 0xef, 0xc8, 0xad, 0xc4, 0x43
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test8_derived_key,
- 0xc8, 0x13, 0xf8, 0x8a, 0x3b, 0xe3, 0xb3, 0x34,
- 0xf7, 0x54, 0x25, 0xce, 0x91, 0x75, 0xfb, 0xe3,
- 0xc8, 0x49, 0x3b, 0x89, 0xc8, 0x70, 0x3b, 0x49
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test9_base_key,
- 0x26, 0xdc, 0xe3, 0x34, 0xb5, 0x45, 0x29, 0x2f,
- 0x2f, 0xea, 0xb9, 0xa8, 0x70, 0x1a, 0x89, 0xa4,
- 0xb9, 0x9e, 0xb9, 0x94, 0x2c, 0xec, 0xd0, 0x16
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test9_derived_key,
- 0xf4, 0x8f, 0xfd, 0x6e, 0x83, 0xf8, 0x3e, 0x73,
- 0x54, 0xe6, 0x94, 0xfd, 0x25, 0x2c, 0xf8, 0x3b,
- 0xfe, 0x58, 0xf7, 0xd5, 0xba, 0x37, 0xec, 0x5d
-);
-
-static const struct gss_krb5_test_param rfc3961_kdf_test_params[] = {
- {
- .desc = "des3-hmac-sha1 key derivation case 1",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test1_base_key,
- .usage = &des3_dk_usage_155,
- .expected_result = &des3_dk_test1_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 2",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test2_base_key,
- .usage = &des3_dk_usage_1aa,
- .expected_result = &des3_dk_test2_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 3",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test3_base_key,
- .usage = &des3_dk_usage_155,
- .expected_result = &des3_dk_test3_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 4",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test4_base_key,
- .usage = &des3_dk_usage_1aa,
- .expected_result = &des3_dk_test4_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 5",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test5_base_key,
- .usage = &des3_dk_usage_kerberos,
- .expected_result = &des3_dk_test5_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 6",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test6_base_key,
- .usage = &des3_dk_usage_155,
- .expected_result = &des3_dk_test6_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 7",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test7_base_key,
- .usage = &des3_dk_usage_1aa,
- .expected_result = &des3_dk_test7_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 8",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test8_base_key,
- .usage = &des3_dk_usage_155,
- .expected_result = &des3_dk_test8_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 9",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test9_base_key,
- .usage = &des3_dk_usage_1aa,
- .expected_result = &des3_dk_test9_derived_key,
- },
-};
-
-/* Creates the function rfc3961_kdf_gen_params */
-KUNIT_ARRAY_PARAM(rfc3961_kdf, rfc3961_kdf_test_params, gss_krb5_get_desc);
-
static struct kunit_case rfc3961_test_cases[] = {
{
.name = "RFC 3961 n-fold",
.run_case = rfc3961_nfold_case,
.generate_params = rfc3961_nfold_gen_params,
},
- {
- .name = "RFC 3961 key derivation",
- .run_case = kdf_case,
- .generate_params = rfc3961_kdf_gen_params,
- },
{}
};
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 7d6d4ae4a3c9..4fbc50a0a2c4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -69,83 +69,6 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-/* read_token is a mic token, and message_buffer is the data that the mic was
- * supposedly taken over. */
-u32
-gss_krb5_verify_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
- struct xdr_netobj *read_token)
-{
- int signalg;
- int sealalg;
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- s32 now;
- int direction;
- u32 seqnum;
- unsigned char *ptr = (unsigned char *)read_token->data;
- int bodysize;
- u8 *cksumkey;
-
- dprintk("RPC: krb5_read_token\n");
-
- if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr,
- read_token->len))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) ||
- (ptr[1] != (KG_TOK_MIC_MSG & 0xff)))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /* XXX sanity-check bodysize?? */
-
- signalg = ptr[2] + (ptr[3] << 8);
- if (signalg != ctx->gk5e->signalg)
- return GSS_S_DEFECTIVE_TOKEN;
-
- sealalg = ptr[4] + (ptr[5] << 8);
- if (sealalg != SEAL_ALG_NONE)
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if (ctx->gk5e->keyed_cksum)
- cksumkey = ctx->cksum;
- else
- cksumkey = NULL;
-
- if (make_checksum(ctx, ptr, 8, message_buffer, 0,
- cksumkey, KG_USAGE_SIGN, &md5cksum))
- return GSS_S_FAILURE;
-
- if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
- ctx->gk5e->cksumlength))
- return GSS_S_BAD_SIG;
-
- /* it got through unscathed. Make sure the context is unexpired */
-
- now = ktime_get_real_seconds();
-
- if (now > ctx->endtime)
- return GSS_S_CONTEXT_EXPIRED;
-
- /* do sequencing checks */
-
- if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
- &direction, &seqnum))
- return GSS_S_FAILURE;
-
- if ((ctx->initiate && direction != 0xff) ||
- (!ctx->initiate && direction != 0))
- return GSS_S_BAD_SIG;
-
- return GSS_S_COMPLETE;
-}
-#endif
-
u32
gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
struct xdr_netobj *read_token)
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 6d6b082380b2..b3e1738ff6bf 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -40,293 +40,6 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-
-static inline int
-gss_krb5_padding(int blocksize, int length)
-{
- return blocksize - (length % blocksize);
-}
-
-static inline void
-gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize)
-{
- int padding = gss_krb5_padding(blocksize, buf->len - offset);
- char *p;
- struct kvec *iov;
-
- if (buf->page_len || buf->tail[0].iov_len)
- iov = &buf->tail[0];
- else
- iov = &buf->head[0];
- p = iov->iov_base + iov->iov_len;
- iov->iov_len += padding;
- buf->len += padding;
- memset(p, padding, padding);
-}
-
-static inline int
-gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
-{
- u8 *ptr;
- u8 pad;
- size_t len = buf->len;
-
- if (len <= buf->head[0].iov_len) {
- pad = *(u8 *)(buf->head[0].iov_base + len - 1);
- if (pad > buf->head[0].iov_len)
- return -EINVAL;
- buf->head[0].iov_len -= pad;
- goto out;
- } else
- len -= buf->head[0].iov_len;
- if (len <= buf->page_len) {
- unsigned int last = (buf->page_base + len - 1)
- >>PAGE_SHIFT;
- unsigned int offset = (buf->page_base + len - 1)
- & (PAGE_SIZE - 1);
- ptr = kmap_atomic(buf->pages[last]);
- pad = *(ptr + offset);
- kunmap_atomic(ptr);
- goto out;
- } else
- len -= buf->page_len;
- BUG_ON(len > buf->tail[0].iov_len);
- pad = *(u8 *)(buf->tail[0].iov_base + len - 1);
-out:
- /* XXX: NOTE: we do not adjust the page lengths--they represent
- * a range of data in the real filesystem page cache, and we need
- * to know that range so the xdr code can properly place read data.
- * However adjusting the head length, as we do above, is harmless.
- * In the case of a request that fits into a single page, the server
- * also uses length and head length together to determine the original
- * start of the request to copy the request for deferal; so it's
- * easier on the server if we adjust head and tail length in tandem.
- * It's not really a problem that we don't fool with the page and
- * tail lengths, though--at worst badly formed xdr might lead the
- * server to attempt to parse the padding.
- * XXX: Document all these weird requirements for gss mechanism
- * wrap/unwrap functions. */
- if (pad > blocksize)
- return -EINVAL;
- if (buf->len > pad)
- buf->len -= pad;
- else
- return -EINVAL;
- return 0;
-}
-
-/* Assumptions: the head and tail of inbuf are ours to play with.
- * The pages, however, may be real pages in the page cache and we replace
- * them with scratch pages from **pages before writing to them. */
-/* XXX: obviously the above should be documentation of wrap interface,
- * and shouldn't be in this kerberos-specific file. */
-
-/* XXX factor out common code with seal/unseal. */
-
-u32
-gss_krb5_wrap_v1(struct krb5_ctx *kctx, int offset,
- struct xdr_buf *buf, struct page **pages)
-{
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- int blocksize = 0, plainlen;
- unsigned char *ptr, *msg_start;
- time64_t now;
- int headlen;
- struct page **tmp_pages;
- u32 seq_send;
- u8 *cksumkey;
- u32 conflen = crypto_sync_skcipher_blocksize(kctx->enc);
-
- dprintk("RPC: %s\n", __func__);
-
- now = ktime_get_real_seconds();
-
- blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
- gss_krb5_add_padding(buf, offset, blocksize);
- BUG_ON((buf->len - offset) % blocksize);
- plainlen = conflen + buf->len - offset;
-
- headlen = g_token_size(&kctx->mech_used,
- GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
- (buf->len - offset);
-
- ptr = buf->head[0].iov_base + offset;
- /* shift data to make room for header. */
- xdr_extend_head(buf, offset, headlen);
-
- /* XXX Would be cleverer to encrypt while copying. */
- BUG_ON((buf->len - offset - headlen) % blocksize);
-
- g_make_token_header(&kctx->mech_used,
- GSS_KRB5_TOK_HDR_LEN +
- kctx->gk5e->cksumlength + plainlen, &ptr);
-
-
- /* ptr now at header described in rfc 1964, section 1.2.1: */
- ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
- ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
-
- msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
-
- /*
- * signalg and sealalg are stored as if they were converted from LE
- * to host endian, even though they're opaque pairs of bytes according
- * to the RFC.
- */
- *(__le16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
- *(__le16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
- ptr[6] = 0xff;
- ptr[7] = 0xff;
-
- krb5_make_confounder(msg_start, conflen);
-
- if (kctx->gk5e->keyed_cksum)
- cksumkey = kctx->cksum;
- else
- cksumkey = NULL;
-
- /* XXXJBF: UGH!: */
- tmp_pages = buf->pages;
- buf->pages = pages;
- if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen,
- cksumkey, KG_USAGE_SEAL, &md5cksum))
- return GSS_S_FAILURE;
- buf->pages = tmp_pages;
-
- memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
-
- seq_send = atomic_fetch_inc(&kctx->seq_send);
-
- /* XXX would probably be more efficient to compute checksum
- * and encrypt at the same time: */
- if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff,
- seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
- return GSS_S_FAILURE;
-
- if (gss_encrypt_xdr_buf(kctx->enc, buf,
- offset + headlen - conflen, pages))
- return GSS_S_FAILURE;
-
- return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
-}
-
-u32
-gss_krb5_unwrap_v1(struct krb5_ctx *kctx, int offset, int len,
- struct xdr_buf *buf, unsigned int *slack,
- unsigned int *align)
-{
- int signalg;
- int sealalg;
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- time64_t now;
- int direction;
- s32 seqnum;
- unsigned char *ptr;
- int bodysize;
- void *data_start, *orig_start;
- int data_len;
- int blocksize;
- u32 conflen = crypto_sync_skcipher_blocksize(kctx->enc);
- int crypt_offset;
- u8 *cksumkey;
- unsigned int saved_len = buf->len;
-
- dprintk("RPC: gss_unwrap_kerberos\n");
-
- ptr = (u8 *)buf->head[0].iov_base + offset;
- if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
- len - offset))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
- (ptr[1] != (KG_TOK_WRAP_MSG & 0xff)))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /* XXX sanity-check bodysize?? */
-
- /* get the sign and seal algorithms */
-
- signalg = ptr[2] + (ptr[3] << 8);
- if (signalg != kctx->gk5e->signalg)
- return GSS_S_DEFECTIVE_TOKEN;
-
- sealalg = ptr[4] + (ptr[5] << 8);
- if (sealalg != kctx->gk5e->sealalg)
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /*
- * Data starts after token header and checksum. ptr points
- * to the beginning of the token header
- */
- crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
- (unsigned char *)buf->head[0].iov_base;
-
- buf->len = len;
- if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if (kctx->gk5e->keyed_cksum)
- cksumkey = kctx->cksum;
- else
- cksumkey = NULL;
-
- if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
- cksumkey, KG_USAGE_SEAL, &md5cksum))
- return GSS_S_FAILURE;
-
- if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
- kctx->gk5e->cksumlength))
- return GSS_S_BAD_SIG;
-
- /* it got through unscathed. Make sure the context is unexpired */
-
- now = ktime_get_real_seconds();
-
- if (now > kctx->endtime)
- return GSS_S_CONTEXT_EXPIRED;
-
- /* do sequencing checks */
-
- if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
- ptr + 8, &direction, &seqnum))
- return GSS_S_BAD_SIG;
-
- if ((kctx->initiate && direction != 0xff) ||
- (!kctx->initiate && direction != 0))
- return GSS_S_BAD_SIG;
-
- /* Copy the data back to the right position. XXX: Would probably be
- * better to copy and encrypt at the same time. */
-
- blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
- data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
- conflen;
- orig_start = buf->head[0].iov_base + offset;
- data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
- memmove(orig_start, data_start, data_len);
- buf->head[0].iov_len -= (data_start - orig_start);
- buf->len = len - (data_start - orig_start);
-
- if (gss_krb5_remove_padding(buf, blocksize))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /* slack must include room for krb5 padding */
- *slack = XDR_QUADLEN(saved_len - buf->len);
- /* The GSS blob always precedes the RPC message payload */
- *align = *slack;
- return GSS_S_COMPLETE;
-}
-
-#endif
-
/*
* We can shift data by up to LOCAL_BUF_LEN bytes in a pass. If we need
* to do more than that, we shift repeatedly. Kevin Coffman reports
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index c4a566737085..18734e70c5dd 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -986,7 +986,7 @@ bad_unwrap:
return -EINVAL;
}
-static int
+static enum svc_auth_status
svcauth_gss_set_client(struct svc_rqst *rqstp)
{
struct gss_svc_data *svcdata = rqstp->rq_auth_data;
@@ -1634,7 +1634,7 @@ svcauth_gss_decode_credbody(struct xdr_stream *xdr,
*
* The rqstp->rq_auth_stat field is also set (see RFCs 2203 and 5531).
*/
-static int
+static enum svc_auth_status
svcauth_gss_accept(struct svc_rqst *rqstp)
{
struct gss_svc_data *svcdata = rqstp->rq_auth_data;
@@ -1945,9 +1945,6 @@ bad_wrap:
* %0: the Reply is ready to be sent
* %-ENOMEM: failed to allocate memory
* %-EINVAL: encoding error
- *
- * XXX: These return values do not match the return values documented
- * for the auth_ops ->release method in linux/sunrpc/svcauth.h.
*/
static int
svcauth_gss_release(struct svc_rqst *rqstp)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d7c697af3762..37b0b212b934 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -534,6 +534,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
.servername = args->servername,
.bc_xprt = args->bc_xprt,
.xprtsec = args->xprtsec,
+ .connect_timeout = args->connect_timeout,
+ .reconnect_timeout = args->reconnect_timeout,
};
char servername[48];
struct rpc_clnt *clnt;
@@ -2602,6 +2604,7 @@ out:
case 0:
task->tk_action = rpc_exit_task;
task->tk_status = rpcauth_unwrap_resp(task, &xdr);
+ xdr_finish_decode(&xdr);
return;
case -EAGAIN:
task->tk_status = 0;
@@ -2722,7 +2725,7 @@ out_unparsable:
out_verifier:
trace_rpc_bad_verifier(task);
- goto out_err;
+ goto out_garbage;
out_msg_denied:
error = -EACCES;
@@ -2748,6 +2751,7 @@ out_msg_denied:
case rpc_autherr_rejectedverf:
case rpcsec_gsserr_credproblem:
case rpcsec_gsserr_ctxproblem:
+ rpcauth_invalcred(task);
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
@@ -2904,19 +2908,22 @@ static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
* @clnt: pointer to struct rpc_clnt
* @xps: pointer to struct rpc_xprt_switch,
* @xprt: pointer struct rpc_xprt
- * @dummy: unused
+ * @in_max_connect: pointer to the max_connect value for the passed in xprt transport
*/
int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
- void *dummy)
+ void *in_max_connect)
{
struct rpc_cb_add_xprt_calldata *data;
struct rpc_task *task;
+ int max_connect = clnt->cl_max_connect;
- if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) {
+ if (in_max_connect)
+ max_connect = *(int *)in_max_connect;
+ if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) {
rcu_read_lock();
pr_warn("SUNRPC: reached max allowed number (%d) did not add "
- "transport to server: %s\n", clnt->cl_max_connect,
+ "transport to server: %s\n", max_connect,
rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
rcu_read_unlock();
return -EINVAL;
@@ -3069,6 +3076,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
}
xprt->resvport = resvport;
xprt->reuseport = reuseport;
+
+ if (xprtargs->connect_timeout)
+ connect_timeout = xprtargs->connect_timeout;
+ if (xprtargs->reconnect_timeout)
+ reconnect_timeout = xprtargs->reconnect_timeout;
if (xprt->ops->set_connect_timeout != NULL)
xprt->ops->set_connect_timeout(xprt,
connect_timeout,
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 0b6034fab9ab..f420d8457345 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -472,7 +472,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
return NULL;
inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
switch (mode & S_IFMT) {
case S_IFDIR:
inode->i_fop = &simple_dir_operations;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 587811a002c9..812fda9d45dd 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -513,9 +513,9 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
INIT_LIST_HEAD(&pool->sp_all_threads);
spin_lock_init(&pool->sp_lock);
+ percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL);
- percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL);
}
return serv;
@@ -588,9 +588,9 @@ svc_destroy(struct kref *ref)
for (i = 0; i < serv->sv_nrpools; i++) {
struct svc_pool *pool = &serv->sv_pools[i];
+ percpu_counter_destroy(&pool->sp_messages_arrived);
percpu_counter_destroy(&pool->sp_sockets_queued);
percpu_counter_destroy(&pool->sp_threads_woken);
- percpu_counter_destroy(&pool->sp_threads_timedout);
}
kfree(serv->sv_pools);
kfree(serv);
@@ -689,23 +689,44 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
return rqstp;
}
-/*
- * Choose a pool in which to create a new thread, for svc_set_num_threads
+/**
+ * svc_pool_wake_idle_thread - Awaken an idle thread in @pool
+ * @pool: service thread pool
+ *
+ * Can be called from soft IRQ or process context. Finding an idle
+ * service thread and marking it BUSY is atomic with respect to
+ * other calls to svc_pool_wake_idle_thread().
+ *
*/
-static inline struct svc_pool *
-choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+void svc_pool_wake_idle_thread(struct svc_pool *pool)
{
- if (pool != NULL)
- return pool;
+ struct svc_rqst *rqstp;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+ if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
+ continue;
+
+ WRITE_ONCE(rqstp->rq_qtime, ktime_get());
+ wake_up_process(rqstp->rq_task);
+ rcu_read_unlock();
+ percpu_counter_inc(&pool->sp_threads_woken);
+ trace_svc_wake_up(rqstp->rq_task->pid);
+ return;
+ }
+ rcu_read_unlock();
- return &serv->sv_pools[(*state)++ % serv->sv_nrpools];
+ set_bit(SP_CONGESTED, &pool->sp_flags);
}
-/*
- * Choose a thread to kill, for svc_set_num_threads
- */
-static inline struct task_struct *
-choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+static struct svc_pool *
+svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+{
+ return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
+}
+
+static struct task_struct *
+svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
{
unsigned int i;
struct task_struct *task = NULL;
@@ -713,7 +734,6 @@ choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
if (pool != NULL) {
spin_lock_bh(&pool->sp_lock);
} else {
- /* choose a pool in round-robin fashion */
for (i = 0; i < serv->sv_nrpools; i++) {
pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
spin_lock_bh(&pool->sp_lock);
@@ -728,21 +748,15 @@ found_pool:
if (!list_empty(&pool->sp_all_threads)) {
struct svc_rqst *rqstp;
- /*
- * Remove from the pool->sp_all_threads list
- * so we don't try to kill it again.
- */
rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
set_bit(RQ_VICTIM, &rqstp->rq_flags);
list_del_rcu(&rqstp->rq_all);
task = rqstp->rq_task;
}
spin_unlock_bh(&pool->sp_lock);
-
return task;
}
-/* create new threads */
static int
svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
@@ -754,13 +768,12 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
do {
nrservs--;
- chosen_pool = choose_pool(serv, pool, &state);
-
+ chosen_pool = svc_pool_next(serv, pool, &state);
node = svc_pool_map_get_node(chosen_pool->sp_id);
+
rqstp = svc_prepare_thread(serv, chosen_pool, node);
if (IS_ERR(rqstp))
return PTR_ERR(rqstp);
-
task = kthread_create_on_node(serv->sv_threadfn, rqstp,
node, "%s", serv->sv_name);
if (IS_ERR(task)) {
@@ -779,15 +792,6 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
return 0;
}
-/*
- * Create or destroy enough new threads to make the number
- * of threads the given number. If `pool' is non-NULL, applies
- * only to threads in that pool, otherwise round-robins between
- * all pools. Caller must ensure that mutual exclusion between this and
- * server startup or shutdown.
- */
-
-/* destroy old threads */
static int
svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
@@ -795,9 +799,8 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
struct task_struct *task;
unsigned int state = serv->sv_nrthreads-1;
- /* destroy old threads */
do {
- task = choose_victim(serv, pool, &state);
+ task = svc_pool_victim(serv, pool, &state);
if (task == NULL)
break;
rqstp = kthread_data(task);
@@ -809,6 +812,23 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
return 0;
}
+/**
+ * svc_set_num_threads - adjust number of threads per RPC service
+ * @serv: RPC service to adjust
+ * @pool: Specific pool from which to choose threads, or NULL
+ * @nrservs: New number of threads for @serv (0 or less means kill all threads)
+ *
+ * Create or destroy threads to make the number of threads for @serv the
+ * given number. If @pool is non-NULL, change only threads in that pool;
+ * otherwise, round-robin between all pools for @serv. @serv's
+ * sv_nrthreads is adjusted for each thread created or destroyed.
+ *
+ * Caller must ensure mutual exclusion between this and server startup or
+ * shutdown.
+ *
+ * Returns zero on success or a negative errno if an error occurred while
+ * starting a thread.
+ */
int
svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
@@ -1277,8 +1297,9 @@ svc_process_common(struct svc_rqst *rqstp)
const struct svc_procedure *procp = NULL;
struct svc_serv *serv = rqstp->rq_server;
struct svc_process_info process;
- int auth_res, rc;
+ enum svc_auth_status auth_res;
unsigned int aoffset;
+ int rc;
__be32 *p;
/* Will be turned off by GSS integrity and privacy services */
@@ -1333,6 +1354,9 @@ svc_process_common(struct svc_rqst *rqstp)
goto dropit;
case SVC_COMPLETE:
goto sendit;
+ default:
+ pr_warn_once("Unexpected svc_auth_status (%d)\n", auth_res);
+ goto err_system_err;
}
if (progp == NULL)
@@ -1370,6 +1394,8 @@ svc_process_common(struct svc_rqst *rqstp)
rc = process.dispatch(rqstp);
if (procp->pc_release)
procp->pc_release(rqstp);
+ xdr_finish_decode(xdr);
+
if (!rc)
goto dropit;
if (rqstp->rq_auth_stat != rpc_auth_ok)
@@ -1516,7 +1542,6 @@ out_baddir:
out_drop:
svc_drop(rqstp);
}
-EXPORT_SYMBOL_GPL(svc_process);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 62c7919ea610..4cfe9640df48 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -434,6 +434,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
smp_rmb();
xpt_flags = READ_ONCE(xprt->xpt_flags);
+ trace_svc_xprt_enqueue(xprt, xpt_flags);
if (xpt_flags & BIT(XPT_BUSY))
return false;
if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE)))
@@ -456,7 +457,6 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
void svc_xprt_enqueue(struct svc_xprt *xprt)
{
struct svc_pool *pool;
- struct svc_rqst *rqstp = NULL;
if (!svc_xprt_ready(xprt))
return;
@@ -476,21 +476,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
spin_unlock_bh(&pool->sp_lock);
- /* find a thread for this xprt */
- rcu_read_lock();
- list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
- if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
- continue;
- percpu_counter_inc(&pool->sp_threads_woken);
- rqstp->rq_qtime = ktime_get();
- wake_up_process(rqstp->rq_task);
- goto out_unlock;
- }
- set_bit(SP_CONGESTED, &pool->sp_flags);
- rqstp = NULL;
-out_unlock:
- rcu_read_unlock();
- trace_svc_xprt_enqueue(xprt, rqstp);
+ svc_pool_wake_idle_thread(pool);
}
EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
@@ -581,7 +567,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
svc_xprt_put(xprt);
}
-/*
+/**
+ * svc_wake_up - Wake up a service thread for non-transport work
+ * @serv: RPC service
+ *
* Some svc_serv's will have occasional work to do, even when a xprt is not
* waiting to be serviced. This function is there to "kick" a task in one of
* those services so that it can wake up and do that work. Note that we only
@@ -590,27 +579,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
*/
void svc_wake_up(struct svc_serv *serv)
{
- struct svc_rqst *rqstp;
- struct svc_pool *pool;
-
- pool = &serv->sv_pools[0];
+ struct svc_pool *pool = &serv->sv_pools[0];
- rcu_read_lock();
- list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
- /* skip any that aren't queued */
- if (test_bit(RQ_BUSY, &rqstp->rq_flags))
- continue;
- rcu_read_unlock();
- wake_up_process(rqstp->rq_task);
- trace_svc_wake_up(rqstp->rq_task->pid);
- return;
- }
- rcu_read_unlock();
-
- /* No free entries available */
set_bit(SP_TASK_PENDING, &pool->sp_flags);
- smp_wmb();
- trace_svc_wake_up(0);
+ svc_pool_wake_idle_thread(pool);
}
EXPORT_SYMBOL_GPL(svc_wake_up);
@@ -679,7 +651,7 @@ static void svc_check_conn_limits(struct svc_serv *serv)
}
}
-static int svc_alloc_arg(struct svc_rqst *rqstp)
+static bool svc_alloc_arg(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
struct xdr_buf *arg = &rqstp->rq_arg;
@@ -701,10 +673,10 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
/* Made progress, don't sleep yet */
continue;
- set_current_state(TASK_INTERRUPTIBLE);
- if (signalled() || kthread_should_stop()) {
+ set_current_state(TASK_IDLE);
+ if (kthread_should_stop()) {
set_current_state(TASK_RUNNING);
- return -EINTR;
+ return false;
}
trace_svc_alloc_arg_err(pages, ret);
memalloc_retry_wait(GFP_KERNEL);
@@ -723,7 +695,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
arg->tail[0].iov_len = 0;
rqstp->rq_xid = xdr_zero;
- return 0;
+ return true;
}
static bool
@@ -732,7 +704,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
struct svc_pool *pool = rqstp->rq_pool;
/* did someone call svc_wake_up? */
- if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags))
+ if (test_bit(SP_TASK_PENDING, &pool->sp_flags))
return false;
/* was a socket queued? */
@@ -740,7 +712,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
return false;
/* are we shutting down? */
- if (signalled() || kthread_should_stop())
+ if (kthread_should_stop())
return false;
/* are we freezing? */
@@ -750,10 +722,9 @@ rqst_should_sleep(struct svc_rqst *rqstp)
return true;
}
-static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
{
struct svc_pool *pool = rqstp->rq_pool;
- long time_left = 0;
/* rq_xprt should be clear on entry */
WARN_ON_ONCE(rqstp->rq_xprt);
@@ -762,18 +733,14 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (rqstp->rq_xprt)
goto out_found;
- /*
- * We have to be able to interrupt this wait
- * to bring down the daemons ...
- */
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(TASK_IDLE);
smp_mb__before_atomic();
clear_bit(SP_CONGESTED, &pool->sp_flags);
clear_bit(RQ_BUSY, &rqstp->rq_flags);
smp_mb__after_atomic();
if (likely(rqst_should_sleep(rqstp)))
- time_left = schedule_timeout(timeout);
+ schedule();
else
__set_current_state(TASK_RUNNING);
@@ -781,17 +748,16 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
set_bit(RQ_BUSY, &rqstp->rq_flags);
smp_mb__after_atomic();
+ clear_bit(SP_TASK_PENDING, &pool->sp_flags);
rqstp->rq_xprt = svc_xprt_dequeue(pool);
if (rqstp->rq_xprt)
goto out_found;
- if (!time_left)
- percpu_counter_inc(&pool->sp_threads_timedout);
-
- if (signalled() || kthread_should_stop())
- return ERR_PTR(-EINTR);
- return ERR_PTR(-EAGAIN);
+ if (kthread_should_stop())
+ return NULL;
+ return NULL;
out_found:
+ clear_bit(SP_TASK_PENDING, &pool->sp_flags);
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
*/
@@ -867,37 +833,35 @@ out:
return len;
}
-/*
- * Receive the next request on any transport. This code is carefully
- * organised not to touch any cachelines in the shared svc_serv
- * structure, only cachelines in the local svc_pool.
+/**
+ * svc_recv - Receive and process the next request on any transport
+ * @rqstp: an idle RPC service thread
+ *
+ * This code is carefully organised not to touch any cachelines in
+ * the shared svc_serv structure, only cachelines in the local
+ * svc_pool.
*/
-int svc_recv(struct svc_rqst *rqstp, long timeout)
+void svc_recv(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = NULL;
struct svc_serv *serv = rqstp->rq_server;
- int len, err;
+ int len;
- err = svc_alloc_arg(rqstp);
- if (err)
+ if (!svc_alloc_arg(rqstp))
goto out;
try_to_freeze();
cond_resched();
- err = -EINTR;
- if (signalled() || kthread_should_stop())
+ if (kthread_should_stop())
goto out;
- xprt = svc_get_next_xprt(rqstp, timeout);
- if (IS_ERR(xprt)) {
- err = PTR_ERR(xprt);
+ xprt = svc_get_next_xprt(rqstp);
+ if (!xprt)
goto out;
- }
len = svc_handle_xprt(rqstp, xprt);
/* No data, incomplete (TCP) read, or accept() */
- err = -EAGAIN;
if (len <= 0)
goto out_release;
@@ -909,13 +873,14 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
if (serv->sv_stats)
serv->sv_stats->netcnt++;
+ percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
rqstp->rq_stime = ktime_get();
- return len;
+ svc_process(rqstp);
+out:
+ return;
out_release:
rqstp->rq_res.len = 0;
svc_xprt_release(rqstp);
-out:
- return err;
}
EXPORT_SYMBOL_GPL(svc_recv);
@@ -1456,12 +1421,11 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
return 0;
}
- seq_printf(m, "%u %llu %llu %llu %llu\n",
- pool->sp_id,
- percpu_counter_sum_positive(&pool->sp_sockets_queued),
- percpu_counter_sum_positive(&pool->sp_sockets_queued),
- percpu_counter_sum_positive(&pool->sp_threads_woken),
- percpu_counter_sum_positive(&pool->sp_threads_timedout));
+ seq_printf(m, "%u %llu %llu %llu 0\n",
+ pool->sp_id,
+ percpu_counter_sum_positive(&pool->sp_messages_arrived),
+ percpu_counter_sum_positive(&pool->sp_sockets_queued),
+ percpu_counter_sum_positive(&pool->sp_threads_woken));
return 0;
}
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 67d8245a08af..aa4429d0b810 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -60,8 +60,19 @@ svc_put_auth_ops(struct auth_ops *aops)
module_put(aops->owner);
}
-int
-svc_authenticate(struct svc_rqst *rqstp)
+/**
+ * svc_authenticate - Initialize an outgoing credential
+ * @rqstp: RPC execution context
+ *
+ * Return values:
+ * %SVC_OK: XDR encoding of the result can begin
+ * %SVC_DENIED: Credential or verifier is not valid
+ * %SVC_GARBAGE: Failed to decode credential or verifier
+ * %SVC_COMPLETE: GSS context lifetime event; no further action
+ * %SVC_DROP: Drop this request; no further action
+ * %SVC_CLOSE: Like drop, but also close transport connection
+ */
+enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp)
{
struct auth_ops *aops;
u32 flavor;
@@ -89,16 +100,28 @@ svc_authenticate(struct svc_rqst *rqstp)
}
EXPORT_SYMBOL_GPL(svc_authenticate);
-int svc_set_client(struct svc_rqst *rqstp)
+/**
+ * svc_set_client - Assign an appropriate 'auth_domain' as the client
+ * @rqstp: RPC execution context
+ *
+ * Return values:
+ * %SVC_OK: Client was found and assigned
+ * %SVC_DENY: Client was explicitly denied
+ * %SVC_DROP: Ignore this request
+ * %SVC_CLOSE: Ignore this request and close the connection
+ */
+enum svc_auth_status svc_set_client(struct svc_rqst *rqstp)
{
rqstp->rq_client = NULL;
return rqstp->rq_authop->set_client(rqstp);
}
EXPORT_SYMBOL_GPL(svc_set_client);
-/* A request, which was authenticated, has now executed.
- * Time to finalise the credentials and verifier
- * and release and resources
+/**
+ * svc_authorise - Finalize credentials/verifier and release resources
+ * @rqstp: RPC execution context
+ *
+ * Returns zero on success, or a negative errno.
*/
int svc_authorise(struct svc_rqst *rqstp)
{
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 174783f804fa..04b45588ae6f 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -665,7 +665,7 @@ static struct group_info *unix_gid_find(kuid_t uid, struct svc_rqst *rqstp)
}
}
-int
+enum svc_auth_status
svcauth_unix_set_client(struct svc_rqst *rqstp)
{
struct sockaddr_in *sin;
@@ -736,7 +736,6 @@ out:
rqstp->rq_auth_stat = rpc_auth_ok;
return SVC_OK;
}
-
EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
/**
@@ -751,7 +750,7 @@ EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
*
* rqstp->rq_auth_stat is set as mandated by RFC 5531.
*/
-static int
+static enum svc_auth_status
svcauth_null_accept(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_arg_stream;
@@ -828,7 +827,7 @@ struct auth_ops svcauth_null = {
*
* rqstp->rq_auth_stat is set as mandated by RFC 5531.
*/
-static int
+static enum svc_auth_status
svcauth_tls_accept(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_arg_stream;
@@ -913,7 +912,7 @@ struct auth_ops svcauth_tls = {
*
* rqstp->rq_auth_stat is set as mandated by RFC 5531.
*/
-static int
+static enum svc_auth_status
svcauth_unix_accept(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_arg_stream;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2eb8df44f894..998687421fa6 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -36,6 +36,8 @@
#include <linux/skbuff.h>
#include <linux/file.h>
#include <linux/freezer.h>
+#include <linux/bvec.h>
+
#include <net/sock.h>
#include <net/checksum.h>
#include <net/ip.h>
@@ -43,7 +45,7 @@
#include <net/udp.h>
#include <net/tcp.h>
#include <net/tcp_states.h>
-#include <net/tls.h>
+#include <net/tls_prot.h>
#include <net/handshake.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
@@ -226,27 +228,30 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
}
static int
-svc_tcp_sock_process_cmsg(struct svc_sock *svsk, struct msghdr *msg,
+svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
struct cmsghdr *cmsg, int ret)
{
- if (cmsg->cmsg_level == SOL_TLS &&
- cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
- u8 content_type = *((u8 *)CMSG_DATA(cmsg));
-
- switch (content_type) {
- case TLS_RECORD_TYPE_DATA:
- /* TLS sets EOR at the end of each application data
- * record, even though there might be more frames
- * waiting to be decrypted.
- */
- msg->msg_flags &= ~MSG_EOR;
- break;
- case TLS_RECORD_TYPE_ALERT:
- ret = -ENOTCONN;
- break;
- default:
- ret = -EAGAIN;
- }
+ u8 content_type = tls_get_record_type(sock->sk, cmsg);
+ u8 level, description;
+
+ switch (content_type) {
+ case 0:
+ break;
+ case TLS_RECORD_TYPE_DATA:
+ /* TLS sets EOR at the end of each application data
+ * record, even though there might be more frames
+ * waiting to be decrypted.
+ */
+ msg->msg_flags &= ~MSG_EOR;
+ break;
+ case TLS_RECORD_TYPE_ALERT:
+ tls_alert_recv(sock->sk, msg, &level, &description);
+ ret = (level == TLS_ALERT_LEVEL_FATAL) ?
+ -ENOTCONN : -EAGAIN;
+ break;
+ default:
+ /* discard this record type */
+ ret = -EAGAIN;
}
return ret;
}
@@ -258,13 +263,14 @@ svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg)
struct cmsghdr cmsg;
u8 buf[CMSG_SPACE(sizeof(u8))];
} u;
+ struct socket *sock = svsk->sk_sock;
int ret;
msg->msg_control = &u;
msg->msg_controllen = sizeof(u);
- ret = sock_recvmsg(svsk->sk_sock, msg, MSG_DONTWAIT);
+ ret = sock_recvmsg(sock, msg, MSG_DONTWAIT);
if (unlikely(msg->msg_controllen != sizeof(u)))
- ret = svc_tcp_sock_process_cmsg(svsk, msg, &u.cmsg, ret);
+ ret = svc_tcp_sock_process_cmsg(sock, msg, &u.cmsg, ret);
return ret;
}
@@ -691,9 +697,10 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
.msg_name = &rqstp->rq_addr,
.msg_namelen = rqstp->rq_addrlen,
.msg_control = cmh,
+ .msg_flags = MSG_SPLICE_PAGES,
.msg_controllen = sizeof(buffer),
};
- unsigned int sent;
+ unsigned int count;
int err;
svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
@@ -706,22 +713,23 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- err = xdr_alloc_bvec(xdr, GFP_KERNEL);
- if (err < 0)
- goto out_unlock;
+ count = xdr_buf_to_bvec(rqstp->rq_bvec,
+ ARRAY_SIZE(rqstp->rq_bvec), xdr);
- err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ count, 0);
+ err = sock_sendmsg(svsk->sk_sock, &msg);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
- err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ count, 0);
+ err = sock_sendmsg(svsk->sk_sock, &msg);
}
- xdr_free_bvec(xdr);
+
trace_svcsock_udp_send(xprt, err);
-out_unlock:
+
mutex_unlock(&xprt->xpt_mutex);
- if (err < 0)
- return err;
- return sent;
+ return err;
out_notconn:
mutex_unlock(&xprt->xpt_mutex);
@@ -1085,6 +1093,9 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk)
/* If we have more data, signal svc_xprt_enqueue() to try again */
svsk->sk_tcplen = 0;
svsk->sk_marker = xdr_zero;
+
+ smp_wmb();
+ tcp_set_rcvlowat(svsk->sk_sk, 1);
}
/**
@@ -1174,10 +1185,17 @@ err_incomplete:
goto err_delete;
if (len == want)
svc_tcp_fragment_received(svsk);
- else
+ else {
+ /* Avoid more ->sk_data_ready() calls until the rest
+ * of the message has arrived. This reduces service
+ * thread wake-ups on large incoming messages. */
+ tcp_set_rcvlowat(svsk->sk_sk,
+ svc_sock_reclen(svsk) - svsk->sk_tcplen);
+
trace_svcsock_tcp_recv_short(&svsk->sk_xprt,
svc_sock_reclen(svsk),
svsk->sk_tcplen - sizeof(rpc_fraghdr));
+ }
goto err_noclose;
error:
if (len != -EAGAIN)
@@ -1194,75 +1212,51 @@ err_noclose:
return 0; /* record not complete */
}
-static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
- int flags)
-{
- struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES | flags, };
-
- iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, vec, 1, vec->iov_len);
- return sock_sendmsg(sock, &msg);
-}
-
/*
* MSG_SPLICE_PAGES is used exclusively to reduce the number of
* copy operations in this path. Therefore the caller must ensure
* that the pages backing @xdr are unchanging.
*
- * In addition, the logic assumes that * .bv_len is never larger
- * than PAGE_SIZE.
+ * Note that the send is non-blocking. The caller has incremented
+ * the reference count on each page backing the RPC message, and
+ * the network layer will "put" these pages when transmission is
+ * complete.
+ *
+ * This is safe for our RPC services because the memory backing
+ * the head and tail components is never kmalloc'd. These always
+ * come from pages in the svc_rqst::rq_pages array.
*/
-static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
+static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
rpc_fraghdr marker, unsigned int *sentp)
{
- const struct kvec *head = xdr->head;
- const struct kvec *tail = xdr->tail;
- struct kvec rm = {
- .iov_base = &marker,
- .iov_len = sizeof(marker),
- };
struct msghdr msg = {
- .msg_flags = 0,
+ .msg_flags = MSG_SPLICE_PAGES,
};
+ unsigned int count;
+ void *buf;
int ret;
*sentp = 0;
- ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
- if (ret < 0)
- return ret;
-
- ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
- if (ret < 0)
- return ret;
- *sentp += ret;
- if (ret != rm.iov_len)
- return -EAGAIN;
- ret = svc_tcp_send_kvec(sock, head, 0);
- if (ret < 0)
- return ret;
- *sentp += ret;
- if (ret != head->iov_len)
- goto out;
-
- if (xdr_buf_pagecount(xdr))
- xdr->bvec[0].bv_offset = offset_in_page(xdr->page_base);
-
- msg.msg_flags = MSG_SPLICE_PAGES;
- iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec,
- xdr_buf_pagecount(xdr), xdr->page_len);
- ret = sock_sendmsg(sock, &msg);
+ /* The stream record marker is copied into a temporary page
+ * fragment buffer so that it can be included in rq_bvec.
+ */
+ buf = page_frag_alloc(&svsk->sk_frag_cache, sizeof(marker),
+ GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ memcpy(buf, &marker, sizeof(marker));
+ bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
+
+ count = xdr_buf_to_bvec(rqstp->rq_bvec + 1,
+ ARRAY_SIZE(rqstp->rq_bvec) - 1, &rqstp->rq_res);
+
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ 1 + count, sizeof(marker) + rqstp->rq_res.len);
+ ret = sock_sendmsg(svsk->sk_sock, &msg);
if (ret < 0)
return ret;
*sentp += ret;
-
- if (tail->iov_len) {
- ret = svc_tcp_send_kvec(sock, tail, 0);
- if (ret < 0)
- return ret;
- *sentp += ret;
- }
-
-out:
return 0;
}
@@ -1288,23 +1282,17 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
rqstp->rq_xprt_ctxt = NULL;
- atomic_inc(&svsk->sk_sendqlen);
mutex_lock(&xprt->xpt_mutex);
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- tcp_sock_set_cork(svsk->sk_sk, true);
- err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent);
- xdr_free_bvec(xdr);
+ err = svc_tcp_sendmsg(svsk, rqstp, marker, &sent);
trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
- if (atomic_dec_and_test(&svsk->sk_sendqlen))
- tcp_sock_set_cork(svsk->sk_sk, false);
mutex_unlock(&xprt->xpt_mutex);
return sent;
out_notconn:
- atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -ENOTCONN;
out_close:
@@ -1313,7 +1301,6 @@ out_close:
(err < 0) ? "got error" : "sent",
(err < 0) ? err : sent, xdr->len);
svc_xprt_deferred_close(xprt);
- atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
}
@@ -1624,6 +1611,8 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ tls_handshake_close(svsk->sk_sock);
+
svc_sock_detach(xprt);
if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
@@ -1638,6 +1627,7 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ struct page_frag_cache *pfc = &svsk->sk_frag_cache;
struct socket *sock = svsk->sk_sock;
trace_svcsock_free(svsk, sock);
@@ -1647,5 +1637,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
sockfd_put(sock);
else
sock_release(sock);
+ if (pfc->va)
+ __page_frag_cache_drain(virt_to_head_page(pfc->va),
+ pfc->pagecnt_bias);
kfree(svsk);
}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2a22e78af116..62e07c330a66 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -165,6 +165,56 @@ xdr_free_bvec(struct xdr_buf *buf)
}
/**
+ * xdr_buf_to_bvec - Copy components of an xdr_buf into a bio_vec array
+ * @bvec: bio_vec array to populate
+ * @bvec_size: element count of @bio_vec
+ * @xdr: xdr_buf to be copied
+ *
+ * Returns the number of entries consumed in @bvec.
+ */
+unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size,
+ const struct xdr_buf *xdr)
+{
+ const struct kvec *head = xdr->head;
+ const struct kvec *tail = xdr->tail;
+ unsigned int count = 0;
+
+ if (head->iov_len) {
+ bvec_set_virt(bvec++, head->iov_base, head->iov_len);
+ ++count;
+ }
+
+ if (xdr->page_len) {
+ unsigned int offset, len, remaining;
+ struct page **pages = xdr->pages;
+
+ offset = offset_in_page(xdr->page_base);
+ remaining = xdr->page_len;
+ while (remaining > 0) {
+ len = min_t(unsigned int, remaining,
+ PAGE_SIZE - offset);
+ bvec_set_page(bvec++, *pages++, len, offset);
+ remaining -= len;
+ offset = 0;
+ if (unlikely(++count > bvec_size))
+ goto bvec_overflow;
+ }
+ }
+
+ if (tail->iov_len) {
+ bvec_set_virt(bvec, tail->iov_base, tail->iov_len);
+ if (unlikely(++count > bvec_size))
+ goto bvec_overflow;
+ }
+
+ return count;
+
+bvec_overflow:
+ pr_warn_once("%s: bio_vec array overflow\n", __func__);
+ return count - 1;
+}
+
+/**
* xdr_inline_pages - Prepare receive buffer for a large reply
* @xdr: xdr_buf into which reply will be placed
* @offset: expected offset where data payload will start, in bytes
@@ -1288,6 +1338,14 @@ static unsigned int xdr_set_tail_base(struct xdr_stream *xdr,
return xdr_set_iov(xdr, buf->tail, base, len);
}
+static void xdr_stream_unmap_current_page(struct xdr_stream *xdr)
+{
+ if (xdr->page_kaddr) {
+ kunmap_local(xdr->page_kaddr);
+ xdr->page_kaddr = NULL;
+ }
+}
+
static unsigned int xdr_set_page_base(struct xdr_stream *xdr,
unsigned int base, unsigned int len)
{
@@ -1305,12 +1363,18 @@ static unsigned int xdr_set_page_base(struct xdr_stream *xdr,
if (len > maxlen)
len = maxlen;
+ xdr_stream_unmap_current_page(xdr);
xdr_stream_page_set_pos(xdr, base);
base += xdr->buf->page_base;
pgnr = base >> PAGE_SHIFT;
xdr->page_ptr = &xdr->buf->pages[pgnr];
- kaddr = page_address(*xdr->page_ptr);
+
+ if (PageHighMem(*xdr->page_ptr)) {
+ xdr->page_kaddr = kmap_local_page(*xdr->page_ptr);
+ kaddr = xdr->page_kaddr;
+ } else
+ kaddr = page_address(*xdr->page_ptr);
pgoff = base & ~PAGE_MASK;
xdr->p = (__be32*)(kaddr + pgoff);
@@ -1364,6 +1428,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
struct rpc_rqst *rqst)
{
xdr->buf = buf;
+ xdr->page_kaddr = NULL;
xdr_reset_scratch_buffer(xdr);
xdr->nwords = XDR_QUADLEN(buf->len);
if (xdr_set_iov(xdr, buf->head, 0, buf->len) == 0 &&
@@ -1396,6 +1461,16 @@ void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
}
EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
+/**
+ * xdr_finish_decode - Clean up the xdr_stream after decoding data.
+ * @xdr: pointer to xdr_stream struct
+ */
+void xdr_finish_decode(struct xdr_stream *xdr)
+{
+ xdr_stream_unmap_current_page(xdr);
+}
+EXPORT_SYMBOL(xdr_finish_decode);
+
static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
unsigned int nwords = XDR_QUADLEN(nbytes);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index b098fde373ab..28c0771c4e8c 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -935,9 +935,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
if (!rep->rr_rdmabuf)
goto out_free;
- if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
- goto out_free_regbuf;
-
rep->rr_cid.ci_completion_id =
atomic_inc_return(&r_xprt->rx_ep->re_completion_ids);
@@ -956,8 +953,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
spin_unlock(&buf->rb_lock);
return rep;
-out_free_regbuf:
- rpcrdma_regbuf_free(rep->rr_rdmabuf);
out_free:
kfree(rep);
out:
@@ -1363,6 +1358,10 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
rep = rpcrdma_rep_create(r_xprt, temp);
if (!rep)
break;
+ if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) {
+ rpcrdma_rep_put(buf, rep);
+ break;
+ }
rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id;
trace_xprtrdma_post_recv(rep);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 5e5ff6784ef5..da409450dfc0 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -593,7 +593,6 @@ void xprt_rdma_cleanup(void);
int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *);
-int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9f010369100a..71cd916e384f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,7 +47,7 @@
#include <net/checksum.h>
#include <net/udp.h>
#include <net/tcp.h>
-#include <net/tls.h>
+#include <net/tls_prot.h>
#include <net/handshake.h>
#include <linux/bvec.h>
@@ -360,24 +360,27 @@ static int
xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
struct cmsghdr *cmsg, int ret)
{
- if (cmsg->cmsg_level == SOL_TLS &&
- cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
- u8 content_type = *((u8 *)CMSG_DATA(cmsg));
-
- switch (content_type) {
- case TLS_RECORD_TYPE_DATA:
- /* TLS sets EOR at the end of each application data
- * record, even though there might be more frames
- * waiting to be decrypted.
- */
- msg->msg_flags &= ~MSG_EOR;
- break;
- case TLS_RECORD_TYPE_ALERT:
- ret = -ENOTCONN;
- break;
- default:
- ret = -EAGAIN;
- }
+ u8 content_type = tls_get_record_type(sock->sk, cmsg);
+ u8 level, description;
+
+ switch (content_type) {
+ case 0:
+ break;
+ case TLS_RECORD_TYPE_DATA:
+ /* TLS sets EOR at the end of each application data
+ * record, even though there might be more frames
+ * waiting to be decrypted.
+ */
+ msg->msg_flags &= ~MSG_EOR;
+ break;
+ case TLS_RECORD_TYPE_ALERT:
+ tls_alert_recv(sock->sk, msg, &level, &description);
+ ret = (level == TLS_ALERT_LEVEL_FATAL) ?
+ -EACCES : -EAGAIN;
+ break;
+ default:
+ /* discard this record type */
+ ret = -EAGAIN;
}
return ret;
}
@@ -777,6 +780,8 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
}
if (ret == -ESHUTDOWN)
kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+ else if (ret == -EACCES)
+ xprt_wake_pending_tasks(&transport->xprt, -EACCES);
else
xs_poll_check_readable(transport);
out:
@@ -1292,6 +1297,8 @@ static void xs_close(struct rpc_xprt *xprt)
dprintk("RPC: xs_close xprt %p\n", xprt);
+ if (transport->sock)
+ tls_handshake_close(transport->sock);
xs_reset_transport(transport);
xprt->reestablish_timeout = 0;
}
@@ -2230,9 +2237,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct net *net = sock_net(sock->sk);
+ unsigned long connect_timeout;
+ unsigned long syn_retries;
unsigned int keepidle;
unsigned int keepcnt;
unsigned int timeo;
+ unsigned long t;
spin_lock(&xprt->transport_lock);
keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
@@ -2250,6 +2261,35 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
/* TCP user timeout (see RFC5482) */
tcp_sock_set_user_timeout(sock->sk, timeo);
+
+ /* Connect timeout */
+ connect_timeout = max_t(unsigned long,
+ DIV_ROUND_UP(xprt->connect_timeout, HZ), 1);
+ syn_retries = max_t(unsigned long,
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries), 1);
+ for (t = 0; t <= syn_retries && (1UL << t) < connect_timeout; t++)
+ ;
+ if (t <= syn_retries)
+ tcp_sock_set_syncnt(sock->sk, t - 1);
+}
+
+static void xs_tcp_do_set_connect_timeout(struct rpc_xprt *xprt,
+ unsigned long connect_timeout)
+{
+ struct sock_xprt *transport =
+ container_of(xprt, struct sock_xprt, xprt);
+ struct rpc_timeout to;
+ unsigned long initval;
+
+ memcpy(&to, xprt->timeout, sizeof(to));
+ /* Arbitrary lower limit */
+ initval = max_t(unsigned long, connect_timeout, XS_TCP_INIT_REEST_TO);
+ to.to_initval = initval;
+ to.to_maxval = initval;
+ to.to_retries = 0;
+ memcpy(&transport->tcp_timeout, &to, sizeof(transport->tcp_timeout));
+ xprt->timeout = &transport->tcp_timeout;
+ xprt->connect_timeout = connect_timeout;
}
static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
@@ -2257,25 +2297,12 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
unsigned long reconnect_timeout)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
- struct rpc_timeout to;
- unsigned long initval;
spin_lock(&xprt->transport_lock);
if (reconnect_timeout < xprt->max_reconnect_timeout)
xprt->max_reconnect_timeout = reconnect_timeout;
- if (connect_timeout < xprt->connect_timeout) {
- memcpy(&to, xprt->timeout, sizeof(to));
- initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1);
- /* Arbitrary lower limit */
- if (initval < XS_TCP_INIT_REEST_TO << 1)
- initval = XS_TCP_INIT_REEST_TO << 1;
- to.to_initval = initval;
- to.to_maxval = initval;
- memcpy(&transport->tcp_timeout, &to,
- sizeof(transport->tcp_timeout));
- xprt->timeout = &transport->tcp_timeout;
- xprt->connect_timeout = connect_timeout;
- }
+ if (connect_timeout < xprt->connect_timeout)
+ xs_tcp_do_set_connect_timeout(xprt, connect_timeout);
set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
spin_unlock(&xprt->transport_lock);
}
@@ -3328,8 +3355,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
xprt->timeout = &xs_tcp_default_timeout;
xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+ if (args->reconnect_timeout)
+ xprt->max_reconnect_timeout = args->reconnect_timeout;
+
xprt->connect_timeout = xprt->timeout->to_initval *
(xprt->timeout->to_retries + 1);
+ if (args->connect_timeout)
+ xs_tcp_do_set_connect_timeout(xprt, args->connect_timeout);
INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
INIT_WORK(&transport->error_worker, xs_error_handle);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 8cc42aea19c7..5b045284849e 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -862,3 +862,28 @@ void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
NULL);
}
EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
+
+int switchdev_bridge_port_replay(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_brport_info brport_info = {
+ .brport = {
+ .dev = dev,
+ .ctx = ctx,
+ .atomic_nb = atomic_nb,
+ .blocking_nb = blocking_nb,
+ },
+ };
+ int err;
+
+ ASSERT_RTNL();
+
+ err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_REPLAY,
+ brport_dev, &brport_info.info,
+ extack);
+ return notifier_to_errno(err);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_replay);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 4b45ed631eb8..051ed5f6fc93 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -101,7 +101,7 @@ __init int net_sysctl_init(void)
* registering "/proc/sys/net" as an empty directory not in a
* network namespace.
*/
- net_header = register_sysctl("net", empty);
+ net_header = register_sysctl_sz("net", empty, 0);
if (!net_header)
goto out;
ret = register_pernet_subsys(&sysctl_pernet_ops);
@@ -122,12 +122,13 @@ out1:
* allocated.
*/
static void ensure_safe_net_sysctl(struct net *net, const char *path,
- struct ctl_table *table)
+ struct ctl_table *table, size_t table_size)
{
struct ctl_table *ent;
pr_debug("Registering net sysctl (net %p): %s\n", net, path);
- for (ent = table; ent->procname; ent++) {
+ ent = table;
+ for (size_t i = 0; i < table_size && ent->procname; ent++, i++) {
unsigned long addr;
const char *where;
@@ -160,15 +161,24 @@ static void ensure_safe_net_sysctl(struct net *net, const char *path,
}
}
-struct ctl_table_header *register_net_sysctl(struct net *net,
- const char *path, struct ctl_table *table)
+struct ctl_table_header *register_net_sysctl_sz(struct net *net,
+ const char *path,
+ struct ctl_table *table,
+ size_t table_size)
{
+ int count;
+ struct ctl_table *entry;
+
if (!net_eq(net, &init_net))
- ensure_safe_net_sysctl(net, path, table);
+ ensure_safe_net_sysctl(net, path, table, table_size);
+
+ entry = table;
+ for (count = 0 ; count < table_size && entry->procname; entry++, count++)
+ ;
- return __register_sysctl_table(&net->sysctls, path, table);
+ return __register_sysctl_table(&net->sysctls, path, table, count);
}
-EXPORT_SYMBOL_GPL(register_net_sysctl);
+EXPORT_SYMBOL_GPL(register_net_sysctl_sz);
void unregister_net_sysctl_table(struct ctl_table_header *header)
{
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 0772cfadaa0d..93f82398283d 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -131,6 +131,5 @@ bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr);
void tipc_set_node_id(struct net *net, u8 *id);
void tipc_set_node_addr(struct net *net, u32 addr);
char *tipc_nodeid2string(char *str, u8 *id);
-u32 tipc_node_id2hash(u8 *id128);
#endif
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 1ee60649bd17..41eac1ee0c09 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -214,8 +214,6 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
-int tipc_media_set_priority(const char *name, u32 new_value);
-int tipc_media_set_window(const char *name, u32 new_value);
int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
struct nlattr *attrs[]);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 0a3f7a70a50a..7eccd97e0609 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -197,7 +197,7 @@ static inline int less(u16 left, u16 right)
return less_eq(left, right) && (mod(right) != mod(left));
}
-static inline int in_range(u16 val, u16 min, u16 max)
+static inline int tipc_in_range(u16 val, u16 min, u16 max)
{
return !less(val, min) && !more(val, max);
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2eff1c7949cb..e33b4f29f77c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1623,7 +1623,7 @@ next_gap_ack:
last_ga->bgack_cnt);
}
/* Check against the last Gap ACK block */
- if (in_range(seqno, start, end))
+ if (tipc_in_range(seqno, start, end))
continue;
/* Update/release the packet peer is acking */
bc_has_acked = true;
@@ -2251,12 +2251,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
strncpy(if_name, data, TIPC_MAX_IF_NAME);
/* Update own tolerance if peer indicates a non-zero value */
- if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
+ if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
l->tolerance = peers_tol;
l->bc_rcvlink->tolerance = peers_tol;
}
/* Update own priority if peer's priority is higher */
- if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
+ if (tipc_in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
l->priority = peers_prio;
/* If peer is going down we want full re-establish cycle */
@@ -2299,13 +2299,13 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
l->rcv_nxt_state = msg_seqno(hdr) + 1;
/* Update own tolerance if peer indicates a non-zero value */
- if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
+ if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
l->tolerance = peers_tol;
l->bc_rcvlink->tolerance = peers_tol;
}
/* Update own prio if peer indicates a different value */
if ((peers_prio != l->priority) &&
- in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) {
+ tipc_in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) {
l->priority = peers_prio;
rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
}
diff --git a/net/tipc/link.h b/net/tipc/link.h
index a16f401fdabd..d80f5649b395 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -148,8 +148,6 @@ int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap,
struct tipc_gap_ack_blks *ga,
struct sk_buff_head *xmitq,
struct sk_buff_head *retrq);
-void tipc_link_build_bc_sync_msg(struct tipc_link *l,
- struct sk_buff_head *xmitq);
void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
struct sk_buff_head *xmitq);
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index e231e6964d61..c677f6f082df 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -67,7 +67,6 @@ struct distr_item {
__be32 key;
};
-void tipc_named_bcast(struct net *net, struct sk_buff *skb);
struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ);
struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities);
diff --git a/net/tipc/net.h b/net/tipc/net.h
index d0c91d2df20a..1cb1e43cf34a 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -43,7 +43,6 @@ extern const struct nla_policy tipc_nl_net_policy[];
int tipc_net_init(struct net *net, u8 *node_id, u32 addr);
void tipc_net_finalize_work(struct work_struct *work);
-void tipc_sched_net_finalize(struct net *net, u32 addr);
void tipc_net_stop(struct net *net);
int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 9b47c8409231..5bc076f2fa74 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -208,7 +208,7 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
goto err_out;
}
- info.attrs = attrbuf;
+ info.info.attrs = attrbuf;
if (nlmsg_len(cb.nlh) > 0) {
err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf,
@@ -1294,7 +1294,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
struct tipc_nl_compat_msg msg;
struct nlmsghdr *req_nlh;
struct nlmsghdr *rep_nlh;
- struct tipc_genlmsghdr *req_userhdr = info->userhdr;
+ struct tipc_genlmsghdr *req_userhdr = genl_info_userhdr(info);
memset(&msg, 0, sizeof(msg));
diff --git a/net/tipc/node.c b/net/tipc/node.c
index a9c5b6594889..3105abe97bb9 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2662,7 +2662,7 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *node;
@@ -2870,7 +2870,7 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
int err;
if (!prev_node) {
- struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
struct nlattr *mon[TIPC_NLA_MON_MAX + 1];
if (!attrs[TIPC_NLA_MON])
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ef8e5139a873..bb1118d02f95 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3791,7 +3791,7 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct tipc_sock *tsk;
if (!tsk_portid) {
- struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];
if (!attrs[TIPC_NLA_SOCK])
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 926232557e77..f892b0903dba 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -465,7 +465,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
int i;
if (!bid && !skip_cnt) {
- struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs;
struct net *net = sock_net(skb->sk);
struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1];
char *bname;
diff --git a/net/tls/tls.h b/net/tls/tls.h
index 86cef1c68e03..28a8c0e80e3c 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -39,6 +39,7 @@
#include <linux/types.h>
#include <linux/skmsg.h>
#include <net/tls.h>
+#include <net/tls_prot.h>
#define TLS_PAGE_ORDER (min_t(unsigned int, PAGE_ALLOC_COSTLY_ORDER, \
TLS_MAX_PAYLOAD_SIZE >> PAGE_SHIFT))
@@ -50,6 +51,59 @@
#define TLS_DEC_STATS(net, field) \
SNMP_DEC_STATS((net)->mib.tls_statistics, field)
+struct tls_cipher_desc {
+ unsigned int nonce;
+ unsigned int iv;
+ unsigned int key;
+ unsigned int salt;
+ unsigned int tag;
+ unsigned int rec_seq;
+ unsigned int iv_offset;
+ unsigned int key_offset;
+ unsigned int salt_offset;
+ unsigned int rec_seq_offset;
+ char *cipher_name;
+ bool offloadable;
+ size_t crypto_info;
+};
+
+#define TLS_CIPHER_MIN TLS_CIPHER_AES_GCM_128
+#define TLS_CIPHER_MAX TLS_CIPHER_ARIA_GCM_256
+extern const struct tls_cipher_desc tls_cipher_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN];
+
+static inline const struct tls_cipher_desc *get_cipher_desc(u16 cipher_type)
+{
+ if (cipher_type < TLS_CIPHER_MIN || cipher_type > TLS_CIPHER_MAX)
+ return NULL;
+
+ return &tls_cipher_desc[cipher_type - TLS_CIPHER_MIN];
+}
+
+static inline char *crypto_info_iv(struct tls_crypto_info *crypto_info,
+ const struct tls_cipher_desc *cipher_desc)
+{
+ return (char *)crypto_info + cipher_desc->iv_offset;
+}
+
+static inline char *crypto_info_key(struct tls_crypto_info *crypto_info,
+ const struct tls_cipher_desc *cipher_desc)
+{
+ return (char *)crypto_info + cipher_desc->key_offset;
+}
+
+static inline char *crypto_info_salt(struct tls_crypto_info *crypto_info,
+ const struct tls_cipher_desc *cipher_desc)
+{
+ return (char *)crypto_info + cipher_desc->salt_offset;
+}
+
+static inline char *crypto_info_rec_seq(struct tls_crypto_info *crypto_info,
+ const struct tls_cipher_desc *cipher_desc)
+{
+ return (char *)crypto_info + cipher_desc->rec_seq_offset;
+}
+
+
/* TLS records are maintained in 'struct tls_rec'. It stores the memory pages
* allocated or mapped for each TLS record. After encryption, the records are
* stores in a linked list.
@@ -86,10 +140,6 @@ void tls_ctx_free(struct sock *sk, struct tls_context *ctx);
void update_sk_prot(struct sock *sk, struct tls_context *ctx);
int wait_on_pending_writer(struct sock *sk, long *timeo);
-int tls_sk_query(struct sock *sk, int optname, char __user *optval,
- int __user *optlen);
-int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
- unsigned int optlen);
void tls_err_abort(struct sock *sk, int err);
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
@@ -110,6 +160,8 @@ bool tls_sw_sock_is_readable(struct sock *sk);
ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
+int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t read_actor);
int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
void tls_device_splice_eof(struct socket *sock);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 529101eb20bd..8c94c926606a 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -440,9 +440,13 @@ static int tls_push_data(struct sock *sk,
long timeo;
if (flags &
- ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SPLICE_PAGES))
+ ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+ MSG_SPLICE_PAGES | MSG_EOR))
return -EOPNOTSUPP;
+ if ((flags & (MSG_MORE | MSG_EOR)) == (MSG_MORE | MSG_EOR))
+ return -EINVAL;
+
if (unlikely(sk->sk_err))
return -sk->sk_err;
@@ -880,7 +884,7 @@ static int
tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
{
struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx);
- const struct tls_cipher_size_desc *cipher_sz;
+ const struct tls_cipher_desc *cipher_desc;
int err, offset, copy, data_len, pos;
struct sk_buff *skb, *skb_iter;
struct scatterlist sg[1];
@@ -894,10 +898,10 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
default:
return -EINVAL;
}
- cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_recv.info.cipher_type];
+ cipher_desc = get_cipher_desc(tls_ctx->crypto_recv.info.cipher_type);
rxm = strp_msg(tls_strp_msg(sw_ctx));
- orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_sz->iv,
+ orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv,
sk->sk_allocation);
if (!orig_buf)
return -ENOMEM;
@@ -913,8 +917,8 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
sg_init_table(sg, 1);
sg_set_buf(&sg[0], buf,
- rxm->full_len + TLS_HEADER_SIZE + cipher_sz->iv);
- err = skb_copy_bits(skb, offset, buf, TLS_HEADER_SIZE + cipher_sz->iv);
+ rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv);
+ err = skb_copy_bits(skb, offset, buf, TLS_HEADER_SIZE + cipher_desc->iv);
if (err)
goto free_buf;
@@ -925,7 +929,7 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx)
else
err = 0;
- data_len = rxm->full_len - cipher_sz->tag;
+ data_len = rxm->full_len - cipher_desc->tag;
if (skb_pagelen(skb) > offset) {
copy = min_t(int, skb_pagelen(skb) - offset, data_len);
@@ -1042,7 +1046,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
- const struct tls_cipher_size_desc *cipher_sz;
+ const struct tls_cipher_desc *cipher_desc;
struct tls_record_info *start_marker_record;
struct tls_offload_context_tx *offload_ctx;
struct tls_crypto_info *crypto_info;
@@ -1075,46 +1079,32 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
goto release_netdev;
}
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv;
- rec_seq =
- ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq;
- break;
- case TLS_CIPHER_AES_GCM_256:
- iv = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->iv;
- rec_seq =
- ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->rec_seq;
- break;
- default:
+ cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+ if (!cipher_desc || !cipher_desc->offloadable) {
rc = -EINVAL;
goto release_netdev;
}
- cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type];
- /* Sanity-check the rec_seq_size for stack allocations */
- if (cipher_sz->rec_seq > TLS_MAX_REC_SEQ_SIZE) {
- rc = -EINVAL;
- goto release_netdev;
- }
+ iv = crypto_info_iv(crypto_info, cipher_desc);
+ rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc);
prot->version = crypto_info->version;
prot->cipher_type = crypto_info->cipher_type;
- prot->prepend_size = TLS_HEADER_SIZE + cipher_sz->iv;
- prot->tag_size = cipher_sz->tag;
+ prot->prepend_size = TLS_HEADER_SIZE + cipher_desc->iv;
+ prot->tag_size = cipher_desc->tag;
prot->overhead_size = prot->prepend_size + prot->tag_size;
- prot->iv_size = cipher_sz->iv;
- prot->salt_size = cipher_sz->salt;
- ctx->tx.iv = kmalloc(cipher_sz->iv + cipher_sz->salt, GFP_KERNEL);
+ prot->iv_size = cipher_desc->iv;
+ prot->salt_size = cipher_desc->salt;
+ ctx->tx.iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL);
if (!ctx->tx.iv) {
rc = -ENOMEM;
goto release_netdev;
}
- memcpy(ctx->tx.iv + cipher_sz->salt, iv, cipher_sz->iv);
+ memcpy(ctx->tx.iv + cipher_desc->salt, iv, cipher_desc->iv);
- prot->rec_seq_size = cipher_sz->rec_seq;
- ctx->tx.rec_seq = kmemdup(rec_seq, cipher_sz->rec_seq, GFP_KERNEL);
+ prot->rec_seq_size = cipher_desc->rec_seq;
+ ctx->tx.rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL);
if (!ctx->tx.rec_seq) {
rc = -ENOMEM;
goto free_iv;
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index b28c5e296dfd..1d743f310f4f 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -55,7 +55,7 @@ static int tls_enc_record(struct aead_request *aead_req,
struct tls_prot_info *prot)
{
unsigned char buf[TLS_HEADER_SIZE + MAX_IV_SIZE];
- const struct tls_cipher_size_desc *cipher_sz;
+ const struct tls_cipher_desc *cipher_desc;
struct scatterlist sg_in[3];
struct scatterlist sg_out[3];
unsigned int buf_size;
@@ -69,9 +69,9 @@ static int tls_enc_record(struct aead_request *aead_req,
default:
return -EINVAL;
}
- cipher_sz = &tls_cipher_size_desc[prot->cipher_type];
+ cipher_desc = get_cipher_desc(prot->cipher_type);
- buf_size = TLS_HEADER_SIZE + cipher_sz->iv;
+ buf_size = TLS_HEADER_SIZE + cipher_desc->iv;
len = min_t(int, *in_len, buf_size);
scatterwalk_copychunks(buf, in, len, 0);
@@ -85,11 +85,11 @@ static int tls_enc_record(struct aead_request *aead_req,
scatterwalk_pagedone(out, 1, 1);
len = buf[4] | (buf[3] << 8);
- len -= cipher_sz->iv;
+ len -= cipher_desc->iv;
- tls_make_aad(aad, len - cipher_sz->tag, (char *)&rcd_sn, buf[0], prot);
+ tls_make_aad(aad, len - cipher_desc->tag, (char *)&rcd_sn, buf[0], prot);
- memcpy(iv + cipher_sz->salt, buf + TLS_HEADER_SIZE, cipher_sz->iv);
+ memcpy(iv + cipher_desc->salt, buf + TLS_HEADER_SIZE, cipher_desc->iv);
sg_init_table(sg_in, ARRAY_SIZE(sg_in));
sg_init_table(sg_out, ARRAY_SIZE(sg_out));
@@ -100,7 +100,7 @@ static int tls_enc_record(struct aead_request *aead_req,
*in_len -= len;
if (*in_len < 0) {
- *in_len += cipher_sz->tag;
+ *in_len += cipher_desc->tag;
/* the input buffer doesn't contain the entire record.
* trim len accordingly. The resulting authentication tag
* will contain garbage, but we don't care, so we won't
@@ -121,7 +121,7 @@ static int tls_enc_record(struct aead_request *aead_req,
scatterwalk_pagedone(out, 1, 1);
}
- len -= cipher_sz->tag;
+ len -= cipher_desc->tag;
aead_request_set_crypt(aead_req, sg_in, sg_out, len, iv);
rc = crypto_aead_encrypt(aead_req);
@@ -309,14 +309,14 @@ static void fill_sg_out(struct scatterlist sg_out[3], void *buf,
int sync_size,
void *dummy_buf)
{
- const struct tls_cipher_size_desc *cipher_sz =
- &tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type];
+ const struct tls_cipher_desc *cipher_desc =
+ get_cipher_desc(tls_ctx->crypto_send.info.cipher_type);
sg_set_buf(&sg_out[0], dummy_buf, sync_size);
sg_set_buf(&sg_out[1], nskb->data + tcp_payload_offset, payload_len);
/* Add room for authentication tag produced by crypto */
dummy_buf += sync_size;
- sg_set_buf(&sg_out[2], dummy_buf, cipher_sz->tag);
+ sg_set_buf(&sg_out[2], dummy_buf, cipher_desc->tag);
}
static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
@@ -328,7 +328,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
int tcp_payload_offset = skb_tcp_all_headers(skb);
int payload_len = skb->len - tcp_payload_offset;
- const struct tls_cipher_size_desc *cipher_sz;
+ const struct tls_cipher_desc *cipher_desc;
void *buf, *iv, *aad, *dummy_buf, *salt;
struct aead_request *aead_req;
struct sk_buff *nskb = NULL;
@@ -348,16 +348,16 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
default:
goto free_req;
}
- cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type];
- buf_len = cipher_sz->salt + cipher_sz->iv + TLS_AAD_SPACE_SIZE +
- sync_size + cipher_sz->tag;
+ cipher_desc = get_cipher_desc(tls_ctx->crypto_send.info.cipher_type);
+ buf_len = cipher_desc->salt + cipher_desc->iv + TLS_AAD_SPACE_SIZE +
+ sync_size + cipher_desc->tag;
buf = kmalloc(buf_len, GFP_ATOMIC);
if (!buf)
goto free_req;
iv = buf;
- memcpy(iv, salt, cipher_sz->salt);
- aad = buf + cipher_sz->salt + cipher_sz->iv;
+ memcpy(iv, salt, cipher_desc->salt);
+ aad = buf + cipher_desc->salt + cipher_desc->iv;
dummy_buf = aad + TLS_AAD_SPACE_SIZE;
nskb = alloc_skb(skb_headroom(skb) + skb->len, GFP_ATOMIC);
@@ -471,12 +471,15 @@ int tls_sw_fallback_init(struct sock *sk,
struct tls_offload_context_tx *offload_ctx,
struct tls_crypto_info *crypto_info)
{
- const struct tls_cipher_size_desc *cipher_sz;
- const u8 *key;
+ const struct tls_cipher_desc *cipher_desc;
int rc;
+ cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+ if (!cipher_desc || !cipher_desc->offloadable)
+ return -EINVAL;
+
offload_ctx->aead_send =
- crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+ crypto_alloc_aead(cipher_desc->cipher_name, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(offload_ctx->aead_send)) {
rc = PTR_ERR(offload_ctx->aead_send);
pr_err_ratelimited("crypto_alloc_aead failed rc=%d\n", rc);
@@ -484,24 +487,13 @@ int tls_sw_fallback_init(struct sock *sk,
goto err_out;
}
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key;
- break;
- case TLS_CIPHER_AES_GCM_256:
- key = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->key;
- break;
- default:
- rc = -EINVAL;
- goto free_aead;
- }
- cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type];
-
- rc = crypto_aead_setkey(offload_ctx->aead_send, key, cipher_sz->key);
+ rc = crypto_aead_setkey(offload_ctx->aead_send,
+ crypto_info_key(crypto_info, cipher_desc),
+ cipher_desc->key);
if (rc)
goto free_aead;
- rc = crypto_aead_setauthsize(offload_ctx->aead_send, cipher_sz->tag);
+ rc = crypto_aead_setauthsize(offload_ctx->aead_send, cipher_desc->tag);
if (rc)
goto free_aead;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 4a8ee2f6badb..02f583ff9239 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -58,23 +58,66 @@ enum {
TLS_NUM_PROTS,
};
-#define CIPHER_SIZE_DESC(cipher) [cipher] = { \
+#define CHECK_CIPHER_DESC(cipher,ci) \
+ static_assert(cipher ## _IV_SIZE <= MAX_IV_SIZE); \
+ static_assert(cipher ## _REC_SEQ_SIZE <= TLS_MAX_REC_SEQ_SIZE); \
+ static_assert(cipher ## _TAG_SIZE == TLS_TAG_SIZE); \
+ static_assert(sizeof_field(struct ci, iv) == cipher ## _IV_SIZE); \
+ static_assert(sizeof_field(struct ci, key) == cipher ## _KEY_SIZE); \
+ static_assert(sizeof_field(struct ci, salt) == cipher ## _SALT_SIZE); \
+ static_assert(sizeof_field(struct ci, rec_seq) == cipher ## _REC_SEQ_SIZE);
+
+#define __CIPHER_DESC(ci) \
+ .iv_offset = offsetof(struct ci, iv), \
+ .key_offset = offsetof(struct ci, key), \
+ .salt_offset = offsetof(struct ci, salt), \
+ .rec_seq_offset = offsetof(struct ci, rec_seq), \
+ .crypto_info = sizeof(struct ci)
+
+#define CIPHER_DESC(cipher,ci,algname,_offloadable) [cipher - TLS_CIPHER_MIN] = { \
+ .nonce = cipher ## _IV_SIZE, \
.iv = cipher ## _IV_SIZE, \
.key = cipher ## _KEY_SIZE, \
.salt = cipher ## _SALT_SIZE, \
.tag = cipher ## _TAG_SIZE, \
.rec_seq = cipher ## _REC_SEQ_SIZE, \
+ .cipher_name = algname, \
+ .offloadable = _offloadable, \
+ __CIPHER_DESC(ci), \
}
-const struct tls_cipher_size_desc tls_cipher_size_desc[] = {
- CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_128),
- CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_256),
- CIPHER_SIZE_DESC(TLS_CIPHER_AES_CCM_128),
- CIPHER_SIZE_DESC(TLS_CIPHER_CHACHA20_POLY1305),
- CIPHER_SIZE_DESC(TLS_CIPHER_SM4_GCM),
- CIPHER_SIZE_DESC(TLS_CIPHER_SM4_CCM),
+#define CIPHER_DESC_NONCE0(cipher,ci,algname,_offloadable) [cipher - TLS_CIPHER_MIN] = { \
+ .nonce = 0, \
+ .iv = cipher ## _IV_SIZE, \
+ .key = cipher ## _KEY_SIZE, \
+ .salt = cipher ## _SALT_SIZE, \
+ .tag = cipher ## _TAG_SIZE, \
+ .rec_seq = cipher ## _REC_SEQ_SIZE, \
+ .cipher_name = algname, \
+ .offloadable = _offloadable, \
+ __CIPHER_DESC(ci), \
+}
+
+const struct tls_cipher_desc tls_cipher_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN] = {
+ CIPHER_DESC(TLS_CIPHER_AES_GCM_128, tls12_crypto_info_aes_gcm_128, "gcm(aes)", true),
+ CIPHER_DESC(TLS_CIPHER_AES_GCM_256, tls12_crypto_info_aes_gcm_256, "gcm(aes)", true),
+ CIPHER_DESC(TLS_CIPHER_AES_CCM_128, tls12_crypto_info_aes_ccm_128, "ccm(aes)", false),
+ CIPHER_DESC_NONCE0(TLS_CIPHER_CHACHA20_POLY1305, tls12_crypto_info_chacha20_poly1305, "rfc7539(chacha20,poly1305)", false),
+ CIPHER_DESC(TLS_CIPHER_SM4_GCM, tls12_crypto_info_sm4_gcm, "gcm(sm4)", false),
+ CIPHER_DESC(TLS_CIPHER_SM4_CCM, tls12_crypto_info_sm4_ccm, "ccm(sm4)", false),
+ CIPHER_DESC(TLS_CIPHER_ARIA_GCM_128, tls12_crypto_info_aria_gcm_128, "gcm(aria)", false),
+ CIPHER_DESC(TLS_CIPHER_ARIA_GCM_256, tls12_crypto_info_aria_gcm_256, "gcm(aria)", false),
};
+CHECK_CIPHER_DESC(TLS_CIPHER_AES_GCM_128, tls12_crypto_info_aes_gcm_128);
+CHECK_CIPHER_DESC(TLS_CIPHER_AES_GCM_256, tls12_crypto_info_aes_gcm_256);
+CHECK_CIPHER_DESC(TLS_CIPHER_AES_CCM_128, tls12_crypto_info_aes_ccm_128);
+CHECK_CIPHER_DESC(TLS_CIPHER_CHACHA20_POLY1305, tls12_crypto_info_chacha20_poly1305);
+CHECK_CIPHER_DESC(TLS_CIPHER_SM4_GCM, tls12_crypto_info_sm4_gcm);
+CHECK_CIPHER_DESC(TLS_CIPHER_SM4_CCM, tls12_crypto_info_sm4_ccm);
+CHECK_CIPHER_DESC(TLS_CIPHER_ARIA_GCM_128, tls12_crypto_info_aria_gcm_128);
+CHECK_CIPHER_DESC(TLS_CIPHER_ARIA_GCM_256, tls12_crypto_info_aria_gcm_256);
+
static const struct proto *saved_tcpv6_prot;
static DEFINE_MUTEX(tcpv6_prot_mutex);
static const struct proto *saved_tcpv4_prot;
@@ -392,6 +435,7 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
int __user *optlen, int tx)
{
int rc = 0;
+ const struct tls_cipher_desc *cipher_desc;
struct tls_context *ctx = tls_get_ctx(sk);
struct tls_crypto_info *crypto_info;
struct cipher_context *cctx;
@@ -430,172 +474,19 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
goto out;
}
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128: {
- struct tls12_crypto_info_aes_gcm_128 *
- crypto_info_aes_gcm_128 =
- container_of(crypto_info,
- struct tls12_crypto_info_aes_gcm_128,
- info);
-
- if (len != sizeof(*crypto_info_aes_gcm_128)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(crypto_info_aes_gcm_128->iv,
- cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
- TLS_CIPHER_AES_GCM_128_IV_SIZE);
- memcpy(crypto_info_aes_gcm_128->rec_seq, cctx->rec_seq,
- TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
- if (copy_to_user(optval,
- crypto_info_aes_gcm_128,
- sizeof(*crypto_info_aes_gcm_128)))
- rc = -EFAULT;
- break;
- }
- case TLS_CIPHER_AES_GCM_256: {
- struct tls12_crypto_info_aes_gcm_256 *
- crypto_info_aes_gcm_256 =
- container_of(crypto_info,
- struct tls12_crypto_info_aes_gcm_256,
- info);
-
- if (len != sizeof(*crypto_info_aes_gcm_256)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(crypto_info_aes_gcm_256->iv,
- cctx->iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE,
- TLS_CIPHER_AES_GCM_256_IV_SIZE);
- memcpy(crypto_info_aes_gcm_256->rec_seq, cctx->rec_seq,
- TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE);
- if (copy_to_user(optval,
- crypto_info_aes_gcm_256,
- sizeof(*crypto_info_aes_gcm_256)))
- rc = -EFAULT;
- break;
- }
- case TLS_CIPHER_AES_CCM_128: {
- struct tls12_crypto_info_aes_ccm_128 *aes_ccm_128 =
- container_of(crypto_info,
- struct tls12_crypto_info_aes_ccm_128, info);
-
- if (len != sizeof(*aes_ccm_128)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(aes_ccm_128->iv,
- cctx->iv + TLS_CIPHER_AES_CCM_128_SALT_SIZE,
- TLS_CIPHER_AES_CCM_128_IV_SIZE);
- memcpy(aes_ccm_128->rec_seq, cctx->rec_seq,
- TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE);
- if (copy_to_user(optval, aes_ccm_128, sizeof(*aes_ccm_128)))
- rc = -EFAULT;
- break;
- }
- case TLS_CIPHER_CHACHA20_POLY1305: {
- struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305 =
- container_of(crypto_info,
- struct tls12_crypto_info_chacha20_poly1305,
- info);
-
- if (len != sizeof(*chacha20_poly1305)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(chacha20_poly1305->iv,
- cctx->iv + TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE,
- TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE);
- memcpy(chacha20_poly1305->rec_seq, cctx->rec_seq,
- TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE);
- if (copy_to_user(optval, chacha20_poly1305,
- sizeof(*chacha20_poly1305)))
- rc = -EFAULT;
- break;
+ cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+ if (!cipher_desc || len != cipher_desc->crypto_info) {
+ rc = -EINVAL;
+ goto out;
}
- case TLS_CIPHER_SM4_GCM: {
- struct tls12_crypto_info_sm4_gcm *sm4_gcm_info =
- container_of(crypto_info,
- struct tls12_crypto_info_sm4_gcm, info);
- if (len != sizeof(*sm4_gcm_info)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(sm4_gcm_info->iv,
- cctx->iv + TLS_CIPHER_SM4_GCM_SALT_SIZE,
- TLS_CIPHER_SM4_GCM_IV_SIZE);
- memcpy(sm4_gcm_info->rec_seq, cctx->rec_seq,
- TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE);
- if (copy_to_user(optval, sm4_gcm_info, sizeof(*sm4_gcm_info)))
- rc = -EFAULT;
- break;
- }
- case TLS_CIPHER_SM4_CCM: {
- struct tls12_crypto_info_sm4_ccm *sm4_ccm_info =
- container_of(crypto_info,
- struct tls12_crypto_info_sm4_ccm, info);
+ memcpy(crypto_info_iv(crypto_info, cipher_desc),
+ cctx->iv + cipher_desc->salt, cipher_desc->iv);
+ memcpy(crypto_info_rec_seq(crypto_info, cipher_desc),
+ cctx->rec_seq, cipher_desc->rec_seq);
- if (len != sizeof(*sm4_ccm_info)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(sm4_ccm_info->iv,
- cctx->iv + TLS_CIPHER_SM4_CCM_SALT_SIZE,
- TLS_CIPHER_SM4_CCM_IV_SIZE);
- memcpy(sm4_ccm_info->rec_seq, cctx->rec_seq,
- TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE);
- if (copy_to_user(optval, sm4_ccm_info, sizeof(*sm4_ccm_info)))
- rc = -EFAULT;
- break;
- }
- case TLS_CIPHER_ARIA_GCM_128: {
- struct tls12_crypto_info_aria_gcm_128 *
- crypto_info_aria_gcm_128 =
- container_of(crypto_info,
- struct tls12_crypto_info_aria_gcm_128,
- info);
-
- if (len != sizeof(*crypto_info_aria_gcm_128)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(crypto_info_aria_gcm_128->iv,
- cctx->iv + TLS_CIPHER_ARIA_GCM_128_SALT_SIZE,
- TLS_CIPHER_ARIA_GCM_128_IV_SIZE);
- memcpy(crypto_info_aria_gcm_128->rec_seq, cctx->rec_seq,
- TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE);
- if (copy_to_user(optval,
- crypto_info_aria_gcm_128,
- sizeof(*crypto_info_aria_gcm_128)))
- rc = -EFAULT;
- break;
- }
- case TLS_CIPHER_ARIA_GCM_256: {
- struct tls12_crypto_info_aria_gcm_256 *
- crypto_info_aria_gcm_256 =
- container_of(crypto_info,
- struct tls12_crypto_info_aria_gcm_256,
- info);
-
- if (len != sizeof(*crypto_info_aria_gcm_256)) {
- rc = -EINVAL;
- goto out;
- }
- memcpy(crypto_info_aria_gcm_256->iv,
- cctx->iv + TLS_CIPHER_ARIA_GCM_256_SALT_SIZE,
- TLS_CIPHER_ARIA_GCM_256_IV_SIZE);
- memcpy(crypto_info_aria_gcm_256->rec_seq, cctx->rec_seq,
- TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE);
- if (copy_to_user(optval,
- crypto_info_aria_gcm_256,
- sizeof(*crypto_info_aria_gcm_256)))
- rc = -EFAULT;
- break;
- }
- default:
- rc = -EINVAL;
- }
+ if (copy_to_user(optval, crypto_info, cipher_desc->crypto_info))
+ rc = -EFAULT;
out:
return rc;
@@ -696,7 +587,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
struct tls_crypto_info *crypto_info;
struct tls_crypto_info *alt_crypto_info;
struct tls_context *ctx = tls_get_ctx(sk);
- size_t optsize;
+ const struct tls_cipher_desc *cipher_desc;
int rc = 0;
int conf;
@@ -737,46 +628,23 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
}
}
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- optsize = sizeof(struct tls12_crypto_info_aes_gcm_128);
- break;
- case TLS_CIPHER_AES_GCM_256: {
- optsize = sizeof(struct tls12_crypto_info_aes_gcm_256);
- break;
+ cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+ if (!cipher_desc) {
+ rc = -EINVAL;
+ goto err_crypto_info;
}
- case TLS_CIPHER_AES_CCM_128:
- optsize = sizeof(struct tls12_crypto_info_aes_ccm_128);
- break;
- case TLS_CIPHER_CHACHA20_POLY1305:
- optsize = sizeof(struct tls12_crypto_info_chacha20_poly1305);
- break;
- case TLS_CIPHER_SM4_GCM:
- optsize = sizeof(struct tls12_crypto_info_sm4_gcm);
- break;
- case TLS_CIPHER_SM4_CCM:
- optsize = sizeof(struct tls12_crypto_info_sm4_ccm);
- break;
+
+ switch (crypto_info->cipher_type) {
case TLS_CIPHER_ARIA_GCM_128:
- if (crypto_info->version != TLS_1_2_VERSION) {
- rc = -EINVAL;
- goto err_crypto_info;
- }
- optsize = sizeof(struct tls12_crypto_info_aria_gcm_128);
- break;
case TLS_CIPHER_ARIA_GCM_256:
if (crypto_info->version != TLS_1_2_VERSION) {
rc = -EINVAL;
goto err_crypto_info;
}
- optsize = sizeof(struct tls12_crypto_info_aria_gcm_256);
break;
- default:
- rc = -EINVAL;
- goto err_crypto_info;
}
- if (optlen != optsize) {
+ if (optlen != cipher_desc->crypto_info) {
rc = -EINVAL;
goto err_crypto_info;
}
@@ -959,10 +827,12 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE];
ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read;
ops[TLS_BASE][TLS_SW ].poll = tls_sk_poll;
+ ops[TLS_BASE][TLS_SW ].read_sock = tls_sw_read_sock;
ops[TLS_SW ][TLS_SW ] = ops[TLS_SW ][TLS_BASE];
ops[TLS_SW ][TLS_SW ].splice_read = tls_sw_splice_read;
ops[TLS_SW ][TLS_SW ].poll = tls_sk_poll;
+ ops[TLS_SW ][TLS_SW ].read_sock = tls_sw_read_sock;
#ifdef CONFIG_TLS_DEVICE
ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index f37f4a0fcd3c..ca1e0e198ceb 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -369,7 +369,6 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
static int tls_strp_read_copyin(struct tls_strparser *strp)
{
- struct socket *sock = strp->sk->sk_socket;
read_descriptor_t desc;
desc.arg.data = strp;
@@ -377,7 +376,7 @@ static int tls_strp_read_copyin(struct tls_strparser *strp)
desc.count = 1; /* give more than one skb per call */
/* sk should be locked here, so okay to do read_sock */
- sock->ops->read_sock(strp->sk, &desc, tls_strp_copyin);
+ tcp_read_sock(strp->sk, &desc, tls_strp_copyin);
return desc.error;
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 53f944e6d8ef..d1fc295b83b5 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -817,7 +817,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
psock = sk_psock_get(sk);
if (!psock || !policy) {
err = tls_push_record(sk, flags, record_type);
- if (err && sk->sk_err == EBADMSG) {
+ if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
*copied -= sk_msg_free(sk, msg);
tls_free_open_rec(sk);
err = -sk->sk_err;
@@ -846,7 +846,7 @@ more_data:
switch (psock->eval) {
case __SK_PASS:
err = tls_push_record(sk, flags, record_type);
- if (err && sk->sk_err == EBADMSG) {
+ if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
*copied -= sk_msg_free(sk, msg);
tls_free_open_rec(sk);
err = -sk->sk_err;
@@ -984,6 +984,9 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
int ret = 0;
int pending;
+ if (!eor && (msg->msg_flags & MSG_EOR))
+ return -EINVAL;
+
if (unlikely(msg->msg_controllen)) {
ret = tls_process_cmsg(sk, msg, &record_type);
if (ret) {
@@ -1193,7 +1196,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
int ret;
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
- MSG_CMSG_COMPAT | MSG_SPLICE_PAGES |
+ MSG_CMSG_COMPAT | MSG_SPLICE_PAGES | MSG_EOR |
MSG_SENDPAGE_NOPOLICY))
return -EOPNOTSUPP;
@@ -1845,13 +1848,10 @@ tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot,
return sk_flush_backlog(sk);
}
-static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
- bool nonblock)
+static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx,
+ bool nonblock)
{
long timeo;
- int err;
-
- lock_sock(sk);
timeo = sock_rcvtimeo(sk, nonblock);
@@ -1865,26 +1865,30 @@ static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
!READ_ONCE(ctx->reader_present), &wait);
remove_wait_queue(&ctx->wq, &wait);
- if (timeo <= 0) {
- err = -EAGAIN;
- goto err_unlock;
- }
- if (signal_pending(current)) {
- err = sock_intr_errno(timeo);
- goto err_unlock;
- }
+ if (timeo <= 0)
+ return -EAGAIN;
+ if (signal_pending(current))
+ return sock_intr_errno(timeo);
}
WRITE_ONCE(ctx->reader_present, 1);
return 0;
+}
-err_unlock:
- release_sock(sk);
+static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx,
+ bool nonblock)
+{
+ int err;
+
+ lock_sock(sk);
+ err = tls_rx_reader_acquire(sk, ctx, nonblock);
+ if (err)
+ release_sock(sk);
return err;
}
-static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
+static void tls_rx_reader_release(struct sock *sk, struct tls_sw_context_rx *ctx)
{
if (unlikely(ctx->reader_contended)) {
if (wq_has_sleeper(&ctx->wq))
@@ -1896,6 +1900,11 @@ static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
}
WRITE_ONCE(ctx->reader_present, 0);
+}
+
+static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx)
+{
+ tls_rx_reader_release(sk, ctx);
release_sock(sk);
}
@@ -2193,6 +2202,102 @@ splice_requeue:
goto splice_read_end;
}
+int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t read_actor)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ struct tls_prot_info *prot = &tls_ctx->prot_info;
+ struct strp_msg *rxm = NULL;
+ struct sk_buff *skb = NULL;
+ struct sk_psock *psock;
+ size_t flushed_at = 0;
+ bool released = true;
+ struct tls_msg *tlm;
+ ssize_t copied = 0;
+ ssize_t decrypted;
+ int err, used;
+
+ psock = sk_psock_get(sk);
+ if (psock) {
+ sk_psock_put(sk, psock);
+ return -EINVAL;
+ }
+ err = tls_rx_reader_acquire(sk, ctx, true);
+ if (err < 0)
+ return err;
+
+ /* If crypto failed the connection is broken */
+ err = ctx->async_wait.err;
+ if (err)
+ goto read_sock_end;
+
+ decrypted = 0;
+ do {
+ if (!skb_queue_empty(&ctx->rx_list)) {
+ skb = __skb_dequeue(&ctx->rx_list);
+ rxm = strp_msg(skb);
+ tlm = tls_msg(skb);
+ } else {
+ struct tls_decrypt_arg darg;
+
+ err = tls_rx_rec_wait(sk, NULL, true, released);
+ if (err <= 0)
+ goto read_sock_end;
+
+ memset(&darg.inargs, 0, sizeof(darg.inargs));
+
+ err = tls_rx_one_record(sk, NULL, &darg);
+ if (err < 0) {
+ tls_err_abort(sk, -EBADMSG);
+ goto read_sock_end;
+ }
+
+ released = tls_read_flush_backlog(sk, prot, INT_MAX,
+ 0, decrypted,
+ &flushed_at);
+ skb = darg.skb;
+ rxm = strp_msg(skb);
+ tlm = tls_msg(skb);
+ decrypted += rxm->full_len;
+
+ tls_rx_rec_done(ctx);
+ }
+
+ /* read_sock does not support reading control messages */
+ if (tlm->control != TLS_RECORD_TYPE_DATA) {
+ err = -EINVAL;
+ goto read_sock_requeue;
+ }
+
+ used = read_actor(desc, skb, rxm->offset, rxm->full_len);
+ if (used <= 0) {
+ if (!copied)
+ err = used;
+ goto read_sock_requeue;
+ }
+ copied += used;
+ if (used < rxm->full_len) {
+ rxm->offset += used;
+ rxm->full_len -= used;
+ if (!desc->count)
+ goto read_sock_requeue;
+ } else {
+ consume_skb(skb);
+ if (!desc->count)
+ skb = NULL;
+ }
+ } while (skb);
+
+read_sock_end:
+ tls_rx_reader_release(sk, ctx);
+ return copied ? : err;
+
+read_sock_requeue:
+ __skb_queue_head(&ctx->rx_list, skb);
+ goto read_sock_end;
+}
+
bool tls_sw_sock_is_readable(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -2485,10 +2590,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
struct tls_sw_context_rx *sw_ctx_rx = NULL;
struct cipher_context *cctx;
struct crypto_aead **aead;
- u16 nonce_size, tag_size, iv_size, rec_seq_size, salt_size;
struct crypto_tfm *tfm;
- char *iv, *rec_seq, *key, *salt, *cipher_name;
- size_t keysize;
+ char *iv, *rec_seq, *key, *salt;
+ const struct tls_cipher_desc *cipher_desc;
+ u16 nonce_size;
int rc = 0;
if (!ctx) {
@@ -2542,148 +2647,19 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
aead = &sw_ctx_rx->aead_recv;
}
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128: {
- struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
-
- gcm_128_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
- tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE;
- iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
- iv = gcm_128_info->iv;
- rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE;
- rec_seq = gcm_128_info->rec_seq;
- keysize = TLS_CIPHER_AES_GCM_128_KEY_SIZE;
- key = gcm_128_info->key;
- salt = gcm_128_info->salt;
- salt_size = TLS_CIPHER_AES_GCM_128_SALT_SIZE;
- cipher_name = "gcm(aes)";
- break;
- }
- case TLS_CIPHER_AES_GCM_256: {
- struct tls12_crypto_info_aes_gcm_256 *gcm_256_info;
-
- gcm_256_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_AES_GCM_256_IV_SIZE;
- tag_size = TLS_CIPHER_AES_GCM_256_TAG_SIZE;
- iv_size = TLS_CIPHER_AES_GCM_256_IV_SIZE;
- iv = gcm_256_info->iv;
- rec_seq_size = TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE;
- rec_seq = gcm_256_info->rec_seq;
- keysize = TLS_CIPHER_AES_GCM_256_KEY_SIZE;
- key = gcm_256_info->key;
- salt = gcm_256_info->salt;
- salt_size = TLS_CIPHER_AES_GCM_256_SALT_SIZE;
- cipher_name = "gcm(aes)";
- break;
- }
- case TLS_CIPHER_AES_CCM_128: {
- struct tls12_crypto_info_aes_ccm_128 *ccm_128_info;
-
- ccm_128_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_AES_CCM_128_IV_SIZE;
- tag_size = TLS_CIPHER_AES_CCM_128_TAG_SIZE;
- iv_size = TLS_CIPHER_AES_CCM_128_IV_SIZE;
- iv = ccm_128_info->iv;
- rec_seq_size = TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE;
- rec_seq = ccm_128_info->rec_seq;
- keysize = TLS_CIPHER_AES_CCM_128_KEY_SIZE;
- key = ccm_128_info->key;
- salt = ccm_128_info->salt;
- salt_size = TLS_CIPHER_AES_CCM_128_SALT_SIZE;
- cipher_name = "ccm(aes)";
- break;
- }
- case TLS_CIPHER_CHACHA20_POLY1305: {
- struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305_info;
-
- chacha20_poly1305_info = (void *)crypto_info;
- nonce_size = 0;
- tag_size = TLS_CIPHER_CHACHA20_POLY1305_TAG_SIZE;
- iv_size = TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE;
- iv = chacha20_poly1305_info->iv;
- rec_seq_size = TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE;
- rec_seq = chacha20_poly1305_info->rec_seq;
- keysize = TLS_CIPHER_CHACHA20_POLY1305_KEY_SIZE;
- key = chacha20_poly1305_info->key;
- salt = chacha20_poly1305_info->salt;
- salt_size = TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE;
- cipher_name = "rfc7539(chacha20,poly1305)";
- break;
- }
- case TLS_CIPHER_SM4_GCM: {
- struct tls12_crypto_info_sm4_gcm *sm4_gcm_info;
-
- sm4_gcm_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_SM4_GCM_IV_SIZE;
- tag_size = TLS_CIPHER_SM4_GCM_TAG_SIZE;
- iv_size = TLS_CIPHER_SM4_GCM_IV_SIZE;
- iv = sm4_gcm_info->iv;
- rec_seq_size = TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE;
- rec_seq = sm4_gcm_info->rec_seq;
- keysize = TLS_CIPHER_SM4_GCM_KEY_SIZE;
- key = sm4_gcm_info->key;
- salt = sm4_gcm_info->salt;
- salt_size = TLS_CIPHER_SM4_GCM_SALT_SIZE;
- cipher_name = "gcm(sm4)";
- break;
- }
- case TLS_CIPHER_SM4_CCM: {
- struct tls12_crypto_info_sm4_ccm *sm4_ccm_info;
-
- sm4_ccm_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_SM4_CCM_IV_SIZE;
- tag_size = TLS_CIPHER_SM4_CCM_TAG_SIZE;
- iv_size = TLS_CIPHER_SM4_CCM_IV_SIZE;
- iv = sm4_ccm_info->iv;
- rec_seq_size = TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE;
- rec_seq = sm4_ccm_info->rec_seq;
- keysize = TLS_CIPHER_SM4_CCM_KEY_SIZE;
- key = sm4_ccm_info->key;
- salt = sm4_ccm_info->salt;
- salt_size = TLS_CIPHER_SM4_CCM_SALT_SIZE;
- cipher_name = "ccm(sm4)";
- break;
- }
- case TLS_CIPHER_ARIA_GCM_128: {
- struct tls12_crypto_info_aria_gcm_128 *aria_gcm_128_info;
-
- aria_gcm_128_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_ARIA_GCM_128_IV_SIZE;
- tag_size = TLS_CIPHER_ARIA_GCM_128_TAG_SIZE;
- iv_size = TLS_CIPHER_ARIA_GCM_128_IV_SIZE;
- iv = aria_gcm_128_info->iv;
- rec_seq_size = TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE;
- rec_seq = aria_gcm_128_info->rec_seq;
- keysize = TLS_CIPHER_ARIA_GCM_128_KEY_SIZE;
- key = aria_gcm_128_info->key;
- salt = aria_gcm_128_info->salt;
- salt_size = TLS_CIPHER_ARIA_GCM_128_SALT_SIZE;
- cipher_name = "gcm(aria)";
- break;
- }
- case TLS_CIPHER_ARIA_GCM_256: {
- struct tls12_crypto_info_aria_gcm_256 *gcm_256_info;
-
- gcm_256_info = (void *)crypto_info;
- nonce_size = TLS_CIPHER_ARIA_GCM_256_IV_SIZE;
- tag_size = TLS_CIPHER_ARIA_GCM_256_TAG_SIZE;
- iv_size = TLS_CIPHER_ARIA_GCM_256_IV_SIZE;
- iv = gcm_256_info->iv;
- rec_seq_size = TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE;
- rec_seq = gcm_256_info->rec_seq;
- keysize = TLS_CIPHER_ARIA_GCM_256_KEY_SIZE;
- key = gcm_256_info->key;
- salt = gcm_256_info->salt;
- salt_size = TLS_CIPHER_ARIA_GCM_256_SALT_SIZE;
- cipher_name = "gcm(aria)";
- break;
- }
- default:
+ cipher_desc = get_cipher_desc(crypto_info->cipher_type);
+ if (!cipher_desc) {
rc = -EINVAL;
goto free_priv;
}
+ nonce_size = cipher_desc->nonce;
+
+ iv = crypto_info_iv(crypto_info, cipher_desc);
+ key = crypto_info_key(crypto_info, cipher_desc);
+ salt = crypto_info_salt(crypto_info, cipher_desc);
+ rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc);
+
if (crypto_info->version == TLS_1_3_VERSION) {
nonce_size = 0;
prot->aad_size = TLS_HEADER_SIZE;
@@ -2694,9 +2670,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
}
/* Sanity-check the sizes for stack allocations. */
- if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE ||
- rec_seq_size > TLS_MAX_REC_SEQ_SIZE || tag_size != TLS_TAG_SIZE ||
- prot->aad_size > TLS_MAX_AAD_SIZE) {
+ if (nonce_size > MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) {
rc = -EINVAL;
goto free_priv;
}
@@ -2704,28 +2678,29 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
prot->version = crypto_info->version;
prot->cipher_type = crypto_info->cipher_type;
prot->prepend_size = TLS_HEADER_SIZE + nonce_size;
- prot->tag_size = tag_size;
+ prot->tag_size = cipher_desc->tag;
prot->overhead_size = prot->prepend_size +
prot->tag_size + prot->tail_size;
- prot->iv_size = iv_size;
- prot->salt_size = salt_size;
- cctx->iv = kmalloc(iv_size + salt_size, GFP_KERNEL);
+ prot->iv_size = cipher_desc->iv;
+ prot->salt_size = cipher_desc->salt;
+ cctx->iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL);
if (!cctx->iv) {
rc = -ENOMEM;
goto free_priv;
}
/* Note: 128 & 256 bit salt are the same size */
- prot->rec_seq_size = rec_seq_size;
- memcpy(cctx->iv, salt, salt_size);
- memcpy(cctx->iv + salt_size, iv, iv_size);
- cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL);
+ prot->rec_seq_size = cipher_desc->rec_seq;
+ memcpy(cctx->iv, salt, cipher_desc->salt);
+ memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv);
+
+ cctx->rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL);
if (!cctx->rec_seq) {
rc = -ENOMEM;
goto free_iv;
}
if (!*aead) {
- *aead = crypto_alloc_aead(cipher_name, 0, 0);
+ *aead = crypto_alloc_aead(cipher_desc->cipher_name, 0, 0);
if (IS_ERR(*aead)) {
rc = PTR_ERR(*aead);
*aead = NULL;
@@ -2735,8 +2710,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
ctx->push_pending_record = tls_sw_push_pending_record;
- rc = crypto_aead_setkey(*aead, key, keysize);
-
+ rc = crypto_aead_setkey(*aead, key, cipher_desc->key);
if (rc)
goto free_aead;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 86930a8ed012..3e8a04a13668 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -680,7 +680,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
* What the above comment does talk about? --ANK(980817)
*/
- if (unix_tot_inflight)
+ if (READ_ONCE(unix_tot_inflight))
unix_gc(); /* Garbage collect fds */
}
diff --git a/net/unix/scm.c b/net/unix/scm.c
index f9152881d77f..6ff628f2349f 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -29,10 +29,11 @@ struct sock *unix_get_socket(struct file *filp)
/* Socket ? */
if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
struct socket *sock = SOCKET_I(inode);
+ const struct proto_ops *ops = READ_ONCE(sock->ops);
struct sock *s = sock->sk;
/* PF_UNIX ? */
- if (s && sock->ops && sock->ops->family == PF_UNIX)
+ if (s && ops && ops->family == PF_UNIX)
u_sock = s;
} else {
/* Could be an io_uring instance */
@@ -63,7 +64,7 @@ void unix_inflight(struct user_struct *user, struct file *fp)
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
}
- user->unix_inflight++;
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
spin_unlock(&unix_gc_lock);
}
@@ -84,7 +85,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
}
- user->unix_inflight--;
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
spin_unlock(&unix_gc_lock);
}
@@ -98,7 +99,7 @@ static inline bool too_many_unix_fds(struct task_struct *p)
{
struct user_struct *user = current_user();
- if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+ if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
return false;
}
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index 500129aa710c..3e84b31c355a 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -36,7 +36,8 @@ int __net_init unix_sysctl_register(struct net *net)
table[0].data = &net->unx.sysctl_max_dgram_qlen;
}
- net->unx.ctl = register_net_sysctl(net, "net/unix", table);
+ net->unx.ctl = register_net_sysctl_sz(net, "net/unix", table,
+ ARRAY_SIZE(unix_table));
if (net->unx.ctl == NULL)
goto err_reg;
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index b769fc258931..352d042b130b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -348,37 +348,34 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk,
size_t len)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- size_t bytes, total = 0, off;
- struct sk_buff *skb, *tmp;
- int err = -EFAULT;
+ struct sk_buff *skb;
+ size_t total = 0;
+ int err;
spin_lock_bh(&vvs->rx_lock);
- skb_queue_walk_safe(&vvs->rx_queue, skb, tmp) {
- off = 0;
+ skb_queue_walk(&vvs->rx_queue, skb) {
+ size_t bytes;
- if (total == len)
- break;
+ bytes = len - total;
+ if (bytes > skb->len)
+ bytes = skb->len;
- while (total < len && off < skb->len) {
- bytes = len - total;
- if (bytes > skb->len - off)
- bytes = skb->len - off;
+ spin_unlock_bh(&vvs->rx_lock);
- /* sk_lock is held by caller so no one else can dequeue.
- * Unlock rx_lock since memcpy_to_msg() may sleep.
- */
- spin_unlock_bh(&vvs->rx_lock);
+ /* sk_lock is held by caller so no one else can dequeue.
+ * Unlock rx_lock since memcpy_to_msg() may sleep.
+ */
+ err = memcpy_to_msg(msg, skb->data, bytes);
+ if (err)
+ goto out;
- err = memcpy_to_msg(msg, skb->data + off, bytes);
- if (err)
- goto out;
+ total += bytes;
- spin_lock_bh(&vvs->rx_lock);
+ spin_lock_bh(&vvs->rx_lock);
- total += bytes;
- off += bytes;
- }
+ if (total == len)
+ break;
}
spin_unlock_bh(&vvs->rx_lock);
@@ -463,6 +460,63 @@ out:
return err;
}
+static ssize_t
+virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk,
+ struct msghdr *msg)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ struct sk_buff *skb;
+ size_t total, len;
+
+ spin_lock_bh(&vvs->rx_lock);
+
+ if (!vvs->msg_count) {
+ spin_unlock_bh(&vvs->rx_lock);
+ return 0;
+ }
+
+ total = 0;
+ len = msg_data_left(msg);
+
+ skb_queue_walk(&vvs->rx_queue, skb) {
+ struct virtio_vsock_hdr *hdr;
+
+ if (total < len) {
+ size_t bytes;
+ int err;
+
+ bytes = len - total;
+ if (bytes > skb->len)
+ bytes = skb->len;
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ /* sk_lock is held by caller so no one else can dequeue.
+ * Unlock rx_lock since memcpy_to_msg() may sleep.
+ */
+ err = memcpy_to_msg(msg, skb->data, bytes);
+ if (err)
+ return err;
+
+ spin_lock_bh(&vvs->rx_lock);
+ }
+
+ total += skb->len;
+ hdr = virtio_vsock_hdr(skb);
+
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
+ msg->msg_flags |= MSG_EOR;
+
+ break;
+ }
+ }
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ return total;
+}
+
static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
int flags)
@@ -557,9 +611,9 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
int flags)
{
if (flags & MSG_PEEK)
- return -EOPNOTSUPP;
-
- return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
+ return virtio_transport_seqpacket_do_peek(vsk, msg);
+ else
+ return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
}
EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index b7b072194282..dbda3ababa14 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -116,9 +116,6 @@ struct vmci_transport {
spinlock_t lock; /* protects sk. */
};
-int vmci_transport_register(void);
-void vmci_transport_unregister(void);
-
int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
struct sockaddr_vm *src);
int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 8a807b609ef7..507d184b8b40 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -298,7 +298,7 @@ struct cfg80211_cqm_config {
u32 rssi_hyst;
s32 last_rssi_event_value;
int n_rssi_thresholds;
- s32 rssi_thresholds[];
+ s32 rssi_thresholds[] __counted_by(n_rssi_thresholds);
};
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index ac059cefbeb3..775cac4d6100 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -281,6 +281,11 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
ether_addr_equal(req->bss->bssid, wdev->u.client.connected_addr))
return -EALREADY;
+ if (ether_addr_equal(req->bss->bssid, dev->dev_addr) ||
+ (req->link_id >= 0 &&
+ ether_addr_equal(req->ap_mld_addr, dev->dev_addr)))
+ return -EINVAL;
+
return rdev_auth(rdev, dev, req);
}
@@ -335,6 +340,9 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
if (req->links[i].bss == req->links[j].bss)
return -EINVAL;
}
+
+ if (ether_addr_equal(req->links[i].bss->bssid, dev->dev_addr))
+ return -EINVAL;
}
if (wdev->connected &&
@@ -342,6 +350,11 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
!ether_addr_equal(wdev->u.client.connected_addr, req->prev_bssid)))
return -EALREADY;
+ if ((req->bss && ether_addr_equal(req->bss->bssid, dev->dev_addr)) ||
+ (req->link_id >= 0 &&
+ ether_addr_equal(req->ap_mld_addr, dev->dev_addr)))
+ return -EINVAL;
+
cfg80211_oper_and_ht_capa(&req->ht_capa_mask,
rdev->wiphy.ht_capa_mod_mask);
cfg80211_oper_and_vht_capa(&req->vht_capa_mask,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8bcf8e293308..de47838aca4f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -323,6 +323,7 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = {
[NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED] = { .type = NLA_FLAG },
[NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED] = { .type = NLA_FLAG },
[NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK] = { .type = NLA_FLAG },
+ [NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR] = { .type = NLA_U8 },
};
static const struct nla_policy
@@ -4889,13 +4890,12 @@ static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy,
acl = kzalloc(struct_size(acl, mac_addrs, n_entries), GFP_KERNEL);
if (!acl)
return ERR_PTR(-ENOMEM);
+ acl->n_acl_entries = n_entries;
nla_for_each_nested(attr, info->attrs[NL80211_ATTR_MAC_ADDRS], tmp) {
memcpy(acl->mac_addrs[i].addr, nla_data(attr), ETH_ALEN);
i++;
}
-
- acl->n_acl_entries = n_entries;
acl->acl_policy = acl_policy;
return acl;
@@ -5439,13 +5439,13 @@ nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs)
elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL);
if (!elems)
return ERR_PTR(-ENOMEM);
+ elems->cnt = num_elems;
nla_for_each_nested(nl_elems, attrs, rem_elems) {
elems->elem[i].data = nla_data(nl_elems);
elems->elem[i].len = nla_len(nl_elems);
i++;
}
- elems->cnt = num_elems;
return elems;
}
@@ -5471,13 +5471,13 @@ nl80211_parse_rnr_elems(struct wiphy *wiphy, struct nlattr *attrs,
elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL);
if (!elems)
return ERR_PTR(-ENOMEM);
+ elems->cnt = num_elems;
nla_for_each_nested(nl_elems, attrs, rem_elems) {
elems->elem[i].data = nla_data(nl_elems);
elems->elem[i].len = nla_len(nl_elems);
i++;
}
- elems->cnt = num_elems;
return elems;
}
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 0278d817bb02..b4af53f9b227 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -120,6 +120,5 @@ void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev);
/* peer measurement */
int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info);
-int nl80211_pmsr_dump_results(struct sk_buff *skb, struct netlink_callback *cb);
#endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/ocb.c b/net/wireless/ocb.c
index 27a1732264f9..29afaf3da54f 100644
--- a/net/wireless/ocb.c
+++ b/net/wireless/ocb.c
@@ -68,6 +68,9 @@ int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev,
if (!rdev->ops->leave_ocb)
return -EOPNOTSUPP;
+ if (!wdev->u.ocb.chandef.chan)
+ return -ENOTCONN;
+
err = rdev_leave_ocb(rdev, dev);
if (!err)
memset(&wdev->u.ocb.chandef, 0, sizeof(wdev->u.ocb.chandef));
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index 77000a264855..9611aa0bd051 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -291,6 +291,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info)
req = kzalloc(struct_size(req, peers, count), GFP_KERNEL);
if (!req)
return -ENOMEM;
+ req->n_peers = count;
if (info->attrs[NL80211_ATTR_TIMEOUT])
req->timeout = nla_get_u32(info->attrs[NL80211_ATTR_TIMEOUT]);
@@ -321,8 +322,6 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info)
goto out_err;
idx++;
}
-
- req->n_peers = count;
req->cookie = cfg80211_assign_cookie(rdev);
req->nl_portid = info->snd_portid;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 10ea85c03147..55f8b9b0e06d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -25,6 +25,7 @@
#include <linux/vmalloc.h>
#include <net/xdp_sock_drv.h>
#include <net/busy_poll.h>
+#include <net/netdev_rx_queue.h>
#include <net/xdp.h>
#include "xsk_queue.h"
@@ -135,14 +136,14 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
return 0;
}
-static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len,
+ u32 flags)
{
- struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
u64 addr;
int err;
addr = xp_get_handle(xskb);
- err = xskq_prod_reserve_desc(xs->rx, addr, len);
+ err = xskq_prod_reserve_desc(xs->rx, addr, len, flags);
if (err) {
xs->rx_queue_full++;
return err;
@@ -152,48 +153,138 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return 0;
}
-static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
+static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- void *from_buf, *to_buf;
- u32 metalen;
+ struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+ u32 frags = xdp_buff_has_frags(xdp);
+ struct xdp_buff_xsk *pos, *tmp;
+ struct list_head *xskb_list;
+ u32 contd = 0;
+ int err;
- if (unlikely(xdp_data_meta_unsupported(from))) {
- from_buf = from->data;
- to_buf = to->data;
- metalen = 0;
- } else {
- from_buf = from->data_meta;
- metalen = from->data - from->data_meta;
- to_buf = to->data - metalen;
+ if (frags)
+ contd = XDP_PKT_CONTD;
+
+ err = __xsk_rcv_zc(xs, xskb, len, contd);
+ if (err || likely(!frags))
+ goto out;
+
+ xskb_list = &xskb->pool->xskb_list;
+ list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
+ if (list_is_singular(xskb_list))
+ contd = 0;
+ len = pos->xdp.data_end - pos->xdp.data;
+ err = __xsk_rcv_zc(xs, pos, len, contd);
+ if (err)
+ return err;
+ list_del(&pos->xskb_list_node);
}
- memcpy(to_buf, from_buf, len + metalen);
+out:
+ return err;
}
-static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+static void *xsk_copy_xdp_start(struct xdp_buff *from)
{
+ if (unlikely(xdp_data_meta_unsupported(from)))
+ return from->data;
+ else
+ return from->data_meta;
+}
+
+static u32 xsk_copy_xdp(void *to, void **from, u32 to_len,
+ u32 *from_len, skb_frag_t **frag, u32 rem)
+{
+ u32 copied = 0;
+
+ while (1) {
+ u32 copy_len = min_t(u32, *from_len, to_len);
+
+ memcpy(to, *from, copy_len);
+ copied += copy_len;
+ if (rem == copied)
+ return copied;
+
+ if (*from_len == copy_len) {
+ *from = skb_frag_address(*frag);
+ *from_len = skb_frag_size((*frag)++);
+ } else {
+ *from += copy_len;
+ *from_len -= copy_len;
+ }
+ if (to_len == copy_len)
+ return copied;
+
+ to_len -= copy_len;
+ to += copy_len;
+ }
+}
+
+static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+{
+ u32 frame_size = xsk_pool_get_rx_frame_size(xs->pool);
+ void *copy_from = xsk_copy_xdp_start(xdp), *copy_to;
+ u32 from_len, meta_len, rem, num_desc;
+ struct xdp_buff_xsk *xskb;
struct xdp_buff *xsk_xdp;
- int err;
- u32 len;
+ skb_frag_t *frag;
- len = xdp->data_end - xdp->data;
- if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
- xs->rx_dropped++;
- return -ENOSPC;
+ from_len = xdp->data_end - copy_from;
+ meta_len = xdp->data - copy_from;
+ rem = len + meta_len;
+
+ if (len <= frame_size && !xdp_buff_has_frags(xdp)) {
+ int err;
+
+ xsk_xdp = xsk_buff_alloc(xs->pool);
+ if (!xsk_xdp) {
+ xs->rx_dropped++;
+ return -ENOMEM;
+ }
+ memcpy(xsk_xdp->data - meta_len, copy_from, rem);
+ xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
+ err = __xsk_rcv_zc(xs, xskb, len, 0);
+ if (err) {
+ xsk_buff_free(xsk_xdp);
+ return err;
+ }
+
+ return 0;
}
- xsk_xdp = xsk_buff_alloc(xs->pool);
- if (!xsk_xdp) {
+ num_desc = (len - 1) / frame_size + 1;
+
+ if (!xsk_buff_can_alloc(xs->pool, num_desc)) {
xs->rx_dropped++;
return -ENOMEM;
}
+ if (xskq_prod_nb_free(xs->rx, num_desc) < num_desc) {
+ xs->rx_queue_full++;
+ return -ENOBUFS;
+ }
- xsk_copy_xdp(xsk_xdp, xdp, len);
- err = __xsk_rcv_zc(xs, xsk_xdp, len);
- if (err) {
- xsk_buff_free(xsk_xdp);
- return err;
+ if (xdp_buff_has_frags(xdp)) {
+ struct skb_shared_info *sinfo;
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ frag = &sinfo->frags[0];
}
+
+ do {
+ u32 to_len = frame_size + meta_len;
+ u32 copied;
+
+ xsk_xdp = xsk_buff_alloc(xs->pool);
+ copy_to = xsk_xdp->data - meta_len;
+
+ copied = xsk_copy_xdp(copy_to, &copy_from, to_len, &from_len, &frag, rem);
+ rem -= copied;
+
+ xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
+ __xsk_rcv_zc(xs, xskb, copied - meta_len, rem ? XDP_PKT_CONTD : 0);
+ meta_len = 0;
+ } while (rem);
+
return 0;
}
@@ -215,7 +306,7 @@ static bool xsk_is_bound(struct xdp_sock *xs)
return false;
}
-static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
+static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
if (!xsk_is_bound(xs))
return -ENXIO;
@@ -223,6 +314,11 @@ static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
+ if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {
+ xs->rx_dropped++;
+ return -ENOSPC;
+ }
+
sk_mark_napi_id_once_xdp(&xs->sk, xdp);
return 0;
}
@@ -236,12 +332,13 @@ static void xsk_flush(struct xdp_sock *xs)
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
+ u32 len = xdp_get_buff_len(xdp);
int err;
spin_lock_bh(&xs->rx_lock);
- err = xsk_rcv_check(xs, xdp);
+ err = xsk_rcv_check(xs, xdp, len);
if (!err) {
- err = __xsk_rcv(xs, xdp);
+ err = __xsk_rcv(xs, xdp, len);
xsk_flush(xs);
}
spin_unlock_bh(&xs->rx_lock);
@@ -250,19 +347,19 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
+ u32 len = xdp_get_buff_len(xdp);
int err;
- u32 len;
- err = xsk_rcv_check(xs, xdp);
+ err = xsk_rcv_check(xs, xdp, len);
if (err)
return err;
if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
len = xdp->data_end - xdp->data;
- return __xsk_rcv_zc(xs, xdp, len);
+ return xsk_rcv_zc(xs, xdp, len);
}
- err = __xsk_rcv(xs, xdp);
+ err = __xsk_rcv(xs, xdp, len);
if (!err)
xdp_return_buff(xdp);
return err;
@@ -321,7 +418,8 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
rcu_read_lock();
list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
- xs->tx->queue_empty_descs++;
+ if (xskq_has_descs(xs->tx))
+ xskq_cons_release(xs->tx);
continue;
}
@@ -408,37 +506,91 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
-static void xsk_destruct_skb(struct sk_buff *skb)
+static int xsk_cq_reserve_addr_locked(struct xdp_sock *xs, u64 addr)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ ret = xskq_prod_reserve_addr(xs->pool->cq, addr);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+
+ return ret;
+}
+
+static void xsk_cq_submit_locked(struct xdp_sock *xs, u32 n)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ xskq_prod_submit_n(xs->pool->cq, n);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+}
+
+static void xsk_cq_cancel_locked(struct xdp_sock *xs, u32 n)
{
- u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
- struct xdp_sock *xs = xdp_sk(skb->sk);
unsigned long flags;
spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_submit_addr(xs->pool->cq, addr);
+ xskq_prod_cancel_n(xs->pool->cq, n);
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+}
+static u32 xsk_get_num_desc(struct sk_buff *skb)
+{
+ return skb ? (long)skb_shinfo(skb)->destructor_arg : 0;
+}
+
+static void xsk_destruct_skb(struct sk_buff *skb)
+{
+ xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
sock_wfree(skb);
}
+static void xsk_set_destructor_arg(struct sk_buff *skb)
+{
+ long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1;
+
+ skb_shinfo(skb)->destructor_arg = (void *)num;
+}
+
+static void xsk_consume_skb(struct sk_buff *skb)
+{
+ struct xdp_sock *xs = xdp_sk(skb->sk);
+
+ skb->destructor = sock_wfree;
+ xsk_cq_cancel_locked(xs, xsk_get_num_desc(skb));
+ /* Free skb without triggering the perf drop trace */
+ consume_skb(skb);
+ xs->skb = NULL;
+}
+
+static void xsk_drop_skb(struct sk_buff *skb)
+{
+ xdp_sk(skb->sk)->tx->invalid_descs += xsk_get_num_desc(skb);
+ xsk_consume_skb(skb);
+}
+
static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
struct xdp_desc *desc)
{
struct xsk_buff_pool *pool = xs->pool;
u32 hr, len, ts, offset, copy, copied;
- struct sk_buff *skb;
+ struct sk_buff *skb = xs->skb;
struct page *page;
void *buffer;
int err, i;
u64 addr;
- hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
+ if (!skb) {
+ hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
- skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
- if (unlikely(!skb))
- return ERR_PTR(err);
+ skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
+ if (unlikely(!skb))
+ return ERR_PTR(err);
- skb_reserve(skb, hr);
+ skb_reserve(skb, hr);
+ }
addr = desc->addr;
len = desc->len;
@@ -448,7 +600,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
offset = offset_in_page(buffer);
addr = buffer - pool->addrs;
- for (copied = 0, i = 0; copied < len; i++) {
+ for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) {
+ if (unlikely(i >= MAX_SKB_FRAGS))
+ return ERR_PTR(-EOVERFLOW);
+
page = pool->umem->pgs[addr >> PAGE_SHIFT];
get_page(page);
@@ -473,43 +628,81 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
struct xdp_desc *desc)
{
struct net_device *dev = xs->dev;
- struct sk_buff *skb;
+ struct sk_buff *skb = xs->skb;
+ int err;
if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
skb = xsk_build_skb_zerocopy(xs, desc);
- if (IS_ERR(skb))
- return skb;
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ goto free_err;
+ }
} else {
u32 hr, tr, len;
void *buffer;
- int err;
- hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
- tr = dev->needed_tailroom;
+ buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
len = desc->len;
- skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
- if (unlikely(!skb))
- return ERR_PTR(err);
+ if (!skb) {
+ hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
+ tr = dev->needed_tailroom;
+ skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
+ if (unlikely(!skb))
+ goto free_err;
- skb_reserve(skb, hr);
- skb_put(skb, len);
+ skb_reserve(skb, hr);
+ skb_put(skb, len);
- buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
- err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err)) {
- kfree_skb(skb);
- return ERR_PTR(err);
+ err = skb_store_bits(skb, 0, buffer, len);
+ if (unlikely(err)) {
+ kfree_skb(skb);
+ goto free_err;
+ }
+ } else {
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ struct page *page;
+ u8 *vaddr;
+
+ if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
+ err = -EOVERFLOW;
+ goto free_err;
+ }
+
+ page = alloc_page(xs->sk.sk_allocation);
+ if (unlikely(!page)) {
+ err = -EAGAIN;
+ goto free_err;
+ }
+
+ vaddr = kmap_local_page(page);
+ memcpy(vaddr, buffer, len);
+ kunmap_local(vaddr);
+
+ skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
}
}
skb->dev = dev;
skb->priority = xs->sk.sk_priority;
skb->mark = READ_ONCE(xs->sk.sk_mark);
- skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
skb->destructor = xsk_destruct_skb;
+ xsk_set_destructor_arg(skb);
return skb;
+
+free_err:
+ if (err == -EOVERFLOW) {
+ /* Drop the packet */
+ xsk_set_destructor_arg(xs->skb);
+ xsk_drop_skb(xs->skb);
+ xskq_cons_release(xs->tx);
+ } else {
+ /* Let application retry */
+ xsk_cq_cancel_locked(xs, 1);
+ }
+
+ return ERR_PTR(err);
}
static int __xsk_generic_xmit(struct sock *sk)
@@ -519,7 +712,6 @@ static int __xsk_generic_xmit(struct sock *sk)
bool sent_frame = false;
struct xdp_desc desc;
struct sk_buff *skb;
- unsigned long flags;
int err = 0;
mutex_lock(&xs->mutex);
@@ -544,47 +736,51 @@ static int __xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- if (xskq_prod_reserve(xs->pool->cq)) {
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ if (xsk_cq_reserve_addr_locked(xs, desc.addr))
goto out;
- }
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
skb = xsk_build_skb(xs, &desc);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_cancel(xs->pool->cq);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
- goto out;
+ if (err != -EOVERFLOW)
+ goto out;
+ err = 0;
+ continue;
+ }
+
+ xskq_cons_release(xs->tx);
+
+ if (xp_mb_desc(&desc)) {
+ xs->skb = skb;
+ continue;
}
err = __dev_direct_xmit(skb, xs->queue_id);
if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */
- skb->destructor = sock_wfree;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_cancel(xs->pool->cq);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
- /* Free skb without triggering the perf drop trace */
- consume_skb(skb);
+ xskq_cons_cancel_n(xs->tx, xsk_get_num_desc(skb));
+ xsk_consume_skb(skb);
err = -EAGAIN;
goto out;
}
- xskq_cons_release(xs->tx);
/* Ignore NET_XMIT_CN as packet might have been sent */
if (err == NET_XMIT_DROP) {
/* SKB completed but not sent */
err = -EBUSY;
+ xs->skb = NULL;
goto out;
}
sent_frame = true;
+ xs->skb = NULL;
}
- xs->tx->queue_empty_descs++;
+ if (xskq_has_descs(xs->tx)) {
+ if (xs->skb)
+ xsk_drop_skb(xs->skb);
+ xskq_cons_release(xs->tx);
+ }
out:
if (sent_frame)
@@ -834,6 +1030,9 @@ static int xsk_release(struct socket *sock)
net = sock_net(sk);
+ if (xs->skb)
+ xsk_drop_skb(xs->skb);
+
mutex_lock(&net->xdp.lock);
sk_del_node_init_rcu(sk);
mutex_unlock(&net->xdp.lock);
@@ -897,7 +1096,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
flags = sxdp->sxdp_flags;
if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
- XDP_USE_NEED_WAKEUP))
+ XDP_USE_NEED_WAKEUP | XDP_USE_SG))
return -EINVAL;
bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
@@ -929,7 +1128,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
struct socket *sock;
if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
- (flags & XDP_USE_NEED_WAKEUP)) {
+ (flags & XDP_USE_NEED_WAKEUP) || (flags & XDP_USE_SG)) {
/* Cannot specify flags for shared sockets. */
err = -EINVAL;
goto out_unlock;
@@ -1029,6 +1228,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->dev = dev;
xs->zc = xs->umem->zc;
+ xs->sg = !!(flags & XDP_USE_SG);
xs->queue_id = qid;
xp_add_xsk(xs->pool, xs);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 26f6d304451e..b3f7b310811e 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -86,6 +86,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
pool->umem = umem;
pool->addrs = umem->addrs;
INIT_LIST_HEAD(&pool->free_list);
+ INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
spin_lock_init(&pool->xsk_tx_list_lock);
spin_lock_init(&pool->cq_lock);
@@ -99,6 +100,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
xskb->pool = pool;
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
INIT_LIST_HEAD(&xskb->free_list_node);
+ INIT_LIST_HEAD(&xskb->xskb_list_node);
if (pool->unaligned)
pool->free_heads[i] = xskb;
else
@@ -187,6 +189,11 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
goto err_unreg_pool;
}
+ if (netdev->xdp_zc_max_segs == 1 && (flags & XDP_USE_SG)) {
+ err = -EOPNOTSUPP;
+ goto err_unreg_pool;
+ }
+
bpf.command = XDP_SETUP_XSK_POOL;
bpf.xsk.pool = pool;
bpf.xsk.queue_id = queue_id;
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index c014217f5fa7..22b36c8143cf 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -111,6 +111,9 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
sock_diag_save_cookie(sk, msg->xdiag_cookie);
mutex_lock(&xs->mutex);
+ if (READ_ONCE(xs->state) == XSK_UNBOUND)
+ goto out_nlmsg_trim;
+
if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
goto out_nlmsg_trim;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 6d40a77fccbe..13354a1e4280 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -48,6 +48,11 @@ struct xsk_queue {
size_t ring_vmalloc_size;
};
+struct parsed_desc {
+ u32 mb;
+ u32 valid;
+};
+
/* The structure of the shared state of the rings are a simple
* circular buffer, as outlined in
* Documentation/core-api/circular-buffers.rst. For the Rx and
@@ -130,18 +135,26 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
return false;
}
+static inline bool xp_unused_options_set(u32 options)
+{
+ return options & ~XDP_PKT_CONTD;
+}
+
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
u64 offset = desc->addr & (pool->chunk_size - 1);
+ if (!desc->len)
+ return false;
+
if (offset + desc->len > pool->chunk_size)
return false;
if (desc->addr >= pool->addrs_cnt)
return false;
- if (desc->options)
+ if (xp_unused_options_set(desc->options))
return false;
return true;
}
@@ -151,6 +164,9 @@ static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
{
u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ if (!desc->len)
+ return false;
+
if (desc->len > pool->chunk_size)
return false;
@@ -158,7 +174,7 @@ static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
return false;
- if (desc->options)
+ if (xp_unused_options_set(desc->options))
return false;
return true;
}
@@ -170,6 +186,11 @@ static inline bool xp_validate_desc(struct xsk_buff_pool *pool,
xp_aligned_validate_desc(pool, desc);
}
+static inline bool xskq_has_descs(struct xsk_queue *q)
+{
+ return q->cached_cons != q->cached_prod;
+}
+
static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
struct xdp_desc *d,
struct xsk_buff_pool *pool)
@@ -185,17 +206,15 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q,
struct xdp_desc *desc,
struct xsk_buff_pool *pool)
{
- while (q->cached_cons != q->cached_prod) {
+ if (q->cached_cons != q->cached_prod) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx = q->cached_cons & q->ring_mask;
*desc = ring->desc[idx];
- if (xskq_cons_is_valid_desc(q, desc, pool))
- return true;
-
- q->cached_cons++;
+ return xskq_cons_is_valid_desc(q, desc, pool);
}
+ q->queue_empty_descs++;
return false;
}
@@ -204,30 +223,52 @@ static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
q->cached_cons += cnt;
}
-static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
- u32 max)
+static inline void parse_desc(struct xsk_queue *q, struct xsk_buff_pool *pool,
+ struct xdp_desc *desc, struct parsed_desc *parsed)
+{
+ parsed->valid = xskq_cons_is_valid_desc(q, desc, pool);
+ parsed->mb = xp_mb_desc(desc);
+}
+
+static inline
+u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
+ u32 max)
{
u32 cached_cons = q->cached_cons, nb_entries = 0;
struct xdp_desc *descs = pool->tx_descs;
+ u32 total_descs = 0, nr_frags = 0;
+ /* track first entry, if stumble upon *any* invalid descriptor, rewind
+ * current packet that consists of frags and stop the processing
+ */
while (cached_cons != q->cached_prod && nb_entries < max) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx = cached_cons & q->ring_mask;
+ struct parsed_desc parsed;
descs[nb_entries] = ring->desc[idx];
- if (unlikely(!xskq_cons_is_valid_desc(q, &descs[nb_entries], pool))) {
- /* Skip the entry */
- cached_cons++;
- continue;
+ cached_cons++;
+ parse_desc(q, pool, &descs[nb_entries], &parsed);
+ if (unlikely(!parsed.valid))
+ break;
+
+ if (likely(!parsed.mb)) {
+ total_descs += (nr_frags + 1);
+ nr_frags = 0;
+ } else {
+ nr_frags++;
+ if (nr_frags == pool->netdev->xdp_zc_max_segs) {
+ nr_frags = 0;
+ break;
+ }
}
-
nb_entries++;
- cached_cons++;
}
+ cached_cons -= nr_frags;
/* Release valid plus any invalid entries */
xskq_cons_release_n(q, cached_cons - q->cached_cons);
- return nb_entries;
+ return total_descs;
}
/* Functions for consumers */
@@ -292,6 +333,11 @@ static inline void xskq_cons_release(struct xsk_queue *q)
q->cached_cons++;
}
+static inline void xskq_cons_cancel_n(struct xsk_queue *q, u32 cnt)
+{
+ q->cached_cons -= cnt;
+}
+
static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
{
/* No barriers needed since data is not accessed */
@@ -319,9 +365,9 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q)
return xskq_prod_nb_free(q, 1) ? false : true;
}
-static inline void xskq_prod_cancel(struct xsk_queue *q)
+static inline void xskq_prod_cancel_n(struct xsk_queue *q, u32 cnt)
{
- q->cached_prod--;
+ q->cached_prod -= cnt;
}
static inline int xskq_prod_reserve(struct xsk_queue *q)
@@ -360,7 +406,7 @@ static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_de
}
static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
- u64 addr, u32 len)
+ u64 addr, u32 len, u32 flags)
{
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
u32 idx;
@@ -372,6 +418,7 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
idx = q->cached_prod++ & q->ring_mask;
ring->desc[idx].addr = addr;
ring->desc[idx].len = len;
+ ring->desc[idx].options = flags;
return 0;
}
@@ -386,16 +433,6 @@ static inline void xskq_prod_submit(struct xsk_queue *q)
__xskq_prod_submit(q, q->cached_prod);
}
-static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr)
-{
- struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- u32 idx = q->ring->producer;
-
- ring->desc[idx++ & q->ring_mask] = addr;
-
- __xskq_prod_submit(q, idx);
-}
-
static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries)
{
__xskq_prod_submit(q, q->ring->producer + nb_entries);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 533697e2488f..3784534c9185 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -247,12 +247,6 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return -EINVAL;
}
- /* We don't yet support UDP encapsulation and TFC padding. */
- if (x->encap || x->tfcpad) {
- NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded");
- return -EINVAL;
- }
-
if (xuo->flags &
~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND | XFRM_OFFLOAD_PACKET)) {
NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request");
@@ -260,6 +254,13 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
}
is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET;
+
+ /* We don't yet support UDP encapsulation and TFC padding. */
+ if ((!is_packet_offload && x->encap) || x->tfcpad) {
+ NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded");
+ return -EINVAL;
+ }
+
dev = dev_get_by_index(net, xuo->ifindex);
if (!dev) {
if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index 0c6c5ef65f9d..7fdeafc838a7 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -44,6 +44,7 @@ static struct ctl_table xfrm_table[] = {
int __net_init xfrm_sysctl_init(struct net *net)
{
struct ctl_table *table;
+ size_t table_size = ARRAY_SIZE(xfrm_table);
__xfrm_sysctl_init(net);
@@ -56,10 +57,13 @@ int __net_init xfrm_sysctl_init(struct net *net)
table[3].data = &net->xfrm.sysctl_acq_expires;
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
+ if (net->user_ns != &init_user_ns) {
table[0].procname = NULL;
+ table_size = 0;
+ }
- net->xfrm.sysctl_hdr = register_net_sysctl(net, "net/core", table);
+ net->xfrm.sysctl_hdr = register_net_sysctl_sz(net, "net/core", table,
+ table_size);
if (!net->xfrm.sysctl_hdr)
goto out_register;
return 0;