diff options
285 files changed, 10817 insertions, 6302 deletions
diff --git a/Documentation/arch/s390/driver-model.rst b/Documentation/arch/s390/driver-model.rst index ad4bc2dbea43..ad18f129fb0b 100644 --- a/Documentation/arch/s390/driver-model.rst +++ b/Documentation/arch/s390/driver-model.rst @@ -244,7 +244,7 @@ information about the interrupt from the irb parameter. -------------------- The ccwgroup mechanism is designed to handle devices consisting of multiple ccw -devices, like lcs or ctc. +devices, like qeth or ctc. The ccw driver provides a 'group' attribute. Piping bus ids of ccw devices to this attributes creates a ccwgroup device consisting of these ccw devices (if diff --git a/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml b/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml index 9bcbacb6640d..55d6a8379025 100644 --- a/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml +++ b/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml @@ -44,6 +44,9 @@ properties: phy-mode: enum: - rgmii + - rgmii-id + - rgmii-rxid + - rgmii-txid - rmii phy-handle: true diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index 9660ffb1ed6a..96fa1f1522ed 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -14,9 +14,10 @@ $defs: pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 len-or-limit: - # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. + # literal int, const name, or limit based on fixed-width type + # e.g. u8-min, u16-max, etc. type: [ string, integer ] - pattern: ^[su](8|16|32|64)-(min|max)$ + pattern: ^[0-9A-Za-z_-]+$ minimum: 0 # Schema for specs @@ -160,7 +161,7 @@ properties: type: string type: &attr-type enum: [ unused, pad, flag, binary, - uint, sint, u8, u16, u32, u64, s32, s64, + uint, sint, u8, u16, u32, u64, s8, s16, s32, s64, string, nest, indexed-array, nest-type-value ] doc: description: Documentation of the attribute. diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 16380e12cabe..a8c5b521937d 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -14,9 +14,10 @@ $defs: pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 len-or-limit: - # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. + # literal int, const name, or limit based on fixed-width type + # e.g. u8-min, u16-max, etc. type: [ string, integer ] - pattern: ^[su](8|16|32|64)-(min|max)$ + pattern: ^[0-9A-Za-z_-]+$ minimum: 0 # Schema for specs @@ -151,6 +152,9 @@ properties: the right formatting mechanism when displaying values of this type. enum: [ hex, mac, fddi, ipv4, ipv6, uuid ] + struct: + description: Name of the nested struct type. + type: string # End genetlink-legacy attribute-sets: @@ -203,7 +207,7 @@ properties: type: &attr-type description: The netlink attribute type enum: [ unused, pad, flag, binary, bitfield32, - uint, sint, u8, u16, u32, u64, s32, s64, + uint, sint, u8, u16, u32, u64, s8, s16, s32, s64, string, nest, indexed-array, nest-type-value ] doc: description: Documentation of the attribute. diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index b036227b46f1..40efbbad76ab 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -14,9 +14,10 @@ $defs: pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 len-or-limit: - # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. + # literal int, const name, or limit based on fixed-width type + # e.g. u8-min, u16-max, etc. type: [ string, integer ] - pattern: ^[su](8|16|32|64)-(min|max)$ + pattern: ^[0-9A-Za-z_-]+$ minimum: 0 # Schema for specs @@ -123,7 +124,7 @@ properties: type: string type: &attr-type enum: [ unused, pad, flag, binary, - uint, sint, u8, u16, u32, u64, s32, s64, + uint, sint, u8, u16, u32, u64, s8, s16, s32, s64, string, nest, indexed-array, nest-type-value ] doc: description: Documentation of the attribute. diff --git a/Documentation/netlink/specs/conntrack.yaml b/Documentation/netlink/specs/conntrack.yaml new file mode 100644 index 000000000000..840dc4504216 --- /dev/null +++ b/Documentation/netlink/specs/conntrack.yaml @@ -0,0 +1,643 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: conntrack +protocol: netlink-raw +protonum: 12 + +doc: + Netfilter connection tracking subsystem over nfnetlink + +definitions: + - + name: nfgenmsg + type: struct + members: + - + name: nfgen-family + type: u8 + - + name: version + type: u8 + - + name: res-id + byte-order: big-endian + type: u16 + - + name: nf-ct-tcp-flags-mask + type: struct + members: + - + name: flags + type: u8 + enum: nf-ct-tcp-flags + enum-as-flags: true + - + name: mask + type: u8 + enum: nf-ct-tcp-flags + enum-as-flags: true + - + name: nf-ct-tcp-flags + type: flags + entries: + - window-scale + - sack-perm + - close-init + - be-liberal + - unacked + - maxack + - challenge-ack + - simultaneous-open + - + name: nf-ct-tcp-state + type: enum + entries: + - none + - syn-sent + - syn-recv + - established + - fin-wait + - close-wait + - last-ack + - time-wait + - close + - syn-sent2 + - max + - ignore + - retrans + - unack + - timeout-max + - + name: nf-ct-sctp-state + type: enum + entries: + - none + - cloned + - cookie-wait + - cookie-echoed + - established + - shutdown-sent + - shutdown-received + - shutdown-ack-sent + - shutdown-heartbeat-sent + - + name: nf-ct-status + type: flags + entries: + - expected + - seen-reply + - assured + - confirmed + - src-nat + - dst-nat + - seq-adj + - src-nat-done + - dst-nat-done + - dying + - fixed-timeout + - template + - nat-clash + - helper + - offload + - hw-offload + +attribute-sets: + - + name: counter-attrs + attributes: + - + name: packets + type: u64 + byte-order: big-endian + - + name: bytes + type: u64 + byte-order: big-endian + - + name: packets-old + type: u32 + - + name: bytes-old + type: u32 + - + name: pad + type: pad + - + name: tuple-proto-attrs + attributes: + - + name: proto-num + type: u8 + doc: l4 protocol number + - + name: proto-src-port + type: u16 + byte-order: big-endian + doc: l4 source port + - + name: proto-dst-port + type: u16 + byte-order: big-endian + doc: l4 source port + - + name: proto-icmp-id + type: u16 + byte-order: big-endian + doc: l4 icmp id + - + name: proto-icmp-type + type: u8 + - + name: proto-icmp-code + type: u8 + - + name: proto-icmpv6-id + type: u16 + byte-order: big-endian + doc: l4 icmp id + - + name: proto-icmpv6-type + type: u8 + - + name: proto-icmpv6-code + type: u8 + - + name: tuple-ip-attrs + attributes: + - + name: ip-v4-src + type: u32 + byte-order: big-endian + display-hint: ipv4 + doc: ipv4 source address + - + name: ip-v4-dst + type: u32 + byte-order: big-endian + display-hint: ipv4 + doc: ipv4 destination address + - + name: ip-v6-src + type: binary + checks: + min-len: 16 + byte-order: big-endian + display-hint: ipv6 + doc: ipv6 source address + - + name: ip-v6-dst + type: binary + checks: + min-len: 16 + byte-order: big-endian + display-hint: ipv6 + doc: ipv6 destination address + - + name: tuple-attrs + attributes: + - + name: tuple-ip + type: nest + nested-attributes: tuple-ip-attrs + doc: conntrack l3 information + - + name: tuple-proto + type: nest + nested-attributes: tuple-proto-attrs + doc: conntrack l4 information + - + name: tuple-zone + type: u16 + byte-order: big-endian + doc: conntrack zone id + - + name: protoinfo-tcp-attrs + attributes: + - + name: tcp-state + type: u8 + enum: nf-ct-tcp-state + doc: tcp connection state + - + name: tcp-wscale-original + type: u8 + doc: window scaling factor in original direction + - + name: tcp-wscale-reply + type: u8 + doc: window scaling factor in reply direction + - + name: tcp-flags-original + type: binary + struct: nf-ct-tcp-flags-mask + - + name: tcp-flags-reply + type: binary + struct: nf-ct-tcp-flags-mask + - + name: protoinfo-dccp-attrs + attributes: + - + name: dccp-state + type: u8 + doc: dccp connection state + - + name: dccp-role + type: u8 + - + name: dccp-handshake-seq + type: u64 + byte-order: big-endian + - + name: dccp-pad + type: pad + - + name: protoinfo-sctp-attrs + attributes: + - + name: sctp-state + type: u8 + doc: sctp connection state + enum: nf-ct-sctp-state + - + name: vtag-original + type: u32 + byte-order: big-endian + - + name: vtag-reply + type: u32 + byte-order: big-endian + - + name: protoinfo-attrs + attributes: + - + name: protoinfo-tcp + type: nest + nested-attributes: protoinfo-tcp-attrs + doc: conntrack tcp state information + - + name: protoinfo-dccp + type: nest + nested-attributes: protoinfo-dccp-attrs + doc: conntrack dccp state information + - + name: protoinfo-sctp + type: nest + nested-attributes: protoinfo-sctp-attrs + doc: conntrack sctp state information + - + name: help-attrs + attributes: + - + name: help-name + type: string + doc: helper name + - + name: nat-proto-attrs + attributes: + - + name: nat-port-min + type: u16 + byte-order: big-endian + - + name: nat-port-max + type: u16 + byte-order: big-endian + - + name: nat-attrs + attributes: + - + name: nat-v4-minip + type: u32 + byte-order: big-endian + - + name: nat-v4-maxip + type: u32 + byte-order: big-endian + - + name: nat-v6-minip + type: binary + - + name: nat-v6-maxip + type: binary + - + name: nat-proto + type: nest + nested-attributes: nat-proto-attrs + - + name: seqadj-attrs + attributes: + - + name: correction-pos + type: u32 + byte-order: big-endian + - + name: offset-before + type: u32 + byte-order: big-endian + - + name: offset-after + type: u32 + byte-order: big-endian + - + name: secctx-attrs + attributes: + - + name: secctx-name + type: string + - + name: synproxy-attrs + attributes: + - + name: isn + type: u32 + byte-order: big-endian + - + name: its + type: u32 + byte-order: big-endian + - + name: tsoff + type: u32 + byte-order: big-endian + - + name: conntrack-attrs + attributes: + - + name: tuple-orig + type: nest + nested-attributes: tuple-attrs + doc: conntrack l3+l4 protocol information, original direction + - + name: tuple-reply + type: nest + nested-attributes: tuple-attrs + doc: conntrack l3+l4 protocol information, reply direction + - + name: status + type: u32 + byte-order: big-endian + enum: nf-ct-status + enum-as-flags: true + doc: conntrack flag bits + - + name: protoinfo + type: nest + nested-attributes: protoinfo-attrs + - + name: help + type: nest + nested-attributes: help-attrs + - + name: nat-src + type: nest + nested-attributes: nat-attrs + - + name: timeout + type: u32 + byte-order: big-endian + - + name: mark + type: u32 + byte-order: big-endian + - + name: counters-orig + type: nest + nested-attributes: counter-attrs + - + name: counters-reply + type: nest + nested-attributes: counter-attrs + - + name: use + type: u32 + byte-order: big-endian + - + name: id + type: u32 + byte-order: big-endian + - + name: nat-dst + type: nest + nested-attributes: nat-attrs + - + name: tuple-master + type: nest + nested-attributes: tuple-attrs + - + name: seq-adj-orig + type: nest + nested-attributes: seqadj-attrs + - + name: seq-adj-reply + type: nest + nested-attributes: seqadj-attrs + - + name: secmark + type: binary + doc: obsolete + - + name: zone + type: u16 + byte-order: big-endian + doc: conntrack zone id + - + name: secctx + type: nest + nested-attributes: secctx-attrs + - + name: timestamp + type: u64 + byte-order: big-endian + - + name: mark-mask + type: u32 + byte-order: big-endian + - + name: labels + type: binary + - + name: labels mask + type: binary + - + name: synproxy + type: nest + nested-attributes: synproxy-attrs + - + name: filter + type: nest + nested-attributes: tuple-attrs + - + name: status-mask + type: u32 + byte-order: big-endian + enum: nf-ct-status + enum-as-flags: true + doc: conntrack flag bits to change + - + name: timestamp-event + type: u64 + byte-order: big-endian + - + name: conntrack-stats-attrs + attributes: + - + name: searched + type: u32 + byte-order: big-endian + doc: obsolete + - + name: found + type: u32 + byte-order: big-endian + - + name: new + type: u32 + byte-order: big-endian + doc: obsolete + - + name: invalid + type: u32 + byte-order: big-endian + doc: obsolete + - + name: ignore + type: u32 + byte-order: big-endian + doc: obsolete + - + name: delete + type: u32 + byte-order: big-endian + doc: obsolete + - + name: delete-list + type: u32 + byte-order: big-endian + doc: obsolete + - + name: insert + type: u32 + byte-order: big-endian + - + name: insert-failed + type: u32 + byte-order: big-endian + - + name: drop + type: u32 + byte-order: big-endian + - + name: early-drop + type: u32 + byte-order: big-endian + - + name: error + type: u32 + byte-order: big-endian + - + name: search-restart + type: u32 + byte-order: big-endian + - + name: clash-resolve + type: u32 + byte-order: big-endian + - + name: chain-toolong + type: u32 + byte-order: big-endian + +operations: + enum-model: directional + list: + - + name: get + doc: get / dump entries + attribute-set: conntrack-attrs + fixed-header: nfgenmsg + do: + request: + value: 0x101 + attributes: + - tuple-orig + - tuple-reply + - zone + reply: + value: 0x100 + attributes: + - tuple-orig + - tuple-reply + - status + - protoinfo + - help + - nat-src + - nat-dst + - timeout + - mark + - counter-orig + - counter-reply + - use + - id + - nat-dst + - tuple-master + - seq-adj-orig + - seq-adj-reply + - zone + - secctx + - labels + - synproxy + dump: + request: + value: 0x101 + attributes: + - nfgen-family + - mark + - filter + - status + - zone + reply: + value: 0x100 + attributes: + - tuple-orig + - tuple-reply + - status + - protoinfo + - help + - nat-src + - nat-dst + - timeout + - mark + - counter-orig + - counter-reply + - use + - id + - nat-dst + - tuple-master + - seq-adj-orig + - seq-adj-reply + - zone + - secctx + - labels + - synproxy + - + name: get-stats + doc: dump pcpu conntrack stats + attribute-set: conntrack-stats-attrs + fixed-header: nfgenmsg + dump: + request: + value: 0x104 + reply: + value: 0x104 + attributes: + - searched + - found + - insert + - insert-failed + - drop + - early-drop + - error + - search-restart + - clash-resolve + - chain-toolong diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index cbb544bd6c84..288923e965ae 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -115,6 +115,9 @@ attribute-sets: type: u64 enum: xsk-flags - + name: io-uring-provider-info + attributes: [] + - name: page-pool attributes: - @@ -171,6 +174,11 @@ attribute-sets: name: dmabuf doc: ID of the dmabuf this page-pool is attached to. type: u32 + - + name: io-uring + doc: io-uring memory provider information. + type: nest + nested-attributes: io-uring-provider-info - name: page-pool-info subset-of: page-pool @@ -296,6 +304,11 @@ attribute-sets: name: dmabuf doc: ID of the dmabuf attached to this queue, if any. type: u32 + - + name: io-uring + doc: io_uring memory provider information. + type: nest + nested-attributes: io-uring-provider-info - name: qstats @@ -572,6 +585,7 @@ operations: - inflight-mem - detach-time - dmabuf + - io-uring dump: reply: *pp-reply config-cond: page-pool @@ -637,6 +651,7 @@ operations: - napi-id - ifindex - dmabuf + - io-uring dump: request: attributes: diff --git a/Documentation/netlink/specs/nl80211.yaml b/Documentation/netlink/specs/nl80211.yaml new file mode 100644 index 000000000000..1ec49c3562cd --- /dev/null +++ b/Documentation/netlink/specs/nl80211.yaml @@ -0,0 +1,2000 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: nl80211 +protocol: genetlink-legacy + +doc: + Netlink API for 802.11 wireless devices + +definitions: + - + name: commands + type: enum + entries: + - unspec + - get-wiphy + - set-wiphy + - new-wiphy + - del-wiphy + - get-interface + - set-interface + - new-interface + - del-interface + - get-key + - set-key + - new-key + - del-key + - get-beacon + - set-beacon + - new-beacon + - del-beacon + - get-station + - set-station + - new-station + - del-station + - get-mpath + - set-mpath + - new-mpath + - del-mpath + - set-bss + - set-reg + - req-set-reg + - get-mesh-config + - set-mesh-config + - set-mgmt-extra-ie + - get-reg + - get-scan + - trigger-scan + - new-scan-results + - scan-aborted + - reg-change + - authenticate + - associate + - deauthenticate + - disassociate + - michael-mic-failure + - reg-beacon-hint + - join-ibss + - leave-ibss + - testmode + - connect + - roam + - disconnect + - set-wiphy-netns + - get-survey + - new-survey-results + - set-pmksa + - del-pmksa + - flush-pmksa + - remain-on-channel + - cancel-remain-on-channel + - set-tx-bitrate-mask + - register-action + - action + - action-tx-status + - set-power-save + - get-power-save + - set-cqm + - notify-cqm + - set-channel + - set-wds-peer + - frame-wait-cancel + - join-mesh + - leave-mesh + - unprot-deauthenticate + - unprot-disassociate + - new-peer-candidate + - get-wowlan + - set-wowlan + - start-sched-scan + - stop-sched-scan + - sched-scan-results + - sched-scan-stopped + - set-rekey-offload + - pmksa-candidate + - tdls-oper + - tdls-mgmt + - unexpected-frame + - probe-client + - register-beacons + - unexpected-4-addr-frame + - set-noack-map + - ch-switch-notify + - start-p2p-device + - stop-p2p-device + - conn-failed + - set-mcast-rate + - set-mac-acl + - radar-detect + - get-protocol-features + - update-ft-ies + - ft-event + - crit-protocol-start + - crit-protocol-stop + - get-coalesce + - set-coalesce + - channel-switch + - vendor + - set-qos-map + - add-tx-ts + - del-tx-ts + - get-mpp + - join-ocb + - leave-ocb + - ch-switch-started-notify + - tdls-channel-switch + - tdls-cancel-channel-switch + - wiphy-reg-change + - abort-scan + - start-nan + - stop-nan + - add-nan-function + - del-nan-function + - change-nan-config + - nan-match + - set-multicast-to-unicast + - update-connect-params + - set-pmk + - del-pmk + - port-authorized + - reload-regdb + - external-auth + - sta-opmode-changed + - control-port-frame + - get-ftm-responder-stats + - peer-measurement-start + - peer-measurement-result + - peer-measurement-complete + - notify-radar + - update-owe-info + - probe-mesh-link + - set-tid-config + - unprot-beacon + - control-port-frame-tx-status + - set-sar-specs + - obss-color-collision + - color-change-request + - color-change-started + - color-change-aborted + - color-change-completed + - set-fils-aad + - assoc-comeback + - add-link + - remove-link + - add-link-sta + - modify-link-sta + - remove-link-sta + - set-hw-timestamp + - links-removed + - set-tid-to-link-mapping + - + name: feature-flags + type: flags + entries: + - sk-tx-status + - ht-ibss + - inactivity-timer + - cell-base-reg-hints + - p2p-device-needs-channel + - sae + - low-priority-scan + - scan-flush + - ap-scan + - vif-txpower + - need-obss-scan + - p2p-go-ctwin + - p2p-go-oppps + - reserved + - advertise-chan-limits + - full-ap-client-state + - userspace-mpm + - active-monitor + - ap-mode-chan-width-change + - ds-param-set-ie-in-probes + - wfa-tpc-ie-in-probes + - quiet + - tx-power-insertion + - ackto-estimation + - static-smps + - dynamic-smps + - supports-wmm-admission + - mac-on-create + - tdls-channel-switch + - scan-random-mac-addr + - sched-scan-random-mac-addr + - no-random-mac-addr + - + name: ieee80211-mcs-info + type: struct + members: + - + name: rx-mask + type: binary + len: 10 + - + name: rx-highest + type: u16 + byte-order: little-endian + - + name: tx-params + type: u8 + - + name: reserved + type: binary + len: 3 + - + name: ieee80211-vht-mcs-info + type: struct + members: + - + name: rx-mcs-map + type: u16 + byte-order: little-endian + - + name: rx-highest + type: u16 + byte-order: little-endian + - + name: tx-mcs-map + type: u16 + byte-order: little-endian + - + name: tx-highest + type: u16 + byte-order: little-endian + - + name: ieee80211-ht-cap + type: struct + members: + - + name: cap-info + type: u16 + byte-order: little-endian + - + name: ampdu-params-info + type: u8 + - + name: mcs + type: binary + struct: ieee80211-mcs-info + - + name: extended-ht-cap-info + type: u16 + byte-order: little-endian + - + name: tx-bf-cap-info + type: u32 + byte-order: little-endian + - + name: antenna-selection-info + type: u8 + - + name: channel-type + type: enum + entries: + - no-ht + - ht20 + - ht40minus + - ht40plus + - + name: sta-flag-update + type: struct + members: + - + name: mask + type: u32 + - + name: set + type: u32 + - + name: protocol-features + type: flags + entries: + - split-wiphy-dump + +attribute-sets: + - + name: nl80211-attrs + name-prefix: nl80211-attr- + enum-name: nl80211-attrs + attr-max-name: num-nl80211-attr + attributes: + - + name: wiphy + type: u32 + - + name: wiphy-name + type: string + - + name: ifindex + type: u32 + - + name: ifname + type: string + - + name: iftype + type: u32 + - + name: mac + type: binary + display-hint: mac + - + name: key-data + type: binary + - + name: key-idx + type: u8 + - + name: key-cipher + type: u32 + - + name: key-seq + type: binary + - + name: key-default + type: flag + - + name: beacon-interval + type: u32 + - + name: dtim-period + type: u32 + - + name: beacon-head + type: binary + - + name: beacon-tail + type: binary + - + name: sta-aid + type: u16 + - + name: sta-flags + type: binary # TODO: nest + - + name: sta-listen-interval + type: u16 + - + name: sta-supported-rates + type: binary + - + name: sta-vlan + type: u32 + - + name: sta-info + type: binary # TODO: nest + - + name: wiphy-bands + type: nest + nested-attributes: wiphy-bands + - + name: mntr-flags + type: binary # TODO: nest + - + name: mesh-id + type: binary + - + name: sta-plink-action + type: u8 + - + name: mpath-next-hop + type: binary + display-hint: mac + - + name: mpath-info + type: binary # TODO: nest + - + name: bss-cts-prot + type: u8 + - + name: bss-short-preamble + type: u8 + - + name: bss-short-slot-time + type: u8 + - + name: ht-capability + type: binary + - + name: supported-iftypes + type: nest + nested-attributes: supported-iftypes + - + name: reg-alpha2 + type: binary + - + name: reg-rules + type: binary # TODO: nest + - + name: mesh-config + type: binary # TODO: nest + - + name: bss-basic-rates + type: binary + - + name: wiphy-txq-params + type: binary # TODO: nest + - + name: wiphy-freq + type: u32 + - + name: wiphy-channel-type + type: u32 + enum: channel-type + - + name: key-default-mgmt + type: flag + - + name: mgmt-subtype + type: u8 + - + name: ie + type: binary + - + name: max-num-scan-ssids + type: u8 + - + name: scan-frequencies + type: binary # TODO: nest + - + name: scan-ssids + type: binary # TODO: nest + - + name: generation + type: u32 + - + name: bss + type: binary # TODO: nest + - + name: reg-initiator + type: u8 + - + name: reg-type + type: u8 + - + name: supported-commands + type: indexed-array + sub-type: u32 + enum: commands + - + name: frame + type: binary + - + name: ssid + type: binary + - + name: auth-type + type: u32 + - + name: reason-code + type: u16 + - + name: key-type + type: u32 + - + name: max-scan-ie-len + type: u16 + - + name: cipher-suites + type: binary + sub-type: u32 + display-hint: hex + - + name: freq-before + type: binary # TODO: nest + - + name: freq-after + type: binary # TODO: nest + - + name: freq-fixed + type: flag + - + name: wiphy-retry-short + type: u8 + - + name: wiphy-retry-long + type: u8 + - + name: wiphy-frag-threshold + type: u32 + - + name: wiphy-rts-threshold + type: u32 + - + name: timed-out + type: flag + - + name: use-mfp + type: u32 + - + name: sta-flags2 + type: binary + struct: sta-flag-update + - + name: control-port + type: flag + - + name: testdata + type: binary + - + name: privacy + type: flag + - + name: disconnected-by-ap + type: flag + - + name: status-code + type: u16 + - + name: cipher-suites-pairwise + type: binary + - + name: cipher-suite-group + type: u32 + - + name: wpa-versions + type: u32 + - + name: akm-suites + type: binary + - + name: req-ie + type: binary + - + name: resp-ie + type: binary + - + name: prev-bssid + type: binary + - + name: key + type: binary # TODO: nest + - + name: keys + type: binary # TODO: nest + - + name: pid + type: u32 + - + name: 4addr + type: u8 + - + name: survey-info + type: binary # TODO: nest + - + name: pmkid + type: binary + - + name: max-num-pmkids + type: u8 + - + name: duration + type: u32 + - + name: cookie + type: u64 + - + name: wiphy-coverage-class + type: u8 + - + name: tx-rates + type: binary # TODO: nest + - + name: frame-match + type: binary + - + name: ack + type: flag + - + name: ps-state + type: u32 + - + name: cqm + type: binary # TODO: nest + - + name: local-state-change + type: flag + - + name: ap-isolate + type: u8 + - + name: wiphy-tx-power-setting + type: u32 + - + name: wiphy-tx-power-level + type: u32 + - + name: tx-frame-types + type: nest + nested-attributes: iftype-attrs + - + name: rx-frame-types + type: nest + nested-attributes: iftype-attrs + - + name: frame-type + type: u16 + - + name: control-port-ethertype + type: flag + - + name: control-port-no-encrypt + type: flag + - + name: support-ibss-rsn + type: flag + - + name: wiphy-antenna-tx + type: u32 + - + name: wiphy-antenna-rx + type: u32 + - + name: mcast-rate + type: u32 + - + name: offchannel-tx-ok + type: flag + - + name: bss-ht-opmode + type: u16 + - + name: key-default-types + type: binary # TODO: nest + - + name: max-remain-on-channel-duration + type: u32 + - + name: mesh-setup + type: binary # TODO: nest + - + name: wiphy-antenna-avail-tx + type: u32 + - + name: wiphy-antenna-avail-rx + type: u32 + - + name: support-mesh-auth + type: flag + - + name: sta-plink-state + type: u8 + - + name: wowlan-triggers + type: binary # TODO: nest + - + name: wowlan-triggers-supported + type: nest + nested-attributes: wowlan-triggers-attrs + - + name: sched-scan-interval + type: u32 + - + name: interface-combinations + type: indexed-array + sub-type: nest + nested-attributes: if-combination-attributes + - + name: software-iftypes + type: nest + nested-attributes: supported-iftypes + - + name: rekey-data + type: binary # TODO: nest + - + name: max-num-sched-scan-ssids + type: u8 + - + name: max-sched-scan-ie-len + type: u16 + - + name: scan-supp-rates + type: binary # TODO: nest + - + name: hidden-ssid + type: u32 + - + name: ie-probe-resp + type: binary + - + name: ie-assoc-resp + type: binary + - + name: sta-wme + type: binary # TODO: nest + - + name: support-ap-uapsd + type: flag + - + name: roam-support + type: flag + - + name: sched-scan-match + type: binary # TODO: nest + - + name: max-match-sets + type: u8 + - + name: pmksa-candidate + type: binary # TODO: nest + - + name: tx-no-cck-rate + type: flag + - + name: tdls-action + type: u8 + - + name: tdls-dialog-token + type: u8 + - + name: tdls-operation + type: u8 + - + name: tdls-support + type: flag + - + name: tdls-external-setup + type: flag + - + name: device-ap-sme + type: u32 + - + name: dont-wait-for-ack + type: flag + - + name: feature-flags + type: u32 + enum: feature-flags + enum-as-flags: True + - + name: probe-resp-offload + type: u32 + - + name: probe-resp + type: binary + - + name: dfs-region + type: u8 + - + name: disable-ht + type: flag + - + name: ht-capability-mask + type: binary + struct: ieee80211-ht-cap + - + name: noack-map + type: u16 + - + name: inactivity-timeout + type: u16 + - + name: rx-signal-dbm + type: u32 + - + name: bg-scan-period + type: u16 + - + name: wdev + type: u64 + - + name: user-reg-hint-type + type: u32 + - + name: conn-failed-reason + type: u32 + - + name: auth-data + type: binary + - + name: vht-capability + type: binary + - + name: scan-flags + type: u32 + - + name: channel-width + type: u32 + - + name: center-freq1 + type: u32 + - + name: center-freq2 + type: u32 + - + name: p2p-ctwindow + type: u8 + - + name: p2p-oppps + type: u8 + - + name: local-mesh-power-mode + type: u32 + - + name: acl-policy + type: u32 + - + name: mac-addrs + type: binary # TODO: nest + - + name: mac-acl-max + type: u32 + - + name: radar-event + type: u32 + - + name: ext-capa + type: binary + - + name: ext-capa-mask + type: binary + - + name: sta-capability + type: u16 + - + name: sta-ext-capability + type: binary + - + name: protocol-features + type: u32 + enum: protocol-features + - + name: split-wiphy-dump + type: flag + - + name: disable-vht + type: flag + - + name: vht-capability-mask + type: binary + - + name: mdid + type: u16 + - + name: ie-ric + type: binary + - + name: crit-prot-id + type: u16 + - + name: max-crit-prot-duration + type: u16 + - + name: peer-aid + type: u16 + - + name: coalesce-rule + type: binary # TODO: nest + - + name: ch-switch-count + type: u32 + - + name: ch-switch-block-tx + type: flag + - + name: csa-ies + type: binary # TODO: nest + - + name: cntdwn-offs-beacon + type: binary + - + name: cntdwn-offs-presp + type: binary + - + name: rxmgmt-flags + type: binary + - + name: sta-supported-channels + type: binary + - + name: sta-supported-oper-classes + type: binary + - + name: handle-dfs + type: flag + - + name: support-5-mhz + type: flag + - + name: support-10-mhz + type: flag + - + name: opmode-notif + type: u8 + - + name: vendor-id + type: u32 + - + name: vendor-subcmd + type: u32 + - + name: vendor-data + type: binary + - + name: vendor-events + type: binary + - + name: qos-map + type: binary + - + name: mac-hint + type: binary + display-hint: mac + - + name: wiphy-freq-hint + type: u32 + - + name: max-ap-assoc-sta + type: u32 + - + name: tdls-peer-capability + type: u32 + - + name: socket-owner + type: flag + - + name: csa-c-offsets-tx + type: binary + - + name: max-csa-counters + type: u8 + - + name: tdls-initiator + type: flag + - + name: use-rrm + type: flag + - + name: wiphy-dyn-ack + type: flag + - + name: tsid + type: u8 + - + name: user-prio + type: u8 + - + name: admitted-time + type: u16 + - + name: smps-mode + type: u8 + - + name: oper-class + type: u8 + - + name: mac-mask + type: binary + display-hint: mac + - + name: wiphy-self-managed-reg + type: flag + - + name: ext-features + type: binary + - + name: survey-radio-stats + type: binary + - + name: netns-fd + type: u32 + - + name: sched-scan-delay + type: u32 + - + name: reg-indoor + type: flag + - + name: max-num-sched-scan-plans + type: u32 + - + name: max-scan-plan-interval + type: u32 + - + name: max-scan-plan-iterations + type: u32 + - + name: sched-scan-plans + type: binary # TODO: nest + - + name: pbss + type: flag + - + name: bss-select + type: binary # TODO: nest + - + name: sta-support-p2p-ps + type: u8 + - + name: pad + type: binary + - + name: iftype-ext-capa + type: binary # TODO: nest + - + name: mu-mimo-group-data + type: binary + - + name: mu-mimo-follow-mac-addr + type: binary + display-hint: mac + - + name: scan-start-time-tsf + type: u64 + - + name: scan-start-time-tsf-bssid + type: binary + - + name: measurement-duration + type: u16 + - + name: measurement-duration-mandatory + type: flag + - + name: mesh-peer-aid + type: u16 + - + name: nan-master-pref + type: u8 + - + name: bands + type: u32 + - + name: nan-func + type: binary # TODO: nest + - + name: nan-match + type: binary # TODO: nest + - + name: fils-kek + type: binary + - + name: fils-nonces + type: binary + - + name: multicast-to-unicast-enabled + type: flag + - + name: bssid + type: binary + display-hint: mac + - + name: sched-scan-relative-rssi + type: s8 + - + name: sched-scan-rssi-adjust + type: binary + - + name: timeout-reason + type: u32 + - + name: fils-erp-username + type: binary + - + name: fils-erp-realm + type: binary + - + name: fils-erp-next-seq-num + type: u16 + - + name: fils-erp-rrk + type: binary + - + name: fils-cache-id + type: binary + - + name: pmk + type: binary + - + name: sched-scan-multi + type: flag + - + name: sched-scan-max-reqs + type: u32 + - + name: want-1x-4way-hs + type: flag + - + name: pmkr0-name + type: binary + - + name: port-authorized + type: binary + - + name: external-auth-action + type: u32 + - + name: external-auth-support + type: flag + - + name: nss + type: u8 + - + name: ack-signal + type: s32 + - + name: control-port-over-nl80211 + type: flag + - + name: txq-stats + type: nest + nested-attributes: txq-stats-attrs + - + name: txq-limit + type: u32 + - + name: txq-memory-limit + type: u32 + - + name: txq-quantum + type: u32 + - + name: he-capability + type: binary + - + name: ftm-responder + type: binary # TODO: nest + - + name: ftm-responder-stats + type: binary # TODO: nest + - + name: timeout + type: u32 + - + name: peer-measurements + type: binary # TODO: nest + - + name: airtime-weight + type: u16 + - + name: sta-tx-power-setting + type: u8 + - + name: sta-tx-power + type: s16 + - + name: sae-password + type: binary + - + name: twt-responder + type: flag + - + name: he-obss-pd + type: binary # TODO: nest + - + name: wiphy-edmg-channels + type: u8 + - + name: wiphy-edmg-bw-config + type: u8 + - + name: vlan-id + type: u16 + - + name: he-bss-color + type: binary # TODO: nest + - + name: iftype-akm-suites + type: binary # TODO: nest + - + name: tid-config + type: binary # TODO: nest + - + name: control-port-no-preauth + type: flag + - + name: pmk-lifetime + type: u32 + - + name: pmk-reauth-threshold + type: u8 + - + name: receive-multicast + type: flag + - + name: wiphy-freq-offset + type: u32 + - + name: center-freq1-offset + type: u32 + - + name: scan-freq-khz + type: binary # TODO: nest + - + name: he-6ghz-capability + type: binary + - + name: fils-discovery + type: binary # TOOD: nest + - + name: unsol-bcast-probe-resp + type: binary # TOOD: nest + - + name: s1g-capability + type: binary + - + name: s1g-capability-mask + type: binary + - + name: sae-pwe + type: u8 + - + name: reconnect-requested + type: binary + - + name: sar-spec + type: nest + nested-attributes: sar-attributes + - + name: disable-he + type: flag + - + name: obss-color-bitmap + type: u64 + - + name: color-change-count + type: u8 + - + name: color-change-color + type: u8 + - + name: color-change-elems + type: binary # TODO: nest + - + name: mbssid-config + type: binary # TODO: nest + - + name: mbssid-elems + type: binary # TODO: nest + - + name: radar-background + type: flag + - + name: ap-settings-flags + type: u32 + - + name: eht-capability + type: binary + - + name: disable-eht + type: flag + - + name: mlo-links + type: binary # TODO: nest + - + name: mlo-link-id + type: u8 + - + name: mld-addr + type: binary + display-hint: mac + - + name: mlo-support + type: flag + - + name: max-num-akm-suites + type: binary + - + name: eml-capability + type: u16 + - + name: mld-capa-and-ops + type: u16 + - + name: tx-hw-timestamp + type: u64 + - + name: rx-hw-timestamp + type: u64 + - + name: td-bitmap + type: binary + - + name: punct-bitmap + type: u32 + - + name: max-hw-timestamp-peers + type: u16 + - + name: hw-timestamp-enabled + type: flag + - + name: ema-rnr-elems + type: binary # TODO: nest + - + name: mlo-link-disabled + type: flag + - + name: bss-dump-include-use-data + type: flag + - + name: mlo-ttlm-dlink + type: u16 + - + name: mlo-ttlm-ulink + type: u16 + - + name: assoc-spp-amsdu + type: flag + - + name: wiphy-radios + type: binary # TODO: nest + - + name: wiphy-interface-combinations + type: binary # TODO: nest + - + name: vif-radio-mask + type: u32 + - + name: frame-type-attrs + subset-of: nl80211-attrs + attributes: + - + name: frame-type + - + name: wiphy-bands + name-prefix: nl80211-band- + attr-max-name: num-nl80211-bands + attributes: + - + name: 2ghz + doc: 2.4 GHz ISM band + value: 0 + type: nest + nested-attributes: band-attrs + - + name: 5ghz + doc: around 5 GHz band (4.9 - 5.7 GHz) + type: nest + nested-attributes: band-attrs + - + name: 60ghz + doc: around 60 GHz band (58.32 - 69.12 GHz) + type: nest + nested-attributes: band-attrs + - + name: 6ghz + type: nest + nested-attributes: band-attrs + - + name: s1ghz + type: nest + nested-attributes: band-attrs + - + name: lc + type: nest + nested-attributes: band-attrs + - + name: band-attrs + enum-name: nl80211-band-attr + name-prefix: nl80211-band-attr- + attributes: + - + name: freqs + type: indexed-array + sub-type: nest + nested-attributes: frequency-attrs + - + name: rates + type: indexed-array + sub-type: nest + nested-attributes: bitrate-attrs + - + name: ht-mcs-set + type: binary + struct: ieee80211-mcs-info + - + name: ht-capa + type: u16 + - + name: ht-ampdu-factor + type: u8 + - + name: ht-ampdu-density + type: u8 + - + name: vht-mcs-set + type: binary + struct: ieee80211-vht-mcs-info + - + name: vht-capa + type: u32 + - + name: iftype-data + type: indexed-array + sub-type: nest + nested-attributes: iftype-data-attrs + - + name: edmg-channels + type: binary + - + name: edmg-bw-config + type: binary + - + name: s1g-mcs-nss-set + type: binary + - + name: s1g-capa + type: binary + - + name: bitrate-attrs + name-prefix: nl80211-bitrate-attr- + attributes: + - + name: rate + type: u32 + - + name: 2ghz-shortpreamble + type: flag + - + name: frequency-attrs + name-prefix: nl80211-frequency-attr- + attributes: + - + name: freq + type: u32 + - + name: disabled + type: flag + - + name: no-ir + type: flag + - + name: no-ibss + name-prefix: __nl80211-frequency-attr- + type: flag + - + name: radar + type: flag + - + name: max-tx-power + type: u32 + - + name: dfs-state + type: u32 + - + name: dfs-time + type: binary + - + name: no-ht40-minus + type: binary + - + name: no-ht40-plus + type: binary + - + name: no-80mhz + type: binary + - + name: no-160mhz + type: binary + - + name: dfs-cac-time + type: binary + - + name: indoor-only + type: binary + - + name: ir-concurrent + type: binary + - + name: no-20mhz + type: binary + - + name: no-10mhz + type: binary + - + name: wmm + type: indexed-array + sub-type: nest + nested-attributes: wmm-attrs + - + name: no-he + type: binary + - + name: offset + type: u32 + - + name: 1mhz + type: binary + - + name: 2mhz + type: binary + - + name: 4mhz + type: binary + - + name: 8mhz + type: binary + - + name: 16mhz + type: binary + - + name: no-320mhz + type: binary + - + name: no-eht + type: binary + - + name: psd + type: binary + - + name: dfs-concurrent + type: binary + - + name: no-6ghz-vlp-client + type: binary + - + name: no-6ghz-afc-client + type: binary + - + name: can-monitor + type: binary + - + name: allow-6ghz-vlp-ap + type: binary + - + name: if-combination-attributes + enum-name: nl80211-if-combination-attrs + name-prefix: nl80211-iface-comb- + attr-max-name: max-nl80211-iface-comb + attributes: + - + name: limits + type: indexed-array + sub-type: nest + nested-attributes: iface-limit-attributes + - + name: maxnum + type: u32 + - + name: sta-ap-bi-match + type: flag + - + name: num-channels + type: u32 + - + name: radar-detect-widths + type: u32 + - + name: radar-detect-regions + type: u32 + - + name: bi-min-gcd + type: u32 + - + name: iface-limit-attributes + enum-name: nl80211-iface-limit-attrs + name-prefix: nl80211-iface-limit- + attr-max-name: max-nl80211-iface-limit + attributes: + - + name: max + type: u32 + - + name: types + type: nest + nested-attributes: supported-iftypes + - + name: iftype-data-attrs + name-prefix: nl80211-band-iftype-attr- + attributes: + - + name: iftypes + type: binary + - + name: he-cap-mac + type: binary + - + name: he-cap-phy + type: binary + - + name: he-cap-mcs-set + type: binary + - + name: he-cap-ppe + type: binary + - + name: he-6ghz-capa + type: binary + - + name: vendor-elems + type: binary + - + name: eht-cap-mac + type: binary + - + name: eht-cap-phy + type: binary + - + name: eht-cap-mcs-set + type: binary + - + name: eht-cap-ppe + type: binary + - + name: iftype-attrs + enum-name: nl80211-iftype + name-prefix: nl80211-iftype- + attributes: + - + name: unspecified + type: nest + value: 0 + nested-attributes: frame-type-attrs + - + name: adhoc + type: nest + nested-attributes: frame-type-attrs + - + name: station + type: nest + nested-attributes: frame-type-attrs + - + name: ap + type: nest + nested-attributes: frame-type-attrs + - + name: ap-vlan + type: nest + nested-attributes: frame-type-attrs + - + name: wds + type: nest + nested-attributes: frame-type-attrs + - + name: monitor + type: nest + nested-attributes: frame-type-attrs + - + name: mesh-point + type: nest + nested-attributes: frame-type-attrs + - + name: p2p-client + type: nest + nested-attributes: frame-type-attrs + - + name: p2p-go + type: nest + nested-attributes: frame-type-attrs + - + name: p2p-device + type: nest + nested-attributes: frame-type-attrs + - + name: ocb + type: nest + nested-attributes: frame-type-attrs + - + name: nan + type: nest + nested-attributes: frame-type-attrs + - + name: sar-attributes + enum-name: nl80211-sar-attrs + name-prefix: nl80211-sar-attr- + attributes: + - + name: type + type: u32 + - + name: specs + type: indexed-array + sub-type: nest + nested-attributes: sar-specs + - + name: sar-specs + enum-name: nl80211-sar-specs-attrs + name-prefix: nl80211-sar-attr-specs- + attributes: + - + name: power + type: s32 + - + name: range-index + type: u32 + - + name: start-freq + type: u32 + - + name: end-freq + type: u32 + - + name: supported-iftypes + enum-name: nl80211-iftype + name-prefix: nl80211-iftype- + attributes: + - + name: adhoc + type: flag + - + name: station + type: flag + - + name: ap + type: flag + - + name: ap-vlan + type: flag + - + name: wds + type: flag + - + name: monitor + type: flag + - + name: mesh-point + type: flag + - + name: p2p-client + type: flag + - + name: p2p-go + type: flag + - + name: p2p-device + type: flag + - + name: ocb + type: flag + - + name: nan + type: flag + - + name: txq-stats-attrs + name-prefix: nl80211-txq-stats- + attributes: + - + name: backlog-bytes + type: u32 + - + name: backlog-packets + type: u32 + - + name: flows + type: u32 + - + name: drops + type: u32 + - + name: ecn-marks + type: u32 + - + name: overlimit + type: u32 + - + name: overmemory + type: u32 + - + name: collisions + type: u32 + - + name: tx-bytes + type: u32 + - + name: tx-packets + type: u32 + - + name: max-flows + type: u32 + - + name: wmm-attrs + enum-name: nl80211-wmm-rule + name-prefix: nl80211-wmmr- + attributes: + - + name: cw-min + type: u16 + - + name: cw-max + type: u16 + - + name: aifsn + type: u8 + - + name: txop + type: u16 + - + name: wowlan-triggers-attrs + enum-name: nl80211-wowlan-triggers + name-prefix: nl80211-wowlan-trig- + attr-max-name: max-nl80211-wowlan-trig + attributes: + - + name: any + type: flag + - + name: disconnect + type: flag + - + name: magic-pkt + type: flag + - + name: pkt-pattern + type: flag + - + name: gtk-rekey-supported + type: flag + - + name: gtk-rekey-failure + type: flag + - + name: eap-ident-request + type: flag + - + name: 4way-handshake + type: flag + - + name: rfkill-release + type: flag + - + name: wakeup-pkt-80211 + type: flag + - + name: wakeup-pkt-80211-len + type: flag + - + name: wakeup-pkt-8023 + type: flag + - + name: wakeup-pkt-8023-len + type: flag + - + name: tcp-connection + type: flag + - + name: wakeup-tcp-match + type: flag + - + name: wakeup-tcp-connlost + type: flag + - + name: wakeup-tcp-nomoretokens + type: flag + - + name: net-detect + type: flag + - + name: net-detect-results + type: flag + - + name: unprotected-deauth-disassoc + type: flag + +operations: + enum-model: directional + list: + - + name: get-wiphy + doc: | + Get information about a wiphy or dump a list of all wiphys. Requests to dump get-wiphy + should unconditionally include the split-wiphy-dump flag in the request. + attribute-set: nl80211-attrs + do: + request: + value: 1 + attributes: + - wiphy + - wdev + - ifindex + reply: + value: 3 + attributes: &wiphy-reply-attrs + - bands + - cipher-suites + - control-port-ethertype + - ext-capa + - ext-capa-mask + - ext-features + - feature-flags + - generation + - ht-capability-mask + - interface-combinations + - mac + - max-csa-counters + - max-match-sets + - max-num-akm-suites + - max-num-pmkids + - max-num-scan-ssids + - max-num-sched-scan-plans + - max-num-sched-scan-ssids + - max-remain-on-channel-duration + - max-scan-ie-len + - max-scan-plan-interval + - max-scan-plan-iterations + - max-sched-scan-ie-len + - offchannel-tx-ok + - rx-frame-types + - sar-spec + - sched-scan-max-reqs + - software-iftypes + - support-ap-uapsd + - supported-commands + - supported-iftypes + - tdls-external-setup + - tdls-support + - tx-frame-types + - txq-limit + - txq-memory-limit + - txq-quantum + - txq-stats + - vht-capability-mask + - wiphy + - wiphy-antenna-avail-rx + - wiphy-antenna-avail-tx + - wiphy-antenna-rx + - wiphy-antenna-tx + - wiphy-bands + - wiphy-coverage-class + - wiphy-frag-threshold + - wiphy-name + - wiphy-retry-long + - wiphy-retry-short + - wiphy-rts-threshold + - wowlan-triggers-supported + dump: + request: + attributes: + - wiphy + - wdev + - ifindex + - split-wiphy-dump + reply: + attributes: *wiphy-reply-attrs + - + name: get-interface + doc: Get information about an interface or dump a list of all interfaces + attribute-set: nl80211-attrs + do: + request: + value: 5 + attributes: + - ifname + reply: + value: 7 + attributes: &interface-reply-attrs + - ifname + - iftype + - ifindex + - wiphy + - wdev + - mac + - generation + - txq-stats + - 4addr + dump: + request: + attributes: + - ifname + reply: + attributes: *interface-reply-attrs + - + name: get-protocol-features + doc: Get information about supported protocol features + attribute-set: nl80211-attrs + do: + request: + value: 95 + attributes: + - protocol-features + reply: + value: 95 + attributes: + - protocol-features + +mcast-groups: + list: + - + name: config + - + name: scan + - + name: regulatory + - + name: mlme + - + name: vendor + - + name: nan + - + name: testmode diff --git a/Documentation/netlink/specs/rt_addr.yaml b/Documentation/netlink/specs/rt_addr.yaml index cbee1cedb177..5dd5469044c7 100644 --- a/Documentation/netlink/specs/rt_addr.yaml +++ b/Documentation/netlink/specs/rt_addr.yaml @@ -168,6 +168,29 @@ operations: reply: value: 20 attributes: *ifaddr-all + - + name: getmaddrs + doc: Get / dump IPv4/IPv6 multicast addresses. + attribute-set: addr-attrs + fixed-header: ifaddrmsg + do: + request: + value: 58 + attributes: + - ifa-family + - ifa-index + reply: + value: 58 + attributes: &mcaddr-attrs + - ifa-multicast + - ifa-cacheinfo + dump: + request: + value: 58 + - ifa-family + reply: + value: 58 + attributes: *mcaddr-attrs mcast-groups: list: diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst index 8bf411b857d4..5f3885e56f58 100644 --- a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst +++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst @@ -70,7 +70,7 @@ the DPSW object that it will probe: Besides the configuration of the actual DPSW object, the dpaa2-switch driver will need the following DPAA2 objects: - * 1 DPMCP - A Management Command Portal object is needed for any interraction + * 1 DPMCP - A Management Command Portal object is needed for any interaction with the MC firmware. * 1 DPBP - A Buffer Pool is used for seeding buffers intended for the Rx path diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst index e3972d03cea0..792e9f8c846a 100644 --- a/Documentation/networking/devlink/ice.rst +++ b/Documentation/networking/devlink/ice.rst @@ -69,6 +69,17 @@ Parameters To verify that value has been set: $ devlink dev param show pci/0000:16:00.0 name tx_scheduling_layers + * - ``msix_vec_per_pf_max`` + - driverinit + - Set the max MSI-X that can be used by the PF, rest can be utilized for + SRIOV. The range is from min value set in msix_vec_per_pf_min to + 2k/number of ports. + * - ``msix_vec_per_pf_min`` + - driverinit + - Set the min MSI-X that will be used by the PF. This value inform how many + MSI-X will be allocated statically. The range is from 2 to value set + in msix_vec_per_pf_max. + .. list-table:: Driver specific parameters implemented :widths: 5 5 90 diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index 41618538fc70..7febe0aecd53 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -280,6 +280,10 @@ Description of the vnic counters: number of packets handled by the VNIC experiencing unexpected steering failure (at any point in steering flow owned by the VNIC, including the FDB for the eswitch owner). +- icm_consumption + amount of Interconnect Host Memory (ICM) consumed by the vnic in + granularity of 4KB. ICM is host memory allocated by SW upon HCA request + and is used for storing data structures that control HCA operation. User commands examples: diff --git a/Documentation/networking/devlink/sfc.rst b/Documentation/networking/devlink/sfc.rst index db64a1bd9733..0398d59ea184 100644 --- a/Documentation/networking/devlink/sfc.rst +++ b/Documentation/networking/devlink/sfc.rst @@ -5,7 +5,7 @@ sfc devlink support =================== This document describes the devlink features implemented by the ``sfc`` -device driver for the ef100 device. +device driver for the ef10 and ef100 devices. Info versions ============= @@ -18,6 +18,10 @@ The ``sfc`` driver reports the following versions * - Name - Type - Description + * - ``fw.bundle_id`` + - stored + - Version of the firmware "bundle" image that was last used to update + multiple components. * - ``fw.mgmt.suc`` - running - For boards where the management function is split between multiple @@ -55,3 +59,13 @@ The ``sfc`` driver reports the following versions * - ``fw.uefi`` - running - UEFI driver version (No UNDI support). + +Flash Update +============ + +The ``sfc`` driver implements support for flash update using the +``devlink-flash`` interface. It supports updating the device flash using a +combined flash image ("bundle") that contains multiple components (on ef10, +typically ``fw.mgmt``, ``fw.app``, ``fw.exprom`` and ``fw.uefi``). + +The driver does not support any overwrite mask flags. diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 363b4950d542..054561f8dcae 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -705,6 +705,8 @@ tcp_retries2 - INTEGER seconds and is a lower bound for the effective timeout. TCP will effectively time out at the first RTO which exceeds the hypothetical timeout. + If tcp_rto_max_ms is decreased, it is recommended to also + change tcp_retries2. RFC 1122 recommends at least 100 seconds for the timeout, which corresponds to a value of at least 8. @@ -1237,6 +1239,17 @@ tcp_rto_min_us - INTEGER Default: 200000 +tcp_rto_max_ms - INTEGER + Maximal TCP retransmission timeout (in ms). + Note that TCP_RTO_MAX_MS socket option has higher precedence. + + When changing tcp_rto_max_ms, it is important to understand + that tcp_retries2 might need a change. + + Possible Values: 1000 - 120,000 + + Default: 120,000 + UDP variables ============= diff --git a/Documentation/networking/net_cachelines/inet_connection_sock.rst b/Documentation/networking/net_cachelines/inet_connection_sock.rst index 4a15627fc93b..b2401aa7c450 100644 --- a/Documentation/networking/net_cachelines/inet_connection_sock.rst +++ b/Documentation/networking/net_cachelines/inet_connection_sock.rst @@ -17,6 +17,7 @@ struct timer_list icsk_retransmit_timer read_mostly struct timer_list icsk_delack_timer read_mostly inet_csk_reset_xmit_timer,tcp_connect u32 icsk_rto read_write tcp_cwnd_validate,tcp_schedule_loss_probe,tcp_connect_init,tcp_connect,tcp_write_xmit,tcp_push_one u32 icsk_rto_min +u32 icsk_rto_max read_mostly tcp_reset_xmit_timer u32 icsk_delack_max u32 icsk_pmtu_cookie read_write tcp_sync_mss,tcp_current_mss,tcp_send_syn_data,tcp_connect_init,tcp_connect struct tcp_congestion_ops icsk_ca_ops read_write tcp_cwnd_validate,tcp_tso_segs,tcp_ca_dst_init,tcp_connect_init,tcp_connect,tcp_write_xmit diff --git a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst index de0263302f16..6e7b20afd2d4 100644 --- a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst +++ b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst @@ -86,6 +86,7 @@ u8 sysctl_tcp_sack u8 sysctl_tcp_window_scaling tcp_syn_options,tcp_parse_options u8 sysctl_tcp_timestamps u8 sysctl_tcp_early_retrans read_mostly tcp_schedule_loss_probe(tcp_write_xmit) +u32 sysctl_tcp_rto_max_ms u8 sysctl_tcp_recovery tcp_fastretrans_alert u8 sysctl_tcp_thin_linear_timeouts tcp_retrans_timer(on_thin_streams) u8 sysctl_tcp_slow_start_after_idle unlikely(tcp_cwnd_validate-network-not-starved) diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst index 94c4680fdf3e..84803c59968a 100644 --- a/Documentation/networking/netconsole.rst +++ b/Documentation/networking/netconsole.rst @@ -17,6 +17,8 @@ Release prepend support by Breno Leitao <leitao@debian.org>, Jul 7 2023 Userdata append support by Matthew Wood <thepacketgeek@gmail.com>, Jan 22 2024 +Sysdata append support by Breno Leitao <leitao@debian.org>, Jan 15 2025 + Please send bug reports to Matt Mackall <mpm@selenic.com> Satyam Sharma <satyam.sharma@gmail.com>, and Cong Wang <xiyou.wangcong@gmail.com> @@ -238,6 +240,49 @@ Delete `userdata` entries with `rmdir`:: It is recommended to not write user data values with newlines. +CPU number auto population in userdata +-------------------------------------- + +Inside the netconsole configfs hierarchy, there is a file called +`cpu_nr` under the `userdata` directory. This file is used to enable or disable +the automatic CPU number population feature. This feature automatically +populates the CPU number that is sending the message. + +To enable the CPU number auto-population:: + + echo 1 > /sys/kernel/config/netconsole/target1/userdata/cpu_nr + +When this option is enabled, the netconsole messages will include an additional +line in the userdata field with the format `cpu=<cpu_number>`. This allows the +receiver of the netconsole messages to easily differentiate and demultiplex +messages originating from different CPUs, which is particularly useful when +dealing with parallel log output. + +Example:: + + echo "This is a message" > /dev/kmsg + 12,607,22085407756,-;This is a message + cpu=42 + +In this example, the message was sent by CPU 42. + +.. note:: + + If the user has set a conflicting `cpu` key in the userdata dictionary, + both keys will be reported, with the kernel-populated entry appearing after + the user one. For example:: + + # User-defined CPU entry + mkdir -p /sys/kernel/config/netconsole/target1/userdata/cpu + echo "1" > /sys/kernel/config/netconsole/target1/userdata/cpu/value + + Output might look like:: + + 12,607,22085407756,-;This is a message + cpu=1 + cpu=42 # kernel-populated value + + Extended console: ================= diff --git a/MAINTAINERS b/MAINTAINERS index 10893c91b1c1..de81a3d68396 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24150,7 +24150,7 @@ W: http://vtun.sourceforge.net/tun F: Documentation/networking/tuntap.rst F: arch/um/os-Linux/drivers/ F: drivers/net/tap.c -F: drivers/net/tun.c +F: drivers/net/tun* TURBOCHANNEL SUBSYSTEM M: "Maciej W. Rozycki" <macro@orcam.me.uk> diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index d9e705f4a697..bde6a496df5f 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -54,7 +54,6 @@ enum interruption_class { IRQIO_C70, IRQIO_TAP, IRQIO_VMR, - IRQIO_LCS, IRQIO_CTC, IRQIO_ADM, IRQIO_CSC, diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index ef7be599e1f7..7ca157ffab30 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -84,7 +84,6 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, - {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"}, {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index ad50b77282f8..69ce1862eabe 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -498,8 +498,6 @@ static int irdma_save_msix_info(struct irdma_pci_f *rf) iw_qvlist->num_vectors = rf->msix_count; if (rf->msix_count <= num_online_cpus()) rf->msix_shared = true; - else if (rf->msix_count > num_online_cpus() + 1) - rf->msix_count = num_online_cpus() + 1; pmsix = rf->msix_entries; for (i = 0, ceq_idx = 0; i < rf->msix_count; i++, iw_qvinfo++) { diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 3f13200ff71b..1ee8969595d3 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -206,6 +206,43 @@ static void irdma_lan_unregister_qset(struct irdma_sc_vsi *vsi, ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n"); } +static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) +{ + int i; + + rf->msix_count = num_online_cpus() + IRDMA_NUM_AEQ_MSIX; + rf->msix_entries = kcalloc(rf->msix_count, sizeof(*rf->msix_entries), + GFP_KERNEL); + if (!rf->msix_entries) + return -ENOMEM; + + for (i = 0; i < rf->msix_count; i++) + if (ice_alloc_rdma_qvector(pf, &rf->msix_entries[i])) + break; + + if (i < IRDMA_MIN_MSIX) { + for (; i > 0; i--) + ice_free_rdma_qvector(pf, &rf->msix_entries[i]); + + kfree(rf->msix_entries); + return -ENOMEM; + } + + rf->msix_count = i; + + return 0; +} + +static void irdma_deinit_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) +{ + int i; + + for (i = 0; i < rf->msix_count; i++) + ice_free_rdma_qvector(pf, &rf->msix_entries[i]); + + kfree(rf->msix_entries); +} + static void irdma_remove(struct auxiliary_device *aux_dev) { struct iidc_auxiliary_dev *iidc_adev = container_of(aux_dev, @@ -216,6 +253,7 @@ static void irdma_remove(struct auxiliary_device *aux_dev) irdma_ib_unregister_device(iwdev); ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false); + irdma_deinit_interrupts(iwdev->rf, pf); pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn)); } @@ -230,9 +268,7 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf rf->gen_ops.unregister_qset = irdma_lan_unregister_qset; rf->hw.hw_addr = pf->hw.hw_addr; rf->pcidev = pf->pdev; - rf->msix_count = pf->num_rdma_msix; rf->pf_id = pf->hw.pf_id; - rf->msix_entries = &pf->msix_entries[pf->rdma_base_vector]; rf->default_vsi.vsi_idx = vsi->vsi_num; rf->protocol_used = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY; @@ -281,6 +317,10 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ irdma_fill_device_info(iwdev, pf, vsi); rf = iwdev->rf; + err = irdma_init_interrupts(rf, pf); + if (err) + goto err_init_interrupts; + err = irdma_ctrl_init_hw(rf); if (err) goto err_ctrl_init; @@ -311,6 +351,8 @@ err_ibreg: err_rt_init: irdma_ctrl_deinit_hw(rf); err_ctrl_init: + irdma_deinit_interrupts(rf, pf); +err_init_interrupts: kfree(iwdev->rf); ib_dealloc_device(&iwdev->ibdev); diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 9f0ed6e84471..ef9a9b79d711 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -117,6 +117,9 @@ extern struct auxiliary_driver i40iw_auxiliary_drv; #define IRDMA_IRQ_NAME_STR_LEN (64) +#define IRDMA_NUM_AEQ_MSIX 1 +#define IRDMA_MIN_MSIX 2 + enum init_completion_state { INVALID_STATE = 0, INITIAL_STATE, diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e45bba240cbc..f6d0628a36d9 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -432,9 +432,6 @@ static struct net_device *bond_ipsec_dev(struct xfrm_state *xs) struct bonding *bond; struct slave *slave; - if (!bond_dev) - return NULL; - bond = netdev_priv(bond_dev); if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) return NULL; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 1c83af805209..9fd44e55d519 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2957,28 +2957,61 @@ static void mt753x_phylink_mac_link_up(struct phylink_config *config, mcr |= PMCR_FORCE_RX_FC_EN; } - if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, false) >= 0) { - switch (speed) { - case SPEED_1000: - case SPEED_2500: - mcr |= PMCR_FORCE_EEE1G; - break; - case SPEED_100: - mcr |= PMCR_FORCE_EEE100; - break; - } - } - mt7530_set(priv, MT753X_PMCR_P(dp->index), mcr); } +static void mt753x_phylink_mac_disable_tx_lpi(struct phylink_config *config) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct mt7530_priv *priv = dp->ds->priv; + + mt7530_clear(priv, MT753X_PMCR_P(dp->index), + PMCR_FORCE_EEE1G | PMCR_FORCE_EEE100); +} + +static int mt753x_phylink_mac_enable_tx_lpi(struct phylink_config *config, + u32 timer, bool tx_clock_stop) +{ + struct dsa_port *dp = dsa_phylink_to_port(config); + struct mt7530_priv *priv = dp->ds->priv; + u32 val; + + /* If the timer is zero, then set LPI_MODE_EN, which allows the + * system to enter LPI mode immediately rather than waiting for + * the LPI threshold. + */ + if (!timer) + val = LPI_MODE_EN; + else if (FIELD_FIT(LPI_THRESH_MASK, timer)) + val = FIELD_PREP(LPI_THRESH_MASK, timer); + else + val = LPI_THRESH_MASK; + + mt7530_rmw(priv, MT753X_PMEEECR_P(dp->index), + LPI_THRESH_MASK | LPI_MODE_EN, val); + + mt7530_set(priv, MT753X_PMCR_P(dp->index), + PMCR_FORCE_EEE1G | PMCR_FORCE_EEE100); + + return 0; +} + static void mt753x_phylink_get_caps(struct dsa_switch *ds, int port, struct phylink_config *config) { struct mt7530_priv *priv = ds->priv; + u32 eeecr; config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE; + config->lpi_capabilities = MAC_100FD | MAC_1000FD | MAC_2500FD; + + eeecr = mt7530_read(priv, MT753X_PMEEECR_P(port)); + /* tx_lpi_timer should be in microseconds. The time units for + * LPI threshold are unspecified. + */ + config->lpi_timer_default = FIELD_GET(LPI_THRESH_MASK, eeecr); + priv->info->mac_port_get_caps(ds, port, config); } @@ -3088,18 +3121,9 @@ mt753x_setup(struct dsa_switch *ds) static int mt753x_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e) { - struct mt7530_priv *priv = ds->priv; - u32 set, mask = LPI_THRESH_MASK | LPI_MODE_EN; - if (e->tx_lpi_timer > 0xFFF) return -EINVAL; - set = LPI_THRESH_SET(e->tx_lpi_timer); - if (!e->tx_lpi_enabled) - /* Force LPI Mode without a delay */ - set |= LPI_MODE_EN; - mt7530_rmw(priv, MT753X_PMEEECR_P(port), mask, set); - return 0; } @@ -3238,6 +3262,8 @@ static const struct phylink_mac_ops mt753x_phylink_mac_ops = { .mac_config = mt753x_phylink_mac_config, .mac_link_down = mt753x_phylink_mac_link_down, .mac_link_up = mt753x_phylink_mac_link_up, + .mac_disable_tx_lpi = mt753x_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = mt753x_phylink_mac_enable_tx_lpi, }; const struct mt753x_info mt753x_table[] = { diff --git a/drivers/net/dsa/rzn1_a5psw.c b/drivers/net/dsa/rzn1_a5psw.c index 66974379334a..31ea8130a495 100644 --- a/drivers/net/dsa/rzn1_a5psw.c +++ b/drivers/net/dsa/rzn1_a5psw.c @@ -1248,18 +1248,16 @@ static int a5psw_probe(struct platform_device *pdev) if (ret) goto clk_disable; - mdio = of_get_child_by_name(dev->of_node, "mdio"); - if (of_device_is_available(mdio)) { + mdio = of_get_available_child_by_name(dev->of_node, "mdio"); + if (mdio) { ret = a5psw_probe_mdio(a5psw, mdio); + of_node_put(mdio); if (ret) { - of_node_put(mdio); dev_err(dev, "Failed to register MDIO: %d\n", ret); goto hclk_disable; } } - of_node_put(mdio); - ds = &a5psw->ds; ds->dev = dev; ds->num_ports = A5PSW_PORTS_NUM; diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c index 84b7169f2974..8d535c033cef 100644 --- a/drivers/net/dsa/sja1105/sja1105_mdio.c +++ b/drivers/net/dsa/sja1105/sja1105_mdio.c @@ -468,13 +468,10 @@ int sja1105_mdiobus_register(struct dsa_switch *ds) if (rc) return rc; - mdio_node = of_get_child_by_name(switch_node, "mdios"); + mdio_node = of_get_available_child_by_name(switch_node, "mdios"); if (!mdio_node) return 0; - if (!of_device_is_available(mdio_node)) - goto out_put_mdio_node; - if (regs->mdio_100base_tx != SJA1105_RSV_ADDR) { rc = sja1105_mdiobus_base_tx_register(priv, mdio_node); if (rc) @@ -487,7 +484,6 @@ int sja1105_mdiobus_register(struct dsa_switch *ds) goto err_free_base_tx_mdiobus; } -out_put_mdio_node: of_node_put(mdio_node); return 0; diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c index 115f48b3342c..0a08da799255 100644 --- a/drivers/net/ethernet/actions/owl-emac.c +++ b/drivers/net/ethernet/actions/owl-emac.c @@ -1325,15 +1325,10 @@ static int owl_emac_mdio_init(struct net_device *netdev) struct device_node *mdio_node; int ret; - mdio_node = of_get_child_by_name(dev->of_node, "mdio"); + mdio_node = of_get_available_child_by_name(dev->of_node, "mdio"); if (!mdio_node) return -ENODEV; - if (!of_device_is_available(mdio_node)) { - ret = -ENODEV; - goto err_put_node; - } - priv->mii = devm_mdiobus_alloc(dev); if (!priv->mii) { ret = -ENOMEM; diff --git a/drivers/net/ethernet/apm/xgene-v2/mdio.c b/drivers/net/ethernet/apm/xgene-v2/mdio.c index eba06831aec2..6a17045a5f62 100644 --- a/drivers/net/ethernet/apm/xgene-v2/mdio.c +++ b/drivers/net/ethernet/apm/xgene-v2/mdio.c @@ -97,7 +97,6 @@ void xge_mdio_remove(struct net_device *ndev) int xge_mdio_config(struct net_device *ndev) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; struct xge_pdata *pdata = netdev_priv(ndev); struct device *dev = &pdata->pdev->dev; struct mii_bus *mdio_bus; @@ -137,17 +136,12 @@ int xge_mdio_config(struct net_device *ndev) goto err; } - linkmode_set_bit_array(phy_10_100_features_array, - ARRAY_SIZE(phy_10_100_features_array), - mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_AUI_BIT, mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_BNC_BIT, mask); - - linkmode_andnot(phydev->supported, phydev->supported, mask); - linkmode_copy(phydev->advertising, phydev->supported); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Full_BIT); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Full_BIT); + phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT); + pdata->phy_speed = SPEED_UNKNOWN; return 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c index 414b2e448d59..787ea91802e7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c @@ -113,19 +113,9 @@ static const struct hwmon_ops aq_hwmon_ops = { .read_string = aq_hwmon_read_string, }; -static u32 aq_hwmon_temp_config[] = { - HWMON_T_INPUT | HWMON_T_LABEL, - HWMON_T_INPUT | HWMON_T_LABEL, - 0, -}; - -static const struct hwmon_channel_info aq_hwmon_temp = { - .type = hwmon_temp, - .config = aq_hwmon_temp_config, -}; - static const struct hwmon_channel_info * const aq_hwmon_info[] = { - &aq_hwmon_temp, + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_LABEL, + HWMON_T_INPUT | HWMON_T_LABEL), NULL, }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h index f5901f8e3907..f6b990b7f5b4 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h @@ -226,7 +226,6 @@ struct __packed offload_info { struct offload_port_info ports; struct offload_ka_info kas; struct offload_rr_info rrs; - u8 buf[]; }; struct __packed hw_atl_utils_fw_rpc { diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 6b6cb73482d7..1753bb87dfbd 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -1433,22 +1433,6 @@ int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout) } EXPORT_SYMBOL_GPL(octeon_wait_for_ddr_init); -/* Get the octeon id assigned to the octeon device passed as argument. - * This function is exported to other modules. - * @param dev - octeon device pointer passed as a void *. - * @return octeon device id - */ -int lio_get_device_id(void *dev) -{ - struct octeon_device *octeon_dev = (struct octeon_device *)dev; - u32 i; - - for (i = 0; i < MAX_OCTEON_DEVICES; i++) - if (octeon_device[i] == octeon_dev) - return octeon_dev->octeon_id; - return -1; -} - void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq) { u64 instr_cnt; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index d26364c2ac81..19344b21f8fb 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -705,13 +705,6 @@ octeon_get_dispatch(struct octeon_device *octeon_dev, u16 opcode, */ struct octeon_device *lio_get_device(u32 octeon_id); -/** Get the octeon id assigned to the octeon device passed as argument. - * This function is exported to other modules. - * @param dev - octeon device pointer passed as a void *. - * @return octeon device id - */ -int lio_get_device_id(void *dev); - /** Read windowed register. * @param oct - pointer to the Octeon device. * @param addr - Address of the register to read. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index c7c2c15a1815..95e6f015a6af 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1211,9 +1211,6 @@ struct adapter { struct timer_list flower_stats_timer; struct work_struct flower_stats_work; - /* Ethtool Dump */ - struct ethtool_dump eth_dump; - /* HMA */ struct hma_data hma; @@ -1233,6 +1230,10 @@ struct adapter { /* Ethtool n-tuple */ struct cxgb4_ethtool_filter *ethtool_filters; + + /* Ethtool Dump */ + /* Must be last - ends in a flex-array member. */ + struct ethtool_dump eth_dump; }; /* Support for "sched-class" command to allow a TX Scheduling Class to be diff --git a/drivers/net/ethernet/cisco/enic/Makefile b/drivers/net/ethernet/cisco/enic/Makefile index c3b6febfdbe4..b3b5196b2dfc 100644 --- a/drivers/net/ethernet/cisco/enic/Makefile +++ b/drivers/net/ethernet/cisco/enic/Makefile @@ -3,5 +3,5 @@ obj-$(CONFIG_ENIC) := enic.o enic-y := enic_main.o vnic_cq.o vnic_intr.o vnic_wq.o \ enic_res.o enic_dev.o enic_pp.o vnic_dev.o vnic_rq.o vnic_vic.o \ - enic_ethtool.o enic_api.o enic_clsf.o + enic_ethtool.o enic_api.o enic_clsf.o enic_rq.o diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 10b7e02ba4d0..305ed12aa031 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -17,6 +17,7 @@ #include "vnic_nic.h" #include "vnic_rss.h" #include <linux/irq.h> +#include <net/page_pool/helpers.h> #define DRV_NAME "enic" #define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver" @@ -158,6 +159,7 @@ struct enic_rq_stats { u64 pkt_truncated; /* truncated pkts */ u64 no_skb; /* out of skbs */ u64 desc_skip; /* Rx pkt went into later buffer */ + u64 pp_alloc_fail; /* page pool alloc failure */ }; struct enic_wq { @@ -169,6 +171,7 @@ struct enic_wq { struct enic_rq { struct vnic_rq vrq; struct enic_rq_stats stats; + struct page_pool *pool; } ____cacheline_aligned; /* Per-instance private data structure */ @@ -223,7 +226,6 @@ struct enic { unsigned int cq_avail; unsigned int cq_count; struct enic_rfs_flw_tbl rfs_h; - u32 rx_copybreak; u8 rss_key[ENIC_RSS_LEN]; struct vnic_gen_stats gen_stats; }; diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index d607b4f0542c..18b929fc2879 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -608,43 +608,6 @@ static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, return ret; } -static int enic_get_tunable(struct net_device *dev, - const struct ethtool_tunable *tuna, void *data) -{ - struct enic *enic = netdev_priv(dev); - int ret = 0; - - switch (tuna->id) { - case ETHTOOL_RX_COPYBREAK: - *(u32 *)data = enic->rx_copybreak; - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -static int enic_set_tunable(struct net_device *dev, - const struct ethtool_tunable *tuna, - const void *data) -{ - struct enic *enic = netdev_priv(dev); - int ret = 0; - - switch (tuna->id) { - case ETHTOOL_RX_COPYBREAK: - enic->rx_copybreak = *(u32 *)data; - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - static u32 enic_get_rxfh_key_size(struct net_device *netdev) { return ENIC_RSS_LEN; @@ -727,8 +690,6 @@ static const struct ethtool_ops enic_ethtool_ops = { .get_coalesce = enic_get_coalesce, .set_coalesce = enic_set_coalesce, .get_rxnfc = enic_get_rxnfc, - .get_tunable = enic_get_tunable, - .set_tunable = enic_set_tunable, .get_rxfh_key_size = enic_get_rxfh_key_size, .get_rxfh = enic_get_rxfh, .set_rxfh = enic_set_rxfh, diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 49f6cab01ed5..f24fd29ea207 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -58,6 +58,7 @@ #include "enic_dev.h" #include "enic_pp.h" #include "enic_clsf.h" +#include "enic_rq.h" #define ENIC_NOTIFY_TIMER_PERIOD (2 * HZ) #define WQ_ENET_MAX_DESC_LEN (1 << WQ_ENET_LEN_BITS) @@ -68,8 +69,6 @@ #define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN 0x0044 /* enet dynamic vnic */ #define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ -#define RX_COPYBREAK_DEFAULT 256 - /* Supported devices */ static const struct pci_device_id enic_id_table[] = { { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) }, @@ -1313,243 +1312,6 @@ nla_put_failure: return -EMSGSIZE; } -static void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf) -{ - struct enic *enic = vnic_dev_priv(rq->vdev); - - if (!buf->os_buf) - return; - - dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len, - DMA_FROM_DEVICE); - dev_kfree_skb_any(buf->os_buf); - buf->os_buf = NULL; -} - -static int enic_rq_alloc_buf(struct vnic_rq *rq) -{ - struct enic *enic = vnic_dev_priv(rq->vdev); - struct net_device *netdev = enic->netdev; - struct sk_buff *skb; - unsigned int len = netdev->mtu + VLAN_ETH_HLEN; - unsigned int os_buf_index = 0; - dma_addr_t dma_addr; - struct vnic_rq_buf *buf = rq->to_use; - - if (buf->os_buf) { - enic_queue_rq_desc(rq, buf->os_buf, os_buf_index, buf->dma_addr, - buf->len); - - return 0; - } - skb = netdev_alloc_skb_ip_align(netdev, len); - if (!skb) { - enic->rq[rq->index].stats.no_skb++; - return -ENOMEM; - } - - dma_addr = dma_map_single(&enic->pdev->dev, skb->data, len, - DMA_FROM_DEVICE); - if (unlikely(enic_dma_map_check(enic, dma_addr))) { - dev_kfree_skb(skb); - return -ENOMEM; - } - - enic_queue_rq_desc(rq, skb, os_buf_index, - dma_addr, len); - - return 0; -} - -static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size, - u32 pkt_len) -{ - if (ENIC_LARGE_PKT_THRESHOLD <= pkt_len) - pkt_size->large_pkt_bytes_cnt += pkt_len; - else - pkt_size->small_pkt_bytes_cnt += pkt_len; -} - -static bool enic_rxcopybreak(struct net_device *netdev, struct sk_buff **skb, - struct vnic_rq_buf *buf, u16 len) -{ - struct enic *enic = netdev_priv(netdev); - struct sk_buff *new_skb; - - if (len > enic->rx_copybreak) - return false; - new_skb = netdev_alloc_skb_ip_align(netdev, len); - if (!new_skb) - return false; - dma_sync_single_for_cpu(&enic->pdev->dev, buf->dma_addr, len, - DMA_FROM_DEVICE); - memcpy(new_skb->data, (*skb)->data, len); - *skb = new_skb; - - return true; -} - -static void enic_rq_indicate_buf(struct vnic_rq *rq, - struct cq_desc *cq_desc, struct vnic_rq_buf *buf, - int skipped, void *opaque) -{ - struct enic *enic = vnic_dev_priv(rq->vdev); - struct net_device *netdev = enic->netdev; - struct sk_buff *skb; - struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; - struct enic_rq_stats *rqstats = &enic->rq[rq->index].stats; - - u8 type, color, eop, sop, ingress_port, vlan_stripped; - u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof; - u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok; - u8 ipv6, ipv4, ipv4_fragment, fcs_ok, rss_type, csum_not_calc; - u8 packet_error; - u16 q_number, completed_index, bytes_written, vlan_tci, checksum; - u32 rss_hash; - bool outer_csum_ok = true, encap = false; - - rqstats->packets++; - if (skipped) { - rqstats->desc_skip++; - return; - } - - skb = buf->os_buf; - - cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc, - &type, &color, &q_number, &completed_index, - &ingress_port, &fcoe, &eop, &sop, &rss_type, - &csum_not_calc, &rss_hash, &bytes_written, - &packet_error, &vlan_stripped, &vlan_tci, &checksum, - &fcoe_sof, &fcoe_fc_crc_ok, &fcoe_enc_error, - &fcoe_eof, &tcp_udp_csum_ok, &udp, &tcp, - &ipv4_csum_ok, &ipv6, &ipv4, &ipv4_fragment, - &fcs_ok); - - if (packet_error) { - - if (!fcs_ok) { - if (bytes_written > 0) - rqstats->bad_fcs++; - else if (bytes_written == 0) - rqstats->pkt_truncated++; - } - - dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len, - DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); - buf->os_buf = NULL; - - return; - } - - if (eop && bytes_written > 0) { - - /* Good receive - */ - rqstats->bytes += bytes_written; - if (!enic_rxcopybreak(netdev, &skb, buf, bytes_written)) { - buf->os_buf = NULL; - dma_unmap_single(&enic->pdev->dev, buf->dma_addr, - buf->len, DMA_FROM_DEVICE); - } - prefetch(skb->data - NET_IP_ALIGN); - - skb_put(skb, bytes_written); - skb->protocol = eth_type_trans(skb, netdev); - skb_record_rx_queue(skb, q_number); - if ((netdev->features & NETIF_F_RXHASH) && rss_hash && - (type == 3)) { - switch (rss_type) { - case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4: - case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6: - case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX: - skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L4); - rqstats->l4_rss_hash++; - break; - case CQ_ENET_RQ_DESC_RSS_TYPE_IPv4: - case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6: - case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX: - skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L3); - rqstats->l3_rss_hash++; - break; - } - } - if (enic->vxlan.vxlan_udp_port_number) { - switch (enic->vxlan.patch_level) { - case 0: - if (fcoe) { - encap = true; - outer_csum_ok = fcoe_fc_crc_ok; - } - break; - case 2: - if ((type == 7) && - (rss_hash & BIT(0))) { - encap = true; - outer_csum_ok = (rss_hash & BIT(1)) && - (rss_hash & BIT(2)); - } - break; - } - } - - /* Hardware does not provide whole packet checksum. It only - * provides pseudo checksum. Since hw validates the packet - * checksum but not provide us the checksum value. use - * CHECSUM_UNNECESSARY. - * - * In case of encap pkt tcp_udp_csum_ok/tcp_udp_csum_ok is - * inner csum_ok. outer_csum_ok is set by hw when outer udp - * csum is correct or is zero. - */ - if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc && - tcp_udp_csum_ok && outer_csum_ok && - (ipv4_csum_ok || ipv6)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = encap; - if (encap) - rqstats->csum_unnecessary_encap++; - else - rqstats->csum_unnecessary++; - } - - if (vlan_stripped) { - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); - rqstats->vlan_stripped++; - } - skb_mark_napi_id(skb, &enic->napi[rq->index]); - if (!(netdev->features & NETIF_F_GRO)) - netif_receive_skb(skb); - else - napi_gro_receive(&enic->napi[q_number], skb); - if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) - enic_intr_update_pkt_size(&cq->pkt_size_counter, - bytes_written); - } else { - - /* Buffer overflow - */ - rqstats->pkt_truncated++; - dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len, - DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); - buf->os_buf = NULL; - } -} - -static int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, - u8 type, u16 q_number, u16 completed_index, void *opaque) -{ - struct enic *enic = vnic_dev_priv(vdev); - - vnic_rq_service(&enic->rq[q_number].vrq, cq_desc, - completed_index, VNIC_RQ_RETURN_DESC, - enic_rq_indicate_buf, opaque); - - return 0; -} - static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq) { unsigned int intr = enic_msix_rq_intr(enic, rq->index); @@ -1972,6 +1734,17 @@ static int enic_open(struct net_device *netdev) struct enic *enic = netdev_priv(netdev); unsigned int i; int err, ret; + unsigned int max_pkt_len = netdev->mtu + VLAN_ETH_HLEN; + struct page_pool_params pp_params = { + .order = get_order(max_pkt_len), + .pool_size = enic->config.rq_desc_count, + .nid = dev_to_node(&enic->pdev->dev), + .dev = &enic->pdev->dev, + .dma_dir = DMA_FROM_DEVICE, + .max_len = (max_pkt_len > PAGE_SIZE) ? max_pkt_len : PAGE_SIZE, + .netdev = netdev, + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + }; err = enic_request_intr(enic); if (err) { @@ -1989,6 +1762,16 @@ static int enic_open(struct net_device *netdev) } for (i = 0; i < enic->rq_count; i++) { + /* create a page pool for each RQ */ + pp_params.napi = &enic->napi[i]; + pp_params.queue_idx = i; + enic->rq[i].pool = page_pool_create(&pp_params); + if (IS_ERR(enic->rq[i].pool)) { + err = PTR_ERR(enic->rq[i].pool); + enic->rq[i].pool = NULL; + goto err_out_free_rq; + } + /* enable rq before updating rq desc */ vnic_rq_enable(&enic->rq[i].vrq); vnic_rq_fill(&enic->rq[i].vrq, enic_rq_alloc_buf); @@ -2029,8 +1812,11 @@ static int enic_open(struct net_device *netdev) err_out_free_rq: for (i = 0; i < enic->rq_count; i++) { ret = vnic_rq_disable(&enic->rq[i].vrq); - if (!ret) + if (!ret) { vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf); + page_pool_destroy(enic->rq[i].pool); + enic->rq[i].pool = NULL; + } } enic_dev_notify_unset(enic); err_out_free_intr: @@ -2088,8 +1874,11 @@ static int enic_stop(struct net_device *netdev) for (i = 0; i < enic->wq_count; i++) vnic_wq_clean(&enic->wq[i].vwq, enic_free_wq_buf); - for (i = 0; i < enic->rq_count; i++) + for (i = 0; i < enic->rq_count; i++) { vnic_rq_clean(&enic->rq[i].vrq, enic_free_rq_buf); + page_pool_destroy(enic->rq[i].pool); + enic->rq[i].pool = NULL; + } for (i = 0; i < enic->cq_count; i++) vnic_cq_clean(&enic->cq[i]); for (i = 0; i < enic->intr_count; i++) @@ -2599,6 +2388,7 @@ static void enic_get_queue_stats_rx(struct net_device *dev, int idx, rxs->hw_drop_overruns = rqstats->pkt_truncated; rxs->csum_unnecessary = rqstats->csum_unnecessary + rqstats->csum_unnecessary_encap; + rxs->alloc_fail = rqstats->pp_alloc_fail; } static void enic_get_queue_stats_tx(struct net_device *dev, int idx, @@ -2626,6 +2416,7 @@ static void enic_get_base_stats(struct net_device *dev, rxs->hw_drops = 0; rxs->hw_drop_overruns = 0; rxs->csum_unnecessary = 0; + rxs->alloc_fail = 0; txs->bytes = 0; txs->packets = 0; txs->csum_none = 0; @@ -3179,7 +2970,6 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_err(dev, "Cannot register net device, aborting\n"); goto err_out_dev_deinit; } - enic->rx_copybreak = RX_COPYBREAK_DEFAULT; return 0; diff --git a/drivers/net/ethernet/cisco/enic/enic_rq.c b/drivers/net/ethernet/cisco/enic/enic_rq.c new file mode 100644 index 000000000000..e3228ef7988a --- /dev/null +++ b/drivers/net/ethernet/cisco/enic/enic_rq.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright 2024 Cisco Systems, Inc. All rights reserved. + +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <net/busy_poll.h> +#include "enic.h" +#include "enic_res.h" +#include "enic_rq.h" +#include "vnic_rq.h" +#include "cq_enet_desc.h" + +#define ENIC_LARGE_PKT_THRESHOLD 1000 + +static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size, + u32 pkt_len) +{ + if (pkt_len > ENIC_LARGE_PKT_THRESHOLD) + pkt_size->large_pkt_bytes_cnt += pkt_len; + else + pkt_size->small_pkt_bytes_cnt += pkt_len; +} + +int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, u8 type, + u16 q_number, u16 completed_index, void *opaque) +{ + struct enic *enic = vnic_dev_priv(vdev); + + vnic_rq_service(&enic->rq[q_number].vrq, cq_desc, completed_index, + VNIC_RQ_RETURN_DESC, enic_rq_indicate_buf, opaque); + return 0; +} + +static void enic_rq_set_skb_flags(struct vnic_rq *vrq, u8 type, u32 rss_hash, + u8 rss_type, u8 fcoe, u8 fcoe_fc_crc_ok, + u8 vlan_stripped, u8 csum_not_calc, + u8 tcp_udp_csum_ok, u8 ipv6, u8 ipv4_csum_ok, + u16 vlan_tci, struct sk_buff *skb) +{ + struct enic *enic = vnic_dev_priv(vrq->vdev); + struct net_device *netdev = enic->netdev; + struct enic_rq_stats *rqstats = &enic->rq[vrq->index].stats; + bool outer_csum_ok = true, encap = false; + + if ((netdev->features & NETIF_F_RXHASH) && rss_hash && type == 3) { + switch (rss_type) { + case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4: + case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6: + case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX: + skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L4); + rqstats->l4_rss_hash++; + break; + case CQ_ENET_RQ_DESC_RSS_TYPE_IPv4: + case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6: + case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX: + skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L3); + rqstats->l3_rss_hash++; + break; + } + } + if (enic->vxlan.vxlan_udp_port_number) { + switch (enic->vxlan.patch_level) { + case 0: + if (fcoe) { + encap = true; + outer_csum_ok = fcoe_fc_crc_ok; + } + break; + case 2: + if (type == 7 && (rss_hash & BIT(0))) { + encap = true; + outer_csum_ok = (rss_hash & BIT(1)) && + (rss_hash & BIT(2)); + } + break; + } + } + + /* Hardware does not provide whole packet checksum. It only + * provides pseudo checksum. Since hw validates the packet + * checksum but not provide us the checksum value. use + * CHECSUM_UNNECESSARY. + * + * In case of encap pkt tcp_udp_csum_ok/tcp_udp_csum_ok is + * inner csum_ok. outer_csum_ok is set by hw when outer udp + * csum is correct or is zero. + */ + if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc && + tcp_udp_csum_ok && outer_csum_ok && (ipv4_csum_ok || ipv6)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = encap; + if (encap) + rqstats->csum_unnecessary_encap++; + else + rqstats->csum_unnecessary++; + } + + if (vlan_stripped) { + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); + rqstats->vlan_stripped++; + } +} + +static bool enic_rq_pkt_error(struct vnic_rq *vrq, u8 packet_error, u8 fcs_ok, + u16 bytes_written) +{ + struct enic *enic = vnic_dev_priv(vrq->vdev); + struct enic_rq_stats *rqstats = &enic->rq[vrq->index].stats; + + if (packet_error) { + if (!fcs_ok) { + if (bytes_written > 0) + rqstats->bad_fcs++; + else if (bytes_written == 0) + rqstats->pkt_truncated++; + } + return true; + } + return false; +} + +int enic_rq_alloc_buf(struct vnic_rq *rq) +{ + struct enic *enic = vnic_dev_priv(rq->vdev); + struct net_device *netdev = enic->netdev; + struct enic_rq *erq = &enic->rq[rq->index]; + struct enic_rq_stats *rqstats = &erq->stats; + unsigned int offset = 0; + unsigned int len = netdev->mtu + VLAN_ETH_HLEN; + unsigned int os_buf_index = 0; + dma_addr_t dma_addr; + struct vnic_rq_buf *buf = rq->to_use; + struct page *page; + unsigned int truesize = len; + + if (buf->os_buf) { + enic_queue_rq_desc(rq, buf->os_buf, os_buf_index, buf->dma_addr, + buf->len); + + return 0; + } + + page = page_pool_dev_alloc(erq->pool, &offset, &truesize); + if (unlikely(!page)) { + rqstats->pp_alloc_fail++; + return -ENOMEM; + } + buf->offset = offset; + buf->truesize = truesize; + dma_addr = page_pool_get_dma_addr(page) + offset; + enic_queue_rq_desc(rq, (void *)page, os_buf_index, dma_addr, len); + + return 0; +} + +void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf) +{ + struct enic *enic = vnic_dev_priv(rq->vdev); + struct enic_rq *erq = &enic->rq[rq->index]; + + if (!buf->os_buf) + return; + + page_pool_put_full_page(erq->pool, (struct page *)buf->os_buf, true); + buf->os_buf = NULL; +} + +void enic_rq_indicate_buf(struct vnic_rq *rq, struct cq_desc *cq_desc, + struct vnic_rq_buf *buf, int skipped, void *opaque) +{ + struct enic *enic = vnic_dev_priv(rq->vdev); + struct sk_buff *skb; + struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; + struct enic_rq_stats *rqstats = &enic->rq[rq->index].stats; + struct napi_struct *napi; + + u8 type, color, eop, sop, ingress_port, vlan_stripped; + u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof; + u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok; + u8 ipv6, ipv4, ipv4_fragment, fcs_ok, rss_type, csum_not_calc; + u8 packet_error; + u16 q_number, completed_index, bytes_written, vlan_tci, checksum; + u32 rss_hash; + + rqstats->packets++; + if (skipped) { + rqstats->desc_skip++; + return; + } + + cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc, &type, &color, + &q_number, &completed_index, &ingress_port, &fcoe, + &eop, &sop, &rss_type, &csum_not_calc, &rss_hash, + &bytes_written, &packet_error, &vlan_stripped, + &vlan_tci, &checksum, &fcoe_sof, &fcoe_fc_crc_ok, + &fcoe_enc_error, &fcoe_eof, &tcp_udp_csum_ok, &udp, + &tcp, &ipv4_csum_ok, &ipv6, &ipv4, &ipv4_fragment, + &fcs_ok); + + if (enic_rq_pkt_error(rq, packet_error, fcs_ok, bytes_written)) + return; + + if (eop && bytes_written > 0) { + /* Good receive + */ + rqstats->bytes += bytes_written; + napi = &enic->napi[rq->index]; + skb = napi_get_frags(napi); + if (unlikely(!skb)) { + net_warn_ratelimited("%s: skb alloc error rq[%d], desc[%d]\n", + enic->netdev->name, rq->index, + completed_index); + rqstats->no_skb++; + return; + } + + prefetch(skb->data - NET_IP_ALIGN); + + dma_sync_single_for_cpu(&enic->pdev->dev, buf->dma_addr, + bytes_written, DMA_FROM_DEVICE); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + (struct page *)buf->os_buf, buf->offset, + bytes_written, buf->truesize); + skb_record_rx_queue(skb, q_number); + enic_rq_set_skb_flags(rq, type, rss_hash, rss_type, fcoe, + fcoe_fc_crc_ok, vlan_stripped, + csum_not_calc, tcp_udp_csum_ok, ipv6, + ipv4_csum_ok, vlan_tci, skb); + skb_mark_for_recycle(skb); + napi_gro_frags(napi); + if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) + enic_intr_update_pkt_size(&cq->pkt_size_counter, + bytes_written); + buf->os_buf = NULL; + buf->dma_addr = 0; + buf = buf->next; + } else { + /* Buffer overflow + */ + rqstats->pkt_truncated++; + } +} diff --git a/drivers/net/ethernet/cisco/enic/enic_rq.h b/drivers/net/ethernet/cisco/enic/enic_rq.h new file mode 100644 index 000000000000..a75d07562686 --- /dev/null +++ b/drivers/net/ethernet/cisco/enic/enic_rq.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright 2024 Cisco Systems, Inc. All rights reserved. + */ + +int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, u8 type, + u16 q_number, u16 completed_index, void *opaque); +void enic_rq_indicate_buf(struct vnic_rq *rq, struct cq_desc *cq_desc, + struct vnic_rq_buf *buf, int skipped, void *opaque); +int enic_rq_alloc_buf(struct vnic_rq *rq); +void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf); diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.h b/drivers/net/ethernet/cisco/enic/vnic_rq.h index 0bc595abc03b..2ee4be2b9a34 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.h +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.h @@ -61,6 +61,8 @@ struct vnic_rq_buf { unsigned int index; void *desc; uint64_t wr_id; + unsigned int offset; + unsigned int truesize; }; enum enic_poll_state { diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f7c4ce8e9a26..a86cfebedaa8 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1093,6 +1093,29 @@ static void fec_enet_enable_ring(struct net_device *ndev) } } +/* Whack a reset. We should wait for this. + * For i.MX6SX SOC, enet use AXI bus, we use disable MAC + * instead of reset MAC itself. + */ +static void fec_ctrl_reset(struct fec_enet_private *fep, bool allow_wol) +{ + u32 val; + + if (!allow_wol || !(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) { + if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES || + ((fep->quirks & FEC_QUIRK_NO_HARD_RESET) && fep->link)) { + writel(0, fep->hwp + FEC_ECNTRL); + } else { + writel(FEC_ECR_RESET, fep->hwp + FEC_ECNTRL); + udelay(10); + } + } else { + val = readl(fep->hwp + FEC_ECNTRL); + val |= (FEC_ECR_MAGICEN | FEC_ECR_SLEEP); + writel(val, fep->hwp + FEC_ECNTRL); + } +} + /* * This function is called to start or restart the FEC during a link * change, transmit timeout, or to reconfigure the FEC. The network @@ -1109,17 +1132,7 @@ fec_restart(struct net_device *ndev) if (fep->bufdesc_ex) fec_ptp_save_state(fep); - /* Whack a reset. We should wait for this. - * For i.MX6SX SOC, enet use AXI bus, we use disable MAC - * instead of reset MAC itself. - */ - if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES || - ((fep->quirks & FEC_QUIRK_NO_HARD_RESET) && fep->link)) { - writel(0, fep->hwp + FEC_ECNTRL); - } else { - writel(1, fep->hwp + FEC_ECNTRL); - udelay(10); - } + fec_ctrl_reset(fep, false); /* * enet-mac reset will reset mac address registers too, @@ -1373,22 +1386,7 @@ fec_stop(struct net_device *ndev) if (fep->bufdesc_ex) fec_ptp_save_state(fep); - /* Whack a reset. We should wait for this. - * For i.MX6SX SOC, enet use AXI bus, we use disable MAC - * instead of reset MAC itself. - */ - if (!(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) { - if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES) { - writel(0, fep->hwp + FEC_ECNTRL); - } else { - writel(FEC_ECR_RESET, fep->hwp + FEC_ECNTRL); - udelay(10); - } - } else { - val = readl(fep->hwp + FEC_ECNTRL); - val |= (FEC_ECR_MAGICEN | FEC_ECR_SLEEP); - writel(val, fep->hwp + FEC_ECNTRL); - } + fec_ctrl_reset(fep, true); writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED); writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 435138f4699d..deb35b38c976 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1647,20 +1647,11 @@ static void gfar_configure_serdes(struct net_device *dev) */ static int init_phy(struct net_device *dev) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; struct gfar_private *priv = netdev_priv(dev); phy_interface_t interface = priv->interface; struct phy_device *phydev; struct ethtool_keee edata; - linkmode_set_bit_array(phy_10_100_features_array, - ARRAY_SIZE(phy_10_100_features_array), - mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, mask); - linkmode_set_bit(ETHTOOL_LINK_MODE_MII_BIT, mask); - if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT) - linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, mask); - priv->oldlink = 0; priv->oldspeed = 0; priv->oldduplex = -1; @@ -1675,9 +1666,8 @@ static int init_phy(struct net_device *dev) if (interface == PHY_INTERFACE_MODE_SGMII) gfar_configure_serdes(dev); - /* Remove any features not supported by the controller */ - linkmode_and(phydev->supported, phydev->supported, mask); - linkmode_copy(phydev->advertising, phydev->supported); + if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT)) + phy_set_max_speed(phydev, SPEED_100); /* Add support for flow control */ phy_support_asym_pause(phydev); diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index 38789faae706..84f92f6384e7 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -890,8 +890,6 @@ struct ucc_geth_hardware_statistics { addresses */ #define TX_TIMEOUT (1*HZ) -#define PHY_INIT_TIMEOUT 100000 -#define PHY_CHANGE_TIME 2 /* Fast Ethernet (10/100 Mbps) */ #define UCC_GETH_URFS_INIT 512 /* Rx virtual FIFO size diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 25b8a3556004..417dfa18daae 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2554,17 +2554,12 @@ static int emac_dt_mdio_probe(struct emac_instance *dev) struct mii_bus *bus; int res; - mii_np = of_get_child_by_name(dev->ofdev->dev.of_node, "mdio"); + mii_np = of_get_available_child_by_name(dev->ofdev->dev.of_node, "mdio"); if (!mii_np) { dev_err(&dev->ofdev->dev, "no mdio definition found."); return -ENODEV; } - if (!of_device_is_available(mii_np)) { - res = -ENODEV; - goto put_node; - } - bus = devm_mdiobus_alloc(&dev->ofdev->dev); if (!bus) { res = -ENOMEM; diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 24ec9a4f1ffa..26b9938bc38d 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -336,7 +336,7 @@ config ICE_SWITCHDEV config ICE_HWTS bool "Support HW cross-timestamp on platforms with PTM support" default y - depends on ICE && X86 + depends on ICE && X86 && PCIE_PTM help Say Y to enable hardware supported cross-timestamping on platforms with PCIe PTM support. The cross-timestamp is available through diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index d7df2a0ed629..44249dd91bd6 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -331,8 +331,21 @@ void e1000e_update_mc_addr_list_generic(struct e1000_hw *hw, } /* replace the entire MTA table */ - for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) + for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) { E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]); + + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + /* + * Do not queue up too many posted writes to prevent + * increased latency for other devices on the + * interconnect. Flush after each 8th posted write, + * to keep additional execution time low while still + * preventing increased latency. + */ + if (!(i % 8) && i) + e1e_flush(); + } + } e1e_flush(); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index e28f1905a4a0..9f47388eaba5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -2,6 +2,7 @@ /* Copyright(c) 2018 Intel Corporation. */ #include <linux/bpf_trace.h> +#include <linux/unroll.h> #include <net/xdp_sock_drv.h> #include "i40e_txrx_common.h" #include "i40e_xsk.h" @@ -529,7 +530,8 @@ static void i40e_xmit_pkt_batch(struct i40e_ring *xdp_ring, struct xdp_desc *des dma_addr_t dma; u32 i; - loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { + unrolled_count(PKTS_PER_BATCH) + for (i = 0; i < PKTS_PER_BATCH; i++) { u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(&desc[i]); dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc[i].addr); diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h index ef156fad52f2..dd16351a7af8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h @@ -6,7 +6,7 @@ #include <linux/types.h> -/* This value should match the pragma in the loop_unrolled_for +/* This value should match the pragma in the unrolled_count() * macro. Why 4? It is strictly empirical. It seems to be a good * compromise between the advantage of having simultaneous outstanding * reads to the DMA array that can hide each others latency and the @@ -14,14 +14,6 @@ */ #define PKTS_PER_BATCH 4 -#ifdef __clang__ -#define loop_unrolled_for _Pragma("clang loop unroll_count(4)") for -#elif __GNUC__ >= 8 -#define loop_unrolled_for _Pragma("GCC unroll 4") for -#else -#define loop_unrolled_for for -#endif - struct i40e_ring; struct i40e_vsi; struct net_device; diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index dbdb83567364..fcb199efbea5 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -1205,6 +1205,25 @@ static int ice_devlink_set_parent(struct devlink_rate *devlink_rate, return status; } +static void ice_set_min_max_msix(struct ice_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, + &val); + if (!err) + pf->msix.min = val.vu32; + + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, + &val); + if (!err) + pf->msix.max = val.vu32; +} + /** * ice_devlink_reinit_up - do reinit of the given PF * @pf: pointer to the PF struct @@ -1220,6 +1239,9 @@ static int ice_devlink_reinit_up(struct ice_pf *pf) return err; } + /* load MSI-X values */ + ice_set_min_max_msix(pf); + err = ice_init_dev(pf); if (err) goto unroll_hw_init; @@ -1533,6 +1555,43 @@ static int ice_devlink_local_fwd_validate(struct devlink *devlink, u32 id, return 0; } +static int +ice_devlink_msix_max_pf_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + + if (val.vu32 > pf->hw.func_caps.common_cap.num_msix_vectors) + return -EINVAL; + + return 0; +} + +static int +ice_devlink_msix_min_pf_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + if (val.vu32 < ICE_MIN_MSIX) + return -EINVAL; + + return 0; +} + +static int ice_devlink_enable_rdma_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + bool new_state = val.vbool; + + if (new_state && !test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) + return -EOPNOTSUPP; + + return 0; +} + enum ice_param_id { ICE_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS, @@ -1548,6 +1607,17 @@ static const struct devlink_param ice_dvl_rdma_params[] = { ice_devlink_enable_iw_get, ice_devlink_enable_iw_set, ice_devlink_enable_iw_validate), + DEVLINK_PARAM_GENERIC(ENABLE_RDMA, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, ice_devlink_enable_rdma_validate), +}; + +static const struct devlink_param ice_dvl_msix_params[] = { + DEVLINK_PARAM_GENERIC(MSIX_VEC_PER_PF_MAX, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, ice_devlink_msix_max_pf_validate), + DEVLINK_PARAM_GENERIC(MSIX_VEC_PER_PF_MIN, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, ice_devlink_msix_min_pf_validate), }; static const struct devlink_param ice_dvl_sched_params[] = { @@ -1651,6 +1721,7 @@ void ice_devlink_unregister(struct ice_pf *pf) int ice_devlink_register_params(struct ice_pf *pf) { struct devlink *devlink = priv_to_devlink(pf); + union devlink_param_value value; struct ice_hw *hw = &pf->hw; int status; @@ -1659,10 +1730,39 @@ int ice_devlink_register_params(struct ice_pf *pf) if (status) return status; + status = devl_params_register(devlink, ice_dvl_msix_params, + ARRAY_SIZE(ice_dvl_msix_params)); + if (status) + goto unregister_rdma_params; + if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en) status = devl_params_register(devlink, ice_dvl_sched_params, ARRAY_SIZE(ice_dvl_sched_params)); + if (status) + goto unregister_msix_params; + + value.vu32 = pf->msix.max; + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, + value); + value.vu32 = pf->msix.min; + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, + value); + + value.vbool = test_bit(ICE_FLAG_RDMA_ENA, pf->flags); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + value); + + return 0; +unregister_msix_params: + devl_params_unregister(devlink, ice_dvl_msix_params, + ARRAY_SIZE(ice_dvl_msix_params)); +unregister_rdma_params: + devl_params_unregister(devlink, ice_dvl_rdma_params, + ARRAY_SIZE(ice_dvl_rdma_params)); return status; } @@ -1673,6 +1773,8 @@ void ice_devlink_unregister_params(struct ice_pf *pf) devl_params_unregister(devlink, ice_dvl_rdma_params, ARRAY_SIZE(ice_dvl_rdma_params)); + devl_params_unregister(devlink, ice_dvl_msix_params, + ARRAY_SIZE(ice_dvl_msix_params)); if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en) devl_params_unregister(devlink, ice_dvl_sched_params, diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 71e05d30f0fd..c9104b13e1d2 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -97,9 +97,6 @@ #define ICE_MIN_LAN_OICR_MSIX 1 #define ICE_MIN_MSIX (ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX) #define ICE_FDIR_MSIX 2 -#define ICE_RDMA_NUM_AEQ_MSIX 4 -#define ICE_MIN_RDMA_MSIX 2 -#define ICE_ESWITCH_MSIX 1 #define ICE_NO_VSI 0xffff #define ICE_VSI_MAP_CONTIG 0 #define ICE_VSI_MAP_SCATTER 1 @@ -542,6 +539,14 @@ struct ice_agg_node { u8 valid; }; +struct ice_pf_msix { + u32 cur; + u32 min; + u32 max; + u32 total; + u32 rest; +}; + struct ice_pf { struct pci_dev *pdev; struct ice_adapter *adapter; @@ -556,13 +561,7 @@ struct ice_pf { /* OS reserved IRQ details */ struct msix_entry *msix_entries; struct ice_irq_tracker irq_tracker; - /* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the - * number of MSIX vectors needed for all SR-IOV VFs from the number of - * MSIX vectors allowed on this PF. - */ - u16 sriov_base_vector; - unsigned long *sriov_irq_bm; /* bitmap to track irq usage */ - u16 sriov_irq_size; /* size of the irq_bm bitmap */ + struct ice_virt_irq_tracker virt_irq_tracker; u16 ctrl_vsi_idx; /* control VSI index in pf->vsi array */ @@ -612,7 +611,7 @@ struct ice_pf { struct msi_map ll_ts_irq; /* LL_TS interrupt MSIX vector */ u16 max_pf_txqs; /* Total Tx queues PF wide */ u16 max_pf_rxqs; /* Total Rx queues PF wide */ - u16 num_lan_msix; /* Total MSIX vectors for base driver */ + struct ice_pf_msix msix; u16 num_lan_tx; /* num LAN Tx queues setup */ u16 num_lan_rx; /* num LAN Rx queues setup */ u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */ @@ -1047,10 +1046,5 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf) clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); } -static inline enum ice_phy_model ice_get_phy_model(const struct ice_hw *hw) -{ - return hw->ptp.phy_model; -} - extern const struct xdp_metadata_ops ice_xdp_md_ops; #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index b2af8e3586f7..0e862f20427a 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -801,13 +801,11 @@ int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi) return 0; err_out: - while (v_idx--) - ice_free_q_vector(vsi, v_idx); - dev_err(dev, "Failed to allocate %d q_vector for VSI %d, ret=%d\n", - vsi->num_q_vectors, vsi->vsi_num, err); - vsi->num_q_vectors = 0; - return err; + dev_info(dev, "Failed to allocate %d q_vectors for VSI %d, new value %d", + vsi->num_q_vectors, vsi->vsi_num, v_idx); + vsi->num_q_vectors = v_idx; + return v_idx ? 0 : err; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 7a2a2e8da8fa..aaa592ffd2d8 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -186,7 +186,7 @@ static int ice_set_mac_type(struct ice_hw *hw) * ice_is_generic_mac - check if device's mac_type is generic * @hw: pointer to the hardware structure * - * Return: true if mac_type is generic (with SBQ support), false if not + * Return: true if mac_type is ICE_MAC_GENERIC*, false otherwise. */ bool ice_is_generic_mac(struct ice_hw *hw) { @@ -195,120 +195,6 @@ bool ice_is_generic_mac(struct ice_hw *hw) } /** - * ice_is_e810 - * @hw: pointer to the hardware structure - * - * returns true if the device is E810 based, false if not. - */ -bool ice_is_e810(struct ice_hw *hw) -{ - return hw->mac_type == ICE_MAC_E810; -} - -/** - * ice_is_e810t - * @hw: pointer to the hardware structure - * - * returns true if the device is E810T based, false if not. - */ -bool ice_is_e810t(struct ice_hw *hw) -{ - switch (hw->device_id) { - case ICE_DEV_ID_E810C_SFP: - switch (hw->subsystem_device_id) { - case ICE_SUBDEV_ID_E810T: - case ICE_SUBDEV_ID_E810T2: - case ICE_SUBDEV_ID_E810T3: - case ICE_SUBDEV_ID_E810T4: - case ICE_SUBDEV_ID_E810T6: - case ICE_SUBDEV_ID_E810T7: - return true; - } - break; - case ICE_DEV_ID_E810C_QSFP: - switch (hw->subsystem_device_id) { - case ICE_SUBDEV_ID_E810T2: - case ICE_SUBDEV_ID_E810T3: - case ICE_SUBDEV_ID_E810T5: - return true; - } - break; - default: - break; - } - - return false; -} - -/** - * ice_is_e822 - Check if a device is E822 family device - * @hw: pointer to the hardware structure - * - * Return: true if the device is E822 based, false if not. - */ -bool ice_is_e822(struct ice_hw *hw) -{ - switch (hw->device_id) { - case ICE_DEV_ID_E822C_BACKPLANE: - case ICE_DEV_ID_E822C_QSFP: - case ICE_DEV_ID_E822C_SFP: - case ICE_DEV_ID_E822C_10G_BASE_T: - case ICE_DEV_ID_E822C_SGMII: - case ICE_DEV_ID_E822L_BACKPLANE: - case ICE_DEV_ID_E822L_SFP: - case ICE_DEV_ID_E822L_10G_BASE_T: - case ICE_DEV_ID_E822L_SGMII: - return true; - default: - return false; - } -} - -/** - * ice_is_e823 - * @hw: pointer to the hardware structure - * - * returns true if the device is E823-L or E823-C based, false if not. - */ -bool ice_is_e823(struct ice_hw *hw) -{ - switch (hw->device_id) { - case ICE_DEV_ID_E823L_BACKPLANE: - case ICE_DEV_ID_E823L_SFP: - case ICE_DEV_ID_E823L_10G_BASE_T: - case ICE_DEV_ID_E823L_1GBE: - case ICE_DEV_ID_E823L_QSFP: - case ICE_DEV_ID_E823C_BACKPLANE: - case ICE_DEV_ID_E823C_QSFP: - case ICE_DEV_ID_E823C_SFP: - case ICE_DEV_ID_E823C_10G_BASE_T: - case ICE_DEV_ID_E823C_SGMII: - return true; - default: - return false; - } -} - -/** - * ice_is_e825c - Check if a device is E825C family device - * @hw: pointer to the hardware structure - * - * Return: true if the device is E825-C based, false if not. - */ -bool ice_is_e825c(struct ice_hw *hw) -{ - switch (hw->device_id) { - case ICE_DEV_ID_E825C_BACKPLANE: - case ICE_DEV_ID_E825C_QSFP: - case ICE_DEV_ID_E825C_SFP: - case ICE_DEV_ID_E825C_SGMII: - return true; - default: - return false; - } -} - -/** * ice_is_pf_c827 - check if pf contains c827 phy * @hw: pointer to the hw struct * @@ -2408,7 +2294,7 @@ ice_parse_1588_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p, info->tmr_index_owned = ((number & ICE_TS_TMR_IDX_OWND_M) != 0); info->tmr_index_assoc = ((number & ICE_TS_TMR_IDX_ASSOC_M) != 0); - if (!ice_is_e825c(hw)) { + if (hw->mac_type != ICE_MAC_GENERIC_3K_E825) { info->clk_freq = FIELD_GET(ICE_TS_CLK_FREQ_M, number); info->clk_src = ((number & ICE_TS_CLK_SRC_M) != 0); } else { @@ -5765,6 +5651,96 @@ ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, } /** + * ice_get_pca9575_handle - find and return the PCA9575 controller + * @hw: pointer to the hw struct + * @pca9575_handle: GPIO controller's handle + * + * Find and return the GPIO controller's handle in the netlist. + * When found - the value will be cached in the hw structure and following calls + * will return cached value. + * + * Return: 0 on success, -ENXIO when there's no PCA9575 present. + */ +int ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle) +{ + struct ice_aqc_get_link_topo *cmd; + struct ice_aq_desc desc; + int err; + u8 idx; + + /* If handle was read previously return cached value */ + if (hw->io_expander_handle) { + *pca9575_handle = hw->io_expander_handle; + return 0; + } + +#define SW_PCA9575_SFP_TOPO_IDX 2 +#define SW_PCA9575_QSFP_TOPO_IDX 1 + + /* Check if the SW IO expander controlling SMA exists in the netlist. */ + if (hw->device_id == ICE_DEV_ID_E810C_SFP) + idx = SW_PCA9575_SFP_TOPO_IDX; + else if (hw->device_id == ICE_DEV_ID_E810C_QSFP) + idx = SW_PCA9575_QSFP_TOPO_IDX; + else + return -ENXIO; + + /* If handle was not detected read it from the netlist */ + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo); + cmd = &desc.params.get_link_topo; + cmd->addr.topo_params.node_type_ctx = + ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL; + cmd->addr.topo_params.index = idx; + + err = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); + if (err) + return -ENXIO; + + /* Verify if we found the right IO expander type */ + if (desc.params.get_link_topo.node_part_num != + ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575) + return -ENXIO; + + /* If present save the handle and return it */ + hw->io_expander_handle = + le16_to_cpu(desc.params.get_link_topo.addr.handle); + *pca9575_handle = hw->io_expander_handle; + + return 0; +} + +/** + * ice_read_pca9575_reg - read the register from the PCA9575 controller + * @hw: pointer to the hw struct + * @offset: GPIO controller register offset + * @data: pointer to data to be read from the GPIO controller + * + * Return: 0 on success, negative error code otherwise. + */ +int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data) +{ + struct ice_aqc_link_topo_addr link_topo; + __le16 addr; + u16 handle; + int err; + + memset(&link_topo, 0, sizeof(link_topo)); + + err = ice_get_pca9575_handle(hw, &handle); + if (err) + return err; + + link_topo.handle = cpu_to_le16(handle); + link_topo.topo_params.node_type_ctx = + FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M, + ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED); + + addr = cpu_to_le16((u16)offset); + + return ice_aq_read_i2c(hw, link_topo, 0, addr, 1, data, NULL); +} + +/** * ice_aq_set_gpio * @hw: pointer to the hw struct * @gpio_ctrl_handle: GPIO controller node handle diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 15ba38543738..9b00aa0ddf10 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -131,7 +131,6 @@ int ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags, struct ice_sq_cd *cd); bool ice_is_generic_mac(struct ice_hw *hw); -bool ice_is_e810(struct ice_hw *hw); int ice_clear_pf_cfg(struct ice_hw *hw); int ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi, @@ -276,10 +275,6 @@ ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat); -bool ice_is_e810t(struct ice_hw *hw); -bool ice_is_e822(struct ice_hw *hw); -bool ice_is_e823(struct ice_hw *hw); -bool ice_is_e825c(struct ice_hw *hw); int ice_sched_query_elem(struct ice_hw *hw, u32 node_teid, struct ice_aqc_txsched_elem_data *buf); @@ -306,5 +301,7 @@ int ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, u16 bus_addr, __le16 addr, u8 params, const u8 *data, struct ice_sq_cd *cd); +int ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle); +int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data); bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index 03988be03729..69d5b1a28491 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -2345,14 +2345,14 @@ ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size, cmd->set_flags |= ICE_AQC_TX_TOPO_FLAGS_SRC_RAM | ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW; - if (ice_is_e825c(hw)) + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); } else { ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_tx_topo); cmd->get_flags = ICE_AQC_TX_TOPO_GET_RAM; } - if (!ice_is_e825c(hw)) + if (hw->mac_type != ICE_MAC_GENERIC_3K_E825) desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index f241493a6ac8..b0805704834d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3788,8 +3788,7 @@ ice_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) */ static int ice_get_max_txq(struct ice_pf *pf) { - return min3(pf->num_lan_msix, (u16)num_online_cpus(), - (u16)pf->hw.func_caps.common_cap.num_txq); + return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_txq); } /** @@ -3798,8 +3797,7 @@ static int ice_get_max_txq(struct ice_pf *pf) */ static int ice_get_max_rxq(struct ice_pf *pf) { - return min3(pf->num_lan_msix, (u16)num_online_cpus(), - (u16)pf->hw.func_caps.common_cap.num_rxq); + return min(num_online_cpus(), pf->hw.func_caps.common_cap.num_rxq); } /** @@ -3817,8 +3815,7 @@ static u32 ice_get_combined_cnt(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; - if (q_vector->rx.rx_ring && q_vector->tx.tx_ring) - combined++; + combined += min(q_vector->num_ring_tx, q_vector->num_ring_rx); } return combined; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index ee9862ddfe15..1d118171de37 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1605,22 +1605,19 @@ void ice_fdir_replay_fltrs(struct ice_pf *pf) */ int ice_fdir_create_dflt_rules(struct ice_pf *pf) { + const enum ice_fltr_ptype dflt_rules[] = { + ICE_FLTR_PTYPE_NONF_IPV4_TCP, ICE_FLTR_PTYPE_NONF_IPV4_UDP, + ICE_FLTR_PTYPE_NONF_IPV6_TCP, ICE_FLTR_PTYPE_NONF_IPV6_UDP, + }; int err; /* Create perfect TCP and UDP rules in hardware. */ - err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_TCP); - if (err) - return err; - - err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_UDP); - if (err) - return err; + for (int i = 0; i < ARRAY_SIZE(dflt_rules); i++) { + err = ice_create_init_fdir_rule(pf, dflt_rules[i]); - err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_TCP); - if (err) - return err; - - err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_UDP); + if (err) + break; + } return err; } diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index b2148dbe49b2..6b26290452d4 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -381,32 +381,23 @@ void ice_gnss_exit(struct ice_pf *pf) } /** - * ice_gnss_is_gps_present - Check if GPS HW is present + * ice_gnss_is_module_present - Check if GNSS HW is present * @hw: pointer to HW struct + * + * Return: true when GNSS is present, false otherwise. */ -bool ice_gnss_is_gps_present(struct ice_hw *hw) +bool ice_gnss_is_module_present(struct ice_hw *hw) { - if (!hw->func_caps.ts_func_info.src_tmr_owned) - return false; + int err; + u8 data; - if (!ice_is_gps_in_netlist(hw)) + if (!hw->func_caps.ts_func_info.src_tmr_owned || + !ice_is_gps_in_netlist(hw)) return false; -#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) - if (ice_is_e810t(hw)) { - int err; - u8 data; - - err = ice_read_pca9575_reg(hw, ICE_PCA9575_P0_IN, &data); - if (err || !!(data & ICE_P0_GNSS_PRSNT_N)) - return false; - } else { - return false; - } -#else - if (!ice_is_e810t(hw)) + err = ice_read_pca9575_reg(hw, ICE_PCA9575_P0_IN, &data); + if (err || !!(data & ICE_P0_GNSS_PRSNT_N)) return false; -#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ return true; } diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.h b/drivers/net/ethernet/intel/ice/ice_gnss.h index 75e567ad7059..15daf603ed7b 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.h +++ b/drivers/net/ethernet/intel/ice/ice_gnss.h @@ -37,11 +37,11 @@ struct gnss_serial { #if IS_ENABLED(CONFIG_GNSS) void ice_gnss_init(struct ice_pf *pf); void ice_gnss_exit(struct ice_pf *pf); -bool ice_gnss_is_gps_present(struct ice_hw *hw); +bool ice_gnss_is_module_present(struct ice_hw *hw); #else static inline void ice_gnss_init(struct ice_pf *pf) { } static inline void ice_gnss_exit(struct ice_pf *pf) { } -static inline bool ice_gnss_is_gps_present(struct ice_hw *hw) +static inline bool ice_gnss_is_module_present(struct ice_hw *hw) { return false; } diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index dc88aea9f473..aa4bfbcf85d2 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -541,10 +541,22 @@ #define PFPM_WUS_MAG_M BIT(1) #define PFPM_WUS_MNG_M BIT(3) #define PFPM_WUS_FW_RST_WK_M BIT(31) +#define E830_PRTMAC_TS_TX_MEM_VALID_H 0x001E2020 +#define E830_PRTMAC_TS_TX_MEM_VALID_L 0x001E2000 #define E830_PRTMAC_CL01_PS_QNT 0x001E32A0 #define E830_PRTMAC_CL01_PS_QNT_CL0_M GENMASK(15, 0) #define E830_PRTMAC_CL01_QNT_THR 0x001E3320 #define E830_PRTMAC_CL01_QNT_THR_CL0_M GENMASK(15, 0) +#define E830_PRTTSYN_TXTIME_H(_i) (0x001E5800 + ((_i) * 32)) +#define E830_PRTTSYN_TXTIME_L(_i) (0x001E5000 + ((_i) * 32)) +#define E830_GLPTM_ART_CTL 0x00088B50 +#define E830_GLPTM_ART_CTL_ACTIVE_M BIT(0) +#define E830_GLPTM_ART_TIME_H 0x00088B54 +#define E830_GLPTM_ART_TIME_L 0x00088B58 +#define E830_GLTSYN_PTMTIME_H(_i) (0x00088B48 + ((_i) * 4)) +#define E830_GLTSYN_PTMTIME_L(_i) (0x00088B40 + ((_i) * 4)) +#define E830_PFPTM_SEM 0x00088B00 +#define E830_PFPTM_SEM_BUSY_M BIT(0) #define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4)) #define VFINT_DYN_CTLN_CLEARPBA_M BIT(1) #define E830_MBX_PF_IN_FLIGHT_VF_MSGS_THRESH 0x00234000 diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index 145b27f2a4ce..bab3e81cad5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -228,61 +228,34 @@ void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos) } EXPORT_SYMBOL_GPL(ice_get_qos_params); -/** - * ice_alloc_rdma_qvectors - Allocate vector resources for RDMA driver - * @pf: board private structure to initialize - */ -static int ice_alloc_rdma_qvectors(struct ice_pf *pf) +int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry) { - if (ice_is_rdma_ena(pf)) { - int i; - - pf->msix_entries = kcalloc(pf->num_rdma_msix, - sizeof(*pf->msix_entries), - GFP_KERNEL); - if (!pf->msix_entries) - return -ENOMEM; + struct msi_map map = ice_alloc_irq(pf, true); - /* RDMA is the only user of pf->msix_entries array */ - pf->rdma_base_vector = 0; - - for (i = 0; i < pf->num_rdma_msix; i++) { - struct msix_entry *entry = &pf->msix_entries[i]; - struct msi_map map; + if (map.index < 0) + return -ENOMEM; - map = ice_alloc_irq(pf, false); - if (map.index < 0) - break; + entry->entry = map.index; + entry->vector = map.virq; - entry->entry = map.index; - entry->vector = map.virq; - } - } return 0; } +EXPORT_SYMBOL_GPL(ice_alloc_rdma_qvector); /** * ice_free_rdma_qvector - free vector resources reserved for RDMA driver * @pf: board private structure to initialize + * @entry: MSI-X entry to be removed */ -static void ice_free_rdma_qvector(struct ice_pf *pf) +void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry) { - int i; - - if (!pf->msix_entries) - return; - - for (i = 0; i < pf->num_rdma_msix; i++) { - struct msi_map map; + struct msi_map map; - map.index = pf->msix_entries[i].entry; - map.virq = pf->msix_entries[i].vector; - ice_free_irq(pf, map); - } - - kfree(pf->msix_entries); - pf->msix_entries = NULL; + map.index = entry->entry; + map.virq = entry->vector; + ice_free_irq(pf, map); } +EXPORT_SYMBOL_GPL(ice_free_rdma_qvector); /** * ice_adev_release - function to be mapped to AUX dev's release op @@ -382,12 +355,6 @@ int ice_init_rdma(struct ice_pf *pf) return -ENOMEM; } - /* Reserve vector resources */ - ret = ice_alloc_rdma_qvectors(pf); - if (ret < 0) { - dev_err(dev, "failed to reserve vectors for RDMA\n"); - goto err_reserve_rdma_qvector; - } pf->rdma_mode |= IIDC_RDMA_PROTOCOL_ROCEV2; ret = ice_plug_aux_dev(pf); if (ret) @@ -395,8 +362,6 @@ int ice_init_rdma(struct ice_pf *pf) return 0; err_plug_aux_dev: - ice_free_rdma_qvector(pf); -err_reserve_rdma_qvector: pf->adev = NULL; xa_erase(&ice_aux_id, pf->aux_idx); return ret; @@ -412,6 +377,5 @@ void ice_deinit_rdma(struct ice_pf *pf) return; ice_unplug_aux_dev(pf); - ice_free_rdma_qvector(pf); xa_erase(&ice_aux_id, pf->aux_idx); } diff --git a/drivers/net/ethernet/intel/ice/ice_irq.c b/drivers/net/ethernet/intel/ice/ice_irq.c index ad82ff7d1995..cbae3d81f0f1 100644 --- a/drivers/net/ethernet/intel/ice/ice_irq.c +++ b/drivers/net/ethernet/intel/ice/ice_irq.c @@ -20,6 +20,19 @@ ice_init_irq_tracker(struct ice_pf *pf, unsigned int max_vectors, xa_init_flags(&pf->irq_tracker.entries, XA_FLAGS_ALLOC); } +static int +ice_init_virt_irq_tracker(struct ice_pf *pf, u32 base, u32 num_entries) +{ + pf->virt_irq_tracker.bm = bitmap_zalloc(num_entries, GFP_KERNEL); + if (!pf->virt_irq_tracker.bm) + return -ENOMEM; + + pf->virt_irq_tracker.num_entries = num_entries; + pf->virt_irq_tracker.base = base; + + return 0; +} + /** * ice_deinit_irq_tracker - free xarray tracker * @pf: board private structure @@ -29,6 +42,11 @@ static void ice_deinit_irq_tracker(struct ice_pf *pf) xa_destroy(&pf->irq_tracker.entries); } +static void ice_deinit_virt_irq_tracker(struct ice_pf *pf) +{ + bitmap_free(pf->virt_irq_tracker.bm); +} + /** * ice_free_irq_res - free a block of resources * @pf: board private structure @@ -45,7 +63,7 @@ static void ice_free_irq_res(struct ice_pf *pf, u16 index) /** * ice_get_irq_res - get an interrupt resource * @pf: board private structure - * @dyn_only: force entry to be dynamically allocated + * @dyn_allowed: allow entry to be dynamically allocated * * Allocate new irq entry in the free slot of the tracker. Since xarray * is used, always allocate new entry at the lowest possible index. Set @@ -53,11 +71,12 @@ static void ice_free_irq_res(struct ice_pf *pf, u16 index) * * Returns allocated irq entry or NULL on failure. */ -static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, bool dyn_only) +static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, + bool dyn_allowed) { - struct xa_limit limit = { .max = pf->irq_tracker.num_entries, + struct xa_limit limit = { .max = pf->irq_tracker.num_entries - 1, .min = 0 }; - unsigned int num_static = pf->irq_tracker.num_static; + unsigned int num_static = pf->irq_tracker.num_static - 1; struct ice_irq_entry *entry; unsigned int index; int ret; @@ -66,9 +85,9 @@ static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, bool dyn_only) if (!entry) return NULL; - /* skip preallocated entries if the caller says so */ - if (dyn_only) - limit.min = num_static; + /* only already allocated if the caller says so */ + if (!dyn_allowed) + limit.max = num_static; ret = xa_alloc(&pf->irq_tracker.entries, &index, entry, limit, GFP_KERNEL); @@ -78,161 +97,18 @@ static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, bool dyn_only) entry = NULL; } else { entry->index = index; - entry->dynamic = index >= num_static; + entry->dynamic = index > num_static; } return entry; } -/** - * ice_reduce_msix_usage - Reduce usage of MSI-X vectors - * @pf: board private structure - * @v_remain: number of remaining MSI-X vectors to be distributed - * - * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled. - * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of - * remaining vectors. - */ -static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain) +#define ICE_RDMA_AEQ_MSIX 1 +static int ice_get_default_msix_amount(struct ice_pf *pf) { - int v_rdma; - - if (!ice_is_rdma_ena(pf)) { - pf->num_lan_msix = v_remain; - return; - } - - /* RDMA needs at least 1 interrupt in addition to AEQ MSIX */ - v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1; - - if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) { - dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n"); - clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); - - pf->num_rdma_msix = 0; - pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; - } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) || - (v_remain - v_rdma < v_rdma)) { - /* Support minimum RDMA and give remaining vectors to LAN MSIX - */ - pf->num_rdma_msix = ICE_MIN_RDMA_MSIX; - pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX; - } else { - /* Split remaining MSIX with RDMA after accounting for AEQ MSIX - */ - pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 + - ICE_RDMA_NUM_AEQ_MSIX; - pf->num_lan_msix = v_remain - pf->num_rdma_msix; - } -} - -/** - * ice_ena_msix_range - Request a range of MSIX vectors from the OS - * @pf: board private structure - * - * Compute the number of MSIX vectors wanted and request from the OS. Adjust - * device usage if there are not enough vectors. Return the number of vectors - * reserved or negative on failure. - */ -static int ice_ena_msix_range(struct ice_pf *pf) -{ - int num_cpus, hw_num_msix, v_other, v_wanted, v_actual; - struct device *dev = ice_pf_to_dev(pf); - int err; - - hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors; - num_cpus = num_online_cpus(); - - /* LAN miscellaneous handler */ - v_other = ICE_MIN_LAN_OICR_MSIX; - - /* Flow Director */ - if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) - v_other += ICE_FDIR_MSIX; - - /* switchdev */ - v_other += ICE_ESWITCH_MSIX; - - v_wanted = v_other; - - /* LAN traffic */ - pf->num_lan_msix = num_cpus; - v_wanted += pf->num_lan_msix; - - /* RDMA auxiliary driver */ - if (ice_is_rdma_ena(pf)) { - pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; - v_wanted += pf->num_rdma_msix; - } - - if (v_wanted > hw_num_msix) { - int v_remain; - - dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n", - v_wanted, hw_num_msix); - - if (hw_num_msix < ICE_MIN_MSIX) { - err = -ERANGE; - goto exit_err; - } - - v_remain = hw_num_msix - v_other; - if (v_remain < ICE_MIN_LAN_TXRX_MSIX) { - v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX; - v_remain = ICE_MIN_LAN_TXRX_MSIX; - } - - ice_reduce_msix_usage(pf, v_remain); - v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other; - - dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n", - pf->num_lan_msix); - if (ice_is_rdma_ena(pf)) - dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n", - pf->num_rdma_msix); - } - - /* actually reserve the vectors */ - v_actual = pci_alloc_irq_vectors(pf->pdev, ICE_MIN_MSIX, v_wanted, - PCI_IRQ_MSIX); - if (v_actual < 0) { - dev_err(dev, "unable to reserve MSI-X vectors\n"); - err = v_actual; - goto exit_err; - } - - if (v_actual < v_wanted) { - dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", - v_wanted, v_actual); - - if (v_actual < ICE_MIN_MSIX) { - /* error if we can't get minimum vectors */ - pci_free_irq_vectors(pf->pdev); - err = -ERANGE; - goto exit_err; - } else { - int v_remain = v_actual - v_other; - - if (v_remain < ICE_MIN_LAN_TXRX_MSIX) - v_remain = ICE_MIN_LAN_TXRX_MSIX; - - ice_reduce_msix_usage(pf, v_remain); - - dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n", - pf->num_lan_msix); - - if (ice_is_rdma_ena(pf)) - dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n", - pf->num_rdma_msix); - } - } - - return v_actual; - -exit_err: - pf->num_rdma_msix = 0; - pf->num_lan_msix = 0; - return err; + return ICE_MIN_LAN_OICR_MSIX + num_online_cpus() + + (test_bit(ICE_FLAG_FD_ENA, pf->flags) ? ICE_FDIR_MSIX : 0) + + (ice_is_rdma_ena(pf) ? num_online_cpus() + ICE_RDMA_AEQ_MSIX : 0); } /** @@ -243,6 +119,7 @@ void ice_clear_interrupt_scheme(struct ice_pf *pf) { pci_free_irq_vectors(pf->pdev); ice_deinit_irq_tracker(pf); + ice_deinit_virt_irq_tracker(pf); } /** @@ -252,27 +129,38 @@ void ice_clear_interrupt_scheme(struct ice_pf *pf) int ice_init_interrupt_scheme(struct ice_pf *pf) { int total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; - int vectors, max_vectors; + int vectors; - vectors = ice_ena_msix_range(pf); + /* load default PF MSI-X range */ + if (!pf->msix.min) + pf->msix.min = ICE_MIN_MSIX; - if (vectors < 0) - return -ENOMEM; + if (!pf->msix.max) + pf->msix.max = min(total_vectors, + ice_get_default_msix_amount(pf)); + + pf->msix.total = total_vectors; + pf->msix.rest = total_vectors - pf->msix.max; if (pci_msix_can_alloc_dyn(pf->pdev)) - max_vectors = total_vectors; + vectors = pf->msix.min; else - max_vectors = vectors; + vectors = pf->msix.max; + + vectors = pci_alloc_irq_vectors(pf->pdev, pf->msix.min, vectors, + PCI_IRQ_MSIX); + if (vectors < pf->msix.min) + return -ENOMEM; - ice_init_irq_tracker(pf, max_vectors, vectors); + ice_init_irq_tracker(pf, pf->msix.max, vectors); - return 0; + return ice_init_virt_irq_tracker(pf, pf->msix.max, pf->msix.rest); } /** * ice_alloc_irq - Allocate new interrupt vector * @pf: board private structure - * @dyn_only: force dynamic allocation of the interrupt + * @dyn_allowed: allow dynamic allocation of the interrupt * * Allocate new interrupt vector for a given owner id. * return struct msi_map with interrupt details and track @@ -285,27 +173,22 @@ int ice_init_interrupt_scheme(struct ice_pf *pf) * interrupt will be allocated with pci_msix_alloc_irq_at. * * Some callers may only support dynamically allocated interrupts. - * This is indicated with dyn_only flag. + * This is indicated with dyn_allowed flag. * * On failure, return map with negative .index. The caller * is expected to check returned map index. * */ -struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only) +struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_allowed) { - int sriov_base_vector = pf->sriov_base_vector; struct msi_map map = { .index = -ENOENT }; struct device *dev = ice_pf_to_dev(pf); struct ice_irq_entry *entry; - entry = ice_get_irq_res(pf, dyn_only); + entry = ice_get_irq_res(pf, dyn_allowed); if (!entry) return map; - /* fail if we're about to violate SRIOV vectors space */ - if (sriov_base_vector && entry->index >= sriov_base_vector) - goto exit_free_res; - if (pci_msix_can_alloc_dyn(pf->pdev) && entry->dynamic) { map = pci_msix_alloc_irq_at(pf->pdev, entry->index, NULL); if (map.index < 0) @@ -353,26 +236,40 @@ void ice_free_irq(struct ice_pf *pf, struct msi_map map) } /** - * ice_get_max_used_msix_vector - Get the max used interrupt vector - * @pf: board private structure + * ice_virt_get_irqs - get irqs for SR-IOV usacase + * @pf: pointer to PF structure + * @needed: number of irqs to get * - * Return index of maximum used interrupt vectors with respect to the - * beginning of the MSIX table. Take into account that some interrupts - * may have been dynamically allocated after MSIX was initially enabled. + * This returns the first MSI-X vector index in PF space that is used by this + * VF. This index is used when accessing PF relative registers such as + * GLINT_VECT2FUNC and GLINT_DYN_CTL. + * This will always be the OICR index in the AVF driver so any functionality + * using vf->first_vector_idx for queue configuration_id: id of VF which will + * use this irqs */ -int ice_get_max_used_msix_vector(struct ice_pf *pf) +int ice_virt_get_irqs(struct ice_pf *pf, u32 needed) { - unsigned long start, index, max_idx; - void *entry; + int res = bitmap_find_next_zero_area(pf->virt_irq_tracker.bm, + pf->virt_irq_tracker.num_entries, + 0, needed, 0); - /* Treat all preallocated interrupts as used */ - start = pf->irq_tracker.num_static; - max_idx = start - 1; + if (res >= pf->virt_irq_tracker.num_entries) + return -ENOENT; - xa_for_each_start(&pf->irq_tracker.entries, index, entry, start) { - if (index > max_idx) - max_idx = index; - } + bitmap_set(pf->virt_irq_tracker.bm, res, needed); + + /* conversion from number in bitmap to global irq index */ + return res + pf->virt_irq_tracker.base; +} - return max_idx; +/** + * ice_virt_free_irqs - free irqs used by the VF + * @pf: pointer to PF structure + * @index: first index to be free + * @irqs: number of irqs to free + */ +void ice_virt_free_irqs(struct ice_pf *pf, u32 index, u32 irqs) +{ + bitmap_clear(pf->virt_irq_tracker.bm, index - pf->virt_irq_tracker.base, + irqs); } diff --git a/drivers/net/ethernet/intel/ice/ice_irq.h b/drivers/net/ethernet/intel/ice/ice_irq.h index f35efc08575e..b2f9dbafd57e 100644 --- a/drivers/net/ethernet/intel/ice/ice_irq.h +++ b/drivers/net/ethernet/intel/ice/ice_irq.h @@ -15,11 +15,22 @@ struct ice_irq_tracker { u16 num_static; /* preallocated entries */ }; +struct ice_virt_irq_tracker { + unsigned long *bm; /* bitmap to track irq usage */ + u32 num_entries; + /* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the + * number of MSIX vectors needed for all SR-IOV VFs from the number of + * MSIX vectors allowed on this PF. + */ + u32 base; +}; + int ice_init_interrupt_scheme(struct ice_pf *pf); void ice_clear_interrupt_scheme(struct ice_pf *pf); struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only); void ice_free_irq(struct ice_pf *pf, struct msi_map map); -int ice_get_max_used_msix_vector(struct ice_pf *pf); +int ice_virt_get_irqs(struct ice_pf *pf, u32 needed); +void ice_virt_free_irqs(struct ice_pf *pf, u32 index, u32 irqs); #endif diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 38a1c8372180..978f308d3b1b 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -157,6 +157,16 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi) } } +static u16 ice_get_rxq_count(struct ice_pf *pf) +{ + return min(ice_get_avail_rxq_count(pf), num_online_cpus()); +} + +static u16 ice_get_txq_count(struct ice_pf *pf) +{ + return min(ice_get_avail_txq_count(pf), num_online_cpus()); +} + /** * ice_vsi_set_num_qs - Set number of queues, descriptors and vectors for a VSI * @vsi: the VSI being configured @@ -178,9 +188,7 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi) vsi->alloc_txq = vsi->req_txq; vsi->num_txq = vsi->req_txq; } else { - vsi->alloc_txq = min3(pf->num_lan_msix, - ice_get_avail_txq_count(pf), - (u16)num_online_cpus()); + vsi->alloc_txq = ice_get_txq_count(pf); } pf->num_lan_tx = vsi->alloc_txq; @@ -193,17 +201,13 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi) vsi->alloc_rxq = vsi->req_rxq; vsi->num_rxq = vsi->req_rxq; } else { - vsi->alloc_rxq = min3(pf->num_lan_msix, - ice_get_avail_rxq_count(pf), - (u16)num_online_cpus()); + vsi->alloc_rxq = ice_get_rxq_count(pf); } } pf->num_lan_rx = vsi->alloc_rxq; - vsi->num_q_vectors = min_t(int, pf->num_lan_msix, - max_t(int, vsi->alloc_rxq, - vsi->alloc_txq)); + vsi->num_q_vectors = max(vsi->alloc_rxq, vsi->alloc_txq); break; case ICE_VSI_SF: vsi->alloc_txq = 1; @@ -567,6 +571,8 @@ ice_vsi_alloc_def(struct ice_vsi *vsi, struct ice_channel *ch) return -ENOMEM; } + vsi->irq_dyn_alloc = pci_msix_can_alloc_dyn(vsi->back->pdev); + switch (vsi->type) { case ICE_VSI_PF: case ICE_VSI_SF: @@ -827,7 +833,13 @@ bool ice_is_safe_mode(struct ice_pf *pf) */ bool ice_is_rdma_ena(struct ice_pf *pf) { - return test_bit(ICE_FLAG_RDMA_ENA, pf->flags); + union devlink_param_value value; + int err; + + err = devl_param_driverinit_value_get(priv_to_devlink(pf), + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + &value); + return err ? test_bit(ICE_FLAG_RDMA_ENA, pf->flags) : value.vbool; } /** @@ -1173,12 +1185,11 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) static void ice_chnl_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) { - struct ice_pf *pf = vsi->back; u16 qcount, qmap; u8 offset = 0; int pow; - qcount = min_t(int, vsi->num_rxq, pf->num_lan_msix); + qcount = vsi->num_rxq; pow = order_base_2(qcount); qmap = FIELD_PREP(ICE_AQ_VSI_TC_Q_OFFSET_M, offset); @@ -3882,7 +3893,7 @@ void ice_init_feature_support(struct ice_pf *pf) ice_set_feature_support(pf, ICE_F_CGU); if (ice_is_clock_mux_in_netlist(&pf->hw)) ice_set_feature_support(pf, ICE_F_SMA_CTRL); - if (ice_gnss_is_gps_present(&pf->hw)) + if (ice_gnss_is_module_present(&pf->hw)) ice_set_feature_support(pf, ICE_F_GNSS); break; default: diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c3a0fb97c5ee..b084839eb811 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3304,22 +3304,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_TSYN_TX_M) { ena_mask &= ~PFINT_OICR_TSYN_TX_M; - if (ice_pf_state_is_nominal(pf) && - pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) { - struct ice_ptp_tx *tx = &pf->ptp.port.tx; - unsigned long flags; - u8 idx; - - spin_lock_irqsave(&tx->lock, flags); - idx = find_next_bit_wrap(tx->in_use, tx->len, - tx->last_ll_ts_idx_read + 1); - if (idx != tx->len) - ice_ptp_req_tx_single_tstamp(tx, idx); - spin_unlock_irqrestore(&tx->lock, flags); - } else if (ice_ptp_pf_handles_tx_interrupt(pf)) { - set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread); - ret = IRQ_WAKE_THREAD; - } + + ret = ice_ptp_ts_irq(pf); } if (oicr & PFINT_OICR_TSYN_EVNT_M) { @@ -4066,8 +4052,7 @@ static void ice_set_pf_caps(struct ice_pf *pf) } clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); - if (func_caps->common_cap.ieee_1588 && - !(pf->hw.mac_type == ICE_MAC_E830)) + if (func_caps->common_cap.ieee_1588) set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); pf->max_pf_txqs = func_caps->common_cap.num_txq; @@ -5087,6 +5072,12 @@ static int ice_init(struct ice_pf *pf) if (err) return err; + if (pf->hw.mac_type == ICE_MAC_E830) { + err = pci_enable_ptm(pf->pdev, NULL); + if (err) + dev_dbg(ice_pf_to_dev(pf), "PCIe PTM not supported by PCIe bus/controller\n"); + } + err = ice_alloc_vsis(pf); if (err) goto err_alloc_vsis; @@ -5186,11 +5177,12 @@ int ice_load(struct ice_pf *pf) ice_napi_add(vsi); + ice_init_features(pf); + err = ice_init_rdma(pf); if (err) goto err_init_rdma; - ice_init_features(pf); ice_service_task_restart(pf); clear_bit(ICE_DOWN, pf->state); @@ -5198,6 +5190,7 @@ int ice_load(struct ice_pf *pf) return 0; err_init_rdma: + ice_deinit_features(pf); ice_tc_indir_block_unregister(vsi); err_tc_indir_block_register: ice_unregister_netdev(vsi); @@ -5221,8 +5214,8 @@ void ice_unload(struct ice_pf *pf) devl_assert_locked(priv_to_devlink(pf)); - ice_deinit_features(pf); ice_deinit_rdma(pf); + ice_deinit_features(pf); ice_tc_indir_block_unregister(vsi); ice_unregister_netdev(vsi); ice_devlink_destroy_pf_port(pf); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index e26320ce52ca..b65114ff487b 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -310,6 +310,15 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts) /* Read the system timestamp pre PHC read */ ptp_read_system_prets(sts); + if (hw->mac_type == ICE_MAC_E830) { + u64 clk_time = rd64(hw, E830_GLTSYN_TIME_L(tmr_idx)); + + /* Read the system timestamp post PHC read */ + ptp_read_system_postts(sts); + + return clk_time; + } + lo = rd32(hw, GLTSYN_TIME_L(tmr_idx)); /* Read the system timestamp post PHC read */ @@ -972,28 +981,6 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) } /** - * ice_ptp_init_tx_eth56g - Initialize tracking for Tx timestamps - * @pf: Board private structure - * @tx: the Tx tracking structure to initialize - * @port: the port this structure tracks - * - * Initialize the Tx timestamp tracker for this port. ETH56G PHYs - * have independent memory blocks for all ports. - * - * Return: 0 for success, -ENOMEM when failed to allocate Tx tracker - */ -static int ice_ptp_init_tx_eth56g(struct ice_pf *pf, struct ice_ptp_tx *tx, - u8 port) -{ - tx->block = port; - tx->offset = 0; - tx->len = INDEX_PER_PORT_ETH56G; - tx->has_ready_bitmap = 1; - - return ice_ptp_alloc_tx_tracker(tx); -} - -/** * ice_ptp_init_tx_e82x - Initialize tracking for Tx timestamps * @pf: Board private structure * @tx: the Tx tracking structure to initialize @@ -1003,9 +990,11 @@ static int ice_ptp_init_tx_eth56g(struct ice_pf *pf, struct ice_ptp_tx *tx, * the timestamp block is shared for all ports in the same quad. To avoid * ports using the same timestamp index, logically break the block of * registers into chunks based on the port number. + * + * Return: 0 on success, -ENOMEM when out of memory */ -static int -ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) +static int ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx, + u8 port) { tx->block = ICE_GET_QUAD_NUM(port); tx->offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT_E82X; @@ -1016,24 +1005,27 @@ ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) } /** - * ice_ptp_init_tx_e810 - Initialize tracking for Tx timestamps + * ice_ptp_init_tx - Initialize tracking for Tx timestamps * @pf: Board private structure * @tx: the Tx tracking structure to initialize + * @port: the port this structure tracks + * + * Initialize the Tx timestamp tracker for this PF. For all PHYs except E82X, + * each port has its own block of timestamps, independent of the other ports. * - * Initialize the Tx timestamp tracker for this PF. For E810 devices, each - * port has its own block of timestamps, independent of the other ports. + * Return: 0 on success, -ENOMEM when out of memory */ -static int -ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) +static int ice_ptp_init_tx(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) { - tx->block = pf->hw.port_info->lport; + tx->block = port; tx->offset = 0; - tx->len = INDEX_PER_PORT_E810; + tx->len = INDEX_PER_PORT; + /* The E810 PHY does not provide a timestamp ready bitmap. Instead, * verify new timestamps against cached copy of the last read * timestamp. */ - tx->has_ready_bitmap = 0; + tx->has_ready_bitmap = pf->hw.mac_type != ICE_MAC_E810; return ice_ptp_alloc_tx_tracker(tx); } @@ -1318,20 +1310,21 @@ ice_ptp_port_phy_stop(struct ice_ptp_port *ptp_port) struct ice_hw *hw = &pf->hw; int err; - if (ice_is_e810(hw)) - return 0; - mutex_lock(&ptp_port->ps_lock); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_stop_phy_timer_eth56g(hw, port, true); + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + err = 0; break; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: kthread_cancel_delayed_work_sync(&ptp_port->ov_work); err = ice_stop_phy_timer_e82x(hw, port, true); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_stop_phy_timer_eth56g(hw, port, true); + break; default: err = -ENODEV; } @@ -1361,19 +1354,17 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) unsigned long flags; int err; - if (ice_is_e810(hw)) - return 0; - if (!ptp_port->link_up) return ice_ptp_port_phy_stop(ptp_port); mutex_lock(&ptp_port->ps_lock); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_start_phy_timer_eth56g(hw, port); + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + err = 0; break; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: /* Start the PHY timer in Vernier mode */ kthread_cancel_delayed_work_sync(&ptp_port->ov_work); @@ -1398,6 +1389,9 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) kthread_queue_delayed_work(pf->ptp.kworker, &ptp_port->ov_work, 0); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_start_phy_timer_eth56g(hw, port); + break; default: err = -ENODEV; } @@ -1432,12 +1426,14 @@ void ice_ptp_link_change(struct ice_pf *pf, bool linkup) /* Skip HW writes if reset is in progress */ if (pf->hw.reset_ongoing) return; - switch (ice_get_phy_model(hw)) { - case ICE_PHY_E810: - /* Do not reconfigure E810 PHY */ + + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + /* Do not reconfigure E810 or E830 PHY */ return; - case ICE_PHY_ETH56G: - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: + case ICE_MAC_GENERIC_3K_E825: ice_ptp_port_phy_restart(ptp_port); return; default: @@ -1465,46 +1461,45 @@ static int ice_ptp_cfg_phy_interrupt(struct ice_pf *pf, bool ena, u32 threshold) ice_ptp_reset_ts_memory(hw); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: { - int port; + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + return 0; + case ICE_MAC_GENERIC: { + int quad; - for (port = 0; port < hw->ptp.num_lports; port++) { + for (quad = 0; quad < ICE_GET_QUAD_NUM(hw->ptp.num_lports); + quad++) { int err; - err = ice_phy_cfg_intr_eth56g(hw, port, ena, threshold); + err = ice_phy_cfg_intr_e82x(hw, quad, ena, threshold); if (err) { - dev_err(dev, "Failed to configure PHY interrupt for port %d, err %d\n", - port, err); + dev_err(dev, "Failed to configure PHY interrupt for quad %d, err %d\n", + quad, err); return err; } } return 0; } - case ICE_PHY_E82X: { - int quad; + case ICE_MAC_GENERIC_3K_E825: { + int port; - for (quad = 0; quad < ICE_GET_QUAD_NUM(hw->ptp.num_lports); - quad++) { + for (port = 0; port < hw->ptp.num_lports; port++) { int err; - err = ice_phy_cfg_intr_e82x(hw, quad, ena, threshold); + err = ice_phy_cfg_intr_eth56g(hw, port, ena, threshold); if (err) { - dev_err(dev, "Failed to configure PHY interrupt for quad %d, err %d\n", - quad, err); + dev_err(dev, "Failed to configure PHY interrupt for port %d, err %d\n", + port, err); return err; } } return 0; } - case ICE_PHY_E810: - return 0; - case ICE_PHY_UNSUP: + case ICE_MAC_UNKNOWN: default: - dev_warn(dev, "%s: Unexpected PHY model %d\n", __func__, - ice_get_phy_model(hw)); return -EOPNOTSUPP; } } @@ -1740,7 +1735,7 @@ static int ice_ptp_write_perout(struct ice_hw *hw, unsigned int chan, /* 0. Reset mode & out_en in AUX_OUT */ wr32(hw, GLTSYN_AUX_OUT(chan, tmr_idx), 0); - if (ice_is_e825c(hw)) { + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) { int err; /* Enable/disable CGU 1PPS output for E825C */ @@ -1824,7 +1819,7 @@ static int ice_ptp_cfg_perout(struct ice_pf *pf, struct ptp_perout_request *rq, return ice_ptp_write_perout(hw, rq->index, gpio_pin, 0, 0); if (strncmp(pf->ptp.pin_desc[pin_desc_idx].name, "1PPS", 64) == 0 && - period != NSEC_PER_SEC && hw->ptp.phy_model == ICE_PHY_E82X) { + period != NSEC_PER_SEC && hw->mac_type == ICE_MAC_GENERIC) { dev_err(ice_pf_to_dev(pf), "1PPS pin supports only 1 s period\n"); return -EOPNOTSUPP; } @@ -2078,7 +2073,7 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts) /* For Vernier mode on E82X, we need to recalibrate after new settime. * Start with marking timestamps as invalid. */ - if (ice_get_phy_model(hw) == ICE_PHY_E82X) { + if (hw->mac_type == ICE_MAC_GENERIC) { err = ice_ptp_clear_phy_offset_ready_e82x(hw); if (err) dev_warn(ice_pf_to_dev(pf), "Failed to mark timestamps as invalid before settime\n"); @@ -2102,7 +2097,7 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts) ice_ptp_enable_all_perout(pf); /* Recalibrate and re-enable timestamp blocks for E822/E823 */ - if (ice_get_phy_model(hw) == ICE_PHY_E82X) + if (hw->mac_type == ICE_MAC_GENERIC) ice_ptp_restart_all_phy(pf); exit: if (err) { @@ -2180,93 +2175,157 @@ static int ice_ptp_adjtime(struct ptp_clock_info *info, s64 delta) return 0; } +/** + * struct ice_crosststamp_cfg - Device cross timestamp configuration + * @lock_reg: The hardware semaphore lock to use + * @lock_busy: Bit in the semaphore lock indicating the lock is busy + * @ctl_reg: The hardware register to request cross timestamp + * @ctl_active: Bit in the control register to request cross timestamp + * @art_time_l: Lower 32-bits of ART system time + * @art_time_h: Upper 32-bits of ART system time + * @dev_time_l: Lower 32-bits of device time (per timer index) + * @dev_time_h: Upper 32-bits of device time (per timer index) + */ +struct ice_crosststamp_cfg { + /* HW semaphore lock register */ + u32 lock_reg; + u32 lock_busy; + + /* Capture control register */ + u32 ctl_reg; + u32 ctl_active; + + /* Time storage */ + u32 art_time_l; + u32 art_time_h; + u32 dev_time_l[2]; + u32 dev_time_h[2]; +}; + +static const struct ice_crosststamp_cfg ice_crosststamp_cfg_e82x = { + .lock_reg = PFHH_SEM, + .lock_busy = PFHH_SEM_BUSY_M, + .ctl_reg = GLHH_ART_CTL, + .ctl_active = GLHH_ART_CTL_ACTIVE_M, + .art_time_l = GLHH_ART_TIME_L, + .art_time_h = GLHH_ART_TIME_H, + .dev_time_l[0] = GLTSYN_HHTIME_L(0), + .dev_time_h[0] = GLTSYN_HHTIME_H(0), + .dev_time_l[1] = GLTSYN_HHTIME_L(1), + .dev_time_h[1] = GLTSYN_HHTIME_H(1), +}; + #ifdef CONFIG_ICE_HWTS +static const struct ice_crosststamp_cfg ice_crosststamp_cfg_e830 = { + .lock_reg = E830_PFPTM_SEM, + .lock_busy = E830_PFPTM_SEM_BUSY_M, + .ctl_reg = E830_GLPTM_ART_CTL, + .ctl_active = E830_GLPTM_ART_CTL_ACTIVE_M, + .art_time_l = E830_GLPTM_ART_TIME_L, + .art_time_h = E830_GLPTM_ART_TIME_H, + .dev_time_l[0] = E830_GLTSYN_PTMTIME_L(0), + .dev_time_h[0] = E830_GLTSYN_PTMTIME_H(0), + .dev_time_l[1] = E830_GLTSYN_PTMTIME_L(1), + .dev_time_h[1] = E830_GLTSYN_PTMTIME_H(1), +}; + +#endif /* CONFIG_ICE_HWTS */ +/** + * struct ice_crosststamp_ctx - Device cross timestamp context + * @snapshot: snapshot of system clocks for historic interpolation + * @pf: pointer to the PF private structure + * @cfg: pointer to hardware configuration for cross timestamp + */ +struct ice_crosststamp_ctx { + struct system_time_snapshot snapshot; + struct ice_pf *pf; + const struct ice_crosststamp_cfg *cfg; +}; + /** - * ice_ptp_get_syncdevicetime - Get the cross time stamp info + * ice_capture_crosststamp - Capture a device/system cross timestamp * @device: Current device time * @system: System counter value read synchronously with device time - * @ctx: Context provided by timekeeping code + * @__ctx: Context passed from ice_ptp_getcrosststamp * * Read device and system (ART) clock simultaneously and return the corrected * clock values in ns. + * + * Return: zero on success, or a negative error code on failure. */ -static int -ice_ptp_get_syncdevicetime(ktime_t *device, - struct system_counterval_t *system, - void *ctx) +static int ice_capture_crosststamp(ktime_t *device, + struct system_counterval_t *system, + void *__ctx) { - struct ice_pf *pf = (struct ice_pf *)ctx; - struct ice_hw *hw = &pf->hw; - u32 hh_lock, hh_art_ctl; - int i; + struct ice_crosststamp_ctx *ctx = __ctx; + const struct ice_crosststamp_cfg *cfg; + u32 lock, ctl, ts_lo, ts_hi, tmr_idx; + struct ice_pf *pf; + struct ice_hw *hw; + int err; + u64 ts; -#define MAX_HH_HW_LOCK_TRIES 5 -#define MAX_HH_CTL_LOCK_TRIES 100 + cfg = ctx->cfg; + pf = ctx->pf; + hw = &pf->hw; - for (i = 0; i < MAX_HH_HW_LOCK_TRIES; i++) { - /* Get the HW lock */ - hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); - if (hh_lock & PFHH_SEM_BUSY_M) { - usleep_range(10000, 15000); - continue; - } - break; - } - if (hh_lock & PFHH_SEM_BUSY_M) { - dev_err(ice_pf_to_dev(pf), "PTP failed to get hh lock\n"); + tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc; + if (tmr_idx > 1) + return -EINVAL; + + /* Poll until we obtain the cross-timestamp hardware semaphore */ + err = rd32_poll_timeout(hw, cfg->lock_reg, lock, + !(lock & cfg->lock_busy), + 10 * USEC_PER_MSEC, 50 * USEC_PER_MSEC); + if (err) { + dev_err(ice_pf_to_dev(pf), "PTP failed to get cross timestamp lock\n"); return -EBUSY; } + /* Snapshot system time for historic interpolation */ + ktime_get_snapshot(&ctx->snapshot); + /* Program cmd to master timer */ ice_ptp_src_cmd(hw, ICE_PTP_READ_TIME); /* Start the ART and device clock sync sequence */ - hh_art_ctl = rd32(hw, GLHH_ART_CTL); - hh_art_ctl = hh_art_ctl | GLHH_ART_CTL_ACTIVE_M; - wr32(hw, GLHH_ART_CTL, hh_art_ctl); - - for (i = 0; i < MAX_HH_CTL_LOCK_TRIES; i++) { - /* Wait for sync to complete */ - hh_art_ctl = rd32(hw, GLHH_ART_CTL); - if (hh_art_ctl & GLHH_ART_CTL_ACTIVE_M) { - udelay(1); - continue; - } else { - u32 hh_ts_lo, hh_ts_hi, tmr_idx; - u64 hh_ts; - - tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc; - /* Read ART time */ - hh_ts_lo = rd32(hw, GLHH_ART_TIME_L); - hh_ts_hi = rd32(hw, GLHH_ART_TIME_H); - hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo; - system->cycles = hh_ts; - system->cs_id = CSID_X86_ART; - /* Read Device source clock time */ - hh_ts_lo = rd32(hw, GLTSYN_HHTIME_L(tmr_idx)); - hh_ts_hi = rd32(hw, GLTSYN_HHTIME_H(tmr_idx)); - hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo; - *device = ns_to_ktime(hh_ts); - break; - } - } + ctl = rd32(hw, cfg->ctl_reg); + ctl |= cfg->ctl_active; + wr32(hw, cfg->ctl_reg, ctl); + /* Poll until hardware completes the capture */ + err = rd32_poll_timeout(hw, cfg->ctl_reg, ctl, !(ctl & cfg->ctl_active), + 5, 20 * USEC_PER_MSEC); + if (err) + goto err_timeout; + + /* Read ART system time */ + ts_lo = rd32(hw, cfg->art_time_l); + ts_hi = rd32(hw, cfg->art_time_h); + ts = ((u64)ts_hi << 32) | ts_lo; + system->cycles = ts; + system->cs_id = CSID_X86_ART; + + /* Read Device source clock time */ + ts_lo = rd32(hw, cfg->dev_time_l[tmr_idx]); + ts_hi = rd32(hw, cfg->dev_time_h[tmr_idx]); + ts = ((u64)ts_hi << 32) | ts_lo; + *device = ns_to_ktime(ts); + +err_timeout: /* Clear the master timer */ ice_ptp_src_cmd(hw, ICE_PTP_NOP); /* Release HW lock */ - hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); - hh_lock = hh_lock & ~PFHH_SEM_BUSY_M; - wr32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), hh_lock); - - if (i == MAX_HH_CTL_LOCK_TRIES) - return -ETIMEDOUT; + lock = rd32(hw, cfg->lock_reg); + lock &= ~cfg->lock_busy; + wr32(hw, cfg->lock_reg, lock); - return 0; + return err; } /** - * ice_ptp_getcrosststamp_e82x - Capture a device cross timestamp + * ice_ptp_getcrosststamp - Capture a device cross timestamp * @info: the driver's PTP info structure * @cts: The memory to fill the cross timestamp info * @@ -2274,22 +2333,36 @@ ice_ptp_get_syncdevicetime(ktime_t *device, * clock. Fill the cross timestamp information and report it back to the * caller. * - * This is only valid for E822 and E823 devices which have support for - * generating the cross timestamp via PCIe PTM. - * * In order to correctly correlate the ART timestamp back to the TSC time, the * CPU must have X86_FEATURE_TSC_KNOWN_FREQ. + * + * Return: zero on success, or a negative error code on failure. */ -static int -ice_ptp_getcrosststamp_e82x(struct ptp_clock_info *info, - struct system_device_crosststamp *cts) +static int ice_ptp_getcrosststamp(struct ptp_clock_info *info, + struct system_device_crosststamp *cts) { struct ice_pf *pf = ptp_info_to_pf(info); + struct ice_crosststamp_ctx ctx = { + .pf = pf, + }; + + switch (pf->hw.mac_type) { + case ICE_MAC_GENERIC: + case ICE_MAC_GENERIC_3K_E825: + ctx.cfg = &ice_crosststamp_cfg_e82x; + break; +#ifdef CONFIG_ICE_HWTS + case ICE_MAC_E830: + ctx.cfg = &ice_crosststamp_cfg_e830; + break; +#endif /* CONFIG_ICE_HWTS */ + default: + return -EOPNOTSUPP; + } - return get_device_system_crosststamp(ice_ptp_get_syncdevicetime, - pf, NULL, cts); + return get_device_system_crosststamp(ice_capture_crosststamp, &ctx, + &ctx.snapshot, cts); } -#endif /* CONFIG_ICE_HWTS */ /** * ice_ptp_get_ts_config - ioctl interface to read the timestamping config @@ -2550,13 +2623,9 @@ static int ice_ptp_parse_sdp_entries(struct ice_pf *pf, __le16 *entries, */ static void ice_ptp_set_funcs_e82x(struct ice_pf *pf) { -#ifdef CONFIG_ICE_HWTS - if (boot_cpu_has(X86_FEATURE_ART) && - boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) - pf->ptp.info.getcrosststamp = ice_ptp_getcrosststamp_e82x; + pf->ptp.info.getcrosststamp = ice_ptp_getcrosststamp; -#endif /* CONFIG_ICE_HWTS */ - if (ice_is_e825c(&pf->hw)) { + if (pf->hw.mac_type == ICE_MAC_GENERIC_3K_E825) { pf->ptp.ice_pin_desc = ice_pin_desc_e825c; pf->ptp.info.n_pins = ICE_PIN_DESC_ARR_LEN(ice_pin_desc_e825c); } else { @@ -2623,6 +2692,28 @@ err: } /** + * ice_ptp_set_funcs_e830 - Set specialized functions for E830 support + * @pf: Board private structure + * + * Assign functions to the PTP capabiltiies structure for E830 devices. + * Functions which operate across all device families should be set directly + * in ice_ptp_set_caps. Only add functions here which are distinct for E830 + * devices. + */ +static void ice_ptp_set_funcs_e830(struct ice_pf *pf) +{ +#ifdef CONFIG_ICE_HWTS + if (pcie_ptm_enabled(pf->pdev) && boot_cpu_has(X86_FEATURE_ART)) + pf->ptp.info.getcrosststamp = ice_ptp_getcrosststamp; + +#endif /* CONFIG_ICE_HWTS */ + /* Rest of the config is the same as base E810 */ + pf->ptp.ice_pin_desc = ice_pin_desc_e810; + pf->ptp.info.n_pins = ICE_PIN_DESC_ARR_LEN(ice_pin_desc_e810); + ice_ptp_setup_pin_cfg(pf); +} + +/** * ice_ptp_set_caps - Set PTP capabilities * @pf: Board private structure */ @@ -2644,10 +2735,20 @@ static void ice_ptp_set_caps(struct ice_pf *pf) info->enable = ice_ptp_gpio_enable; info->verify = ice_verify_pin; - if (ice_is_e810(&pf->hw)) + switch (pf->hw.mac_type) { + case ICE_MAC_E810: ice_ptp_set_funcs_e810(pf); - else + return; + case ICE_MAC_E830: + ice_ptp_set_funcs_e830(pf); + return; + case ICE_MAC_GENERIC: + case ICE_MAC_GENERIC_3K_E825: ice_ptp_set_funcs_e82x(pf); + return; + default: + return; + } } /** @@ -2758,6 +2859,65 @@ enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf) } /** + * ice_ptp_ts_irq - Process the PTP Tx timestamps in IRQ context + * @pf: Board private structure + * + * Return: IRQ_WAKE_THREAD if Tx timestamp read has to be handled in the bottom + * half of the interrupt and IRQ_HANDLED otherwise. + */ +irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + + switch (hw->mac_type) { + case ICE_MAC_E810: + /* E810 capable of low latency timestamping with interrupt can + * request a single timestamp in the top half and wait for + * a second LL TS interrupt from the FW when it's ready. + */ + if (hw->dev_caps.ts_dev_info.ts_ll_int_read) { + struct ice_ptp_tx *tx = &pf->ptp.port.tx; + u8 idx; + + if (!ice_pf_state_is_nominal(pf)) + return IRQ_HANDLED; + + spin_lock(&tx->lock); + idx = find_next_bit_wrap(tx->in_use, tx->len, + tx->last_ll_ts_idx_read + 1); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + spin_unlock(&tx->lock); + + return IRQ_HANDLED; + } + fallthrough; /* non-LL_TS E810 */ + case ICE_MAC_GENERIC: + case ICE_MAC_GENERIC_3K_E825: + /* All other devices process timestamps in the bottom half due + * to sleeping or polling. + */ + if (!ice_ptp_pf_handles_tx_interrupt(pf)) + return IRQ_HANDLED; + + set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread); + return IRQ_WAKE_THREAD; + case ICE_MAC_E830: + /* E830 can read timestamps in the top half using rd32() */ + if (ice_ptp_process_ts(pf) == ICE_TX_TSTAMP_WORK_PENDING) { + /* Process outstanding Tx timestamps. If there + * is more work, re-arm the interrupt to trigger again. + */ + wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M); + ice_flush(hw); + } + return IRQ_HANDLED; + default: + return IRQ_HANDLED; + } +} + +/** * ice_ptp_maybe_trigger_tx_interrupt - Trigger Tx timstamp interrupt * @pf: Board private structure * @@ -2777,7 +2937,7 @@ static void ice_ptp_maybe_trigger_tx_interrupt(struct ice_pf *pf) bool trigger_oicr = false; unsigned int i; - if (ice_is_e810(hw)) + if (!pf->ptp.port.tx.has_ready_bitmap) return; if (!ice_pf_src_tmr_owned(pf)) @@ -2912,14 +3072,12 @@ static int ice_ptp_rebuild_owner(struct ice_pf *pf) */ ice_ptp_flush_all_tx_tracker(pf); - if (!ice_is_e810(hw)) { - /* Enable quad interrupts */ - err = ice_ptp_cfg_phy_interrupt(pf, true, 1); - if (err) - return err; + /* Enable quad interrupts */ + err = ice_ptp_cfg_phy_interrupt(pf, true, 1); + if (err) + return err; - ice_ptp_restart_all_phy(pf); - } + ice_ptp_restart_all_phy(pf); /* Re-enable all periodic outputs and external timestamp events */ ice_ptp_enable_all_perout(pf); @@ -2971,8 +3129,9 @@ err: static bool ice_is_primary(struct ice_hw *hw) { - return ice_is_e825c(hw) && ice_is_dual(hw) ? - !!(hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_PRIMARY_M) : true; + return hw->mac_type == ICE_MAC_GENERIC_3K_E825 && ice_is_dual(hw) ? + !!(hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_PRIMARY_M) : + true; } static int ice_ptp_setup_adapter(struct ice_pf *pf) @@ -2990,7 +3149,7 @@ static int ice_ptp_setup_pf(struct ice_pf *pf) struct ice_ptp *ctrl_ptp = ice_get_ctrl_ptp(pf); struct ice_ptp *ptp = &pf->ptp; - if (WARN_ON(!ctrl_ptp) || ice_get_phy_model(&pf->hw) == ICE_PHY_UNSUP) + if (WARN_ON(!ctrl_ptp) || pf->hw.mac_type == ICE_MAC_UNKNOWN) return -ENODEV; INIT_LIST_HEAD(&ptp->port.list_node); @@ -3007,7 +3166,7 @@ static void ice_ptp_cleanup_pf(struct ice_pf *pf) { struct ice_ptp *ptp = &pf->ptp; - if (ice_get_phy_model(&pf->hw) != ICE_PHY_UNSUP) { + if (pf->hw.mac_type != ICE_MAC_UNKNOWN) { mutex_lock(&pf->adapter->ports.lock); list_del(&ptp->port.list_node); mutex_unlock(&pf->adapter->ports.lock); @@ -3127,6 +3286,8 @@ static int ice_ptp_init_work(struct ice_pf *pf, struct ice_ptp *ptp) * ice_ptp_init_port - Initialize PTP port structure * @pf: Board private structure * @ptp_port: PTP port structure + * + * Return: 0 on success, -ENODEV on invalid MAC type, -ENOMEM on failed alloc. */ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) { @@ -3134,16 +3295,14 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) mutex_init(&ptp_port->ps_lock); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_ptp_init_tx_eth56g(pf, &ptp_port->tx, - ptp_port->port_num); - case ICE_PHY_E810: - return ice_ptp_init_tx_e810(pf, &ptp_port->tx); - case ICE_PHY_E82X: + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + case ICE_MAC_GENERIC_3K_E825: + return ice_ptp_init_tx(pf, &ptp_port->tx, ptp_port->port_num); + case ICE_MAC_GENERIC: kthread_init_delayed_work(&ptp_port->ov_work, ice_ptp_wait_for_offsets); - return ice_ptp_init_tx_e82x(pf, &ptp_port->tx, ptp_port->port_num); default: @@ -3162,8 +3321,8 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) */ static void ice_ptp_init_tx_interrupt_mode(struct ice_pf *pf) { - switch (ice_get_phy_model(&pf->hw)) { - case ICE_PHY_E82X: + switch (pf->hw.mac_type) { + case ICE_MAC_GENERIC: /* E822 based PHY has the clock owner process the interrupt * for all ports. */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index a1d0e988c084..783139de7f74 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -128,8 +128,7 @@ struct ice_ptp_tx { /* Quad and port information for initializing timestamp blocks */ #define INDEX_PER_QUAD 64 #define INDEX_PER_PORT_E82X 16 -#define INDEX_PER_PORT_E810 64 -#define INDEX_PER_PORT_ETH56G 64 +#define INDEX_PER_PORT 64 /** * struct ice_ptp_port - data used to initialize an external port for PTP @@ -304,6 +303,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); void ice_ptp_req_tx_single_tstamp(struct ice_ptp_tx *tx, u8 idx); void ice_ptp_complete_tx_single_tstamp(struct ice_ptp_tx *tx); enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf); +irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf); u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc, const struct ice_pkt_ctx *pkt_ctx); @@ -342,6 +342,11 @@ static inline bool ice_ptp_process_ts(struct ice_pf *pf) return true; } +static inline irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf) +{ + return IRQ_HANDLED; +} + static inline u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc, const struct ice_pkt_ctx *pkt_ctx) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index ec91822e9280..3e824f7b30c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -746,7 +746,7 @@ static int ice_init_cgu_e82x(struct ice_hw *hw) int err; /* Disable sticky lock detection so lock err reported is accurate */ - if (ice_is_e825c(hw)) + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) err = ice_cfg_cgu_pll_dis_sticky_bits_e825c(hw); else err = ice_cfg_cgu_pll_dis_sticky_bits_e82x(hw); @@ -756,7 +756,7 @@ static int ice_init_cgu_e82x(struct ice_hw *hw) /* Configure the CGU PLL using the parameters from the function * capabilities. */ - if (ice_is_e825c(hw)) + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) err = ice_cfg_cgu_pll_e825c(hw, ts_info->time_ref, (enum ice_clk_src)ts_info->clk_src); else @@ -827,8 +827,9 @@ static u32 ice_ptp_tmr_cmd_to_port_reg(struct ice_hw *hw, /* Certain hardware families share the same register values for the * port register and source timer register. */ - switch (ice_get_phy_model(hw)) { - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: return ice_ptp_tmr_cmd_to_src_reg(hw, cmd) & TS_CMD_MASK_E810; default: break; @@ -895,6 +896,17 @@ static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw) ice_flush(hw); } +/** + * ice_ptp_cfg_sync_delay - Configure PHC to PHY synchronization delay + * @hw: pointer to HW struct + * @delay: delay between PHC and PHY SYNC command execution in nanoseconds + */ +static void ice_ptp_cfg_sync_delay(const struct ice_hw *hw, u32 delay) +{ + wr32(hw, GLTSYN_SYNC_DLAY, delay); + ice_flush(hw); +} + /* 56G PHY device functions * * The following functions operate on devices with the ETH 56G PHY. @@ -1576,9 +1588,8 @@ static int ice_read_ptp_tstamp_eth56g(struct ice_hw *hw, u8 port, u8 idx, * lower 8 bits in the low register, and the upper 32 bits in the high * register. */ - *tstamp = FIELD_PREP(TS_PHY_HIGH_M, hi) | - FIELD_PREP(TS_PHY_LOW_M, lo); - + *tstamp = FIELD_PREP(PHY_40B_HIGH_M, hi) | + FIELD_PREP(PHY_40B_LOW_M, lo); return 0; } @@ -2729,10 +2740,7 @@ static void ice_ptp_init_phy_e825(struct ice_hw *hw) { struct ice_ptp_hw *ptp = &hw->ptp; struct ice_eth56g_params *params; - u32 phy_rev; - int err; - ptp->phy_model = ICE_PHY_ETH56G; params = &ptp->phy.eth56g; params->onestep_ena = false; params->peer_delay = 0; @@ -2742,9 +2750,6 @@ static void ice_ptp_init_phy_e825(struct ice_hw *hw) ptp->num_lports = params->num_phys * ptp->ports_per_phy; ice_sb_access_ena_eth56g(hw, true); - err = ice_read_phy_eth56g(hw, hw->pf_id, PHY_REG_REVISION, &phy_rev); - if (err || phy_rev != PHY_REVISION_ETH56G) - ptp->phy_model = ICE_PHY_UNSUP; } /* E822 family functions @@ -3219,7 +3224,8 @@ ice_read_phy_tstamp_e82x(struct ice_hw *hw, u8 quad, u8 idx, u64 *tstamp) * lower 8 bits in the low register, and the upper 32 bits in the high * register. */ - *tstamp = FIELD_PREP(TS_PHY_HIGH_M, hi) | FIELD_PREP(TS_PHY_LOW_M, lo); + *tstamp = FIELD_PREP(PHY_40B_HIGH_M, hi) | + FIELD_PREP(PHY_40B_LOW_M, lo); return 0; } @@ -4792,7 +4798,6 @@ int ice_phy_cfg_intr_e82x(struct ice_hw *hw, u8 quad, bool ena, u8 threshold) */ static void ice_ptp_init_phy_e82x(struct ice_ptp_hw *ptp) { - ptp->phy_model = ICE_PHY_E82X; ptp->num_lports = 8; ptp->ports_per_phy = 8; } @@ -4986,7 +4991,8 @@ ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp) /* For E810 devices, the timestamp is reported with the lower 32 bits * in the low register, and the upper 8 bits in the high register. */ - *tstamp = ((u64)hi) << TS_HIGH_S | ((u64)lo & TS_LOW_M); + *tstamp = FIELD_PREP(PHY_EXT_40B_HIGH_M, hi) | + FIELD_PREP(PHY_EXT_40B_LOW_M, lo); return 0; } @@ -5049,8 +5055,7 @@ static int ice_ptp_init_phc_e810(struct ice_hw *hw) u8 tmr_idx; int err; - /* Ensure synchronization delay is zero */ - wr32(hw, GLTSYN_SYNC_DLAY, 0); + ice_ptp_cfg_sync_delay(hw, ICE_E810_E830_SYNC_DELAY); tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_ENA(tmr_idx), @@ -5316,68 +5321,6 @@ ice_get_phy_tx_tstamp_ready_e810(struct ice_hw *hw, u8 port, u64 *tstamp_ready) */ /** - * ice_get_pca9575_handle - * @hw: pointer to the hw struct - * @pca9575_handle: GPIO controller's handle - * - * Find and return the GPIO controller's handle in the netlist. - * When found - the value will be cached in the hw structure and following calls - * will return cached value - */ -static int -ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle) -{ - struct ice_aqc_get_link_topo *cmd; - struct ice_aq_desc desc; - int status; - u8 idx; - - /* If handle was read previously return cached value */ - if (hw->io_expander_handle) { - *pca9575_handle = hw->io_expander_handle; - return 0; - } - - /* If handle was not detected read it from the netlist */ - cmd = &desc.params.get_link_topo; - ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo); - - /* Set node type to GPIO controller */ - cmd->addr.topo_params.node_type_ctx = - (ICE_AQC_LINK_TOPO_NODE_TYPE_M & - ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL); - -#define SW_PCA9575_SFP_TOPO_IDX 2 -#define SW_PCA9575_QSFP_TOPO_IDX 1 - - /* Check if the SW IO expander controlling SMA exists in the netlist. */ - if (hw->device_id == ICE_DEV_ID_E810C_SFP) - idx = SW_PCA9575_SFP_TOPO_IDX; - else if (hw->device_id == ICE_DEV_ID_E810C_QSFP) - idx = SW_PCA9575_QSFP_TOPO_IDX; - else - return -EOPNOTSUPP; - - cmd->addr.topo_params.index = idx; - - status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); - if (status) - return -EOPNOTSUPP; - - /* Verify if we found the right IO expander type */ - if (desc.params.get_link_topo.node_part_num != - ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575) - return -EOPNOTSUPP; - - /* If present save the handle and return it */ - hw->io_expander_handle = - le16_to_cpu(desc.params.get_link_topo.addr.handle); - *pca9575_handle = hw->io_expander_handle; - - return 0; -} - -/** * ice_read_sma_ctrl * @hw: pointer to the hw struct * @data: pointer to data to be read from the GPIO controller @@ -5442,37 +5385,6 @@ int ice_write_sma_ctrl(struct ice_hw *hw, u8 data) } /** - * ice_read_pca9575_reg - * @hw: pointer to the hw struct - * @offset: GPIO controller register offset - * @data: pointer to data to be read from the GPIO controller - * - * Read the register from the GPIO controller - */ -int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data) -{ - struct ice_aqc_link_topo_addr link_topo; - __le16 addr; - u16 handle; - int err; - - memset(&link_topo, 0, sizeof(link_topo)); - - err = ice_get_pca9575_handle(hw, &handle); - if (err) - return err; - - link_topo.handle = cpu_to_le16(handle); - link_topo.topo_params.node_type_ctx = - FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M, - ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED); - - addr = cpu_to_le16((u16)offset); - - return ice_aq_read_i2c(hw, link_topo, 0, addr, 1, data, NULL); -} - -/** * ice_ptp_read_sdp_ac - read SDP available connections section from NVM * @hw: pointer to the HW struct * @entries: returns the SDP available connections section from NVM @@ -5538,18 +5450,138 @@ exit: */ static void ice_ptp_init_phy_e810(struct ice_ptp_hw *ptp) { - ptp->phy_model = ICE_PHY_E810; ptp->num_lports = 8; ptp->ports_per_phy = 4; init_waitqueue_head(&ptp->phy.e810.atqbal_wq); } +/* E830 functions + * + * The following functions operate on the E830 series devices. + * + */ + +/** + * ice_ptp_init_phc_e830 - Perform E830 specific PHC initialization + * @hw: pointer to HW struct + * + * Perform E830-specific PTP hardware clock initialization steps. + */ +static void ice_ptp_init_phc_e830(const struct ice_hw *hw) +{ + ice_ptp_cfg_sync_delay(hw, ICE_E810_E830_SYNC_DELAY); +} + +/** + * ice_ptp_write_direct_incval_e830 - Prep PHY port increment value change + * @hw: pointer to HW struct + * @incval: The new 40bit increment value to prepare + * + * Prepare the PHY port for a new increment value by programming the PHC + * GLTSYN_INCVAL_L and GLTSYN_INCVAL_H registers. The actual change is + * completed by FW automatically. + */ +static void ice_ptp_write_direct_incval_e830(const struct ice_hw *hw, + u64 incval) +{ + u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + wr32(hw, GLTSYN_INCVAL_L(tmr_idx), lower_32_bits(incval)); + wr32(hw, GLTSYN_INCVAL_H(tmr_idx), upper_32_bits(incval)); +} + +/** + * ice_ptp_write_direct_phc_time_e830 - Prepare PHY port with initial time + * @hw: Board private structure + * @time: Time to initialize the PHY port clock to + * + * Program the PHY port ETH_GLTSYN_SHTIME registers in preparation setting the + * initial clock time. The time will not actually be programmed until the + * driver issues an ICE_PTP_INIT_TIME command. + * + * The time value is the upper 32 bits of the PHY timer, usually in units of + * nominal nanoseconds. + */ +static void ice_ptp_write_direct_phc_time_e830(const struct ice_hw *hw, + u64 time) +{ + u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + wr32(hw, GLTSYN_TIME_0(tmr_idx), 0); + wr32(hw, GLTSYN_TIME_L(tmr_idx), lower_32_bits(time)); + wr32(hw, GLTSYN_TIME_H(tmr_idx), upper_32_bits(time)); +} + +/** + * ice_ptp_port_cmd_e830 - Prepare all external PHYs for a timer command + * @hw: pointer to HW struct + * @cmd: Command to be sent to the port + * + * Prepare the external PHYs connected to this device for a timer sync + * command. + * + * Return: 0 on success, negative error code when PHY write failed + */ +static int ice_ptp_port_cmd_e830(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) +{ + u32 val = ice_ptp_tmr_cmd_to_port_reg(hw, cmd); + + return ice_write_phy_reg_e810(hw, E830_ETH_GLTSYN_CMD, val); +} + +/** + * ice_read_phy_tstamp_e830 - Read a PHY timestamp out of the external PHY + * @hw: pointer to the HW struct + * @idx: the timestamp index to read + * @tstamp: on return, the 40bit timestamp value + * + * Read a 40bit timestamp value out of the timestamp block of the external PHY + * on the E830 device. + */ +static void ice_read_phy_tstamp_e830(const struct ice_hw *hw, u8 idx, + u64 *tstamp) +{ + u32 hi, lo; + + hi = rd32(hw, E830_PRTTSYN_TXTIME_H(idx)); + lo = rd32(hw, E830_PRTTSYN_TXTIME_L(idx)); + + /* For E830 devices, the timestamp is reported with the lower 32 bits + * in the low register, and the upper 8 bits in the high register. + */ + *tstamp = FIELD_PREP(PHY_EXT_40B_HIGH_M, hi) | + FIELD_PREP(PHY_EXT_40B_LOW_M, lo); +} + +/** + * ice_get_phy_tx_tstamp_ready_e830 - Read Tx memory status register + * @hw: pointer to the HW struct + * @port: the PHY port to read + * @tstamp_ready: contents of the Tx memory status register + */ +static void ice_get_phy_tx_tstamp_ready_e830(const struct ice_hw *hw, u8 port, + u64 *tstamp_ready) +{ + *tstamp_ready = rd32(hw, E830_PRTMAC_TS_TX_MEM_VALID_H); + *tstamp_ready <<= 32; + *tstamp_ready |= rd32(hw, E830_PRTMAC_TS_TX_MEM_VALID_L); +} + +/** + * ice_ptp_init_phy_e830 - initialize PHY parameters + * @ptp: pointer to the PTP HW struct + */ +static void ice_ptp_init_phy_e830(struct ice_ptp_hw *ptp) +{ + ptp->num_lports = 8; + ptp->ports_per_phy = 4; +} + /* Device agnostic functions * - * The following functions implement shared behavior common to both E822 and - * E810 devices, possibly calling a device specific implementation where - * necessary. + * The following functions implement shared behavior common to all devices, + * possibly calling a device specific implementation where necessary. */ /** @@ -5612,14 +5644,22 @@ void ice_ptp_init_hw(struct ice_hw *hw) { struct ice_ptp_hw *ptp = &hw->ptp; - if (ice_is_e822(hw) || ice_is_e823(hw)) - ice_ptp_init_phy_e82x(ptp); - else if (ice_is_e810(hw)) + switch (hw->mac_type) { + case ICE_MAC_E810: ice_ptp_init_phy_e810(ptp); - else if (ice_is_e825c(hw)) + break; + case ICE_MAC_E830: + ice_ptp_init_phy_e830(ptp); + break; + case ICE_MAC_GENERIC: + ice_ptp_init_phy_e82x(ptp); + break; + case ICE_MAC_GENERIC_3K_E825: ice_ptp_init_phy_e825(hw); - else - ptp->phy_model = ICE_PHY_UNSUP; + break; + default: + return; + } } /** @@ -5640,11 +5680,11 @@ void ice_ptp_init_hw(struct ice_hw *hw) static int ice_ptp_write_port_cmd(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_ptp_write_port_cmd_eth56g(hw, port, cmd); - case ICE_PHY_E82X: + switch (hw->mac_type) { + case ICE_MAC_GENERIC: return ice_ptp_write_port_cmd_e82x(hw, port, cmd); + case ICE_MAC_GENERIC_3K_E825: + return ice_ptp_write_port_cmd_eth56g(hw, port, cmd); default: return -EOPNOTSUPP; } @@ -5705,9 +5745,11 @@ static int ice_ptp_port_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) u32 port; /* PHY models which can program all ports simultaneously */ - switch (ice_get_phy_model(hw)) { - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_ptp_port_cmd_e810(hw, cmd); + case ICE_MAC_E830: + return ice_ptp_port_cmd_e830(hw, cmd); default: break; } @@ -5778,23 +5820,29 @@ int ice_ptp_init_time(struct ice_hw *hw, u64 time) tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; /* Source timers */ + /* For E830 we don't need to use shadow registers, its automatic */ + if (hw->mac_type == ICE_MAC_E830) { + ice_ptp_write_direct_phc_time_e830(hw, time); + return 0; + } + wr32(hw, GLTSYN_SHTIME_L(tmr_idx), lower_32_bits(time)); wr32(hw, GLTSYN_SHTIME_H(tmr_idx), upper_32_bits(time)); wr32(hw, GLTSYN_SHTIME_0(tmr_idx), 0); /* PHY timers */ /* Fill Rx and Tx ports and send msg to PHY */ - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_ptp_prep_phy_time_eth56g(hw, - (u32)(time & 0xFFFFFFFF)); - break; - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: err = ice_ptp_prep_phy_time_e810(hw, time & 0xFFFFFFFF); break; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: err = ice_ptp_prep_phy_time_e82x(hw, time & 0xFFFFFFFF); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_ptp_prep_phy_time_eth56g(hw, + (u32)(time & 0xFFFFFFFF)); + break; default: err = -EOPNOTSUPP; } @@ -5826,20 +5874,26 @@ int ice_ptp_write_incval(struct ice_hw *hw, u64 incval) tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + /* For E830 we don't need to use shadow registers, its automatic */ + if (hw->mac_type == ICE_MAC_E830) { + ice_ptp_write_direct_incval_e830(hw, incval); + return 0; + } + /* Shadow Adjust */ wr32(hw, GLTSYN_SHADJ_L(tmr_idx), lower_32_bits(incval)); wr32(hw, GLTSYN_SHADJ_H(tmr_idx), upper_32_bits(incval)); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_ptp_prep_phy_incval_eth56g(hw, incval); - break; - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: err = ice_ptp_prep_phy_incval_e810(hw, incval); break; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: err = ice_ptp_prep_phy_incval_e82x(hw, incval); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_ptp_prep_phy_incval_eth56g(hw, incval); + break; default: err = -EOPNOTSUPP; } @@ -5899,16 +5953,19 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) wr32(hw, GLTSYN_SHADJ_L(tmr_idx), 0); wr32(hw, GLTSYN_SHADJ_H(tmr_idx), adj); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_ptp_prep_phy_adj_eth56g(hw, adj); - break; - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: err = ice_ptp_prep_phy_adj_e810(hw, adj); break; - case ICE_PHY_E82X: + case ICE_MAC_E830: + /* E830 sync PHYs automatically after setting GLTSYN_SHADJ */ + return 0; + case ICE_MAC_GENERIC: err = ice_ptp_prep_phy_adj_e82x(hw, adj); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_ptp_prep_phy_adj_eth56g(hw, adj); + break; default: err = -EOPNOTSUPP; } @@ -5932,13 +5989,16 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) */ int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_read_ptp_tstamp_eth56g(hw, block, idx, tstamp); - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_read_phy_tstamp_e810(hw, block, idx, tstamp); - case ICE_PHY_E82X: + case ICE_MAC_E830: + ice_read_phy_tstamp_e830(hw, idx, tstamp); + return 0; + case ICE_MAC_GENERIC: return ice_read_phy_tstamp_e82x(hw, block, idx, tstamp); + case ICE_MAC_GENERIC_3K_E825: + return ice_read_ptp_tstamp_eth56g(hw, block, idx, tstamp); default: return -EOPNOTSUPP; } @@ -5962,13 +6022,13 @@ int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp) */ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_clear_ptp_tstamp_eth56g(hw, block, idx); - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_clear_phy_tstamp_e810(hw, block, idx); - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: return ice_clear_phy_tstamp_e82x(hw, block, idx); + case ICE_MAC_GENERIC_3K_E825: + return ice_clear_ptp_tstamp_eth56g(hw, block, idx); default: return -EOPNOTSUPP; } @@ -6025,14 +6085,14 @@ static int ice_get_pf_c827_idx(struct ice_hw *hw, u8 *idx) */ void ice_ptp_reset_ts_memory(struct ice_hw *hw) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - ice_ptp_reset_ts_memory_eth56g(hw); - break; - case ICE_PHY_E82X: + switch (hw->mac_type) { + case ICE_MAC_GENERIC: ice_ptp_reset_ts_memory_e82x(hw); break; - case ICE_PHY_E810: + case ICE_MAC_GENERIC_3K_E825: + ice_ptp_reset_ts_memory_eth56g(hw); + break; + case ICE_MAC_E810: default: return; } @@ -6054,13 +6114,16 @@ int ice_ptp_init_phc(struct ice_hw *hw) /* Clear event err indications for auxiliary pins */ (void)rd32(hw, GLTSYN_STAT(src_idx)); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_ptp_init_phc_eth56g(hw); - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_ptp_init_phc_e810(hw); - case ICE_PHY_E82X: + case ICE_MAC_E830: + ice_ptp_init_phc_e830(hw); + return 0; + case ICE_MAC_GENERIC: return ice_ptp_init_phc_e82x(hw); + case ICE_MAC_GENERIC_3K_E825: + return ice_ptp_init_phc_eth56g(hw); default: return -EOPNOTSUPP; } @@ -6079,17 +6142,19 @@ int ice_ptp_init_phc(struct ice_hw *hw) */ int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_get_phy_tx_tstamp_ready_eth56g(hw, block, - tstamp_ready); - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_get_phy_tx_tstamp_ready_e810(hw, block, tstamp_ready); - case ICE_PHY_E82X: + case ICE_MAC_E830: + ice_get_phy_tx_tstamp_ready_e830(hw, block, tstamp_ready); + return 0; + case ICE_MAC_GENERIC: return ice_get_phy_tx_tstamp_ready_e82x(hw, block, tstamp_ready); - break; + case ICE_MAC_GENERIC_3K_E825: + return ice_get_phy_tx_tstamp_ready_eth56g(hw, block, + tstamp_ready); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 6779ce120515..8442d1d60351 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -324,6 +324,7 @@ extern const struct ice_vernier_info_e82x e822_vernier[NUM_ICE_PTP_LNK_SPD]; */ #define ICE_E810_PLL_FREQ 812500000 #define ICE_PTP_NOMINAL_INCVAL_E810 0x13b13b13bULL +#define ICE_E810_E830_SYNC_DELAY 0 /* Device agnostic functions */ u8 ice_get_ptp_src_clock_index(struct ice_hw *hw); @@ -395,7 +396,6 @@ int ice_phy_cfg_intr_e82x(struct ice_hw *hw, u8 quad, bool ena, u8 threshold); /* E810 family functions */ int ice_read_sma_ctrl(struct ice_hw *hw, u8 *data); int ice_write_sma_ctrl(struct ice_hw *hw, u8 data); -int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data); int ice_ptp_read_sdp_ac(struct ice_hw *hw, __le16 *entries, uint *num_entries); int ice_cgu_get_num_pins(struct ice_hw *hw, bool input); enum dpll_pin_type ice_cgu_get_pin_type(struct ice_hw *hw, u8 pin, bool input); @@ -431,13 +431,14 @@ int ice_phy_cfg_ptp_1step_eth56g(struct ice_hw *hw, u8 port); */ static inline u64 ice_get_base_incval(struct ice_hw *hw) { - switch (hw->ptp.phy_model) { - case ICE_PHY_ETH56G: - return ICE_ETH56G_NOMINAL_INCVAL; - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: return ICE_PTP_NOMINAL_INCVAL_E810; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: return ice_e82x_nominal_incval(ice_e82x_time_ref(hw)); + case ICE_MAC_GENERIC_3K_E825: + return ICE_ETH56G_NOMINAL_INCVAL; default: return 0; } @@ -650,18 +651,25 @@ static inline bool ice_is_dual(struct ice_hw *hw) /* E810 timer command register */ #define E810_ETH_GLTSYN_CMD 0x03000344 +/* E830 timer command register */ +#define E830_ETH_GLTSYN_CMD 0x00088814 + +/* E810 PHC time register */ +#define E830_GLTSYN_TIME_L(_tmr_idx) (0x0008A000 + 0x1000 * (_tmr_idx)) + /* Source timer incval macros */ #define INCVAL_HIGH_M 0xFF -/* Timestamp block macros */ +/* PHY 40b registers macros */ +#define PHY_EXT_40B_LOW_M GENMASK(31, 0) +#define PHY_EXT_40B_HIGH_M GENMASK_ULL(39, 32) +#define PHY_40B_LOW_M GENMASK(7, 0) +#define PHY_40B_HIGH_M GENMASK_ULL(39, 8) #define TS_VALID BIT(0) #define TS_LOW_M 0xFFFFFFFF #define TS_HIGH_M 0xFF #define TS_HIGH_S 32 -#define TS_PHY_LOW_M GENMASK(7, 0) -#define TS_PHY_HIGH_M GENMASK_ULL(39, 8) - #define BYTES_PER_IDX_ADDR_L_U 8 #define BYTES_PER_IDX_ADDR_L 4 diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index b83f99c01d91..33eac29b6a50 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -123,27 +123,6 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) } /** - * ice_sriov_free_msix_res - Reset/free any used MSIX resources - * @pf: pointer to the PF structure - * - * Since no MSIX entries are taken from the pf->irq_tracker then just clear - * the pf->sriov_base_vector. - * - * Returns 0 on success, and -EINVAL on error. - */ -static int ice_sriov_free_msix_res(struct ice_pf *pf) -{ - if (!pf) - return -EINVAL; - - bitmap_free(pf->sriov_irq_bm); - pf->sriov_irq_size = 0; - pf->sriov_base_vector = 0; - - return 0; -} - -/** * ice_free_vfs - Free all VFs * @pf: pointer to the PF structure */ @@ -177,6 +156,7 @@ void ice_free_vfs(struct ice_pf *pf) ice_eswitch_detach_vf(pf, vf); ice_dis_vf_qs(vf); + ice_virt_free_irqs(pf, vf->first_vector_idx, vf->num_msix); if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { /* disable VF qp mappings and set VF disable state */ @@ -200,9 +180,6 @@ void ice_free_vfs(struct ice_pf *pf) mutex_unlock(&vf->cfg_lock); } - if (ice_sriov_free_msix_res(pf)) - dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n"); - vfs->num_qps_per = 0; ice_free_vf_entries(pf); @@ -372,40 +349,6 @@ void ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) } /** - * ice_sriov_set_msix_res - Set any used MSIX resources - * @pf: pointer to PF structure - * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs - * - * This function allows SR-IOV resources to be taken from the end of the PF's - * allowed HW MSIX vectors so that the irq_tracker will not be affected. We - * just set the pf->sriov_base_vector and return success. - * - * If there are not enough resources available, return an error. This should - * always be caught by ice_set_per_vf_res(). - * - * Return 0 on success, and -EINVAL when there are not enough MSIX vectors - * in the PF's space available for SR-IOV. - */ -static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) -{ - u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; - int vectors_used = ice_get_max_used_msix_vector(pf); - int sriov_base_vector; - - sriov_base_vector = total_vectors - num_msix_needed; - - /* make sure we only grab irq_tracker entries from the list end and - * that we have enough available MSIX vectors - */ - if (sriov_base_vector < vectors_used) - return -EINVAL; - - pf->sriov_base_vector = sriov_base_vector; - - return 0; -} - -/** * ice_set_per_vf_res - check if vectors and queues are available * @pf: pointer to the PF structure * @num_vfs: the number of SR-IOV VFs being configured @@ -429,11 +372,9 @@ static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) */ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) { - int vectors_used = ice_get_max_used_msix_vector(pf); u16 num_msix_per_vf, num_txq, num_rxq, avail_qs; int msix_avail_per_vf, msix_avail_for_sriov; struct device *dev = ice_pf_to_dev(pf); - int err; lockdep_assert_held(&pf->vfs.table_lock); @@ -441,8 +382,7 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) return -EINVAL; /* determine MSI-X resources per VF */ - msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors - - vectors_used; + msix_avail_for_sriov = pf->virt_irq_tracker.num_entries; msix_avail_per_vf = msix_avail_for_sriov / num_vfs; if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) { num_msix_per_vf = ICE_NUM_VF_MSIX_MED; @@ -481,13 +421,6 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) return -ENOSPC; } - err = ice_sriov_set_msix_res(pf, num_msix_per_vf * num_vfs); - if (err) { - dev_err(dev, "Unable to set MSI-X resources for %d VFs, err %d\n", - num_vfs, err); - return err; - } - /* only allow equal Tx/Rx queue count (i.e. queue pairs) */ pf->vfs.num_qps_per = min_t(int, num_txq, num_rxq); pf->vfs.num_msix_per = num_msix_per_vf; @@ -498,52 +431,6 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) } /** - * ice_sriov_get_irqs - get irqs for SR-IOV usacase - * @pf: pointer to PF structure - * @needed: number of irqs to get - * - * This returns the first MSI-X vector index in PF space that is used by this - * VF. This index is used when accessing PF relative registers such as - * GLINT_VECT2FUNC and GLINT_DYN_CTL. - * This will always be the OICR index in the AVF driver so any functionality - * using vf->first_vector_idx for queue configuration_id: id of VF which will - * use this irqs - * - * Only SRIOV specific vectors are tracked in sriov_irq_bm. SRIOV vectors are - * allocated from the end of global irq index. First bit in sriov_irq_bm means - * last irq index etc. It simplifies extension of SRIOV vectors. - * They will be always located from sriov_base_vector to the last irq - * index. While increasing/decreasing sriov_base_vector can be moved. - */ -static int ice_sriov_get_irqs(struct ice_pf *pf, u16 needed) -{ - int res = bitmap_find_next_zero_area(pf->sriov_irq_bm, - pf->sriov_irq_size, 0, needed, 0); - /* conversion from number in bitmap to global irq index */ - int index = pf->sriov_irq_size - res - needed; - - if (res >= pf->sriov_irq_size || index < pf->sriov_base_vector) - return -ENOENT; - - bitmap_set(pf->sriov_irq_bm, res, needed); - return index; -} - -/** - * ice_sriov_free_irqs - free irqs used by the VF - * @pf: pointer to PF structure - * @vf: pointer to VF structure - */ -static void ice_sriov_free_irqs(struct ice_pf *pf, struct ice_vf *vf) -{ - /* Move back from first vector index to first index in bitmap */ - int bm_i = pf->sriov_irq_size - vf->first_vector_idx - vf->num_msix; - - bitmap_clear(pf->sriov_irq_bm, bm_i, vf->num_msix); - vf->first_vector_idx = 0; -} - -/** * ice_init_vf_vsi_res - initialize/setup VF VSI resources * @vf: VF to initialize/setup the VSI for * @@ -556,7 +443,7 @@ static int ice_init_vf_vsi_res(struct ice_vf *vf) struct ice_vsi *vsi; int err; - vf->first_vector_idx = ice_sriov_get_irqs(pf, vf->num_msix); + vf->first_vector_idx = ice_virt_get_irqs(pf, vf->num_msix); if (vf->first_vector_idx < 0) return -ENOMEM; @@ -856,16 +743,10 @@ err_free_entries: */ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) { - int total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; int ret; - pf->sriov_irq_bm = bitmap_zalloc(total_vectors, GFP_KERNEL); - if (!pf->sriov_irq_bm) - return -ENOMEM; - pf->sriov_irq_size = total_vectors; - /* Disable global interrupt 0 so we don't try to handle the VFLR. */ wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index), ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); @@ -918,7 +799,6 @@ err_unroll_intr: /* rearm interrupts here */ ice_irq_dynamic_ena(hw, NULL, NULL); clear_bit(ICE_OICR_INTR_DIS, pf->state); - bitmap_free(pf->sriov_irq_bm); return ret; } @@ -992,16 +872,7 @@ u32 ice_sriov_get_vf_total_msix(struct pci_dev *pdev) { struct ice_pf *pf = pci_get_drvdata(pdev); - return pf->sriov_irq_size - ice_get_max_used_msix_vector(pf); -} - -static int ice_sriov_move_base_vector(struct ice_pf *pf, int move) -{ - if (pf->sriov_base_vector - move < ice_get_max_used_msix_vector(pf)) - return -ENOMEM; - - pf->sriov_base_vector -= move; - return 0; + return pf->virt_irq_tracker.num_entries; } static void ice_sriov_remap_vectors(struct ice_pf *pf, u16 restricted_id) @@ -1020,7 +891,8 @@ static void ice_sriov_remap_vectors(struct ice_pf *pf, u16 restricted_id) continue; ice_dis_vf_mappings(tmp_vf); - ice_sriov_free_irqs(pf, tmp_vf); + ice_virt_free_irqs(pf, tmp_vf->first_vector_idx, + tmp_vf->num_msix); vf_ids[to_remap] = tmp_vf->vf_id; to_remap += 1; @@ -1032,7 +904,7 @@ static void ice_sriov_remap_vectors(struct ice_pf *pf, u16 restricted_id) continue; tmp_vf->first_vector_idx = - ice_sriov_get_irqs(pf, tmp_vf->num_msix); + ice_virt_get_irqs(pf, tmp_vf->num_msix); /* there is no need to rebuild VSI as we are only changing the * vector indexes not amount of MSI-X or queues */ @@ -1105,20 +977,15 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) prev_msix = vf->num_msix; prev_queues = vf->num_vf_qs; - if (ice_sriov_move_base_vector(pf, msix_vec_count - prev_msix)) { - ice_put_vf(vf); - return -ENOSPC; - } - ice_dis_vf_mappings(vf); - ice_sriov_free_irqs(pf, vf); + ice_virt_free_irqs(pf, vf->first_vector_idx, vf->num_msix); /* Remap all VFs beside the one is now configured */ ice_sriov_remap_vectors(pf, vf->vf_id); vf->num_msix = msix_vec_count; vf->num_vf_qs = queues; - vf->first_vector_idx = ice_sriov_get_irqs(pf, vf->num_msix); + vf->first_vector_idx = ice_virt_get_irqs(pf, vf->num_msix); if (vf->first_vector_idx < 0) goto unroll; @@ -1147,7 +1014,8 @@ unroll: vf->num_msix = prev_msix; vf->num_vf_qs = prev_queues; - vf->first_vector_idx = ice_sriov_get_irqs(pf, vf->num_msix); + + vf->first_vector_idx = ice_virt_get_irqs(pf, vf->num_msix); if (vf->first_vector_idx < 0) { ice_put_vf(vf); return -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 33a1a5934c0d..0aab21113cc4 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -871,14 +871,6 @@ union ice_phy_params { struct ice_eth56g_params eth56g; }; -/* PHY model */ -enum ice_phy_model { - ICE_PHY_UNSUP = -1, - ICE_PHY_E810 = 1, - ICE_PHY_E82X, - ICE_PHY_ETH56G, -}; - /* Global Link Topology */ enum ice_global_link_topo { ICE_LINK_TOPO_UP_TO_2_LINKS, @@ -888,7 +880,6 @@ enum ice_global_link_topo { }; struct ice_ptp_hw { - enum ice_phy_model phy_model; union ice_phy_params phy; u8 num_lports; u8 ports_per_phy; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 8975d2971bc3..a3a4eaa17739 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019, Intel Corporation. */ #include <linux/bpf_trace.h> +#include <linux/unroll.h> #include <net/xdp_sock_drv.h> #include <net/xdp.h> #include "ice.h" @@ -989,7 +990,8 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct ice_tx_desc *tx_desc; u32 i; - loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { + unrolled_count(PKTS_PER_BATCH) + for (i = 0; i < PKTS_PER_BATCH; i++) { dma_addr_t dma; dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 45adeb513253..8dc5d55e26c5 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -7,14 +7,6 @@ #define PKTS_PER_BATCH 8 -#ifdef __clang__ -#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for -#elif __GNUC__ >= 8 -#define loop_unrolled_for _Pragma("GCC unroll 8") for -#else -#define loop_unrolled_for for -#endif - struct ice_vsi; #ifdef CONFIG_XDP_SOCKETS diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c index 13bbd3346e01..c538e6b18aad 100644 --- a/drivers/net/ethernet/intel/igc/igc_xdp.c +++ b/drivers/net/ethernet/intel/igc/igc_xdp.c @@ -14,6 +14,7 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, bool if_running = netif_running(dev); struct bpf_prog *old_prog; bool need_update; + unsigned int i; if (dev->mtu > ETH_DATA_LEN) { /* For now, the driver doesn't support XDP functionality with @@ -24,8 +25,13 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, } need_update = !!adapter->xdp_prog != !!prog; - if (if_running && need_update) - igc_close(dev); + if (if_running && need_update) { + for (i = 0; i < adapter->num_rx_queues; i++) { + igc_disable_rx_ring(adapter->rx_ring[i]); + igc_disable_tx_ring(adapter->tx_ring[i]); + napi_disable(&adapter->rx_ring[i]->q_vector->napi); + } + } old_prog = xchg(&adapter->xdp_prog, prog); if (old_prog) @@ -36,8 +42,13 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, else xdp_features_clear_redirect_target(dev); - if (if_running && need_update) - igc_open(dev); + if (if_running && need_update) { + for (i = 0; i < adapter->num_rx_queues; i++) { + napi_enable(&adapter->rx_ring[i]->q_vector->napi); + igc_enable_tx_ring(adapter->tx_ring[i]); + igc_enable_rx_ring(adapter->rx_ring[i]); + } + } return 0; } diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 53485142938c..0ad965ced5ef 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -830,17 +830,12 @@ static int mtk_mdio_init(struct mtk_eth *eth) int ret; u32 val; - mii_np = of_get_child_by_name(eth->dev->of_node, "mdio-bus"); + mii_np = of_get_available_child_by_name(eth->dev->of_node, "mdio-bus"); if (!mii_np) { dev_err(eth->dev, "no %s child node found", "mdio-bus"); return -ENODEV; } - if (!of_device_is_available(mii_np)) { - ret = -ENODEV; - goto err_put_node; - } - eth->mii_bus = devm_mdiobus_alloc(eth->dev); if (!eth->mii_bus) { ret = -ENOMEM; diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 25989c79c92e..76f202d7f055 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1427,15 +1427,10 @@ static int mtk_star_mdio_init(struct net_device *ndev) of_node = dev->of_node; - mdio_node = of_get_child_by_name(of_node, "mdio"); + mdio_node = of_get_available_child_by_name(of_node, "mdio"); if (!mdio_node) return -ENODEV; - if (!of_device_is_available(mdio_node)) { - ret = -ENODEV; - goto out_put_node; - } - priv->mii = devm_mdiobus_alloc(dev); if (!priv->mii) { ret = -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index b330020dc0d6..598df63518c5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -526,28 +526,6 @@ out: return res; } -u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, u32 uid, u32 obj, u32 count) -{ - struct mlx4_zone_entry *zone; - int res = 0; - - spin_lock(&zones->lock); - - zone = __mlx4_find_zone_by_uid(zones, uid); - - if (NULL == zone) { - res = -1; - goto out; - } - - __mlx4_free_from_zone(zone, obj, count); - -out: - spin_unlock(&zones->lock); - - return res; -} - u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count) { struct mlx4_zone_entry *zone; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index d7d856d1758a..b213094ea30f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1478,12 +1478,6 @@ void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc); u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count, int align, u32 skip_mask, u32 *puid); -/* Free <count> objects, start from <obj> of the uid <uid> from zone_allocator - * <zones>. - */ -u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, - u32 uid, u32 obj, u32 count); - /* If <zones> was allocated with MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP, instead of * specifying the uid when freeing an object, zone allocator could figure it by * itself. Other parameters are similar to mlx4_zone_free. diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 4e43f4a7d246..e3d0b13c1610 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -147,26 +147,6 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port, return err; } -int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx) -{ - struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; - struct mlx4_mac_table *table = &info->mac_table; - int i; - - for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { - if (!table->refs[i]) - continue; - - if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) { - *idx = i; - return 0; - } - } - - return -ENOENT; -} -EXPORT_SYMBOL_GPL(mlx4_find_cached_mac); - static bool mlx4_need_mf_bond(struct mlx4_dev *dev) { int i, num_eth_ports = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c index c7216e84ef8c..86253a89c24c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -13,6 +13,50 @@ struct mlx5_vnic_diag_stats { __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; }; +static void mlx5_reporter_vnic_diagnose_counter_icm(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg, + u16 vport_num, bool other_vport) +{ + u32 out_icm_reg[MLX5_ST_SZ_DW(vhca_icm_ctrl_reg)] = {}; + u32 in_icm_reg[MLX5_ST_SZ_DW(vhca_icm_ctrl_reg)] = {}; + u32 out_reg[MLX5_ST_SZ_DW(nic_cap_reg)] = {}; + u32 in_reg[MLX5_ST_SZ_DW(nic_cap_reg)] = {}; + u32 cur_alloc_icm; + int vhca_icm_ctrl; + u16 vhca_id; + int err; + + err = mlx5_core_access_reg(dev, in_reg, sizeof(in_reg), out_reg, + sizeof(out_reg), MLX5_REG_NIC_CAP, 0, 0); + if (err) { + mlx5_core_warn(dev, "Reading nic_cap_reg failed. err = %d\n", err); + return; + } + vhca_icm_ctrl = MLX5_GET(nic_cap_reg, out_reg, vhca_icm_ctrl); + if (!vhca_icm_ctrl) + return; + + MLX5_SET(vhca_icm_ctrl_reg, in_icm_reg, vhca_id_valid, other_vport); + if (other_vport) { + err = mlx5_vport_get_vhca_id(dev, vport_num, &vhca_id); + if (err) { + mlx5_core_warn(dev, "vport to vhca_id failed. vport_num = %d, err = %d\n", + vport_num, err); + return; + } + MLX5_SET(vhca_icm_ctrl_reg, in_icm_reg, vhca_id, vhca_id); + } + err = mlx5_core_access_reg(dev, in_icm_reg, sizeof(in_icm_reg), + out_icm_reg, sizeof(out_icm_reg), + MLX5_REG_VHCA_ICM_CTRL, 0, 0); + if (err) { + mlx5_core_warn(dev, "Reading vhca_icm_ctrl failed. err = %d\n", err); + return; + } + cur_alloc_icm = MLX5_GET(vhca_icm_ctrl_reg, out_icm_reg, cur_alloc_icm); + devlink_fmsg_u32_pair_put(fmsg, "icm_consumption", cur_alloc_icm); +} + void mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, struct devlink_fmsg *fmsg, u16 vport_num, bool other_vport) @@ -59,6 +103,8 @@ void mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail", VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail)); } + if (MLX5_CAP_GEN(dev, nic_cap_reg)) + mlx5_reporter_vnic_diagnose_counter_icm(dev, fmsg, vport_num, other_vport); devlink_fmsg_obj_nest_end(fmsg); devlink_fmsg_pair_nest_end(fmsg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 979fc56205e1..769e683f2488 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -95,8 +95,6 @@ struct page_pool; #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \ MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD)) -#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18 - /* Keep in sync with mlx5e_mpwrq_log_wqe_sz. * These are theoretical maximums, which can be further restricted by * capabilities. These values are used for static resource allocations and @@ -386,7 +384,6 @@ enum { MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, MLX5E_SQ_STATE_PENDING_XSK_TX, MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, - MLX5E_SQ_STATE_XDP_MULTIBUF, MLX5E_NUM_SQ_STATES, /* Must be kept last */ }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 64b62ed17b07..aa36670d9a36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -10,6 +10,9 @@ #include <net/page_pool/types.h> #include <net/xdp_sock_drv.h> +#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18 +#define MLX5_REP_MPWRQ_MAX_LOG_WQE_SZ 17 + static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev) { u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size); @@ -103,18 +106,22 @@ u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, enum mlx5e_mpwrq_umr_mode umr_mode) { u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); - u8 max_pages_per_wqe, max_log_mpwqe_size; + u8 max_pages_per_wqe, max_log_wqe_size_calc; + u8 max_log_wqe_size_cap; u16 max_wqe_size; /* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */ max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB; max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe), MLX5_UMR_FLEX_ALIGNMENT) / umr_entry_size; - max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift; + max_log_wqe_size_calc = ilog2(max_pages_per_wqe) + page_shift; + + WARN_ON_ONCE(max_log_wqe_size_calc < MLX5E_ORDER2_MAX_PACKET_MTU); - WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU); + max_log_wqe_size_cap = mlx5_core_is_ecpf(mdev) ? + MLX5_REP_MPWRQ_MAX_LOG_WQE_SZ : MLX5_MPWRQ_MAX_LOG_WQE_SZ; - return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ); + return min_t(u8, max_log_wqe_size_calc, max_log_wqe_size_cap); } u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, @@ -1240,7 +1247,6 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); - param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk); mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 3f8986f9d862..bd5877acc5b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -33,7 +33,6 @@ struct mlx5e_sq_param { struct mlx5_wq_param wq; bool is_mpw; bool is_tls; - bool is_xdp_mb; u16 stop_room; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 5f6a0605e4ae..f62fbfb67a1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -296,11 +296,16 @@ enum mlx5e_fec_supported_link_mode { MLX5E_FEC_SUPPORTED_LINK_MODE_200G_2X, MLX5E_FEC_SUPPORTED_LINK_MODE_400G_4X, MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X, + MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X, + MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X, + MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X, + MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X, MLX5E_MAX_FEC_SUPPORTED_LINK_MODE, }; #define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X #define MLX5E_FEC_FIRST_100G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_100G_1X +#define MLX5E_FEC_FIRST_200G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X #define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \ do { \ @@ -320,8 +325,10 @@ static bool mlx5e_is_fec_supported_link_mode(struct mlx5_core_dev *dev, return link_mode < MLX5E_FEC_FIRST_50G_PER_LANE_MODE || (link_mode < MLX5E_FEC_FIRST_100G_PER_LANE_MODE && MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm)) || - (link_mode >= MLX5E_FEC_FIRST_100G_PER_LANE_MODE && - MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm)); + (link_mode < MLX5E_FEC_FIRST_200G_PER_LANE_MODE && + MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm)) || + (link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE && + MLX5_CAP_PCAM_FEATURE(dev, fec_200G_per_lane_in_pplm)); } /* get/set FEC admin field for a given speed */ @@ -368,6 +375,18 @@ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X: MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_8x); break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 1600g_8x); + break; default: return -EINVAL; } @@ -421,6 +440,18 @@ static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap, case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X: *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_8x); break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 1600g_8x); + break; default: return -EINVAL; } @@ -494,6 +525,26 @@ out: return 0; } +static u16 mlx5e_remap_fec_conf_mode(enum mlx5e_fec_supported_link_mode link_mode, + u16 conf_fec) +{ + /* RS fec in ethtool is originally mapped to MLX5E_FEC_RS_528_514. + * For link modes up to 25G per lane, the value is kept. + * For 50G or 100G per lane, it's remapped to MLX5E_FEC_RS_544_514. + * For 200G per lane, remapped to MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD. + */ + if (conf_fec != BIT(MLX5E_FEC_RS_528_514)) + return conf_fec; + + if (link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE) + return BIT(MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD); + + if (link_mode >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) + return BIT(MLX5E_FEC_RS_544_514); + + return conf_fec; +} + int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) { bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); @@ -530,14 +581,7 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) if (!mlx5e_is_fec_supported_link_mode(dev, i)) break; - /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514 - * to link modes up to 25G per lane and to - * MLX5E_FEC_RS_544_514 in the new link modes based on - * 50G or 100G per lane - */ - if (conf_fec == (1 << MLX5E_FEC_RS_528_514) && - i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) - conf_fec = (1 << MLX5E_FEC_RS_544_514); + conf_fec = mlx5e_remap_fec_conf_mode(i, conf_fec); mlx5e_get_fec_cap_field(out, &fec_caps, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index d1da225f35da..fa2283dd383b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -61,6 +61,7 @@ enum { MLX5E_FEC_NOFEC, MLX5E_FEC_FIRECODE, MLX5E_FEC_RS_528_514, + MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD = 4, MLX5E_FEC_RS_544_514 = 7, MLX5E_FEC_LLRS_272_257_1 = 9, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index afd654583b6b..131ed97ca997 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -326,7 +326,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, int node; sq->pdev = c->pdev; - sq->clock = &mdev->clock; + sq->clock = mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; sq->priv = c->priv; @@ -696,7 +696,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, rq->pdev = c->pdev; rq->netdev = priv->netdev; rq->priv = priv; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->tstamp = &priv->tstamp; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 25d751eba99b..e75759533ae0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -317,10 +317,8 @@ mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx } static void -mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) +mlx5e_rx_reporter_diagnose_common_config(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { - struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq; struct mlx5e_ptp *ptp_ch = priv->channels.ptp; @@ -340,20 +338,100 @@ static void mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq, devlink_fmsg_obj_nest_end(fmsg); } -static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, - struct netlink_ext_ack *extack) +static void mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(struct mlx5e_rx_res *rx_res, + struct devlink_fmsg *fmsg) { - struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); - struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + unsigned int max_nch = mlx5e_rx_res_get_max_nch(rx_res); int i; - mutex_lock(&priv->state_lock); + devlink_fmsg_arr_pair_nest_start(fmsg, "Direct TIRs"); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto unlock; + for (i = 0; i < max_nch; i++) { + devlink_fmsg_obj_nest_start(fmsg); + + devlink_fmsg_u32_pair_put(fmsg, "ix", i); + devlink_fmsg_u32_pair_put(fmsg, "tirn", mlx5e_rx_res_get_tirn_direct(rx_res, i)); + devlink_fmsg_u32_pair_put(fmsg, "rqtn", mlx5e_rx_res_get_rqtn_direct(rx_res, i)); + + devlink_fmsg_obj_nest_end(fmsg); + } + + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(struct mlx5e_rss *rss, bool inner, + struct devlink_fmsg *fmsg) +{ + bool found_valid_tir = false; + int tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + if (!mlx5e_rss_valid_tir(rss, tt, inner)) + continue; + + if (!found_valid_tir) { + char *tir_msg = inner ? "Inner TIRs Numbers" : "TIRs Numbers"; + + found_valid_tir = true; + devlink_fmsg_arr_pair_nest_start(fmsg, tir_msg); + } + + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_string_pair_put(fmsg, "tt", mlx5_ttc_get_name(tt)); + devlink_fmsg_u32_pair_put(fmsg, "tirn", mlx5e_rss_get_tirn(rss, tt, inner)); + devlink_fmsg_obj_nest_end(fmsg); + } + + if (found_valid_tir) + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res_rss_ix(struct mlx5e_rx_res *rx_res, u32 rss_idx, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_rss *rss = mlx5e_rx_res_rss_get(rx_res, rss_idx); + + if (!rss) + return; + + devlink_fmsg_obj_nest_start(fmsg); + + devlink_fmsg_u32_pair_put(fmsg, "Index", rss_idx); + devlink_fmsg_u32_pair_put(fmsg, "rqtn", mlx5e_rss_get_rqtn(rss)); + mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(rss, false, fmsg); + if (mlx5e_rss_get_inner_ft_support(rss)) + mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(rss, true, fmsg); + + devlink_fmsg_obj_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res_rss(struct mlx5e_rx_res *rx_res, + struct devlink_fmsg *fmsg) +{ + int rss_ix; + + devlink_fmsg_arr_pair_nest_start(fmsg, "RSS"); + for (rss_ix = 0; rss_ix < MLX5E_MAX_NUM_RSS; rss_ix++) + mlx5e_rx_reporter_diagnose_rx_res_rss_ix(rx_res, rss_ix, fmsg); + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_rx_res *rx_res = priv->rx_res; + + mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX resources"); + mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(rx_res, fmsg); + mlx5e_rx_reporter_diagnose_rx_res_rss(rx_res, fmsg); + mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) +{ + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + int i; - mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg); devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); for (i = 0; i < priv->channels.num; i++) { @@ -367,7 +445,24 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, } if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg); + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + mlx5e_rx_reporter_diagnose_common_config(priv, fmsg); + mlx5e_rx_reporter_diagnose_rqs(priv, fmsg); + mlx5e_rx_reporter_diagnose_rx_res(priv, fmsg); unlock: mutex_unlock(&priv->state_lock); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 09433b91be17..532c7fa94d17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -16,7 +16,6 @@ static const char * const sq_sw_state_type_name[] = { [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline", [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx", [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync", - [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf", }; static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c index 5f742f896600..0d8ccc7b6c11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -81,6 +81,11 @@ struct mlx5e_rss { refcount_t refcnt; }; +bool mlx5e_rss_get_inner_ft_support(struct mlx5e_rss *rss) +{ + return rss->inner_ft_support; +} + void mlx5e_rss_params_indir_modify_actual_size(struct mlx5e_rss *rss, u32 num_channels) { rss->indir.actual_table_size = mlx5e_rqt_size(rss->mdev, num_channels); @@ -449,6 +454,16 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, return mlx5e_tir_get_tirn(tir); } +u32 mlx5e_rss_get_rqtn(struct mlx5e_rss *rss) +{ + return mlx5e_rqt_get_rqtn(&rss->rqt); +} + +bool mlx5e_rss_valid_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner) +{ + return !!rss_get_tir(rss, tt, inner); +} + /* Fill the "tirn" output parameter. * Create the requested TIR if it's its first usage. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h index d0df98963c8d..72089f5f473c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h @@ -32,8 +32,11 @@ void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss); void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss); unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss); +bool mlx5e_rss_get_inner_ft_support(struct mlx5e_rss *rss); u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner); +bool mlx5e_rss_valid_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner); +u32 mlx5e_rss_get_rqtn(struct mlx5e_rss *rss); int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, const struct mlx5e_packet_merge_param *init_pkt_merge_param, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index a86eade9a9e0..9d8b2f5f6c96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -5,8 +5,6 @@ #include "channels.h" #include "params.h" -#define MLX5E_MAX_NUM_RSS 16 - struct mlx5e_rx_res { struct mlx5_core_dev *mdev; /* primary */ enum mlx5e_rx_res_features features; @@ -497,6 +495,11 @@ void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res) mlx5e_rx_res_free(res); } +unsigned int mlx5e_rx_res_get_max_nch(struct mlx5e_rx_res *res) +{ + return res->max_nch; +} + u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix) { return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir); @@ -522,7 +525,7 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res) return mlx5e_tir_get_tirn(&res->ptp.tir); } -static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) { return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 7b1a9f0f1874..05b438043bcb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -10,6 +10,8 @@ #include "fs.h" #include "rss.h" +#define MLX5E_MAX_NUM_RSS 16 + struct mlx5e_rx_res; struct mlx5e_channels; @@ -34,6 +36,9 @@ u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix); u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix); +unsigned int mlx5e_rx_res_get_max_nch(struct mlx5e_rx_res *res); +bool mlx5_rx_res_rss_inner_ft_support(struct mlx5e_rx_res *res); /* Activate/deactivate API */ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index d6c12d0ea55b..2e528b2c34d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -73,11 +73,6 @@ struct mlx5e_tc_act { bool is_terminating_action; }; -struct mlx5e_tc_flow_action { - unsigned int num_entries; - struct flow_action_entry **entries; -}; - extern struct mlx5e_tc_act mlx5e_tc_act_drop; extern struct mlx5e_tc_act mlx5e_tc_act_trap; extern struct mlx5e_tc_act mlx5e_tc_act_accept; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 53ca16cb9c41..140606fcd23b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -46,7 +46,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params rq->pdev = t->pdev; rq->netdev = priv->netdev; rq->priv = priv; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->tstamp = &priv->tstamp; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 94b291662087..6f3094a479e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -289,9 +289,9 @@ static u64 mlx5e_xsk_fill_timestamp(void *_priv) ts = get_cqe_ts(priv->cqe); if (mlx5_is_real_time_rq(priv->cq->mdev) || mlx5_is_real_time_sq(priv->cq->mdev)) - return mlx5_real_time_cyc2time(&priv->cq->mdev->clock, ts); + return mlx5_real_time_cyc2time(priv->cq->mdev->clock, ts); - return mlx5_timecounter_cyc2time(&priv->cq->mdev->clock, ts); + return mlx5_timecounter_cyc2time(priv->cq->mdev->clock, ts); } static void mlx5e_xsk_request_checksum(u16 csum_start, u16 csum_offset, void *priv) @@ -546,6 +546,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, bool inline_ok; bool linear; u16 pi; + int i; struct mlx5e_xdpsq_stats *stats = sq->stats; @@ -612,41 +613,33 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { - int i; - - memset(&cseg->trailer, 0, sizeof(cseg->trailer)); - memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); - - eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + memset(&cseg->trailer, 0, sizeof(cseg->trailer)); + memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); - for (i = 0; i < num_frags; i++) { - skb_frag_t *frag = &xdptxdf->sinfo->frags[i]; - dma_addr_t addr; + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] : - page_pool_get_dma_addr(skb_frag_page(frag)) + - skb_frag_off(frag); + for (i = 0; i < num_frags; i++) { + skb_frag_t *frag = &xdptxdf->sinfo->frags[i]; + dma_addr_t addr; - dseg->addr = cpu_to_be64(addr); - dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); - dseg->lkey = sq->mkey_be; - dseg++; - } + addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] : + page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + dseg->addr = cpu_to_be64(addr); + dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); + dseg->lkey = sq->mkey_be; + dseg++; + } - sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { - .num_wqebbs = num_wqebbs, - .num_pkts = 1, - }; + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - sq->pc += num_wqebbs; - } else { - cseg->fm_ce_se = 0; + sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { + .num_wqebbs = num_wqebbs, + .num_pkts = 1, + }; - sq->pc++; - } + sq->pc += num_wqebbs; xsk_tx_metadata_request(meta, &mlx5e_xsk_tx_metadata_ops, eseg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 9240cfe25d10..d743e823362a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -72,7 +72,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, rq->netdev = c->netdev; rq->priv = c->priv; rq->tstamp = c->tstamp; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; rq->channel = c; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index cae39198b4db..f9113cb13a0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -237,6 +237,27 @@ void mlx5e_build_ptys2ethtool_map(void) ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT, ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT, ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_1_200GBASE_CR1_KR1, ext, + ETHTOOL_LINK_MODE_200000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseVR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_2_400GBASE_CR2_KR2, ext, + ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_800GAUI_4_800GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT, + ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT); } static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, @@ -931,6 +952,7 @@ static const u32 pplm_fec_2_ethtool[] = { [MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS, [MLX5E_FEC_RS_544_514] = ETHTOOL_FEC_RS, [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_FEC_LLRS, + [MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD] = ETHTOOL_FEC_RS, }; static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a814b63ed97e..5d5e7b19c396 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -737,7 +737,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param rq->netdev = c->netdev; rq->priv = c->priv; rq->tstamp = c->tstamp; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; rq->channel = c; @@ -1614,7 +1614,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int err; sq->pdev = c->pdev; - sq->clock = &mdev->clock; + sq->clock = mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; sq->mdev = c->mdev; @@ -2023,41 +2023,12 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.min_inline_mode = sq->min_inline_mode; set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - if (param->is_xdp_mb) - set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state); - err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); if (err) goto err_free_xdpsq; mlx5e_set_xmit_fp(sq, param->is_mpw); - if (!param->is_mpw && !test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { - unsigned int ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; - unsigned int inline_hdr_sz = 0; - int i; - - if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - inline_hdr_sz = MLX5E_XDP_MIN_INLINE; - ds_cnt++; - } - - /* Pre initialize fixed WQE fields */ - for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - - sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) { - .num_wqebbs = 1, - .num_pkts = 1, - }; - - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - } - } - return 0; err_free_xdpsq: @@ -3816,8 +3787,11 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, /* MQPRIO is another toplevel qdisc that can't be attached * simultaneously with the offloaded HTB. */ - if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq))) - return -EINVAL; + if (mlx5e_selq_is_htb_enabled(&priv->selq)) { + NL_SET_ERR_MSG_MOD(mqprio->extack, + "MQPRIO cannot be configured when HTB offload is enabled."); + return -EOPNOTSUPP; + } switch (mqprio->mode) { case TC_MQPRIO_MODE_DCB: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index fdff9fd8a89e..07f38f472a27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -65,6 +65,7 @@ #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) #define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1 +#define MLX5E_REP_PARAMS_DEF_LOG_RQ_SIZE 0x8 static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -855,6 +856,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev) /* RQ */ mlx5e_build_rq_params(mdev, params); + if (!mlx5e_is_uplink_rep(priv) && mlx5_core_is_ecpf(mdev)) + params->log_rq_mtu_frames = MLX5E_REP_PARAMS_DEF_LOG_RQ_SIZE; /* If netdev is already registered (e.g. move from nic profile to uplink, * RTNL lock must be held before triggering netdev notifiers. @@ -886,6 +889,8 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev, netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; netdev->watchdog_timeo = 15 * HZ; + if (mlx5_core_is_ecpf(mdev)) + netdev->tx_queue_len = 1 << MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE; #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) netdev->hw_features |= NETIF_F_HW_TC; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 1d60465cc2ca..2f7a543feca6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -166,6 +166,9 @@ mlx5e_test_loopback_validate(struct sk_buff *skb, struct udphdr *udph; struct iphdr *iph; + if (skb_linearize(skb)) + goto out; + /* We are only going to peek, no need to clone the SKB */ if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb)) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 20cc01ceee8a..0fa0333106a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -4157,37 +4157,12 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); -static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id) -{ - int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - void *query_ctx; - void *hca_caps; - int err; - - *vhca_id = 0; - - query_ctx = kzalloc(query_out_sz, GFP_KERNEL); - if (!query_ctx) - return -ENOMEM; - - err = mlx5_vport_get_other_func_general_cap(esw->dev, vport_num, query_ctx); - if (err) - goto out_free; - - hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); - -out_free: - kfree(query_ctx); - return err; -} - int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num) { u16 *old_entry, *vhca_map_entry, vhca_id; int err; - err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id); if (err) { esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n", vport_num, err); @@ -4213,7 +4188,7 @@ void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num) u16 *vhca_map_entry, vhca_id; int err; - err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id); if (err) esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n", vport_num, err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index bde79cac33a9..d832a12ffec0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -97,7 +97,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, mlx5_del_flow_rules(lag_definer->rules[idx]); } j = ldev->buckets; - }; + } goto destroy_fg; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index d61a1a9297c9..65a94e46edcf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -43,6 +43,8 @@ #include <linux/cpufeature.h> #endif /* CONFIG_X86 */ +#define MLX5_RT_CLOCK_IDENTITY_SIZE MLX5_FLD_SZ_BYTES(mrtcq_reg, rt_clock_identity) + enum { MLX5_PIN_MODE_IN = 0x0, MLX5_PIN_MODE_OUT = 0x1, @@ -77,6 +79,56 @@ enum { MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX = 200000, }; +struct mlx5_clock_dev_state { + struct mlx5_core_dev *mdev; + struct mlx5_devcom_comp_dev *compdev; + struct mlx5_nb pps_nb; + struct work_struct out_work; +}; + +struct mlx5_clock_priv { + struct mlx5_clock clock; + struct mlx5_core_dev *mdev; + struct mutex lock; /* protect mdev and used in PTP callbacks */ + struct mlx5_core_dev *event_mdev; +}; + +static struct mlx5_clock_priv *clock_priv(struct mlx5_clock *clock) +{ + return container_of(clock, struct mlx5_clock_priv, clock); +} + +static void mlx5_clock_lockdep_assert(struct mlx5_clock *clock) +{ + if (!clock->shared) + return; + + lockdep_assert(lockdep_is_held(&clock_priv(clock)->lock)); +} + +static struct mlx5_core_dev *mlx5_clock_mdev_get(struct mlx5_clock *clock) +{ + mlx5_clock_lockdep_assert(clock); + + return clock_priv(clock)->mdev; +} + +static void mlx5_clock_lock(struct mlx5_clock *clock) +{ + if (!clock->shared) + return; + + mutex_lock(&clock_priv(clock)->lock); +} + +static void mlx5_clock_unlock(struct mlx5_clock *clock) +{ + if (!clock->shared) + return; + + mutex_unlock(&clock_priv(clock)->lock); +} + static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev) { return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev)); @@ -94,6 +146,22 @@ static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev) return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify); } +static int mlx5_clock_identity_get(struct mlx5_core_dev *mdev, + u8 identify[MLX5_RT_CLOCK_IDENTITY_SIZE]) +{ + u32 out[MLX5_ST_SZ_DW(mrtcq_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mrtcq_reg)] = {}; + int err; + + err = mlx5_core_access_reg(mdev, in, sizeof(in), + out, sizeof(out), MLX5_REG_MRTCQ, 0, 0); + if (!err) + memcpy(identify, MLX5_ADDR_OF(mrtcq_reg, out, rt_clock_identity), + MLX5_RT_CLOCK_IDENTITY_SIZE); + + return err; +} + static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz) { /* Optimal shift constant leads to corrections above just 1 scaled ppm. @@ -119,21 +187,30 @@ static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz) ilog2((U32_MAX / NSEC_PER_MSEC) * dev_freq_khz)); } +static s32 mlx5_clock_getmaxphase(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ? + MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX : + MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; +} + static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct mlx5_core_dev *mdev; + s32 ret; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + ret = mlx5_clock_getmaxphase(mdev); + mlx5_clock_unlock(clock); - return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ? - MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX : - MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; + return ret; } static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta) { - s64 max = mlx5_ptp_getmaxphase(&mdev->clock.ptp_info); + s64 max = mlx5_clock_getmaxphase(mdev); if (delta < -max || delta > max) return false; @@ -209,7 +286,7 @@ static int mlx5_mtctr_syncdevicetime(ktime_t *device_time, if (real_time_mode) *device_time = ns_to_ktime(REAL_TIME_TO_NS(device >> 32, device & U32_MAX)); else - *device_time = mlx5_timecounter_cyc2time(&mdev->clock, device); + *device_time = mlx5_timecounter_cyc2time(mdev->clock, device); return 0; } @@ -220,16 +297,23 @@ static int mlx5_ptp_getcrosststamp(struct ptp_clock_info *ptp, struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct system_time_snapshot history_begin = {0}; struct mlx5_core_dev *mdev; + int err; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); - if (!mlx5_is_ptm_source_time_available(mdev)) - return -EBUSY; + if (!mlx5_is_ptm_source_time_available(mdev)) { + err = -EBUSY; + goto unlock; + } ktime_get_snapshot(&history_begin); - return get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev, - &history_begin, cts); + err = get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev, + &history_begin, cts); +unlock: + mlx5_clock_unlock(clock); + return err; } #endif /* CONFIG_X86 */ @@ -263,8 +347,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc) { struct mlx5_timer *timer = container_of(cc, struct mlx5_timer, cycles); struct mlx5_clock *clock = container_of(timer, struct mlx5_clock, timer); - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, - clock); + struct mlx5_core_dev *mdev = mlx5_clock_mdev_get(clock); return mlx5_read_time(mdev, NULL, false) & cc->mask; } @@ -272,7 +355,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc) static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) { struct mlx5_ib_clock_info *clock_info = mdev->clock_info; - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; struct mlx5_timer *timer; u32 sign; @@ -295,12 +378,10 @@ static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) static void mlx5_pps_out(struct work_struct *work) { - struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps, - out_work); - struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock, - pps_info); - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, - clock); + struct mlx5_clock_dev_state *clock_state = container_of(work, struct mlx5_clock_dev_state, + out_work); + struct mlx5_core_dev *mdev = clock_state->mdev; + struct mlx5_clock *clock = mdev->clock; u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; unsigned long flags; int i; @@ -330,7 +411,8 @@ static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info) unsigned long flags; clock = container_of(ptp_info, struct mlx5_clock, ptp_info); - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); timer = &clock->timer; if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) @@ -342,6 +424,7 @@ static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info) write_sequnlock_irqrestore(&clock->lock, flags); out: + mlx5_clock_unlock(clock); return timer->overflow_period; } @@ -361,15 +444,12 @@ static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev, return mlx5_set_mtutc(mdev, in, sizeof(in)); } -static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) +static int mlx5_clock_settime(struct mlx5_core_dev *mdev, struct mlx5_clock *clock, + const struct timespec64 *ts) { - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct mlx5_timer *timer = &clock->timer; - struct mlx5_core_dev *mdev; unsigned long flags; - mdev = container_of(clock, struct mlx5_core_dev, clock); - if (mlx5_modify_mtutc_allowed(mdev)) { int err = mlx5_ptp_settime_real_time(mdev, ts); @@ -385,6 +465,20 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 return 0; } +static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; + int err; + + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + err = mlx5_clock_settime(mdev, clock, ts); + mlx5_clock_unlock(clock); + + return err; +} + static struct timespec64 mlx5_ptp_gettimex_real_time(struct mlx5_core_dev *mdev, struct ptp_system_timestamp *sts) @@ -404,7 +498,8 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, struct mlx5_core_dev *mdev; u64 cycles, ns; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); if (mlx5_real_time_mode(mdev)) { *ts = mlx5_ptp_gettimex_real_time(mdev, sts); goto out; @@ -414,6 +509,7 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, ns = mlx5_timecounter_cyc2time(clock, cycles); *ts = ns_to_timespec64(ns); out: + mlx5_clock_unlock(clock); return 0; } @@ -444,14 +540,16 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) struct mlx5_timer *timer = &clock->timer; struct mlx5_core_dev *mdev; unsigned long flags; + int err = 0; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); if (mlx5_modify_mtutc_allowed(mdev)) { - int err = mlx5_ptp_adjtime_real_time(mdev, delta); + err = mlx5_ptp_adjtime_real_time(mdev, delta); if (err) - return err; + goto unlock; } write_seqlock_irqsave(&clock->lock, flags); @@ -459,17 +557,23 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); - return 0; +unlock: + mlx5_clock_unlock(clock); + return err; } static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct mlx5_core_dev *mdev; + int err; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + err = mlx5_ptp_adjtime_real_time(mdev, delta); + mlx5_clock_unlock(clock); - return mlx5_ptp_adjtime_real_time(mdev, delta); + return err; } static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm) @@ -498,15 +602,17 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) struct mlx5_timer *timer = &clock->timer; struct mlx5_core_dev *mdev; unsigned long flags; + int err = 0; u32 mult; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); if (mlx5_modify_mtutc_allowed(mdev)) { - int err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm); + err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm); if (err) - return err; + goto unlock; } mult = (u32)adjust_by_scaled_ppm(timer->nominal_c_mult, scaled_ppm); @@ -518,7 +624,9 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) write_sequnlock_irqrestore(&clock->lock, flags); ptp_schedule_worker(clock->ptp, timer->overflow_period); - return 0; +unlock: + mlx5_clock_unlock(clock); + return err; } static int mlx5_extts_configure(struct ptp_clock_info *ptp, @@ -527,18 +635,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - struct mlx5_core_dev *mdev = - container_of(clock, struct mlx5_core_dev, clock); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + struct mlx5_core_dev *mdev; u32 field_select = 0; u8 pin_mode = 0; u8 pattern = 0; int pin = -1; int err = 0; - if (!MLX5_PPS_CAP(mdev)) - return -EOPNOTSUPP; - /* Reject requests with unsupported flags */ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | PTP_RISING_EDGE | @@ -569,6 +673,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, field_select = MLX5_MTPPS_FS_ENABLE; } + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + + if (!MLX5_PPS_CAP(mdev)) { + err = -EOPNOTSUPP; + goto unlock; + } + MLX5_SET(mtpps_reg, in, pin, pin); MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); MLX5_SET(mtpps_reg, in, pattern, pattern); @@ -577,15 +689,23 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, err = mlx5_set_mtpps(mdev, in, sizeof(in)); if (err) - return err; + goto unlock; + + err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on); + if (err) + goto unlock; + + clock->pps_info.pin_armed[pin] = on; + clock_priv(clock)->event_mdev = mdev; - return mlx5_set_mtppse(mdev, pin, 0, - MLX5_EVENT_MODE_REPETETIVE & on); +unlock: + mlx5_clock_unlock(clock); + return err; } static u64 find_target_cycles(struct mlx5_core_dev *mdev, s64 target_ns) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; u64 cycles_now, cycles_delta; u64 nsec_now, nsec_delta; struct mlx5_timer *timer; @@ -644,7 +764,7 @@ static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq, u32 *out_pulse_duration_ns) { - struct mlx5_pps *pps_info = &mdev->clock.pps_info; + struct mlx5_pps *pps_info = &mdev->clock->pps_info; u32 out_pulse_duration; struct timespec64 ts; @@ -677,7 +797,7 @@ static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clo u32 *field_select, u32 *out_pulse_duration_ns, u64 *period, u64 *time_stamp) { - struct mlx5_pps *pps_info = &mdev->clock.pps_info; + struct mlx5_pps *pps_info = &mdev->clock->pps_info; struct ptp_clock_time *time = &rq->perout.start; struct timespec64 ts; @@ -712,26 +832,18 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - struct mlx5_core_dev *mdev = - container_of(clock, struct mlx5_core_dev, clock); - bool rt_mode = mlx5_real_time_mode(mdev); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; u32 out_pulse_duration_ns = 0; + struct mlx5_core_dev *mdev; u32 field_select = 0; u64 npps_period = 0; u64 time_stamp = 0; u8 pin_mode = 0; u8 pattern = 0; + bool rt_mode; int pin = -1; int err = 0; - if (!MLX5_PPS_CAP(mdev)) - return -EOPNOTSUPP; - - /* Reject requests with unsupported flags */ - if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) - return -EOPNOTSUPP; - if (rq->perout.index >= clock->ptp_info.n_pins) return -EINVAL; @@ -740,14 +852,29 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, if (pin < 0) return -EBUSY; - if (on) { - bool rt_mode = mlx5_real_time_mode(mdev); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + rt_mode = mlx5_real_time_mode(mdev); + + if (!MLX5_PPS_CAP(mdev)) { + err = -EOPNOTSUPP; + goto unlock; + } + + /* Reject requests with unsupported flags */ + if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) { + err = -EOPNOTSUPP; + goto unlock; + } + if (on) { pin_mode = MLX5_PIN_MODE_OUT; pattern = MLX5_OUT_PATTERN_PERIODIC; - if (rt_mode && rq->perout.start.sec > U32_MAX) - return -EINVAL; + if (rt_mode && rq->perout.start.sec > U32_MAX) { + err = -EINVAL; + goto unlock; + } field_select |= MLX5_MTPPS_FS_PIN_MODE | MLX5_MTPPS_FS_PATTERN | @@ -760,7 +887,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, else err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode); if (err) - return err; + goto unlock; } MLX5_SET(mtpps_reg, in, pin, pin); @@ -773,13 +900,16 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns); err = mlx5_set_mtpps(mdev, in, sizeof(in)); if (err) - return err; + goto unlock; if (rt_mode) - return 0; + goto unlock; + + err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on); - return mlx5_set_mtppse(mdev, pin, 0, - MLX5_EVENT_MODE_REPETETIVE & on); +unlock: + mlx5_clock_unlock(clock); + return err; } static int mlx5_pps_configure(struct ptp_clock_info *ptp, @@ -866,10 +996,8 @@ static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin, mtpps_size, MLX5_REG_MTPPS, 0, 0); } -static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) +static int mlx5_get_pps_pin_mode(struct mlx5_core_dev *mdev, u8 pin) { - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); - u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {}; u8 mode; int err; @@ -888,8 +1016,9 @@ static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) return PTP_PF_NONE; } -static void mlx5_init_pin_config(struct mlx5_clock *clock) +static void mlx5_init_pin_config(struct mlx5_core_dev *mdev) { + struct mlx5_clock *clock = mdev->clock; int i; if (!clock->ptp_info.n_pins) @@ -910,15 +1039,15 @@ static void mlx5_init_pin_config(struct mlx5_clock *clock) sizeof(clock->ptp_info.pin_config[i].name), "mlx5_pps%d", i); clock->ptp_info.pin_config[i].index = i; - clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i); + clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(mdev, i); clock->ptp_info.pin_config[i].chan = 0; } } static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + struct mlx5_clock *clock = mdev->clock; mlx5_query_mtpps(mdev, out, sizeof(out)); @@ -968,16 +1097,16 @@ static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev, static int mlx5_pps_event(struct notifier_block *nb, unsigned long type, void *data) { - struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb); + struct mlx5_clock_dev_state *clock_state = mlx5_nb_cof(nb, struct mlx5_clock_dev_state, + pps_nb); + struct mlx5_core_dev *mdev = clock_state->mdev; + struct mlx5_clock *clock = mdev->clock; struct ptp_clock_event ptp_event; struct mlx5_eqe *eqe = data; int pin = eqe->data.pps.pin; - struct mlx5_core_dev *mdev; unsigned long flags; u64 ns; - mdev = container_of(clock, struct mlx5_core_dev, clock); - switch (clock->ptp_info.pin_config[pin].func) { case PTP_PF_EXTTS: ptp_event.index = pin; @@ -997,11 +1126,15 @@ static int mlx5_pps_event(struct notifier_block *nb, ptp_clock_event(clock->ptp, &ptp_event); break; case PTP_PF_PEROUT: + if (clock->shared) { + mlx5_core_warn(mdev, " Received unexpected PPS out event\n"); + break; + } ns = perout_conf_next_event_timer(mdev, clock); write_seqlock_irqsave(&clock->lock, flags); clock->pps_info.start[pin] = ns; write_sequnlock_irqrestore(&clock->lock, flags); - schedule_work(&clock->pps_info.out_work); + schedule_work(&clock_state->out_work); break; default: mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n", @@ -1013,7 +1146,7 @@ static int mlx5_pps_event(struct notifier_block *nb, static void mlx5_timecounter_init(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; struct mlx5_timer *timer = &clock->timer; u32 dev_freq; @@ -1029,10 +1162,10 @@ static void mlx5_timecounter_init(struct mlx5_core_dev *mdev) ktime_to_ns(ktime_get_real())); } -static void mlx5_init_overflow_period(struct mlx5_clock *clock) +static void mlx5_init_overflow_period(struct mlx5_core_dev *mdev) { - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); struct mlx5_ib_clock_info *clock_info = mdev->clock_info; + struct mlx5_clock *clock = mdev->clock; struct mlx5_timer *timer = &clock->timer; u64 overflow_cycles; u64 frac = 0; @@ -1065,7 +1198,7 @@ static void mlx5_init_overflow_period(struct mlx5_clock *clock) static void mlx5_init_clock_info(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; struct mlx5_ib_clock_info *info; struct mlx5_timer *timer; @@ -1088,7 +1221,7 @@ static void mlx5_init_clock_info(struct mlx5_core_dev *mdev) static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {}; u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; u8 log_max_freq_adjustment = 0; @@ -1107,7 +1240,7 @@ static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev) static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; /* Configure the PHC */ clock->ptp_info = mlx5_ptp_clock_info; @@ -1123,38 +1256,30 @@ static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) mlx5_timecounter_init(mdev); mlx5_init_clock_info(mdev); - mlx5_init_overflow_period(clock); + mlx5_init_overflow_period(mdev); if (mlx5_real_time_mode(mdev)) { struct timespec64 ts; ktime_get_real_ts64(&ts); - mlx5_ptp_settime(&clock->ptp_info, &ts); + mlx5_clock_settime(mdev, clock, &ts); } } static void mlx5_init_pps(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; - if (!MLX5_PPS_CAP(mdev)) return; mlx5_get_pps_caps(mdev); - mlx5_init_pin_config(clock); + mlx5_init_pin_config(mdev); } -void mlx5_init_clock(struct mlx5_core_dev *mdev) +static void mlx5_init_clock_dev(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; - - if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) { - mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); - return; - } + struct mlx5_clock *clock = mdev->clock; seqlock_init(&clock->lock); - INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); /* Initialize the device clock */ mlx5_init_timer_clock(mdev); @@ -1163,35 +1288,27 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) mlx5_init_pps(mdev); clock->ptp = ptp_clock_register(&clock->ptp_info, - &mdev->pdev->dev); + clock->shared ? NULL : &mdev->pdev->dev); if (IS_ERR(clock->ptp)) { - mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n", + mlx5_core_warn(mdev, "%sptp_clock_register failed %ld\n", + clock->shared ? "shared clock " : "", PTR_ERR(clock->ptp)); clock->ptp = NULL; } - MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT); - mlx5_eq_notifier_register(mdev, &clock->pps_nb); - if (clock->ptp) ptp_schedule_worker(clock->ptp, 0); } -void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) +static void mlx5_destroy_clock_dev(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; - if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) - return; - - mlx5_eq_notifier_unregister(mdev, &clock->pps_nb); if (clock->ptp) { ptp_clock_unregister(clock->ptp); clock->ptp = NULL; } - cancel_work_sync(&clock->pps_info.out_work); - if (mdev->clock_info) { free_page((unsigned long)mdev->clock_info); mdev->clock_info = NULL; @@ -1199,3 +1316,248 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) kfree(clock->ptp_info.pin_config); } + +static void mlx5_clock_free(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock_priv *cpriv = clock_priv(mdev->clock); + + mlx5_destroy_clock_dev(mdev); + mutex_destroy(&cpriv->lock); + kfree(cpriv); + mdev->clock = NULL; +} + +static int mlx5_clock_alloc(struct mlx5_core_dev *mdev, bool shared) +{ + struct mlx5_clock_priv *cpriv; + struct mlx5_clock *clock; + + cpriv = kzalloc(sizeof(*cpriv), GFP_KERNEL); + if (!cpriv) + return -ENOMEM; + + mutex_init(&cpriv->lock); + cpriv->mdev = mdev; + clock = &cpriv->clock; + clock->shared = shared; + mdev->clock = clock; + mlx5_clock_lock(clock); + mlx5_init_clock_dev(mdev); + mlx5_clock_unlock(clock); + + if (!clock->shared) + return 0; + + if (!clock->ptp) { + mlx5_core_warn(mdev, "failed to create ptp dev shared by multiple functions"); + mlx5_clock_free(mdev); + return -EINVAL; + } + + return 0; +} + +static void mlx5_shared_clock_register(struct mlx5_core_dev *mdev, u64 key) +{ + struct mlx5_core_dev *peer_dev, *next = NULL; + struct mlx5_devcom_comp_dev *pos; + + mdev->clock_state->compdev = mlx5_devcom_register_component(mdev->priv.devc, + MLX5_DEVCOM_SHARED_CLOCK, + key, NULL, mdev); + if (IS_ERR(mdev->clock_state->compdev)) + return; + + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) { + if (peer_dev->clock) { + next = peer_dev; + break; + } + } + + if (next) { + mdev->clock = next->clock; + /* clock info is shared among all the functions using the same clock */ + mdev->clock_info = next->clock_info; + } else { + mlx5_clock_alloc(mdev, true); + } + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); + + if (!mdev->clock) { + mlx5_devcom_unregister_component(mdev->clock_state->compdev); + mdev->clock_state->compdev = NULL; + } +} + +static void mlx5_shared_clock_unregister(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *peer_dev, *next = NULL; + struct mlx5_clock *clock = mdev->clock; + struct mlx5_devcom_comp_dev *pos; + + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) { + if (peer_dev->clock && peer_dev != mdev) { + next = peer_dev; + break; + } + } + + if (next) { + struct mlx5_clock_priv *cpriv = clock_priv(clock); + + mlx5_clock_lock(clock); + if (mdev == cpriv->mdev) + cpriv->mdev = next; + mlx5_clock_unlock(clock); + } else { + mlx5_clock_free(mdev); + } + + mdev->clock = NULL; + mdev->clock_info = NULL; + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); + + mlx5_devcom_unregister_component(mdev->clock_state->compdev); +} + +static void mlx5_clock_arm_pps_in_event(struct mlx5_clock *clock, + struct mlx5_core_dev *new_mdev, + struct mlx5_core_dev *old_mdev) +{ + struct ptp_clock_info *ptp_info = &clock->ptp_info; + struct mlx5_clock_priv *cpriv = clock_priv(clock); + int i; + + for (i = 0; i < ptp_info->n_pins; i++) { + if (ptp_info->pin_config[i].func != PTP_PF_EXTTS || + !clock->pps_info.pin_armed[i]) + continue; + + if (new_mdev) { + mlx5_set_mtppse(new_mdev, i, 0, MLX5_EVENT_MODE_REPETETIVE); + cpriv->event_mdev = new_mdev; + } else { + cpriv->event_mdev = NULL; + } + + if (old_mdev) + mlx5_set_mtppse(old_mdev, i, 0, MLX5_EVENT_MODE_DISABLE); + } +} + +void mlx5_clock_load(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = mdev->clock; + struct mlx5_clock_priv *cpriv; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + INIT_WORK(&mdev->clock_state->out_work, mlx5_pps_out); + MLX5_NB_INIT(&mdev->clock_state->pps_nb, mlx5_pps_event, PPS_EVENT); + mlx5_eq_notifier_register(mdev, &mdev->clock_state->pps_nb); + + if (!clock->shared) { + mlx5_clock_arm_pps_in_event(clock, mdev, NULL); + return; + } + + cpriv = clock_priv(clock); + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_clock_lock(clock); + if (mdev == cpriv->mdev && mdev != cpriv->event_mdev) + mlx5_clock_arm_pps_in_event(clock, mdev, cpriv->event_mdev); + mlx5_clock_unlock(clock); + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); +} + +void mlx5_clock_unload(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *peer_dev, *next = NULL; + struct mlx5_clock *clock = mdev->clock; + struct mlx5_devcom_comp_dev *pos; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + if (!clock->shared) { + mlx5_clock_arm_pps_in_event(clock, NULL, mdev); + goto out; + } + + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) { + if (peer_dev->clock && peer_dev != mdev) { + next = peer_dev; + break; + } + } + + mlx5_clock_lock(clock); + if (mdev == clock_priv(clock)->event_mdev) + mlx5_clock_arm_pps_in_event(clock, next, mdev); + mlx5_clock_unlock(clock); + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); + +out: + mlx5_eq_notifier_unregister(mdev, &mdev->clock_state->pps_nb); + cancel_work_sync(&mdev->clock_state->out_work); +} + +static struct mlx5_clock null_clock; + +int mlx5_init_clock(struct mlx5_core_dev *mdev) +{ + u8 identity[MLX5_RT_CLOCK_IDENTITY_SIZE]; + struct mlx5_clock_dev_state *clock_state; + u64 key; + int err; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) { + mdev->clock = &null_clock; + mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); + return 0; + } + + clock_state = kzalloc(sizeof(*clock_state), GFP_KERNEL); + if (!clock_state) + return -ENOMEM; + clock_state->mdev = mdev; + mdev->clock_state = clock_state; + + if (MLX5_CAP_MCAM_REG3(mdev, mrtcq) && mlx5_real_time_mode(mdev)) { + if (mlx5_clock_identity_get(mdev, identity)) { + mlx5_core_warn(mdev, "failed to get rt clock identity, create ptp dev per function\n"); + } else { + memcpy(&key, &identity, sizeof(key)); + mlx5_shared_clock_register(mdev, key); + } + } + + if (!mdev->clock) { + err = mlx5_clock_alloc(mdev, false); + if (err) { + kfree(clock_state); + mdev->clock_state = NULL; + return err; + } + } + + return 0; +} + +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + if (mdev->clock->shared) + mlx5_shared_clock_unregister(mdev); + else + mlx5_clock_free(mdev); + kfree(mdev->clock_state); + mdev->clock_state = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h index bd95b9f8d143..c18a652c0faa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -33,6 +33,35 @@ #ifndef __LIB_CLOCK_H__ #define __LIB_CLOCK_H__ +#include <linux/ptp_clock_kernel.h> + +#define MAX_PIN_NUM 8 +struct mlx5_pps { + u8 pin_caps[MAX_PIN_NUM]; + u64 start[MAX_PIN_NUM]; + u8 enabled; + u64 min_npps_period; + u64 min_out_pulse_duration_ns; + bool pin_armed[MAX_PIN_NUM]; +}; + +struct mlx5_timer { + struct cyclecounter cycles; + struct timecounter tc; + u32 nominal_c_mult; + unsigned long overflow_period; +}; + +struct mlx5_clock { + seqlock_t lock; + struct hwtstamp_config hwtstamp_config; + struct ptp_clock *ptp; + struct ptp_clock_info ptp_info; + struct mlx5_pps pps_info; + struct mlx5_timer timer; + bool shared; +}; + static inline bool mlx5_is_real_time_rq(struct mlx5_core_dev *mdev) { u8 rq_ts_format_cap = MLX5_CAP_GEN(mdev, rq_ts_format); @@ -54,12 +83,14 @@ static inline bool mlx5_is_real_time_sq(struct mlx5_core_dev *mdev) typedef ktime_t (*cqe_ts_to_ns)(struct mlx5_clock *, u64); #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) -void mlx5_init_clock(struct mlx5_core_dev *mdev); +int mlx5_init_clock(struct mlx5_core_dev *mdev); void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); +void mlx5_clock_load(struct mlx5_core_dev *mdev); +void mlx5_clock_unload(struct mlx5_core_dev *mdev); static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { - return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1; + return mdev->clock->ptp ? ptp_clock_index(mdev->clock->ptp) : -1; } static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, @@ -87,8 +118,10 @@ static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock, return ns_to_ktime(time); } #else -static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {} +static inline int mlx5_init_clock(struct mlx5_core_dev *mdev) { return 0; } static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {} +static inline void mlx5_clock_load(struct mlx5_core_dev *mdev) {} +static inline void mlx5_clock_unload(struct mlx5_core_dev *mdev) {} static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { return -1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index d58032dd0df7..c79699b94a02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -11,6 +11,7 @@ enum mlx5_devcom_component { MLX5_DEVCOM_MPV, MLX5_DEVCOM_HCA_PORTS, MLX5_DEVCOM_SD_GROUP, + MLX5_DEVCOM_SHARED_CLOCK, MLX5_DEVCOM_NUM_COMPONENTS, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index 9f13cea16446..eb3bd9c7f66e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -61,6 +61,25 @@ static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc) } } +static const char *mlx5_traffic_types_names[MLX5_NUM_TT] = { + [MLX5_TT_IPV4_TCP] = "TT_IPV4_TCP", + [MLX5_TT_IPV6_TCP] = "TT_IPV6_TCP", + [MLX5_TT_IPV4_UDP] = "TT_IPV4_UDP", + [MLX5_TT_IPV6_UDP] = "TT_IPV6_UDP", + [MLX5_TT_IPV4_IPSEC_AH] = "TT_IPV4_IPSEC_AH", + [MLX5_TT_IPV6_IPSEC_AH] = "TT_IPV6_IPSEC_AH", + [MLX5_TT_IPV4_IPSEC_ESP] = "TT_IPV4_IPSEC_ESP", + [MLX5_TT_IPV6_IPSEC_ESP] = "TT_IPV6_IPSEC_ESP", + [MLX5_TT_IPV4] = "TT_IPV4", + [MLX5_TT_IPV6] = "TT_IPV6", + [MLX5_TT_ANY] = "TT_ANY" +}; + +const char *mlx5_ttc_get_name(enum mlx5_traffic_types tt) +{ + return mlx5_traffic_types_names[tt]; +} + struct mlx5_etype_proto { u16 etype; u8 proto; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h index 92eea6bea310..ab9434fe3ae6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h @@ -49,6 +49,7 @@ struct ttc_params { struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT]; }; +const char *mlx5_ttc_get_name(enum mlx5_traffic_types tt); struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc); struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ec956c4bcebd..710633d5fdbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1038,7 +1038,11 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) mlx5_init_reserved_gids(dev); - mlx5_init_clock(dev); + err = mlx5_init_clock(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize hardware clock\n"); + goto err_tables_cleanup; + } dev->vxlan = mlx5_vxlan_create(dev); dev->geneve = mlx5_geneve_create(dev); @@ -1046,7 +1050,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) err = mlx5_init_rl_table(dev); if (err) { mlx5_core_err(dev, "Failed to init rate limiting\n"); - goto err_tables_cleanup; + goto err_clock_cleanup; } err = mlx5_mpfs_init(dev); @@ -1123,10 +1127,11 @@ err_mpfs_cleanup: mlx5_mpfs_cleanup(dev); err_rl_cleanup: mlx5_cleanup_rl_table(dev); -err_tables_cleanup: +err_clock_cleanup: mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_clock(dev); +err_tables_cleanup: mlx5_cleanup_reserved_gids(dev); mlx5_cq_debugfs_cleanup(dev); mlx5_fw_reset_cleanup(dev); @@ -1359,6 +1364,8 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_eq_table; } + mlx5_clock_load(dev); + err = mlx5_fw_tracer_init(dev->tracer); if (err) { mlx5_core_err(dev, "Failed to init FW tracer %d\n", err); @@ -1442,6 +1449,7 @@ err_fpga_start: mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_reset_events_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_clock_unload(dev); mlx5_eq_table_destroy(dev); err_eq_table: mlx5_irq_table_destroy(dev); @@ -1468,6 +1476,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_reset_events_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_clock_unload(dev); mlx5_eq_table_destroy(dev); mlx5_irq_table_destroy(dev); mlx5_pagealloc_stop(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 99de67c3aa74..6fef1005c469 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -346,6 +346,8 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap #define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL) +int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id); + static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 50931584132b..3995df064101 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -1105,6 +1105,9 @@ static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000, [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000, [MLX5E_800GAUI_8_800GBASE_CR8_KR8] = 800000, + [MLX5E_200GAUI_1_200GBASE_CR1_KR1] = 200000, + [MLX5E_400GAUI_2_400GBASE_CR2_KR2] = 400000, + [MLX5E_800GAUI_4_800GBASE_CR4_KR4] = 800000, }; int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c index 60cb4527588a..65740bb68b09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c @@ -516,30 +516,6 @@ def_xa_destroy: return NULL; } -/* Assure synchronization of the device steering tables with updates made by SW - * insertion. - */ -int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) -{ - int ret = 0; - - if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) { - mlx5dr_domain_lock(dmn); - ret = mlx5dr_send_ring_force_drain(dmn); - mlx5dr_domain_unlock(dmn); - if (ret) { - mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n", - flags, ret); - return ret; - } - } - - if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW) - ret = mlx5dr_cmd_sync_steering(dmn->mdev); - - return ret; -} - int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) { if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c index f57c84e5128b..4fd4e8483382 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c @@ -1331,36 +1331,3 @@ void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, kfree(send_ring->sync_buff); kfree(send_ring); } - -int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) -{ - struct mlx5dr_send_ring *send_ring = dmn->send_ring; - struct postsend_info send_info = {}; - u8 data[DR_STE_SIZE]; - int num_of_sends_req; - int ret; - int i; - - /* Sending this amount of requests makes sure we will get drain */ - num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; - - /* Send fake requests forcing the last to be signaled */ - send_info.write.addr = (uintptr_t)data; - send_info.write.length = DR_STE_SIZE; - send_info.write.lkey = 0; - /* Using the sync_mr in order to write/read */ - send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; - send_info.rkey = send_ring->sync_mr->mkey; - - for (i = 0; i < num_of_sends_req; i++) { - ret = dr_postsend_icm_data(dmn, &send_info); - if (ret) - return ret; - } - - spin_lock(&send_ring->lock); - ret = dr_handle_pending_wc(dmn, send_ring); - spin_unlock(&send_ring->lock); - - return ret; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h index 7618c6147f86..cc328292bf84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h @@ -1473,7 +1473,6 @@ struct mlx5dr_send_ring { int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn); void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, struct mlx5dr_send_ring *send_ring); -int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn); int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, u8 *data, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h index 0bb3724c10c2..fc8a2169d1a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h @@ -45,8 +45,6 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type); int mlx5dr_domain_destroy(struct mlx5dr_domain *domain); -int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags); - void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, struct mlx5dr_domain *peer_dmn, u16 peer_vhca_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 0d5f750faa45..d10d4c396040 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1199,6 +1199,31 @@ int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *ou } EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap); +int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id) +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + *vhca_id = 0; + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + err = mlx5_vport_get_other_func_general_cap(dev, vport, query_ctx); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); + +out_free: + kfree(query_ctx); + return err; +} + int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 5b44c931b660..058dcabfaa2e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -2214,6 +2214,8 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) mlxsw_pci_wqe_byte_count_set(wqe, i, 0); + mlxsw_pci_wqe_ipcs_set(wqe, skb->ip_summed == CHECKSUM_PARTIAL); + /* Everything is set up, ring producer doorbell to get HW going */ q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 6bed495dcf0f..7fa94e5828de 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -90,6 +90,11 @@ MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1); */ MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4); +/* pci_wqe_ipcs + * Calculate IPv4 and TCP / UDP checksums. + */ +MLXSW_ITEM32(pci, wqe, ipcs, 0x00, 14, 1); + /* pci_wqe_byte_count * Size of i-th scatter/gather entry, 0 if entry is unused. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d714311fd884..1f8362788c75 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1574,8 +1574,10 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, netif_carrier_off(dev); dev->features |= NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_HW_TC; - dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK; + NETIF_F_HW_TC | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK | + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->vlan_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; dev->lltx = true; dev->netns_local = true; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index b10f80fc651b..fa7082ee5183 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -754,9 +754,6 @@ void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, struct net_device *dev); -bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev); -u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, enum mlxsw_sp_l3proto ul_proto, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 7d6d859cef3f..464821dd492d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8184,41 +8184,6 @@ mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, return NULL; } -bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev) -{ - struct mlxsw_sp_rif *rif; - - mutex_lock(&mlxsw_sp->router->lock); - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - mutex_unlock(&mlxsw_sp->router->lock); - - return rif; -} - -u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) -{ - struct mlxsw_sp_rif *rif; - u16 vid = 0; - - mutex_lock(&mlxsw_sp->router->lock); - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) - goto out; - - /* We only return the VID for VLAN RIFs. Otherwise we return an - * invalid value (0). - */ - if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN) - goto out; - - vid = mlxsw_sp_fid_8021q_vid(rif->fid); - -out: - mutex_unlock(&mlxsw_sp->router->lock); - return vid; -} - static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) { char ritr_pl[MLXSW_REG_RITR_LEN]; @@ -8417,19 +8382,6 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) return lb_rif->common.rif_index; } -u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) -{ - struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common); - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev); - struct mlxsw_sp_vr *ul_vr; - - ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL); - if (WARN_ON(IS_ERR(ul_vr))) - return 0; - - return ul_vr->id; -} - u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) { return lb_rif->ul_rif_id; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 0432c7cc6b07..313efab5c324 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -90,7 +90,6 @@ struct mlxsw_sp_ipip_entry; struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index); u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); -u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif); u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/meta/fbnic/Makefile b/drivers/net/ethernet/meta/fbnic/Makefile index 239b2258ec65..0dbc634adb4b 100644 --- a/drivers/net/ethernet/meta/fbnic/Makefile +++ b/drivers/net/ethernet/meta/fbnic/Makefile @@ -20,6 +20,7 @@ fbnic-y := fbnic_csr.o \ fbnic_pci.o \ fbnic_phylink.o \ fbnic_rpc.o \ + fbnic_time.o \ fbnic_tlv.o \ fbnic_txrx.o \ - fbnic_time.o +# End of objects diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index 14751f16e125..37f81db1fc30 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -60,6 +60,12 @@ struct fbnic_dev { u8 mac_addr_boundary; u8 tce_tcam_last; + /* IP TCAM */ + struct fbnic_ip_addr ip_src[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES]; + struct fbnic_ip_addr ip_dst[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES]; + struct fbnic_ip_addr ipo_src[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES]; + struct fbnic_ip_addr ipo_dst[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES]; + /* Number of TCQs/RCQs available on hardware */ u16 max_num_queues; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index 02bb81b3c506..6f24c5f2e175 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -605,8 +605,11 @@ enum { FBNIC_RPC_ACT_TBL0_DEST_EI = 4, }; +#define FBNIC_RPC_ACT_TBL0_Q_SEL CSR_BIT(4) +#define FBNIC_RPC_ACT_TBL0_Q_ID CSR_GENMASK(15, 8) #define FBNIC_RPC_ACT_TBL0_DMA_HINT CSR_GENMASK(24, 16) #define FBNIC_RPC_ACT_TBL0_TS_ENA CSR_BIT(28) +#define FBNIC_RPC_ACT_TBL0_ACT_TBL_IDX CSR_BIT(29) #define FBNIC_RPC_ACT_TBL0_RSS_CTXT_ID CSR_BIT(30) #define FBNIC_RPC_ACT_TBL1_DEFAULT 0x0840b /* 0x2102c */ @@ -677,6 +680,9 @@ enum { #define FBNIC_RPC_TCAM_OUTER_IPSRC(m, n)\ (0x08c00 + 0x08 * (n) + (m)) /* 0x023000 + 32*n + 4*m */ +#define FBNIC_RPC_TCAM_IP_ADDR_VALUE CSR_GENMASK(15, 0) +#define FBNIC_RPC_TCAM_IP_ADDR_MASK CSR_GENMASK(31, 16) + #define FBNIC_RPC_TCAM_OUTER_IPDST(m, n)\ (0x08c48 + 0x08 * (n) + (m)) /* 0x023120 + 32*n + 4*m */ #define FBNIC_RPC_TCAM_IPSRC(m, n)\ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c index 59951b5abdb7..e8f2d7f2d962 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c @@ -10,6 +10,166 @@ static struct dentry *fbnic_dbg_root; +static void fbnic_dbg_desc_break(struct seq_file *s, int i) +{ + while (i--) + seq_putc(s, '-'); + + seq_putc(s, '\n'); +} + +static int fbnic_dbg_mac_addr_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + char hdr[80]; + int i; + + /* Generate Header */ + snprintf(hdr, sizeof(hdr), "%3s %s %-17s %s\n", + "Idx", "S", "TCAM Bitmap", "Addr/Mask"); + seq_puts(s, hdr); + fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr))); + + for (i = 0; i < FBNIC_RPC_TCAM_MACDA_NUM_ENTRIES; i++) { + struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i]; + + seq_printf(s, "%02d %d %64pb %pm\n", + i, mac_addr->state, mac_addr->act_tcam, + mac_addr->value.addr8); + seq_printf(s, " %pm\n", + mac_addr->mask.addr8); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_mac_addr); + +static int fbnic_dbg_tce_tcam_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + int i, tcam_idx = 0; + char hdr[80]; + + /* Generate Header */ + snprintf(hdr, sizeof(hdr), "%3s %s %-17s %s\n", + "Idx", "S", "TCAM Bitmap", "Addr/Mask"); + seq_puts(s, hdr); + fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr))); + + for (i = 0; i < ARRAY_SIZE(fbd->mac_addr); i++) { + struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i]; + + /* Verify BMC bit is set */ + if (!test_bit(FBNIC_MAC_ADDR_T_BMC, mac_addr->act_tcam)) + continue; + + if (tcam_idx == FBNIC_TCE_TCAM_NUM_ENTRIES) + break; + + seq_printf(s, "%02d %d %64pb %pm\n", + tcam_idx, mac_addr->state, mac_addr->act_tcam, + mac_addr->value.addr8); + seq_printf(s, " %pm\n", + mac_addr->mask.addr8); + tcam_idx++; + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_tce_tcam); + +static int fbnic_dbg_act_tcam_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + char hdr[80]; + int i; + + /* Generate Header */ + snprintf(hdr, sizeof(hdr), "%3s %s %-55s %-4s %s\n", + "Idx", "S", "Value/Mask", "RSS", "Dest"); + seq_puts(s, hdr); + fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr))); + + for (i = 0; i < FBNIC_RPC_TCAM_ACT_NUM_ENTRIES; i++) { + struct fbnic_act_tcam *act_tcam = &fbd->act_tcam[i]; + + seq_printf(s, "%02d %d %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x %08x\n", + i, act_tcam->state, + act_tcam->value.tcam[10], act_tcam->value.tcam[9], + act_tcam->value.tcam[8], act_tcam->value.tcam[7], + act_tcam->value.tcam[6], act_tcam->value.tcam[5], + act_tcam->value.tcam[4], act_tcam->value.tcam[3], + act_tcam->value.tcam[2], act_tcam->value.tcam[1], + act_tcam->value.tcam[0], act_tcam->rss_en_mask, + act_tcam->dest); + seq_printf(s, " %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x %04x\n", + act_tcam->mask.tcam[10], act_tcam->mask.tcam[9], + act_tcam->mask.tcam[8], act_tcam->mask.tcam[7], + act_tcam->mask.tcam[6], act_tcam->mask.tcam[5], + act_tcam->mask.tcam[4], act_tcam->mask.tcam[3], + act_tcam->mask.tcam[2], act_tcam->mask.tcam[1], + act_tcam->mask.tcam[0]); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_act_tcam); + +static int fbnic_dbg_ip_addr_show(struct seq_file *s, + struct fbnic_ip_addr *ip_addr) +{ + char hdr[80]; + int i; + + /* Generate Header */ + snprintf(hdr, sizeof(hdr), "%3s %s %-17s %s %s\n", + "Idx", "S", "TCAM Bitmap", "V", "Addr/Mask"); + seq_puts(s, hdr); + fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr))); + + for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES; i++, ip_addr++) { + seq_printf(s, "%02d %d %64pb %d %pi6\n", + i, ip_addr->state, ip_addr->act_tcam, + ip_addr->version, &ip_addr->value); + seq_printf(s, " %pi6\n", + &ip_addr->mask); + } + + return 0; +} + +static int fbnic_dbg_ip_src_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + + return fbnic_dbg_ip_addr_show(s, fbd->ip_src); +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_ip_src); + +static int fbnic_dbg_ip_dst_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + + return fbnic_dbg_ip_addr_show(s, fbd->ip_dst); +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_ip_dst); + +static int fbnic_dbg_ipo_src_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + + return fbnic_dbg_ip_addr_show(s, fbd->ipo_src); +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_ipo_src); + +static int fbnic_dbg_ipo_dst_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + + return fbnic_dbg_ip_addr_show(s, fbd->ipo_dst); +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_ipo_dst); + static int fbnic_dbg_pcie_stats_show(struct seq_file *s, void *v) { struct fbnic_dev *fbd = s->private; @@ -48,6 +208,20 @@ void fbnic_dbg_fbd_init(struct fbnic_dev *fbd) fbd->dbg_fbd = debugfs_create_dir(name, fbnic_dbg_root); debugfs_create_file("pcie_stats", 0400, fbd->dbg_fbd, fbd, &fbnic_dbg_pcie_stats_fops); + debugfs_create_file("mac_addr", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_mac_addr_fops); + debugfs_create_file("tce_tcam", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_tce_tcam_fops); + debugfs_create_file("act_tcam", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_act_tcam_fops); + debugfs_create_file("ip_src", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_ip_src_fops); + debugfs_create_file("ip_dst", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_ip_dst_fops); + debugfs_create_file("ipo_src", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_ipo_src_fops); + debugfs_create_file("ipo_dst", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_ipo_dst_fops); } void fbnic_dbg_fbd_exit(struct fbnic_dev *fbd) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index 20cd9f5f89e2..fb7139a1da46 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -4,6 +4,7 @@ #include <linux/ethtool.h> #include <linux/netdevice.h> #include <linux/pci.h> +#include <net/ipv6.h> #include "fbnic.h" #include "fbnic_netdev.h" @@ -218,11 +219,234 @@ fbnic_get_rss_hash_opts(struct fbnic_net *fbn, struct ethtool_rxnfc *cmd) return 0; } +static int fbnic_get_cls_rule_all(struct fbnic_net *fbn, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct fbnic_dev *fbd = fbn->fbd; + int i, cnt = 0; + + /* Report maximum rule count */ + cmd->data = FBNIC_RPC_ACT_TBL_NFC_ENTRIES; + + for (i = 0; i < FBNIC_RPC_ACT_TBL_NFC_ENTRIES; i++) { + int idx = i + FBNIC_RPC_ACT_TBL_NFC_OFFSET; + struct fbnic_act_tcam *act_tcam; + + act_tcam = &fbd->act_tcam[idx]; + if (act_tcam->state != FBNIC_TCAM_S_VALID) + continue; + + if (rule_locs) { + if (cnt == cmd->rule_cnt) + return -EMSGSIZE; + + rule_locs[cnt] = i; + } + + cnt++; + } + + return cnt; +} + +static int fbnic_get_cls_rule(struct fbnic_net *fbn, struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp; + struct fbnic_dev *fbd = fbn->fbd; + struct fbnic_act_tcam *act_tcam; + int idx; + + fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + + if (fsp->location >= FBNIC_RPC_ACT_TBL_NFC_ENTRIES) + return -EINVAL; + + idx = fsp->location + FBNIC_RPC_ACT_TBL_NFC_OFFSET; + act_tcam = &fbd->act_tcam[idx]; + + if (act_tcam->state != FBNIC_TCAM_S_VALID) + return -EINVAL; + + /* Report maximum rule count */ + cmd->data = FBNIC_RPC_ACT_TBL_NFC_ENTRIES; + + /* Set flow type field */ + if (!(act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_IP_VALID)) { + fsp->flow_type = ETHER_FLOW; + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX, + act_tcam->mask.tcam[1])) { + struct fbnic_mac_addr *mac_addr; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX, + act_tcam->value.tcam[1]); + mac_addr = &fbd->mac_addr[idx]; + + ether_addr_copy(fsp->h_u.ether_spec.h_dest, + mac_addr->value.addr8); + eth_broadcast_addr(fsp->m_u.ether_spec.h_dest); + } + } else if (act_tcam->value.tcam[1] & + FBNIC_RPC_TCAM_ACT1_OUTER_IP_VALID) { + fsp->flow_type = IPV6_USER_FLOW; + fsp->h_u.usr_ip6_spec.l4_proto = IPPROTO_IPV6; + fsp->m_u.usr_ip6_spec.l4_proto = 0xff; + + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX, + act_tcam->mask.tcam[0])) { + struct fbnic_ip_addr *ip_addr; + int i; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX, + act_tcam->value.tcam[0]); + ip_addr = &fbd->ipo_src[idx]; + + for (i = 0; i < 4; i++) { + fsp->h_u.usr_ip6_spec.ip6src[i] = + ip_addr->value.s6_addr32[i]; + fsp->m_u.usr_ip6_spec.ip6src[i] = + ~ip_addr->mask.s6_addr32[i]; + } + } + + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX, + act_tcam->mask.tcam[0])) { + struct fbnic_ip_addr *ip_addr; + int i; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX, + act_tcam->value.tcam[0]); + ip_addr = &fbd->ipo_dst[idx]; + + for (i = 0; i < 4; i++) { + fsp->h_u.usr_ip6_spec.ip6dst[i] = + ip_addr->value.s6_addr32[i]; + fsp->m_u.usr_ip6_spec.ip6dst[i] = + ~ip_addr->mask.s6_addr32[i]; + } + } + } else if ((act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_IP_IS_V6)) { + if (act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_L4_VALID) { + if (act_tcam->value.tcam[1] & + FBNIC_RPC_TCAM_ACT1_L4_IS_UDP) + fsp->flow_type = UDP_V6_FLOW; + else + fsp->flow_type = TCP_V6_FLOW; + fsp->h_u.tcp_ip6_spec.psrc = + cpu_to_be16(act_tcam->value.tcam[3]); + fsp->m_u.tcp_ip6_spec.psrc = + cpu_to_be16(~act_tcam->mask.tcam[3]); + fsp->h_u.tcp_ip6_spec.pdst = + cpu_to_be16(act_tcam->value.tcam[4]); + fsp->m_u.tcp_ip6_spec.pdst = + cpu_to_be16(~act_tcam->mask.tcam[4]); + } else { + fsp->flow_type = IPV6_USER_FLOW; + } + + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX, + act_tcam->mask.tcam[0])) { + struct fbnic_ip_addr *ip_addr; + int i; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX, + act_tcam->value.tcam[0]); + ip_addr = &fbd->ip_src[idx]; + + for (i = 0; i < 4; i++) { + fsp->h_u.usr_ip6_spec.ip6src[i] = + ip_addr->value.s6_addr32[i]; + fsp->m_u.usr_ip6_spec.ip6src[i] = + ~ip_addr->mask.s6_addr32[i]; + } + } + + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPDST_IDX, + act_tcam->mask.tcam[0])) { + struct fbnic_ip_addr *ip_addr; + int i; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPDST_IDX, + act_tcam->value.tcam[0]); + ip_addr = &fbd->ip_dst[idx]; + + for (i = 0; i < 4; i++) { + fsp->h_u.usr_ip6_spec.ip6dst[i] = + ip_addr->value.s6_addr32[i]; + fsp->m_u.usr_ip6_spec.ip6dst[i] = + ~ip_addr->mask.s6_addr32[i]; + } + } + } else { + if (act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_L4_VALID) { + if (act_tcam->value.tcam[1] & + FBNIC_RPC_TCAM_ACT1_L4_IS_UDP) + fsp->flow_type = UDP_V4_FLOW; + else + fsp->flow_type = TCP_V4_FLOW; + fsp->h_u.tcp_ip4_spec.psrc = + cpu_to_be16(act_tcam->value.tcam[3]); + fsp->m_u.tcp_ip4_spec.psrc = + cpu_to_be16(~act_tcam->mask.tcam[3]); + fsp->h_u.tcp_ip4_spec.pdst = + cpu_to_be16(act_tcam->value.tcam[4]); + fsp->m_u.tcp_ip4_spec.pdst = + cpu_to_be16(~act_tcam->mask.tcam[4]); + } else { + fsp->flow_type = IPV4_USER_FLOW; + fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4; + } + + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX, + act_tcam->mask.tcam[0])) { + struct fbnic_ip_addr *ip_addr; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX, + act_tcam->value.tcam[0]); + ip_addr = &fbd->ip_src[idx]; + + fsp->h_u.usr_ip4_spec.ip4src = + ip_addr->value.s6_addr32[3]; + fsp->m_u.usr_ip4_spec.ip4src = + ~ip_addr->mask.s6_addr32[3]; + } + + if (!FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPDST_IDX, + act_tcam->mask.tcam[0])) { + struct fbnic_ip_addr *ip_addr; + + idx = FIELD_GET(FBNIC_RPC_TCAM_ACT0_IPDST_IDX, + act_tcam->value.tcam[0]); + ip_addr = &fbd->ip_dst[idx]; + + fsp->h_u.usr_ip4_spec.ip4dst = + ip_addr->value.s6_addr32[3]; + fsp->m_u.usr_ip4_spec.ip4dst = + ~ip_addr->mask.s6_addr32[3]; + } + } + + /* Record action */ + if (act_tcam->dest & FBNIC_RPC_ACT_TBL0_DROP) + fsp->ring_cookie = RX_CLS_FLOW_DISC; + else if (act_tcam->dest & FBNIC_RPC_ACT_TBL0_Q_SEL) + fsp->ring_cookie = FIELD_GET(FBNIC_RPC_ACT_TBL0_Q_ID, + act_tcam->dest); + else + fsp->flow_type |= FLOW_RSS; + + cmd->rss_context = FIELD_GET(FBNIC_RPC_ACT_TBL0_RSS_CTXT_ID, + act_tcam->dest); + + return 0; +} + static int fbnic_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u32 *rule_locs) { struct fbnic_net *fbn = netdev_priv(netdev); int ret = -EOPNOTSUPP; + u32 special = 0; switch (cmd->cmd) { case ETHTOOL_GRXRINGS: @@ -232,6 +456,22 @@ static int fbnic_get_rxnfc(struct net_device *netdev, case ETHTOOL_GRXFH: ret = fbnic_get_rss_hash_opts(fbn, cmd); break; + case ETHTOOL_GRXCLSRULE: + ret = fbnic_get_cls_rule(fbn, cmd); + break; + case ETHTOOL_GRXCLSRLCNT: + rule_locs = NULL; + special = RX_CLS_LOC_SPECIAL; + fallthrough; + case ETHTOOL_GRXCLSRLALL: + ret = fbnic_get_cls_rule_all(fbn, cmd, rule_locs); + if (ret < 0) + break; + + cmd->data |= special; + cmd->rule_cnt = ret; + ret = 0; + break; } return ret; @@ -272,6 +512,406 @@ fbnic_set_rss_hash_opts(struct fbnic_net *fbn, const struct ethtool_rxnfc *cmd) return 0; } +static int fbnic_cls_rule_any_loc(struct fbnic_dev *fbd) +{ + int i; + + for (i = FBNIC_RPC_ACT_TBL_NFC_ENTRIES; i--;) { + int idx = i + FBNIC_RPC_ACT_TBL_NFC_OFFSET; + + if (fbd->act_tcam[idx].state != FBNIC_TCAM_S_VALID) + return i; + } + + return -ENOSPC; +} + +static int fbnic_set_cls_rule_ins(struct fbnic_net *fbn, + const struct ethtool_rxnfc *cmd) +{ + u16 flow_value = 0, flow_mask = 0xffff, ip_value = 0, ip_mask = 0xffff; + u16 sport = 0, sport_mask = ~0, dport = 0, dport_mask = ~0; + u16 misc = 0, misc_mask = ~0; + u32 dest = FIELD_PREP(FBNIC_RPC_ACT_TBL0_DEST_MASK, + FBNIC_RPC_ACT_TBL0_DEST_HOST); + struct fbnic_ip_addr *ip_src = NULL, *ip_dst = NULL; + struct fbnic_mac_addr *mac_addr = NULL; + struct ethtool_rx_flow_spec *fsp; + struct fbnic_dev *fbd = fbn->fbd; + struct fbnic_act_tcam *act_tcam; + struct in6_addr *addr6, *mask6; + struct in_addr *addr4, *mask4; + int hash_idx, location; + u32 flow_type; + int idx, j; + + fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + + if (fsp->location != RX_CLS_LOC_ANY) + return -EINVAL; + location = fbnic_cls_rule_any_loc(fbd); + if (location < 0) + return location; + + if (fsp->ring_cookie == RX_CLS_FLOW_DISC) { + dest = FBNIC_RPC_ACT_TBL0_DROP; + } else if (fsp->flow_type & FLOW_RSS) { + if (cmd->rss_context == 1) + dest |= FBNIC_RPC_ACT_TBL0_RSS_CTXT_ID; + } else { + u32 ring_idx = ethtool_get_flow_spec_ring(fsp->ring_cookie); + + if (ring_idx >= fbn->num_rx_queues) + return -EINVAL; + + dest |= FBNIC_RPC_ACT_TBL0_Q_SEL | + FIELD_PREP(FBNIC_RPC_ACT_TBL0_Q_ID, ring_idx); + } + + idx = location + FBNIC_RPC_ACT_TBL_NFC_OFFSET; + act_tcam = &fbd->act_tcam[idx]; + + /* Do not allow overwriting for now. + * To support overwriting rules we will need to add logic to free + * any IP or MACDA TCAMs that may be associated with the old rule. + */ + if (act_tcam->state != FBNIC_TCAM_S_DISABLED) + return -EBUSY; + + flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_RSS); + hash_idx = fbnic_get_rss_hash_idx(flow_type); + + switch (flow_type) { + case UDP_V4_FLOW: +udp4_flow: + flow_value |= FBNIC_RPC_TCAM_ACT1_L4_IS_UDP; + fallthrough; + case TCP_V4_FLOW: +tcp4_flow: + flow_value |= FBNIC_RPC_TCAM_ACT1_L4_VALID; + flow_mask &= ~(FBNIC_RPC_TCAM_ACT1_L4_IS_UDP | + FBNIC_RPC_TCAM_ACT1_L4_VALID); + + sport = be16_to_cpu(fsp->h_u.tcp_ip4_spec.psrc); + sport_mask = ~be16_to_cpu(fsp->m_u.tcp_ip4_spec.psrc); + dport = be16_to_cpu(fsp->h_u.tcp_ip4_spec.pdst); + dport_mask = ~be16_to_cpu(fsp->m_u.tcp_ip4_spec.pdst); + goto ip4_flow; + case IP_USER_FLOW: + if (!fsp->m_u.usr_ip4_spec.proto) + goto ip4_flow; + if (fsp->m_u.usr_ip4_spec.proto != 0xff) + return -EINVAL; + if (fsp->h_u.usr_ip4_spec.proto == IPPROTO_UDP) + goto udp4_flow; + if (fsp->h_u.usr_ip4_spec.proto == IPPROTO_TCP) + goto tcp4_flow; + return -EINVAL; +ip4_flow: + addr4 = (struct in_addr *)&fsp->h_u.usr_ip4_spec.ip4src; + mask4 = (struct in_addr *)&fsp->m_u.usr_ip4_spec.ip4src; + if (mask4->s_addr) { + ip_src = __fbnic_ip4_sync(fbd, fbd->ip_src, + addr4, mask4); + if (!ip_src) + return -ENOSPC; + + set_bit(idx, ip_src->act_tcam); + ip_value |= FBNIC_RPC_TCAM_ACT0_IPSRC_VALID | + FIELD_PREP(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX, + ip_src - fbd->ip_src); + ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_IPSRC_VALID | + FBNIC_RPC_TCAM_ACT0_IPSRC_IDX); + } + + addr4 = (struct in_addr *)&fsp->h_u.usr_ip4_spec.ip4dst; + mask4 = (struct in_addr *)&fsp->m_u.usr_ip4_spec.ip4dst; + if (mask4->s_addr) { + ip_dst = __fbnic_ip4_sync(fbd, fbd->ip_dst, + addr4, mask4); + if (!ip_dst) { + if (ip_src && ip_src->state == FBNIC_TCAM_S_ADD) + memset(ip_src, 0, sizeof(*ip_src)); + return -ENOSPC; + } + + set_bit(idx, ip_dst->act_tcam); + ip_value |= FBNIC_RPC_TCAM_ACT0_IPDST_VALID | + FIELD_PREP(FBNIC_RPC_TCAM_ACT0_IPDST_IDX, + ip_dst - fbd->ip_dst); + ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_IPDST_VALID | + FBNIC_RPC_TCAM_ACT0_IPDST_IDX); + } + flow_value |= FBNIC_RPC_TCAM_ACT1_IP_VALID | + FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID; + flow_mask &= ~(FBNIC_RPC_TCAM_ACT1_IP_IS_V6 | + FBNIC_RPC_TCAM_ACT1_IP_VALID | + FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID); + break; + case UDP_V6_FLOW: +udp6_flow: + flow_value |= FBNIC_RPC_TCAM_ACT1_L4_IS_UDP; + fallthrough; + case TCP_V6_FLOW: +tcp6_flow: + flow_value |= FBNIC_RPC_TCAM_ACT1_L4_VALID; + flow_mask &= ~(FBNIC_RPC_TCAM_ACT1_L4_IS_UDP | + FBNIC_RPC_TCAM_ACT1_L4_VALID); + + sport = be16_to_cpu(fsp->h_u.tcp_ip6_spec.psrc); + sport_mask = ~be16_to_cpu(fsp->m_u.tcp_ip6_spec.psrc); + dport = be16_to_cpu(fsp->h_u.tcp_ip6_spec.pdst); + dport_mask = ~be16_to_cpu(fsp->m_u.tcp_ip6_spec.pdst); + goto ipv6_flow; + case IPV6_USER_FLOW: + if (!fsp->m_u.usr_ip6_spec.l4_proto) + goto ipv6_flow; + + if (fsp->m_u.usr_ip6_spec.l4_proto != 0xff) + return -EINVAL; + if (fsp->h_u.usr_ip6_spec.l4_proto == IPPROTO_UDP) + goto udp6_flow; + if (fsp->h_u.usr_ip6_spec.l4_proto == IPPROTO_TCP) + goto tcp6_flow; + if (fsp->h_u.usr_ip6_spec.l4_proto != IPPROTO_IPV6) + return -EINVAL; + + addr6 = (struct in6_addr *)fsp->h_u.usr_ip6_spec.ip6src; + mask6 = (struct in6_addr *)fsp->m_u.usr_ip6_spec.ip6src; + if (!ipv6_addr_any(mask6)) { + ip_src = __fbnic_ip6_sync(fbd, fbd->ipo_src, + addr6, mask6); + if (!ip_src) + return -ENOSPC; + + set_bit(idx, ip_src->act_tcam); + ip_value |= + FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_VALID | + FIELD_PREP(FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX, + ip_src - fbd->ipo_src); + ip_mask &= + ~(FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_VALID | + FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX); + } + + addr6 = (struct in6_addr *)fsp->h_u.usr_ip6_spec.ip6dst; + mask6 = (struct in6_addr *)fsp->m_u.usr_ip6_spec.ip6dst; + if (!ipv6_addr_any(mask6)) { + ip_dst = __fbnic_ip6_sync(fbd, fbd->ipo_dst, + addr6, mask6); + if (!ip_dst) { + if (ip_src && ip_src->state == FBNIC_TCAM_S_ADD) + memset(ip_src, 0, sizeof(*ip_src)); + return -ENOSPC; + } + + set_bit(idx, ip_dst->act_tcam); + ip_value |= + FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_VALID | + FIELD_PREP(FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX, + ip_dst - fbd->ipo_dst); + ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_VALID | + FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX); + } + + flow_value |= FBNIC_RPC_TCAM_ACT1_OUTER_IP_VALID; + flow_mask &= FBNIC_RPC_TCAM_ACT1_OUTER_IP_VALID; +ipv6_flow: + addr6 = (struct in6_addr *)fsp->h_u.usr_ip6_spec.ip6src; + mask6 = (struct in6_addr *)fsp->m_u.usr_ip6_spec.ip6src; + if (!ip_src && !ipv6_addr_any(mask6)) { + ip_src = __fbnic_ip6_sync(fbd, fbd->ip_src, + addr6, mask6); + if (!ip_src) + return -ENOSPC; + + set_bit(idx, ip_src->act_tcam); + ip_value |= FBNIC_RPC_TCAM_ACT0_IPSRC_VALID | + FIELD_PREP(FBNIC_RPC_TCAM_ACT0_IPSRC_IDX, + ip_src - fbd->ip_src); + ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_IPSRC_VALID | + FBNIC_RPC_TCAM_ACT0_IPSRC_IDX); + } + + addr6 = (struct in6_addr *)fsp->h_u.usr_ip6_spec.ip6dst; + mask6 = (struct in6_addr *)fsp->m_u.usr_ip6_spec.ip6dst; + if (!ip_dst && !ipv6_addr_any(mask6)) { + ip_dst = __fbnic_ip6_sync(fbd, fbd->ip_dst, + addr6, mask6); + if (!ip_dst) { + if (ip_src && ip_src->state == FBNIC_TCAM_S_ADD) + memset(ip_src, 0, sizeof(*ip_src)); + return -ENOSPC; + } + + set_bit(idx, ip_dst->act_tcam); + ip_value |= FBNIC_RPC_TCAM_ACT0_IPDST_VALID | + FIELD_PREP(FBNIC_RPC_TCAM_ACT0_IPDST_IDX, + ip_dst - fbd->ip_dst); + ip_mask &= ~(FBNIC_RPC_TCAM_ACT0_IPDST_VALID | + FBNIC_RPC_TCAM_ACT0_IPDST_IDX); + } + + flow_value |= FBNIC_RPC_TCAM_ACT1_IP_IS_V6 | + FBNIC_RPC_TCAM_ACT1_IP_VALID | + FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID; + flow_mask &= ~(FBNIC_RPC_TCAM_ACT1_IP_IS_V6 | + FBNIC_RPC_TCAM_ACT1_IP_VALID | + FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID); + break; + case ETHER_FLOW: + if (!is_zero_ether_addr(fsp->m_u.ether_spec.h_dest)) { + u8 *addr = fsp->h_u.ether_spec.h_dest; + u8 *mask = fsp->m_u.ether_spec.h_dest; + + /* Do not allow MAC addr of 0 */ + if (is_zero_ether_addr(addr)) + return -EINVAL; + + /* Only support full MAC address to avoid + * conflicts with other MAC addresses. + */ + if (!is_broadcast_ether_addr(mask)) + return -EINVAL; + + if (is_multicast_ether_addr(addr)) + mac_addr = __fbnic_mc_sync(fbd, addr); + else + mac_addr = __fbnic_uc_sync(fbd, addr); + + if (!mac_addr) + return -ENOSPC; + + set_bit(idx, mac_addr->act_tcam); + flow_value |= + FIELD_PREP(FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX, + mac_addr - fbd->mac_addr); + flow_mask &= ~FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX; + } + + flow_value |= FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID; + flow_mask &= ~FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID; + break; + default: + return -EINVAL; + } + + /* Write action table values */ + act_tcam->dest = dest; + act_tcam->rss_en_mask = fbnic_flow_hash_2_rss_en_mask(fbn, hash_idx); + + /* Write IP Match value/mask to action_tcam[0] */ + act_tcam->value.tcam[0] = ip_value; + act_tcam->mask.tcam[0] = ip_mask; + + /* Write flow type value/mask to action_tcam[1] */ + act_tcam->value.tcam[1] = flow_value; + act_tcam->mask.tcam[1] = flow_mask; + + /* Write error, DSCP, extra L4 matches to action_tcam[2] */ + act_tcam->value.tcam[2] = misc; + act_tcam->mask.tcam[2] = misc_mask; + + /* Write source/destination port values */ + act_tcam->value.tcam[3] = sport; + act_tcam->mask.tcam[3] = sport_mask; + act_tcam->value.tcam[4] = dport; + act_tcam->mask.tcam[4] = dport_mask; + + for (j = 5; j < FBNIC_RPC_TCAM_ACT_WORD_LEN; j++) + act_tcam->mask.tcam[j] = 0xffff; + + act_tcam->state = FBNIC_TCAM_S_UPDATE; + fsp->location = location; + + if (netif_running(fbn->netdev)) { + fbnic_write_rules(fbd); + if (ip_src || ip_dst) + fbnic_write_ip_addr(fbd); + if (mac_addr) + fbnic_write_macda(fbd); + } + + return 0; +} + +static void fbnic_clear_nfc_macda(struct fbnic_net *fbn, + unsigned int tcam_idx) +{ + struct fbnic_dev *fbd = fbn->fbd; + int idx; + + for (idx = ARRAY_SIZE(fbd->mac_addr); idx--;) + __fbnic_xc_unsync(&fbd->mac_addr[idx], tcam_idx); + + /* Write updates to hardware */ + if (netif_running(fbn->netdev)) + fbnic_write_macda(fbd); +} + +static void fbnic_clear_nfc_ip_addr(struct fbnic_net *fbn, + unsigned int tcam_idx) +{ + struct fbnic_dev *fbd = fbn->fbd; + int idx; + + for (idx = ARRAY_SIZE(fbd->ip_src); idx--;) + __fbnic_ip_unsync(&fbd->ip_src[idx], tcam_idx); + for (idx = ARRAY_SIZE(fbd->ip_dst); idx--;) + __fbnic_ip_unsync(&fbd->ip_dst[idx], tcam_idx); + for (idx = ARRAY_SIZE(fbd->ipo_src); idx--;) + __fbnic_ip_unsync(&fbd->ipo_src[idx], tcam_idx); + for (idx = ARRAY_SIZE(fbd->ipo_dst); idx--;) + __fbnic_ip_unsync(&fbd->ipo_dst[idx], tcam_idx); + + /* Write updates to hardware */ + if (netif_running(fbn->netdev)) + fbnic_write_ip_addr(fbd); +} + +static int fbnic_set_cls_rule_del(struct fbnic_net *fbn, + const struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp; + struct fbnic_dev *fbd = fbn->fbd; + struct fbnic_act_tcam *act_tcam; + int idx; + + fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + + if (fsp->location >= FBNIC_RPC_ACT_TBL_NFC_ENTRIES) + return -EINVAL; + + idx = fsp->location + FBNIC_RPC_ACT_TBL_NFC_OFFSET; + act_tcam = &fbd->act_tcam[idx]; + + if (act_tcam->state != FBNIC_TCAM_S_VALID) + return -EINVAL; + + act_tcam->state = FBNIC_TCAM_S_DELETE; + + if ((act_tcam->value.tcam[1] & FBNIC_RPC_TCAM_ACT1_L2_MACDA_VALID) && + (~act_tcam->mask.tcam[1] & FBNIC_RPC_TCAM_ACT1_L2_MACDA_IDX)) + fbnic_clear_nfc_macda(fbn, idx); + + if ((act_tcam->value.tcam[0] & + (FBNIC_RPC_TCAM_ACT0_IPSRC_VALID | + FBNIC_RPC_TCAM_ACT0_IPDST_VALID | + FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_VALID | + FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_VALID)) && + (~act_tcam->mask.tcam[0] & + (FBNIC_RPC_TCAM_ACT0_IPSRC_IDX | + FBNIC_RPC_TCAM_ACT0_IPDST_IDX | + FBNIC_RPC_TCAM_ACT0_OUTER_IPSRC_IDX | + FBNIC_RPC_TCAM_ACT0_OUTER_IPDST_IDX))) + fbnic_clear_nfc_ip_addr(fbn, idx); + + if (netif_running(fbn->netdev)) + fbnic_write_rules(fbd); + + return 0; +} + static int fbnic_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) { struct fbnic_net *fbn = netdev_priv(netdev); @@ -281,6 +921,12 @@ static int fbnic_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) case ETHTOOL_SRXFH: ret = fbnic_set_rss_hash_opts(fbn, cmd); break; + case ETHTOOL_SRXCLSRLINS: + ret = fbnic_set_cls_rule_ins(fbn, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + ret = fbnic_set_cls_rule_del(fbn, cmd); + break; } return ret; @@ -374,6 +1020,61 @@ fbnic_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh, return 0; } +static int +fbnic_modify_rxfh_context(struct net_device *netdev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + const u32 *indir = rxfh->indir; + unsigned int changes; + + if (!indir) + indir = ethtool_rxfh_context_indir(ctx); + + changes = fbnic_set_indir(fbn, rxfh->rss_context, indir); + if (changes && netif_running(netdev)) + fbnic_rss_reinit_hw(fbn->fbd, fbn); + + return 0; +} + +static int +fbnic_create_rxfh_context(struct net_device *netdev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + + if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) { + NL_SET_ERR_MSG_MOD(extack, "RSS hash function not supported"); + return -EOPNOTSUPP; + } + ctx->hfunc = ETH_RSS_HASH_TOP; + + if (!rxfh->indir) { + u32 *indir = ethtool_rxfh_context_indir(ctx); + unsigned int num_rx = fbn->num_rx_queues; + unsigned int i; + + for (i = 0; i < FBNIC_RPC_RSS_TBL_SIZE; i++) + indir[i] = ethtool_rxfh_indir_default(i, num_rx); + } + + return fbnic_modify_rxfh_context(netdev, ctx, rxfh, extack); +} + +static int +fbnic_remove_rxfh_context(struct net_device *netdev, + struct ethtool_rxfh_context *ctx, u32 rss_context, + struct netlink_ext_ack *extack) +{ + /* Nothing to do, contexts are allocated statically */ + return 0; +} + static void fbnic_get_channels(struct net_device *netdev, struct ethtool_channels *ch) { @@ -523,14 +1224,14 @@ static void fbnic_get_ts_stats(struct net_device *netdev, unsigned int start; int i; - ts_stats->pkts = fbn->tx_stats.ts_packets; - ts_stats->lost = fbn->tx_stats.ts_lost; + ts_stats->pkts = fbn->tx_stats.twq.ts_packets; + ts_stats->lost = fbn->tx_stats.twq.ts_lost; for (i = 0; i < fbn->num_tx_queues; i++) { ring = fbn->tx[i]; do { start = u64_stats_fetch_begin(&ring->stats.syncp); - ts_packets = ring->stats.ts_packets; - ts_lost = ring->stats.ts_lost; + ts_packets = ring->stats.twq.ts_packets; + ts_lost = ring->stats.twq.ts_lost; } while (u64_stats_fetch_retry(&ring->stats.syncp, start)); ts_stats->pkts += ts_packets; ts_stats->lost += ts_lost; @@ -586,6 +1287,7 @@ fbnic_get_eth_mac_stats(struct net_device *netdev, } static const struct ethtool_ops fbnic_ethtool_ops = { + .rxfh_max_num_contexts = FBNIC_RPC_RSS_TBL_COUNT, .get_drvinfo = fbnic_get_drvinfo, .get_regs_len = fbnic_get_regs_len, .get_regs = fbnic_get_regs, @@ -598,6 +1300,9 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .get_rxfh_indir_size = fbnic_get_rxfh_indir_size, .get_rxfh = fbnic_get_rxfh, .set_rxfh = fbnic_set_rxfh, + .create_rxfh_context = fbnic_create_rxfh_context, + .modify_rxfh_context = fbnic_modify_rxfh_context, + .remove_rxfh_context = fbnic_remove_rxfh_context, .get_channels = fbnic_get_channels, .set_channels = fbnic_set_channels, .get_ts_info = fbnic_get_ts_info, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 7a96b6ee773f..b12672d1607e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -487,8 +487,9 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx, struct fbnic_net *fbn = netdev_priv(dev); struct fbnic_ring *rxr = fbn->rx[idx]; struct fbnic_queue_stats *stats; + u64 bytes, packets, alloc_fail; + u64 csum_complete, csum_none; unsigned int start; - u64 bytes, packets; if (!rxr) return; @@ -498,10 +499,16 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx, start = u64_stats_fetch_begin(&stats->syncp); bytes = stats->bytes; packets = stats->packets; + alloc_fail = stats->rx.alloc_failed; + csum_complete = stats->rx.csum_complete; + csum_none = stats->rx.csum_none; } while (u64_stats_fetch_retry(&stats->syncp, start)); rx->bytes = bytes; rx->packets = packets; + rx->alloc_fail = alloc_fail; + rx->csum_complete = csum_complete; + rx->csum_none = csum_none; } static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx, @@ -510,6 +517,7 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx, struct fbnic_net *fbn = netdev_priv(dev); struct fbnic_ring *txr = fbn->tx[idx]; struct fbnic_queue_stats *stats; + u64 stop, wake, csum; unsigned int start; u64 bytes, packets; @@ -521,10 +529,16 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx, start = u64_stats_fetch_begin(&stats->syncp); bytes = stats->bytes; packets = stats->packets; + csum = stats->twq.csum_partial; + stop = stats->twq.stop; + wake = stats->twq.wake; } while (u64_stats_fetch_retry(&stats->syncp, start)); tx->bytes = bytes; tx->packets = packets; + tx->needs_csum = csum; + tx->stop = stop; + tx->wake = wake; } static void fbnic_get_base_stats(struct net_device *dev, @@ -535,9 +549,15 @@ static void fbnic_get_base_stats(struct net_device *dev, tx->bytes = fbn->tx_stats.bytes; tx->packets = fbn->tx_stats.packets; + tx->needs_csum = fbn->tx_stats.twq.csum_partial; + tx->stop = fbn->tx_stats.twq.stop; + tx->wake = fbn->tx_stats.twq.wake; rx->bytes = fbn->rx_stats.bytes; rx->packets = fbn->rx_stats.packets; + rx->alloc_fail = fbn->rx_stats.rx.alloc_failed; + rx->csum_complete = fbn->rx_stats.rx.csum_complete; + rx->csum_none = fbn->rx_stats.rx.csum_none; } static const struct netdev_stat_ops fbnic_stat_ops = { @@ -628,6 +648,8 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd) fbnic_rss_key_fill(fbn->rss_key); fbnic_rss_init_en_mask(fbn); + netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->features |= NETIF_F_RXHASH | NETIF_F_SG | @@ -637,6 +659,7 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd) netdev->hw_features |= netdev->features; netdev->vlan_features |= netdev->features; netdev->hw_enc_features |= netdev->features; + netdev->features |= NETIF_F_NTUPLE; netdev->min_mtu = IPV6_MIN_MTU; netdev->max_mtu = FBNIC_MAX_JUMBO_FRAME_SIZE - ETH_HLEN; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c index c25bd300b902..8ff07b5562e3 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c @@ -3,6 +3,7 @@ #include <linux/etherdevice.h> #include <linux/ethtool.h> +#include <net/ipv6.h> #include "fbnic.h" #include "fbnic_netdev.h" @@ -60,7 +61,7 @@ void fbnic_rss_disable_hw(struct fbnic_dev *fbd) #define FBNIC_FH_2_RSSEM_BIT(_fh, _rssem, _val) \ FIELD_PREP(FBNIC_RPC_ACT_TBL1_RSS_ENA_##_rssem, \ FIELD_GET(RXH_##_fh, _val)) -static u16 fbnic_flow_hash_2_rss_en_mask(struct fbnic_net *fbn, int flow_type) +u16 fbnic_flow_hash_2_rss_en_mask(struct fbnic_net *fbn, int flow_type) { u32 flow_hash = fbn->rss_flow_hash[flow_type]; u32 rss_en_mask = 0; @@ -698,6 +699,359 @@ void fbnic_write_tce_tcam(struct fbnic_dev *fbd) __fbnic_write_tce_tcam(fbd); } +struct fbnic_ip_addr *__fbnic_ip4_sync(struct fbnic_dev *fbd, + struct fbnic_ip_addr *ip_addr, + const struct in_addr *addr, + const struct in_addr *mask) +{ + struct fbnic_ip_addr *avail_addr = NULL; + unsigned int i; + + /* Scan from top of list to bottom, filling bottom up. */ + for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES; i++, ip_addr++) { + struct in6_addr *m = &ip_addr->mask; + + if (ip_addr->state == FBNIC_TCAM_S_DISABLED) { + avail_addr = ip_addr; + continue; + } + + if (ip_addr->version != 4) + continue; + + /* Drop avail_addr if mask is a subset of our current mask, + * This prevents us from inserting a longer prefix behind a + * shorter one. + * + * The mask is stored inverted value so as an example: + * m ffff ffff ffff ffff ffff ffff ffff 0000 0000 + * mask 0000 0000 0000 0000 0000 0000 0000 ffff ffff + * + * "m" and "mask" represent typical IPv4 mask stored in + * the TCAM and those provided by the stack. The code below + * should return a non-zero result if there is a 0 stored + * anywhere in "m" where "mask" has a 0. + */ + if (~m->s6_addr32[3] & ~mask->s_addr) { + avail_addr = NULL; + continue; + } + + /* Check to see if the mask actually contains fewer bits than + * our new mask "m". The XOR below should only result in 0 if + * "m" is masking a bit that we are looking for in our new + * "mask", we eliminated the 0^0 case with the check above. + * + * If it contains fewer bits we need to stop here, otherwise + * we might be adding an unreachable rule. + */ + if (~(m->s6_addr32[3] ^ mask->s_addr)) + break; + + if (ip_addr->value.s6_addr32[3] == addr->s_addr) { + avail_addr = ip_addr; + break; + } + } + + if (avail_addr && avail_addr->state == FBNIC_TCAM_S_DISABLED) { + ipv6_addr_set(&avail_addr->value, 0, 0, 0, addr->s_addr); + ipv6_addr_set(&avail_addr->mask, htonl(~0), htonl(~0), + htonl(~0), ~mask->s_addr); + avail_addr->version = 4; + + avail_addr->state = FBNIC_TCAM_S_ADD; + } + + return avail_addr; +} + +struct fbnic_ip_addr *__fbnic_ip6_sync(struct fbnic_dev *fbd, + struct fbnic_ip_addr *ip_addr, + const struct in6_addr *addr, + const struct in6_addr *mask) +{ + struct fbnic_ip_addr *avail_addr = NULL; + unsigned int i; + + ip_addr = &ip_addr[FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES - 1]; + + /* Scan from bottom of list to top, filling top down. */ + for (i = FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES; i--; ip_addr--) { + struct in6_addr *m = &ip_addr->mask; + + if (ip_addr->state == FBNIC_TCAM_S_DISABLED) { + avail_addr = ip_addr; + continue; + } + + if (ip_addr->version != 6) + continue; + + /* Drop avail_addr if mask is a superset of our current mask. + * This prevents us from inserting a longer prefix behind a + * shorter one. + * + * The mask is stored inverted value so as an example: + * m 0000 0000 0000 0000 0000 0000 0000 0000 0000 + * mask ffff ffff ffff ffff ffff ffff ffff ffff ffff + * + * "m" and "mask" represent typical IPv6 mask stored in + * the TCAM and those provided by the stack. The code below + * should return a non-zero result which will cause us + * to drop the avail_addr value that might be cached + * to prevent us from dropping a v6 address behind it. + */ + if ((m->s6_addr32[0] & mask->s6_addr32[0]) | + (m->s6_addr32[1] & mask->s6_addr32[1]) | + (m->s6_addr32[2] & mask->s6_addr32[2]) | + (m->s6_addr32[3] & mask->s6_addr32[3])) { + avail_addr = NULL; + continue; + } + + /* The previous test eliminated any overlap between the + * two values so now we need to check for gaps. + * + * If the mask is equal to our current mask then it should + * result with m ^ mask = ffff ffff, if however the value + * stored in m is bigger then we should see a 0 appear + * somewhere in the mask. + */ + if (~(m->s6_addr32[0] ^ mask->s6_addr32[0]) | + ~(m->s6_addr32[1] ^ mask->s6_addr32[1]) | + ~(m->s6_addr32[2] ^ mask->s6_addr32[2]) | + ~(m->s6_addr32[3] ^ mask->s6_addr32[3])) + break; + + if (ipv6_addr_cmp(&ip_addr->value, addr)) + continue; + + avail_addr = ip_addr; + break; + } + + if (avail_addr && avail_addr->state == FBNIC_TCAM_S_DISABLED) { + memcpy(&avail_addr->value, addr, sizeof(*addr)); + ipv6_addr_set(&avail_addr->mask, + ~mask->s6_addr32[0], ~mask->s6_addr32[1], + ~mask->s6_addr32[2], ~mask->s6_addr32[3]); + avail_addr->version = 6; + + avail_addr->state = FBNIC_TCAM_S_ADD; + } + + return avail_addr; +} + +int __fbnic_ip_unsync(struct fbnic_ip_addr *ip_addr, unsigned int tcam_idx) +{ + if (!test_and_clear_bit(tcam_idx, ip_addr->act_tcam)) + return -ENOENT; + + if (bitmap_empty(ip_addr->act_tcam, FBNIC_RPC_TCAM_ACT_NUM_ENTRIES)) + ip_addr->state = FBNIC_TCAM_S_DELETE; + + return 0; +} + +static void fbnic_clear_ip_src_entry(struct fbnic_dev *fbd, unsigned int idx) +{ + int i; + + /* Invalidate entry and clear addr state info */ + for (i = 0; i <= FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_IPSRC(idx, i), 0); +} + +static void fbnic_clear_ip_dst_entry(struct fbnic_dev *fbd, unsigned int idx) +{ + int i; + + /* Invalidate entry and clear addr state info */ + for (i = 0; i <= FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_IPDST(idx, i), 0); +} + +static void fbnic_clear_ip_outer_src_entry(struct fbnic_dev *fbd, + unsigned int idx) +{ + int i; + + /* Invalidate entry and clear addr state info */ + for (i = 0; i <= FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPSRC(idx, i), 0); +} + +static void fbnic_clear_ip_outer_dst_entry(struct fbnic_dev *fbd, + unsigned int idx) +{ + int i; + + /* Invalidate entry and clear addr state info */ + for (i = 0; i <= FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPDST(idx, i), 0); +} + +static void fbnic_write_ip_src_entry(struct fbnic_dev *fbd, unsigned int idx, + struct fbnic_ip_addr *ip_addr) +{ + __be16 *mask, *value; + int i; + + mask = &ip_addr->mask.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + value = &ip_addr->value.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + + for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_IPSRC(idx, i), + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_MASK, ntohs(*mask--)) | + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_VALUE, ntohs(*value--))); + wrfl(fbd); + + /* Bit 129 is used to flag for v4/v6 */ + wr32(fbd, FBNIC_RPC_TCAM_IPSRC(idx, i), + (ip_addr->version == 6) | FBNIC_RPC_TCAM_VALIDATE); +} + +static void fbnic_write_ip_dst_entry(struct fbnic_dev *fbd, unsigned int idx, + struct fbnic_ip_addr *ip_addr) +{ + __be16 *mask, *value; + int i; + + mask = &ip_addr->mask.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + value = &ip_addr->value.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + + for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_IPDST(idx, i), + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_MASK, ntohs(*mask--)) | + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_VALUE, ntohs(*value--))); + wrfl(fbd); + + /* Bit 129 is used to flag for v4/v6 */ + wr32(fbd, FBNIC_RPC_TCAM_IPDST(idx, i), + (ip_addr->version == 6) | FBNIC_RPC_TCAM_VALIDATE); +} + +static void fbnic_write_ip_outer_src_entry(struct fbnic_dev *fbd, + unsigned int idx, + struct fbnic_ip_addr *ip_addr) +{ + __be16 *mask, *value; + int i; + + mask = &ip_addr->mask.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + value = &ip_addr->value.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + + for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPSRC(idx, i), + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_MASK, ntohs(*mask--)) | + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_VALUE, ntohs(*value--))); + wrfl(fbd); + + wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPSRC(idx, i), FBNIC_RPC_TCAM_VALIDATE); +} + +static void fbnic_write_ip_outer_dst_entry(struct fbnic_dev *fbd, + unsigned int idx, + struct fbnic_ip_addr *ip_addr) +{ + __be16 *mask, *value; + int i; + + mask = &ip_addr->mask.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + value = &ip_addr->value.s6_addr16[FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN - 1]; + + for (i = 0; i < FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN; i++) + wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPDST(idx, i), + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_MASK, ntohs(*mask--)) | + FIELD_PREP(FBNIC_RPC_TCAM_IP_ADDR_VALUE, ntohs(*value--))); + wrfl(fbd); + + wr32(fbd, FBNIC_RPC_TCAM_OUTER_IPDST(idx, i), FBNIC_RPC_TCAM_VALIDATE); +} + +void fbnic_write_ip_addr(struct fbnic_dev *fbd) +{ + int idx; + + for (idx = ARRAY_SIZE(fbd->ip_src); idx--;) { + struct fbnic_ip_addr *ip_addr = &fbd->ip_src[idx]; + + /* Check if update flag is set else skip. */ + if (!(ip_addr->state & FBNIC_TCAM_S_UPDATE)) + continue; + + /* Clear by writing 0s. */ + if (ip_addr->state == FBNIC_TCAM_S_DELETE) { + /* Invalidate entry and clear addr state info */ + fbnic_clear_ip_src_entry(fbd, idx); + memset(ip_addr, 0, sizeof(*ip_addr)); + + continue; + } + + fbnic_write_ip_src_entry(fbd, idx, ip_addr); + + ip_addr->state = FBNIC_TCAM_S_VALID; + } + + /* Repeat process for other IP TCAMs */ + for (idx = ARRAY_SIZE(fbd->ip_dst); idx--;) { + struct fbnic_ip_addr *ip_addr = &fbd->ip_dst[idx]; + + if (!(ip_addr->state & FBNIC_TCAM_S_UPDATE)) + continue; + + if (ip_addr->state == FBNIC_TCAM_S_DELETE) { + fbnic_clear_ip_dst_entry(fbd, idx); + memset(ip_addr, 0, sizeof(*ip_addr)); + + continue; + } + + fbnic_write_ip_dst_entry(fbd, idx, ip_addr); + + ip_addr->state = FBNIC_TCAM_S_VALID; + } + + for (idx = ARRAY_SIZE(fbd->ipo_src); idx--;) { + struct fbnic_ip_addr *ip_addr = &fbd->ipo_src[idx]; + + if (!(ip_addr->state & FBNIC_TCAM_S_UPDATE)) + continue; + + if (ip_addr->state == FBNIC_TCAM_S_DELETE) { + fbnic_clear_ip_outer_src_entry(fbd, idx); + memset(ip_addr, 0, sizeof(*ip_addr)); + + continue; + } + + fbnic_write_ip_outer_src_entry(fbd, idx, ip_addr); + + ip_addr->state = FBNIC_TCAM_S_VALID; + } + + for (idx = ARRAY_SIZE(fbd->ipo_dst); idx--;) { + struct fbnic_ip_addr *ip_addr = &fbd->ipo_dst[idx]; + + if (!(ip_addr->state & FBNIC_TCAM_S_UPDATE)) + continue; + + if (ip_addr->state == FBNIC_TCAM_S_DELETE) { + fbnic_clear_ip_outer_dst_entry(fbd, idx); + memset(ip_addr, 0, sizeof(*ip_addr)); + + continue; + } + + fbnic_write_ip_outer_dst_entry(fbd, idx, ip_addr); + + ip_addr->state = FBNIC_TCAM_S_VALID; + } +} + void fbnic_clear_rules(struct fbnic_dev *fbd) { u32 dest = FIELD_PREP(FBNIC_RPC_ACT_TBL0_DEST_MASK, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h index 0d8285fa5b45..6892414195c3 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h @@ -7,6 +7,8 @@ #include <uapi/linux/in6.h> #include <linux/bitfield.h> +struct in_addr; + /* The TCAM state definitions follow an expected ordering. * They start out disabled, then move through the following states: * Disabled 0 -> Add 2 @@ -32,6 +34,12 @@ enum { #define FBNIC_RPC_TCAM_MACDA_WORD_LEN 3 #define FBNIC_RPC_TCAM_MACDA_NUM_ENTRIES 32 +/* 8 IPSRC and IPDST TCAM Entries each + * 8 registers, Validate each + */ +#define FBNIC_RPC_TCAM_IP_ADDR_WORD_LEN 8 +#define FBNIC_RPC_TCAM_IP_ADDR_NUM_ENTRIES 8 + #define FBNIC_RPC_TCAM_ACT_WORD_LEN 11 #define FBNIC_RPC_TCAM_ACT_NUM_ENTRIES 64 @@ -47,6 +55,13 @@ struct fbnic_mac_addr { DECLARE_BITMAP(act_tcam, FBNIC_RPC_TCAM_ACT_NUM_ENTRIES); }; +struct fbnic_ip_addr { + struct in6_addr mask, value; + unsigned char version; + unsigned char state; + DECLARE_BITMAP(act_tcam, FBNIC_RPC_TCAM_ACT_NUM_ENTRIES); +}; + struct fbnic_act_tcam { struct { u16 tcam[FBNIC_RPC_TCAM_ACT_WORD_LEN]; @@ -81,6 +96,11 @@ enum { #define FBNIC_RPC_ACT_TBL_BMC_OFFSET 0 #define FBNIC_RPC_ACT_TBL_BMC_ALL_MULTI_OFFSET 1 +/* This should leave us with 48 total entries in the TCAM that can be used + * for NFC after also deducting the 14 needed for RSS table programming. + */ +#define FBNIC_RPC_ACT_TBL_NFC_OFFSET 2 + /* We reserve the last 14 entries for RSS rules on the host. The BMC * unicast rule will need to be populated above these and is expected to * use MACDA TCAM entry 23 to store the BMC MAC address. @@ -88,6 +108,9 @@ enum { #define FBNIC_RPC_ACT_TBL_RSS_OFFSET \ (FBNIC_RPC_ACT_TBL_NUM_ENTRIES - FBNIC_RSS_EN_NUM_ENTRIES) +#define FBNIC_RPC_ACT_TBL_NFC_ENTRIES \ + (FBNIC_RPC_ACT_TBL_RSS_OFFSET - FBNIC_RPC_ACT_TBL_NFC_OFFSET) + /* Flags used to identify the owner for this MAC filter. Note that any * flags set for Broadcast thru Promisc indicate that the rule belongs * to the RSS filters for the host. @@ -168,6 +191,7 @@ void fbnic_rss_init_en_mask(struct fbnic_net *fbn); void fbnic_rss_disable_hw(struct fbnic_dev *fbd); void fbnic_rss_reinit_hw(struct fbnic_dev *fbd, struct fbnic_net *fbn); void fbnic_rss_reinit(struct fbnic_dev *fbd, struct fbnic_net *fbn); +u16 fbnic_flow_hash_2_rss_en_mask(struct fbnic_net *fbn, int flow_type); int __fbnic_xc_unsync(struct fbnic_mac_addr *mac_addr, unsigned int tcam_idx); struct fbnic_mac_addr *__fbnic_uc_sync(struct fbnic_dev *fbd, @@ -177,6 +201,17 @@ struct fbnic_mac_addr *__fbnic_mc_sync(struct fbnic_dev *fbd, void fbnic_sift_macda(struct fbnic_dev *fbd); void fbnic_write_macda(struct fbnic_dev *fbd); +struct fbnic_ip_addr *__fbnic_ip4_sync(struct fbnic_dev *fbd, + struct fbnic_ip_addr *ip_addr, + const struct in_addr *addr, + const struct in_addr *mask); +struct fbnic_ip_addr *__fbnic_ip6_sync(struct fbnic_dev *fbd, + struct fbnic_ip_addr *ip_addr, + const struct in6_addr *addr, + const struct in6_addr *mask); +int __fbnic_ip_unsync(struct fbnic_ip_addr *ip_addr, unsigned int tcam_idx); +void fbnic_write_ip_addr(struct fbnic_dev *fbd); + static inline int __fbnic_uc_unsync(struct fbnic_mac_addr *mac_addr) { return __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_UNICAST); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index d4d7027df9a0..24d2b528b66c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -113,6 +113,11 @@ static int fbnic_maybe_stop_tx(const struct net_device *dev, res = netif_txq_maybe_stop(txq, fbnic_desc_unused(ring), size, FBNIC_TX_DESC_WAKEUP); + if (!res) { + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.twq.stop++; + u64_stats_update_end(&ring->stats.syncp); + } return !res; } @@ -191,6 +196,9 @@ fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) skb->csum_offset / 2)); *meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO); + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.twq.csum_partial++; + u64_stats_update_end(&ring->stats.syncp); *meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) | FIELD_PREP(FBNIC_TWD_L3_IHLEN_MASK, i3len / 2)); @@ -198,12 +206,15 @@ fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) } static void -fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq) +fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq, + u64 *csum_cmpl, u64 *csum_none) { skb_checksum_none_assert(skb); - if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) + if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) { + (*csum_none)++; return; + } if (FIELD_GET(FBNIC_RCD_META_L4_CSUM_UNNECESSARY, rcd)) { skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -212,6 +223,7 @@ fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq) skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = (__force __wsum)csum; + (*csum_cmpl)++; } } @@ -444,7 +456,7 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, if (unlikely(discard)) { u64_stats_update_begin(&ring->stats.syncp); ring->stats.dropped += total_packets; - ring->stats.ts_lost += ts_lost; + ring->stats.twq.ts_lost += ts_lost; u64_stats_update_end(&ring->stats.syncp); netdev_tx_completed_queue(txq, total_packets, total_bytes); @@ -456,9 +468,13 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, ring->stats.packets += total_packets; u64_stats_update_end(&ring->stats.syncp); - netif_txq_completed_wake(txq, total_packets, total_bytes, - fbnic_desc_unused(ring), - FBNIC_TX_DESC_WAKEUP); + if (!netif_txq_completed_wake(txq, total_packets, total_bytes, + fbnic_desc_unused(ring), + FBNIC_TX_DESC_WAKEUP)) { + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.twq.wake++; + u64_stats_update_end(&ring->stats.syncp); + } } static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, @@ -507,7 +523,7 @@ static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, skb_tstamp_tx(skb, &hwtstamp); u64_stats_update_begin(&ring->stats.syncp); - ring->stats.ts_packets++; + ring->stats.twq.ts_packets++; u64_stats_update_end(&ring->stats.syncp); } @@ -661,8 +677,13 @@ static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq) struct page *page; page = page_pool_dev_alloc_pages(nv->page_pool); - if (!page) + if (!page) { + u64_stats_update_begin(&bdq->stats.syncp); + bdq->stats.rx.alloc_failed++; + u64_stats_update_end(&bdq->stats.syncp); + break; + } fbnic_page_pool_init(bdq, i, page); fbnic_bd_prep(bdq, i, page); @@ -875,12 +896,13 @@ static void fbnic_rx_tstamp(struct fbnic_napi_vector *nv, u64 rcd, static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv, u64 rcd, struct sk_buff *skb, - struct fbnic_q_triad *qt) + struct fbnic_q_triad *qt, + u64 *csum_cmpl, u64 *csum_none) { struct net_device *netdev = nv->napi.dev; struct fbnic_ring *rcq = &qt->cmpl; - fbnic_rx_csum(rcd, skb, rcq); + fbnic_rx_csum(rcd, skb, rcq, csum_cmpl, csum_none); if (netdev->features & NETIF_F_RXHASH) skb_set_hash(skb, @@ -898,7 +920,8 @@ static bool fbnic_rcd_metadata_err(u64 rcd) static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, int budget) { - unsigned int packets = 0, bytes = 0, dropped = 0; + unsigned int packets = 0, bytes = 0, dropped = 0, alloc_failed = 0; + u64 csum_complete = 0, csum_none = 0; struct fbnic_ring *rcq = &qt->cmpl; struct fbnic_pkt_buff *pkt; s32 head0 = -1, head1 = -1; @@ -947,14 +970,22 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, /* Populate skb and invalidate XDP */ if (!IS_ERR_OR_NULL(skb)) { - fbnic_populate_skb_fields(nv, rcd, skb, qt); + fbnic_populate_skb_fields(nv, rcd, skb, qt, + &csum_complete, + &csum_none); packets++; bytes += skb->len; napi_gro_receive(&nv->napi, skb); } else { - dropped++; + if (!skb) { + alloc_failed++; + dropped++; + } else { + dropped++; + } + fbnic_put_pkt_buff(nv, pkt, 1); } @@ -977,6 +1008,9 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, /* Re-add ethernet header length (removed in fbnic_build_skb) */ rcq->stats.bytes += ETH_HLEN * packets; rcq->stats.dropped += dropped; + rcq->stats.rx.alloc_failed += alloc_failed; + rcq->stats.rx.csum_complete += csum_complete; + rcq->stats.rx.csum_none += csum_none; u64_stats_update_end(&rcq->stats.syncp); /* Unmap and free processed buffers */ @@ -1054,6 +1088,11 @@ void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, fbn->rx_stats.bytes += stats->bytes; fbn->rx_stats.packets += stats->packets; fbn->rx_stats.dropped += stats->dropped; + fbn->rx_stats.rx.alloc_failed += stats->rx.alloc_failed; + fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete; + fbn->rx_stats.rx.csum_none += stats->rx.csum_none; + /* Remember to add new stats here */ + BUILD_BUG_ON(sizeof(fbn->tx_stats.rx) / 8 != 3); } void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, @@ -1065,8 +1104,13 @@ void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, fbn->tx_stats.bytes += stats->bytes; fbn->tx_stats.packets += stats->packets; fbn->tx_stats.dropped += stats->dropped; - fbn->tx_stats.ts_lost += stats->ts_lost; - fbn->tx_stats.ts_packets += stats->ts_packets; + fbn->tx_stats.twq.csum_partial += stats->twq.csum_partial; + fbn->tx_stats.twq.ts_lost += stats->twq.ts_lost; + fbn->tx_stats.twq.ts_packets += stats->twq.ts_packets; + fbn->tx_stats.twq.stop += stats->twq.stop; + fbn->tx_stats.twq.wake += stats->twq.wake; + /* Remember to add new stats here */ + BUILD_BUG_ON(sizeof(fbn->tx_stats.twq) / 8 != 5); } static void fbnic_remove_tx_ring(struct fbnic_net *fbn, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index c2a94f31f71b..b53a7d28ecd3 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -56,9 +56,21 @@ struct fbnic_pkt_buff { struct fbnic_queue_stats { u64 packets; u64 bytes; + union { + struct { + u64 csum_partial; + u64 ts_packets; + u64 ts_lost; + u64 stop; + u64 wake; + } twq; + struct { + u64 alloc_failed; + u64 csum_complete; + u64 csum_none; + } rx; + }; u64 dropped; - u64 ts_packets; - u64 ts_lost; struct u64_stats_sync syncp; }; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c b/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c index 0d6c59d6d4ae..ea6a288c0d5e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_hwmon.c @@ -83,42 +83,12 @@ nfp_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, return 0; } -static u32 nfp_chip_config[] = { - HWMON_C_REGISTER_TZ, - 0 -}; - -static const struct hwmon_channel_info nfp_chip = { - .type = hwmon_chip, - .config = nfp_chip_config, -}; - -static u32 nfp_temp_config[] = { - HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT, - 0 -}; - -static const struct hwmon_channel_info nfp_temp = { - .type = hwmon_temp, - .config = nfp_temp_config, -}; - -static u32 nfp_power_config[] = { - HWMON_P_INPUT | HWMON_P_MAX, - HWMON_P_INPUT, - HWMON_P_INPUT, - 0 -}; - -static const struct hwmon_channel_info nfp_power = { - .type = hwmon_power, - .config = nfp_power_config, -}; - static const struct hwmon_channel_info * const nfp_hwmon_info[] = { - &nfp_chip, - &nfp_temp, - &nfp_power, + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT), + HWMON_CHANNEL_INFO(power, HWMON_P_INPUT | HWMON_P_MAX, + HWMON_P_INPUT, + HWMON_P_INPUT), NULL }; diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index fa167b1aa019..5222a035fd19 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -3033,7 +3033,7 @@ static void qed_iov_vf_mbx_vport_update(struct qed_hwfn *p_hwfn, u16 length; int rc; - /* Valiate PF can send such a request */ + /* Validate PF can send such a request */ if (!vf->vport_instance) { DP_VERBOSE(p_hwfn, QED_MSG_IOV, @@ -3312,7 +3312,7 @@ static void qed_iov_vf_mbx_ucast_filter(struct qed_hwfn *p_hwfn, goto out; } - /* Determine if the unicast filtering is acceptible by PF */ + /* Determine if the unicast filtering is acceptable by PF */ if ((p_bulletin->valid_bitmap & BIT(VLAN_ADDR_FORCED)) && (params.type == QED_FILTER_VLAN || params.type == QED_FILTER_MAC_VLAN)) { @@ -3729,7 +3729,7 @@ qed_iov_execute_vf_flr_cleanup(struct qed_hwfn *p_hwfn, rc = qed_iov_enable_vf_access(p_hwfn, p_ptt, p_vf); if (rc) { - DP_ERR(p_hwfn, "Failed to re-enable VF[%d] acces\n", + DP_ERR(p_hwfn, "Failed to re-enable VF[%d] access\n", vfid); return rc; } @@ -4480,7 +4480,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled) struct qed_ptt *ptt = qed_ptt_acquire(hwfn); /* Failure to acquire the ptt in 100g creates an odd error - * where the first engine has already relased IOV. + * where the first engine has already released IOV. */ if (!ptt) { DP_ERR(hwfn, "Failed to acquire ptt\n"); diff --git a/drivers/net/ethernet/realtek/Kconfig b/drivers/net/ethernet/realtek/Kconfig index 8a8ea51c639e..fe136f61586f 100644 --- a/drivers/net/ethernet/realtek/Kconfig +++ b/drivers/net/ethernet/realtek/Kconfig @@ -114,7 +114,8 @@ config R8169 will be called r8169. This is recommended. config R8169_LEDS - def_bool R8169 && LEDS_TRIGGER_NETDEV + bool "Support for controlling the NIC LEDs" + depends on R8169 && LEDS_TRIGGER_NETDEV depends on !(R8169=y && LEDS_CLASS=m) help Optional support for controlling the NIC LED's with the netdev diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5a5eba49c651..9fe53322d7bb 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -169,6 +169,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_VDEVICE(REALTEK, 0x8125) }, { PCI_VDEVICE(REALTEK, 0x8126) }, { PCI_VDEVICE(REALTEK, 0x3000) }, + { PCI_VDEVICE(REALTEK, 0x5000) }, {} }; @@ -5222,6 +5223,7 @@ static int r8169_mdio_register(struct rtl8169_private *tp) new_bus->priv = tp; new_bus->parent = &pdev->dev; new_bus->irq[0] = PHY_MAC_INTERRUPT; + new_bus->phy_mask = GENMASK(31, 1); snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x-%x", pci_domain_nr(pdev->bus), pci_dev_id(pdev)); @@ -5251,9 +5253,9 @@ static int r8169_mdio_register(struct rtl8169_private *tp) /* mimic behavior of r8125/r8126 vendor drivers */ if (tp->mac_version == RTL_GIGA_MAC_VER_61) - phy_set_eee_broken(tp->phydev, - ETHTOOL_LINK_MODE_2500baseT_Full_BIT); - phy_set_eee_broken(tp->phydev, ETHTOOL_LINK_MODE_5000baseT_Full_BIT); + phy_disable_eee_mode(tp->phydev, + ETHTOOL_LINK_MODE_2500baseT_Full_BIT); + phy_disable_eee_mode(tp->phydev, ETHTOOL_LINK_MODE_5000baseT_Full_BIT); /* PHY will be woken up in rtl_open() */ phy_suspend(tp->phydev); diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 84d09a8973b7..aba772e14555 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1287,17 +1287,14 @@ static struct device_node *rswitch_get_port_node(struct rswitch_device *rdev) if (!ports) return NULL; - for_each_child_of_node(ports, port) { + for_each_available_child_of_node(ports, port) { err = of_property_read_u32(port, "reg", &index); if (err < 0) { port = NULL; goto out; } - if (index == rdev->etha->index) { - if (!of_device_is_available(port)) - port = NULL; + if (index == rdev->etha->index) break; - } } out: diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 3eb55dcfa8a6..c4c43434f314 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -38,8 +38,9 @@ config SFC_MTD default y help This exposes the on-board flash and/or EEPROM as MTD devices - (e.g. /dev/mtd1). This is required to update the firmware or - the boot configuration under Linux. + (e.g. /dev/mtd1). This is required to update the boot + configuration under Linux, or use some older userland tools to + update the firmware. config SFC_MCDI_MON bool "Solarflare SFC9100-family hwmon support" depends on SFC && HWMON && !(SFC=y && HWMON=m) diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index 8f446b9bd5ee..d99039ec468d 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -7,7 +7,7 @@ sfc-y += efx.o efx_common.o efx_channels.o nic.o \ mcdi_functions.o mcdi_filters.o mcdi_mon.o \ ef100.o ef100_nic.o ef100_netdev.o \ ef100_ethtool.o ef100_rx.o ef100_tx.o \ - efx_devlink.o + efx_devlink.o efx_reflash.o sfc-$(CONFIG_SFC_MTD) += mtd.o sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o \ mae.o tc.o tc_bindings.o tc_counters.o \ diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 452009ed7a43..47d78abecf30 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3501,7 +3501,7 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_METADATA_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_METADATA_OUT_LENMAX); const struct efx_ef10_nvram_type_info *info; - size_t size, erase_size, outlen; + size_t size, erase_size, write_size, outlen; int type_idx = 0; bool protected; int rc; @@ -3516,7 +3516,8 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, if (info->port != efx_port_num(efx)) return -ENODEV; - rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &protected); + rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &write_size, + &protected); if (rc) return rc; if (protected && @@ -3561,6 +3562,8 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, if (!erase_size) part->common.mtd.flags |= MTD_NO_ERASE; + part->common.mtd.writesize = write_size; + return 0; } diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c index c88ec3e24836..5a14d94163b1 100644 --- a/drivers/net/ethernet/sfc/efx_common.c +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -1003,6 +1003,7 @@ int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev) INIT_LIST_HEAD(&efx->vf_reps); INIT_WORK(&efx->mac_work, efx_mac_work); init_waitqueue_head(&efx->flush_wq); + mutex_init(&efx->reflash_mutex); efx->tx_queues_per_channel = 1; efx->rxq_entries = EFX_DEFAULT_DMAQ_SIZE; diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c index 3cd750820fdd..d842c60dfc10 100644 --- a/drivers/net/ethernet/sfc/efx_devlink.c +++ b/drivers/net/ethernet/sfc/efx_devlink.c @@ -19,6 +19,7 @@ #include "mae.h" #include "ef100_rep.h" #endif +#include "efx_reflash.h" struct efx_devlink { struct efx_nic *efx; @@ -615,7 +616,19 @@ static int efx_devlink_info_get(struct devlink *devlink, return 0; } +static int efx_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct efx_devlink *devlink_private = devlink_priv(devlink); + struct efx_nic *efx = devlink_private->efx; + + return efx_reflash_flash_firmware(efx, params->fw, extack); +} + static const struct devlink_ops sfc_devlink_ops = { + .supported_flash_update_params = 0, + .flash_update = efx_devlink_flash_update, .info_get = efx_devlink_info_get, }; diff --git a/drivers/net/ethernet/sfc/efx_reflash.c b/drivers/net/ethernet/sfc/efx_reflash.c new file mode 100644 index 000000000000..ddc53740f098 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_reflash.c @@ -0,0 +1,514 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include <linux/crc32.h> +#include <net/devlink.h> +#include "efx_reflash.h" +#include "net_driver.h" +#include "fw_formats.h" +#include "mcdi_pcol.h" +#include "mcdi.h" + +/* Try to parse a Reflash header at the specified offset */ +static bool efx_reflash_parse_reflash_header(const struct firmware *fw, + size_t header_offset, u32 *type, + u32 *subtype, const u8 **data, + size_t *data_size) +{ + size_t header_end, trailer_offset, trailer_end; + u32 magic, version, payload_size, header_len; + const u8 *header, *trailer; + u32 expected_crc, crc; + + if (check_add_overflow(header_offset, EFX_REFLASH_HEADER_LENGTH_OFST + + EFX_REFLASH_HEADER_LENGTH_LEN, + &header_end)) + return false; + if (fw->size < header_end) + return false; + + header = fw->data + header_offset; + magic = get_unaligned_le32(header + EFX_REFLASH_HEADER_MAGIC_OFST); + if (magic != EFX_REFLASH_HEADER_MAGIC_VALUE) + return false; + + version = get_unaligned_le32(header + EFX_REFLASH_HEADER_VERSION_OFST); + if (version != EFX_REFLASH_HEADER_VERSION_VALUE) + return false; + + payload_size = get_unaligned_le32(header + EFX_REFLASH_HEADER_PAYLOAD_SIZE_OFST); + header_len = get_unaligned_le32(header + EFX_REFLASH_HEADER_LENGTH_OFST); + if (check_add_overflow(header_offset, header_len, &trailer_offset) || + check_add_overflow(trailer_offset, payload_size, &trailer_offset) || + check_add_overflow(trailer_offset, EFX_REFLASH_TRAILER_LEN, + &trailer_end)) + return false; + if (fw->size < trailer_end) + return false; + + trailer = fw->data + trailer_offset; + expected_crc = get_unaligned_le32(trailer + EFX_REFLASH_TRAILER_CRC_OFST); + /* Addition could overflow u32, but not size_t since we already + * checked trailer_offset didn't overflow. So cast to size_t first. + */ + crc = crc32_le(0, header, (size_t)header_len + payload_size); + if (crc != expected_crc) + return false; + + *type = get_unaligned_le32(header + EFX_REFLASH_HEADER_FIRMWARE_TYPE_OFST); + *subtype = get_unaligned_le32(header + EFX_REFLASH_HEADER_FIRMWARE_SUBTYPE_OFST); + if (*type == EFX_REFLASH_FIRMWARE_TYPE_BUNDLE) { + /* All the bundle data is written verbatim to NVRAM */ + *data = fw->data; + *data_size = fw->size; + } else { + /* Other payload types strip the reflash header and trailer + * from the data written to NVRAM + */ + *data = header + header_len; + *data_size = payload_size; + } + + return true; +} + +/* Map from FIRMWARE_TYPE to NVRAM_PARTITION_TYPE */ +static int efx_reflash_partition_type(u32 type, u32 subtype, + u32 *partition_type, + u32 *partition_subtype) +{ + int rc = 0; + + switch (type) { + case EFX_REFLASH_FIRMWARE_TYPE_BOOTROM: + *partition_type = NVRAM_PARTITION_TYPE_EXPANSION_ROM; + *partition_subtype = subtype; + break; + case EFX_REFLASH_FIRMWARE_TYPE_BUNDLE: + *partition_type = NVRAM_PARTITION_TYPE_BUNDLE; + *partition_subtype = subtype; + break; + default: + /* Not supported */ + rc = -EINVAL; + } + + return rc; +} + +/* Try to parse a SmartNIC image header at the specified offset */ +static bool efx_reflash_parse_snic_header(const struct firmware *fw, + size_t header_offset, + u32 *partition_type, + u32 *partition_subtype, + const u8 **data, size_t *data_size) +{ + u32 magic, version, payload_size, header_len, expected_crc, crc; + size_t header_end, payload_end; + const u8 *header; + + if (check_add_overflow(header_offset, EFX_SNICIMAGE_HEADER_MINLEN, + &header_end) || + fw->size < header_end) + return false; + + header = fw->data + header_offset; + magic = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_MAGIC_OFST); + if (magic != EFX_SNICIMAGE_HEADER_MAGIC_VALUE) + return false; + + version = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_VERSION_OFST); + if (version != EFX_SNICIMAGE_HEADER_VERSION_VALUE) + return false; + + header_len = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_LENGTH_OFST); + if (check_add_overflow(header_offset, header_len, &header_end)) + return false; + payload_size = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_PAYLOAD_SIZE_OFST); + if (check_add_overflow(header_end, payload_size, &payload_end) || + fw->size < payload_end) + return false; + + expected_crc = get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_CRC_OFST); + + /* Calculate CRC omitting the expected CRC field itself */ + crc = crc32_le(~0, header, EFX_SNICIMAGE_HEADER_CRC_OFST); + crc = ~crc32_le(crc, + header + EFX_SNICIMAGE_HEADER_CRC_OFST + + EFX_SNICIMAGE_HEADER_CRC_LEN, + header_len + payload_size - EFX_SNICIMAGE_HEADER_CRC_OFST - + EFX_SNICIMAGE_HEADER_CRC_LEN); + if (crc != expected_crc) + return false; + + *partition_type = + get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_PARTITION_TYPE_OFST); + *partition_subtype = + get_unaligned_le32(header + EFX_SNICIMAGE_HEADER_PARTITION_SUBTYPE_OFST); + *data = fw->data; + *data_size = fw->size; + return true; +} + +/* Try to parse a SmartNIC bundle header at the specified offset */ +static bool efx_reflash_parse_snic_bundle_header(const struct firmware *fw, + size_t header_offset, + u32 *partition_type, + u32 *partition_subtype, + const u8 **data, + size_t *data_size) +{ + u32 magic, version, bundle_type, header_len, expected_crc, crc; + size_t header_end; + const u8 *header; + + if (check_add_overflow(header_offset, EFX_SNICBUNDLE_HEADER_LEN, + &header_end)) + return false; + if (fw->size < header_end) + return false; + + header = fw->data + header_offset; + magic = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_MAGIC_OFST); + if (magic != EFX_SNICBUNDLE_HEADER_MAGIC_VALUE) + return false; + + version = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_VERSION_OFST); + if (version != EFX_SNICBUNDLE_HEADER_VERSION_VALUE) + return false; + + bundle_type = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_BUNDLE_TYPE_OFST); + if (bundle_type != NVRAM_PARTITION_TYPE_BUNDLE) + return false; + + header_len = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_LENGTH_OFST); + if (header_len != EFX_SNICBUNDLE_HEADER_LEN) + return false; + + expected_crc = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_CRC_OFST); + crc = ~crc32_le(~0, header, EFX_SNICBUNDLE_HEADER_CRC_OFST); + if (crc != expected_crc) + return false; + + *partition_type = NVRAM_PARTITION_TYPE_BUNDLE; + *partition_subtype = get_unaligned_le32(header + EFX_SNICBUNDLE_HEADER_BUNDLE_SUBTYPE_OFST); + *data = fw->data; + *data_size = fw->size; + return true; +} + +/* Try to find a valid firmware payload in the firmware data. + * When we recognise a valid header, we parse it for the partition type + * (so we know where to ask the MC to write it to) and the location of + * the data blob to write. + */ +static int efx_reflash_parse_firmware_data(const struct firmware *fw, + u32 *partition_type, + u32 *partition_subtype, + const u8 **data, size_t *data_size) +{ + size_t header_offset; + u32 type, subtype; + + /* Some packaging formats (such as CMS/PKCS#7 signed images) + * prepend a header for which finding the size is a non-trivial + * task, so step through the firmware data until we find a valid + * header. + * + * The checks are intended to reject firmware data that is clearly not + * in the expected format. They do not need to be exhaustive as the + * running firmware will perform its own comprehensive validity and + * compatibility checks during the update procedure. + * + * Firmware packages may contain multiple reflash images, e.g. a + * bundle containing one or more other images. Only check the + * outermost container by stopping after the first candidate image + * found even it is for an unsupported partition type. + */ + for (header_offset = 0; header_offset < fw->size; header_offset++) { + if (efx_reflash_parse_snic_bundle_header(fw, header_offset, + partition_type, + partition_subtype, + data, data_size)) + return 0; + + if (efx_reflash_parse_snic_header(fw, header_offset, + partition_type, + partition_subtype, data, + data_size)) + return 0; + + if (efx_reflash_parse_reflash_header(fw, header_offset, &type, + &subtype, data, data_size)) + return efx_reflash_partition_type(type, subtype, + partition_type, + partition_subtype); + } + + return -EINVAL; +} + +/* Limit the number of status updates during the erase or write phases */ +#define EFX_DEVLINK_STATUS_UPDATE_COUNT 50 + +/* Expected timeout for the efx_mcdi_nvram_update_finish_polled() */ +#define EFX_DEVLINK_UPDATE_FINISH_TIMEOUT 900 + +/* Ideal erase chunk size. This is a balance between minimising the number of + * MCDI requests to erase an entire partition whilst avoiding tripping the MCDI + * RPC timeout. + */ +#define EFX_NVRAM_ERASE_IDEAL_CHUNK_SIZE (64 * 1024) + +static int efx_reflash_erase_partition(struct efx_nic *efx, + struct netlink_ext_ack *extack, + struct devlink *devlink, u32 type, + size_t partition_size, + size_t align) +{ + size_t chunk, offset, next_update; + int rc; + + /* Partitions that cannot be erased or do not require erase before + * write are advertised with a erase alignment/sector size of zero. + */ + if (align == 0) + /* Nothing to do */ + return 0; + + if (partition_size % align) + return -EINVAL; + + /* Erase the entire NVRAM partition a chunk at a time to avoid + * potentially tripping the MCDI RPC timeout. + */ + if (align >= EFX_NVRAM_ERASE_IDEAL_CHUNK_SIZE) + chunk = align; + else + chunk = rounddown(EFX_NVRAM_ERASE_IDEAL_CHUNK_SIZE, align); + + for (offset = 0, next_update = 0; offset < partition_size; offset += chunk) { + if (offset >= next_update) { + devlink_flash_update_status_notify(devlink, "Erasing", + NULL, offset, + partition_size); + next_update += partition_size / EFX_DEVLINK_STATUS_UPDATE_COUNT; + } + + chunk = min_t(size_t, partition_size - offset, chunk); + rc = efx_mcdi_nvram_erase(efx, type, offset, chunk); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Erase failed for NVRAM partition %#x at %#zx-%#zx", + type, offset, offset + chunk - 1); + return rc; + } + } + + devlink_flash_update_status_notify(devlink, "Erasing", NULL, + partition_size, partition_size); + + return 0; +} + +static int efx_reflash_write_partition(struct efx_nic *efx, + struct netlink_ext_ack *extack, + struct devlink *devlink, u32 type, + const u8 *data, size_t data_size, + size_t align) +{ + size_t write_max, chunk, offset, next_update; + int rc; + + if (align == 0) + return -EINVAL; + + /* Write the NVRAM partition in chunks that are the largest multiple + * of the partition's required write alignment that will fit into the + * MCDI NVRAM_WRITE RPC payload. + */ + if (efx->type->mcdi_max_ver < 2) + write_max = MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_LEN * + MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MAXNUM; + else + write_max = MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_LEN * + MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MAXNUM_MCDI2; + chunk = rounddown(write_max, align); + + for (offset = 0, next_update = 0; offset + chunk <= data_size; offset += chunk) { + if (offset >= next_update) { + devlink_flash_update_status_notify(devlink, "Writing", + NULL, offset, + data_size); + next_update += data_size / EFX_DEVLINK_STATUS_UPDATE_COUNT; + } + + rc = efx_mcdi_nvram_write(efx, type, offset, data + offset, chunk); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Write failed for NVRAM partition %#x at %#zx-%#zx", + type, offset, offset + chunk - 1); + return rc; + } + } + + /* Round up left over data to satisfy write alignment */ + if (offset < data_size) { + size_t remaining = data_size - offset; + u8 *buf; + + if (offset >= next_update) + devlink_flash_update_status_notify(devlink, "Writing", + NULL, offset, + data_size); + + chunk = roundup(remaining, align); + buf = kmalloc(chunk, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memcpy(buf, data + offset, remaining); + memset(buf + remaining, 0xFF, chunk - remaining); + rc = efx_mcdi_nvram_write(efx, type, offset, buf, chunk); + kfree(buf); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Write failed for NVRAM partition %#x at %#zx-%#zx", + type, offset, offset + chunk - 1); + return rc; + } + } + + devlink_flash_update_status_notify(devlink, "Writing", NULL, data_size, + data_size); + + return 0; +} + +int efx_reflash_flash_firmware(struct efx_nic *efx, const struct firmware *fw, + struct netlink_ext_ack *extack) +{ + size_t data_size, size, erase_align, write_align; + struct devlink *devlink = efx->devlink; + u32 type, data_subtype, subtype; + const u8 *data; + bool protected; + int rc; + + if (!efx_has_cap(efx, BUNDLE_UPDATE)) { + NL_SET_ERR_MSG_MOD(extack, "NVRAM bundle updates are not supported by the firmware"); + return -EOPNOTSUPP; + } + + devlink_flash_update_status_notify(devlink, "Checking update", NULL, 0, 0); + + rc = efx_reflash_parse_firmware_data(fw, &type, &data_subtype, &data, + &data_size); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, + "Firmware image validation check failed"); + goto out; + } + + mutex_lock(&efx->reflash_mutex); + + rc = efx_mcdi_nvram_metadata(efx, type, &subtype, NULL, NULL, 0); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Metadata query for NVRAM partition %#x failed", + type); + goto out_unlock; + } + + if (subtype != data_subtype) { + NL_SET_ERR_MSG_MOD(extack, + "Firmware image is not appropriate for this adapter"); + rc = -EINVAL; + goto out_unlock; + } + + rc = efx_mcdi_nvram_info(efx, type, &size, &erase_align, &write_align, + &protected); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Info query for NVRAM partition %#x failed", + type); + goto out_unlock; + } + + if (protected) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "NVRAM partition %#x is protected", + type); + rc = -EPERM; + goto out_unlock; + } + + if (write_align == 0) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "NVRAM partition %#x is not writable", + type); + rc = -EACCES; + goto out_unlock; + } + + if (erase_align != 0 && size % erase_align) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "NVRAM partition %#x has a bad partition table entry, can't erase it", + type); + rc = -EACCES; + goto out_unlock; + } + + if (data_size > size) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Firmware image is too big for NVRAM partition %#x", + type); + rc = -EFBIG; + goto out_unlock; + } + + devlink_flash_update_status_notify(devlink, "Starting update", NULL, 0, 0); + + rc = efx_mcdi_nvram_update_start(efx, type); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Update start request for NVRAM partition %#x failed", + type); + goto out_unlock; + } + + rc = efx_reflash_erase_partition(efx, extack, devlink, type, size, + erase_align); + if (rc) + goto out_update_finish; + + rc = efx_reflash_write_partition(efx, extack, devlink, type, data, + data_size, write_align); + if (rc) + goto out_update_finish; + + devlink_flash_update_timeout_notify(devlink, "Finishing update", NULL, + EFX_DEVLINK_UPDATE_FINISH_TIMEOUT); + +out_update_finish: + if (rc) + /* Don't obscure the return code from an earlier failure */ + efx_mcdi_nvram_update_finish(efx, type, EFX_UPDATE_FINISH_ABORT); + else + rc = efx_mcdi_nvram_update_finish_polled(efx, type); +out_unlock: + mutex_unlock(&efx->reflash_mutex); +out: + devlink_flash_update_status_notify(devlink, rc ? "Update failed" : + "Update complete", + NULL, 0, 0); + return rc; +} diff --git a/drivers/net/ethernet/sfc/efx_reflash.h b/drivers/net/ethernet/sfc/efx_reflash.h new file mode 100644 index 000000000000..3dffac565161 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_reflash.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef _EFX_REFLASH_H +#define _EFX_REFLASH_H + +#include "net_driver.h" +#include <linux/firmware.h> + +int efx_reflash_flash_firmware(struct efx_nic *efx, const struct firmware *fw, + struct netlink_ext_ack *extack); + +#endif /* _EFX_REFLASH_H */ diff --git a/drivers/net/ethernet/sfc/fw_formats.h b/drivers/net/ethernet/sfc/fw_formats.h new file mode 100644 index 000000000000..cbc350c96013 --- /dev/null +++ b/drivers/net/ethernet/sfc/fw_formats.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef _EFX_FW_FORMATS_H +#define _EFX_FW_FORMATS_H + +/* Header layouts of firmware update images recognised by Efx NICs. + * The sources-of-truth for these layouts are AMD internal documents + * and sfregistry headers, neither of which are available externally + * nor usable directly by the driver. + * + * While each format includes a 'magic number', these are at different + * offsets in the various formats, and a legal header for one format + * could have the right value in whichever field occupies that offset + * to match another format's magic. + * Besides, some packaging formats (such as CMS/PKCS#7 signed images) + * prepend a header for which finding the size is a non-trivial task; + * rather than trying to parse those headers, we search byte-by-byte + * through the provided firmware image looking for a valid header. + * Thus, format recognition has to include validation of the checksum + * field, even though the firmware will validate that itself before + * applying the image. + */ + +/* EF10 (Medford2, X2) "reflash" header format. Defined in SF-121352-AN */ +#define EFX_REFLASH_HEADER_MAGIC_OFST 0 +#define EFX_REFLASH_HEADER_MAGIC_LEN 4 +#define EFX_REFLASH_HEADER_MAGIC_VALUE 0x106F1A5 + +#define EFX_REFLASH_HEADER_VERSION_OFST 4 +#define EFX_REFLASH_HEADER_VERSION_LEN 4 +#define EFX_REFLASH_HEADER_VERSION_VALUE 4 + +#define EFX_REFLASH_HEADER_FIRMWARE_TYPE_OFST 8 +#define EFX_REFLASH_HEADER_FIRMWARE_TYPE_LEN 4 +#define EFX_REFLASH_FIRMWARE_TYPE_BOOTROM 0x2 +#define EFX_REFLASH_FIRMWARE_TYPE_BUNDLE 0xd + +#define EFX_REFLASH_HEADER_FIRMWARE_SUBTYPE_OFST 12 +#define EFX_REFLASH_HEADER_FIRMWARE_SUBTYPE_LEN 4 + +#define EFX_REFLASH_HEADER_PAYLOAD_SIZE_OFST 16 +#define EFX_REFLASH_HEADER_PAYLOAD_SIZE_LEN 4 + +#define EFX_REFLASH_HEADER_LENGTH_OFST 20 +#define EFX_REFLASH_HEADER_LENGTH_LEN 4 + +/* Reflash trailer */ +#define EFX_REFLASH_TRAILER_CRC_OFST 0 +#define EFX_REFLASH_TRAILER_CRC_LEN 4 + +#define EFX_REFLASH_TRAILER_LEN \ + (EFX_REFLASH_TRAILER_CRC_OFST + EFX_REFLASH_TRAILER_CRC_LEN) + +/* EF100 "SmartNIC image" header format. + * Defined in sfregistry "src/layout/snic_image_hdr.h". + */ +#define EFX_SNICIMAGE_HEADER_MAGIC_OFST 16 +#define EFX_SNICIMAGE_HEADER_MAGIC_LEN 4 +#define EFX_SNICIMAGE_HEADER_MAGIC_VALUE 0x541C057A + +#define EFX_SNICIMAGE_HEADER_VERSION_OFST 20 +#define EFX_SNICIMAGE_HEADER_VERSION_LEN 4 +#define EFX_SNICIMAGE_HEADER_VERSION_VALUE 1 + +#define EFX_SNICIMAGE_HEADER_LENGTH_OFST 24 +#define EFX_SNICIMAGE_HEADER_LENGTH_LEN 4 + +#define EFX_SNICIMAGE_HEADER_PARTITION_TYPE_OFST 36 +#define EFX_SNICIMAGE_HEADER_PARTITION_TYPE_LEN 4 + +#define EFX_SNICIMAGE_HEADER_PARTITION_SUBTYPE_OFST 40 +#define EFX_SNICIMAGE_HEADER_PARTITION_SUBTYPE_LEN 4 + +#define EFX_SNICIMAGE_HEADER_PAYLOAD_SIZE_OFST 60 +#define EFX_SNICIMAGE_HEADER_PAYLOAD_SIZE_LEN 4 + +#define EFX_SNICIMAGE_HEADER_CRC_OFST 64 +#define EFX_SNICIMAGE_HEADER_CRC_LEN 4 + +#define EFX_SNICIMAGE_HEADER_MINLEN 256 + +/* EF100 "SmartNIC bundle" header format. Defined in SF-122606-TC */ +#define EFX_SNICBUNDLE_HEADER_MAGIC_OFST 0 +#define EFX_SNICBUNDLE_HEADER_MAGIC_LEN 4 +#define EFX_SNICBUNDLE_HEADER_MAGIC_VALUE 0xB1001001 + +#define EFX_SNICBUNDLE_HEADER_VERSION_OFST 4 +#define EFX_SNICBUNDLE_HEADER_VERSION_LEN 4 +#define EFX_SNICBUNDLE_HEADER_VERSION_VALUE 1 + +#define EFX_SNICBUNDLE_HEADER_BUNDLE_TYPE_OFST 8 +#define EFX_SNICBUNDLE_HEADER_BUNDLE_TYPE_LEN 4 + +#define EFX_SNICBUNDLE_HEADER_BUNDLE_SUBTYPE_OFST 12 +#define EFX_SNICBUNDLE_HEADER_BUNDLE_SUBTYPE_LEN 4 + +#define EFX_SNICBUNDLE_HEADER_LENGTH_OFST 20 +#define EFX_SNICBUNDLE_HEADER_LENGTH_LEN 4 + +#define EFX_SNICBUNDLE_HEADER_CRC_OFST 224 +#define EFX_SNICBUNDLE_HEADER_CRC_LEN 4 + +#define EFX_SNICBUNDLE_HEADER_LEN \ + (EFX_SNICBUNDLE_HEADER_CRC_OFST + EFX_SNICBUNDLE_HEADER_CRC_LEN) + +#endif /* _EFX_FW_FORMATS_H */ diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index d461b1a6ce81..dbd2ee915838 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -1625,12 +1625,15 @@ fail: return rc; } +#define EFX_MCDI_NVRAM_DEFAULT_WRITE_LEN 128 + int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type, size_t *size_out, size_t *erase_size_out, - bool *protected_out) + size_t *write_size_out, bool *protected_out) { MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_INFO_IN_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_INFO_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_INFO_V2_OUT_LEN); + size_t write_size = 0; size_t outlen; int rc; @@ -1645,6 +1648,12 @@ int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type, goto fail; } + if (outlen >= MC_CMD_NVRAM_INFO_V2_OUT_LEN) + write_size = MCDI_DWORD(outbuf, NVRAM_INFO_V2_OUT_WRITESIZE); + else + write_size = EFX_MCDI_NVRAM_DEFAULT_WRITE_LEN; + + *write_size_out = write_size; *size_out = MCDI_DWORD(outbuf, NVRAM_INFO_OUT_SIZE); *erase_size_out = MCDI_DWORD(outbuf, NVRAM_INFO_OUT_ERASESIZE); *protected_out = !!(MCDI_DWORD(outbuf, NVRAM_INFO_OUT_FLAGS) & @@ -2163,11 +2172,9 @@ out_free: return rc; } -#ifdef CONFIG_SFC_MTD - #define EFX_MCDI_NVRAM_LEN_MAX 128 -static int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type) +int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type) { MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN); int rc; @@ -2185,6 +2192,8 @@ static int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type) return rc; } +#ifdef CONFIG_SFC_MTD + static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type, loff_t offset, u8 *buffer, size_t length) { @@ -2209,13 +2218,20 @@ static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type, return 0; } -static int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type, - loff_t offset, const u8 *buffer, size_t length) +#endif /* CONFIG_SFC_MTD */ + +int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type, + loff_t offset, const u8 *buffer, size_t length) { - MCDI_DECLARE_BUF(inbuf, - MC_CMD_NVRAM_WRITE_IN_LEN(EFX_MCDI_NVRAM_LEN_MAX)); + efx_dword_t *inbuf; + size_t inlen; int rc; + inlen = ALIGN(MC_CMD_NVRAM_WRITE_IN_LEN(length), 4); + inbuf = kzalloc(inlen, GFP_KERNEL); + if (!inbuf) + return -ENOMEM; + MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_TYPE, type); MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_OFFSET, offset); MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_LENGTH, length); @@ -2223,14 +2239,14 @@ static int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type, BUILD_BUG_ON(MC_CMD_NVRAM_WRITE_OUT_LEN != 0); - rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_WRITE, inbuf, - ALIGN(MC_CMD_NVRAM_WRITE_IN_LEN(length), 4), - NULL, 0, NULL); + rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_WRITE, inbuf, inlen, NULL, 0, NULL); + kfree(inbuf); + return rc; } -static int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type, - loff_t offset, size_t length) +int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type, loff_t offset, + size_t length) { MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_ERASE_IN_LEN); int rc; @@ -2246,7 +2262,8 @@ static int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type, return rc; } -static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type) +int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type, + enum efx_update_finish_mode mode) { MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN); @@ -2254,22 +2271,41 @@ static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type) int rc, rc2; MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_FINISH_IN_TYPE, type); - /* Always set this flag. Old firmware ignores it */ - MCDI_POPULATE_DWORD_1(inbuf, NVRAM_UPDATE_FINISH_V2_IN_FLAGS, + + /* Old firmware doesn't support background update finish and abort + * operations. Fallback to waiting if the requested mode is not + * supported. + */ + if (!efx_has_cap(efx, NVRAM_UPDATE_POLL_VERIFY_RESULT) || + (!efx_has_cap(efx, NVRAM_UPDATE_ABORT_SUPPORTED) && + mode == EFX_UPDATE_FINISH_ABORT)) + mode = EFX_UPDATE_FINISH_WAIT; + + MCDI_POPULATE_DWORD_4(inbuf, NVRAM_UPDATE_FINISH_V2_IN_FLAGS, NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT, - 1); + (mode != EFX_UPDATE_FINISH_ABORT), + NVRAM_UPDATE_FINISH_V2_IN_FLAG_RUN_IN_BACKGROUND, + (mode == EFX_UPDATE_FINISH_BACKGROUND), + NVRAM_UPDATE_FINISH_V2_IN_FLAG_POLL_VERIFY_RESULT, + (mode == EFX_UPDATE_FINISH_POLL), + NVRAM_UPDATE_FINISH_V2_IN_FLAG_ABORT, + (mode == EFX_UPDATE_FINISH_ABORT)); rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_FINISH, inbuf, sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); if (!rc && outlen >= MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN) { rc2 = MCDI_DWORD(outbuf, NVRAM_UPDATE_FINISH_V2_OUT_RESULT_CODE); - if (rc2 != MC_CMD_NVRAM_VERIFY_RC_SUCCESS) + if (rc2 != MC_CMD_NVRAM_VERIFY_RC_SUCCESS && + rc2 != MC_CMD_NVRAM_VERIFY_RC_PENDING) netif_err(efx, drv, efx->net_dev, "NVRAM update failed verification with code 0x%x\n", rc2); switch (rc2) { case MC_CMD_NVRAM_VERIFY_RC_SUCCESS: break; + case MC_CMD_NVRAM_VERIFY_RC_PENDING: + rc = -EAGAIN; + break; case MC_CMD_NVRAM_VERIFY_RC_CMS_CHECK_FAILED: case MC_CMD_NVRAM_VERIFY_RC_MESSAGE_DIGEST_CHECK_FAILED: case MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHECK_FAILED: @@ -2284,6 +2320,8 @@ static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type) case MC_CMD_NVRAM_VERIFY_RC_NO_VALID_SIGNATURES: case MC_CMD_NVRAM_VERIFY_RC_NO_TRUSTED_APPROVERS: case MC_CMD_NVRAM_VERIFY_RC_NO_SIGNATURE_MATCH: + case MC_CMD_NVRAM_VERIFY_RC_REJECT_TEST_SIGNED: + case MC_CMD_NVRAM_VERIFY_RC_SECURITY_LEVEL_DOWNGRADE: rc = -EPERM; break; default: @@ -2296,6 +2334,42 @@ static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type) return rc; } +#define EFX_MCDI_NVRAM_UPDATE_FINISH_INITIAL_POLL_DELAY_MS 5 +#define EFX_MCDI_NVRAM_UPDATE_FINISH_MAX_POLL_DELAY_MS 5000 +#define EFX_MCDI_NVRAM_UPDATE_FINISH_RETRIES 185 + +int efx_mcdi_nvram_update_finish_polled(struct efx_nic *efx, unsigned int type) +{ + unsigned int delay = EFX_MCDI_NVRAM_UPDATE_FINISH_INITIAL_POLL_DELAY_MS; + unsigned int retry = 0; + int rc; + + /* NVRAM updates can take a long time (e.g. up to 1 minute for bundle + * images). Polling for NVRAM update completion ensures that other MCDI + * commands can be issued before the background NVRAM update completes. + * + * The initial call either completes the update synchronously, or + * returns -EAGAIN to indicate processing is continuing. In the latter + * case, we poll for at least 900 seconds, at increasing intervals + * (5ms, 50ms, 500ms, 5s). + */ + rc = efx_mcdi_nvram_update_finish(efx, type, EFX_UPDATE_FINISH_BACKGROUND); + while (rc == -EAGAIN) { + if (retry > EFX_MCDI_NVRAM_UPDATE_FINISH_RETRIES) + return -ETIMEDOUT; + retry++; + + msleep(delay); + if (delay < EFX_MCDI_NVRAM_UPDATE_FINISH_MAX_POLL_DELAY_MS) + delay *= 10; + + rc = efx_mcdi_nvram_update_finish(efx, type, EFX_UPDATE_FINISH_POLL); + } + return rc; +} + +#ifdef CONFIG_SFC_MTD + int efx_mcdi_mtd_read(struct mtd_info *mtd, loff_t start, size_t len, size_t *retlen, u8 *buffer) { @@ -2389,7 +2463,8 @@ int efx_mcdi_mtd_sync(struct mtd_info *mtd) if (part->updating) { part->updating = false; - rc = efx_mcdi_nvram_update_finish(efx, part->nvram_type); + rc = efx_mcdi_nvram_update_finish(efx, part->nvram_type, + EFX_UPDATE_FINISH_WAIT); } return rc; diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index cdb17d7c147f..3755cd3fe1e6 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -392,7 +392,7 @@ int efx_mcdi_log_ctrl(struct efx_nic *efx, bool evq, bool uart, u32 dest_evq); int efx_mcdi_nvram_types(struct efx_nic *efx, u32 *nvram_types_out); int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type, size_t *size_out, size_t *erase_size_out, - bool *protected_out); + size_t *write_size_out, bool *protected_out); int efx_new_mcdi_nvram_test_all(struct efx_nic *efx); int efx_mcdi_nvram_metadata(struct efx_nic *efx, unsigned int type, u32 *subtype, u16 version[4], char *desc, @@ -424,6 +424,26 @@ static inline int efx_mcdi_mon_probe(struct efx_nic *efx) { return 0; } static inline void efx_mcdi_mon_remove(struct efx_nic *efx) {} #endif +int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type); +int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type, + loff_t offset, const u8 *buffer, size_t length); +int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type, + loff_t offset, size_t length); +int efx_mcdi_nvram_metadata(struct efx_nic *efx, unsigned int type, + u32 *subtype, u16 version[4], char *desc, + size_t descsize); + +enum efx_update_finish_mode { + EFX_UPDATE_FINISH_WAIT, + EFX_UPDATE_FINISH_BACKGROUND, + EFX_UPDATE_FINISH_POLL, + EFX_UPDATE_FINISH_ABORT, +}; + +int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type, + enum efx_update_finish_mode mode); +int efx_mcdi_nvram_update_finish_polled(struct efx_nic *efx, unsigned int type); + #ifdef CONFIG_SFC_MTD int efx_mcdi_mtd_read(struct mtd_info *mtd, loff_t start, size_t len, size_t *retlen, u8 *buffer); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index f70a7b7d6345..8b0689f749b5 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1006,6 +1006,7 @@ struct efx_mae; * @dl_port: devlink port associated with the PF * @mem_bar: The BAR that is mapped into membase. * @reg_base: Offset from the start of the bar to the function control window. + * @reflash_mutex: Mutex for serialising firmware reflash operations. * @monitor_work: Hardware monitor workitem * @biu_lock: BIU (bus interface unit) lock * @last_irq_cpu: Last CPU to handle a possible test interrupt. This @@ -1191,6 +1192,7 @@ struct efx_nic { struct devlink_port *dl_port; unsigned int mem_bar; u32 reg_base; + struct mutex reflash_mutex; /* The following fields may be written more often */ diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index e25db747a81a..55053528e498 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -530,6 +530,20 @@ struct dma_features { #define STMMAC_DEFAULT_TWT_LS 0x1E #define STMMAC_ET_MAX 0xFFFFF +/* Common LPI register bits */ +#define LPI_CTRL_STATUS_LPITCSE BIT(21) /* LPI Tx Clock Stop Enable, gmac4, xgmac2 only */ +#define LPI_CTRL_STATUS_LPIATE BIT(20) /* LPI Timer Enable, gmac4 only */ +#define LPI_CTRL_STATUS_LPITXA BIT(19) /* Enable LPI TX Automate */ +#define LPI_CTRL_STATUS_PLSEN BIT(18) /* Enable PHY Link Status */ +#define LPI_CTRL_STATUS_PLS BIT(17) /* PHY Link Status */ +#define LPI_CTRL_STATUS_LPIEN BIT(16) /* LPI Enable */ +#define LPI_CTRL_STATUS_RLPIST BIT(9) /* Receive LPI state, gmac1000 only? */ +#define LPI_CTRL_STATUS_TLPIST BIT(8) /* Transmit LPI state, gmac1000 only? */ +#define LPI_CTRL_STATUS_RLPIEX BIT(3) /* Receive LPI Exit */ +#define LPI_CTRL_STATUS_RLPIEN BIT(2) /* Receive LPI Entry */ +#define LPI_CTRL_STATUS_TLPIEX BIT(1) /* Transmit LPI Exit */ +#define LPI_CTRL_STATUS_TLPIEN BIT(0) /* Transmit LPI Entry */ + #define STMMAC_CHAIN_MODE 0x1 #define STMMAC_RING_MODE 0x2 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index bfe6e2d631bd..79acdf38c525 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -574,6 +574,9 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id if (ret) goto err_disable_device; + plat->tx_fifo_size = SZ_16K * plat->tx_queues_to_use; + plat->rx_fifo_size = SZ_16K * plat->rx_queues_to_use; + if (dev_of_node(&pdev->dev)) ret = loongson_dwmac_dt_config(pdev, plat, &res); else diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index 600fea8f712f..967a16212faf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -59,22 +59,11 @@ enum power_event { /* Energy Efficient Ethernet (EEE) * * LPI status, timer and control register offset + * For LPI control and status bit definitions, see common.h. */ #define LPI_CTRL_STATUS 0x0030 #define LPI_TIMER_CTRL 0x0034 -/* LPI control and status defines */ -#define LPI_CTRL_STATUS_LPITXA 0x00080000 /* Enable LPI TX Automate */ -#define LPI_CTRL_STATUS_PLSEN 0x00040000 /* Enable PHY Link Status */ -#define LPI_CTRL_STATUS_PLS 0x00020000 /* PHY Link Status */ -#define LPI_CTRL_STATUS_LPIEN 0x00010000 /* LPI Enable */ -#define LPI_CTRL_STATUS_RLPIST 0x00000200 /* Receive LPI state */ -#define LPI_CTRL_STATUS_TLPIST 0x00000100 /* Transmit LPI state */ -#define LPI_CTRL_STATUS_RLPIEX 0x00000008 /* Receive LPI Exit */ -#define LPI_CTRL_STATUS_RLPIEN 0x00000004 /* Receive LPI Entry */ -#define LPI_CTRL_STATUS_TLPIEX 0x00000002 /* Transmit LPI Exit */ -#define LPI_CTRL_STATUS_TLPIEN 0x00000001 /* Transmit LPI Entry */ - /* GMAC HW ADDR regs */ #define GMAC_ADDR_HIGH(reg) ((reg > 15) ? 0x00000800 + (reg - 16) * 8 : \ 0x00000040 + (reg * 8)) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 96bcda0856ec..7900bf3effa7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -342,31 +342,24 @@ static int dwmac1000_irq_status(struct mac_device_info *hw, return ret; } -static void dwmac1000_set_eee_mode(struct mac_device_info *hw, - bool en_tx_lpi_clockgating) +static int dwmac1000_set_lpi_mode(struct mac_device_info *hw, + enum stmmac_lpi_mode mode, + bool en_tx_lpi_clockgating, u32 et) { void __iomem *ioaddr = hw->pcsr; u32 value; - /*TODO - en_tx_lpi_clockgating treatment */ + if (mode == STMMAC_LPI_TIMER) + return -EOPNOTSUPP; - /* Enable the link status receive on RGMII, SGMII ore SMII - * receive path and instruct the transmit to enter in LPI - * state. - */ value = readl(ioaddr + LPI_CTRL_STATUS); - value |= LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA; + if (mode == STMMAC_LPI_FORCED) + value |= LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA; + else + value &= ~(LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA); writel(value, ioaddr + LPI_CTRL_STATUS); -} - -static void dwmac1000_reset_eee_mode(struct mac_device_info *hw) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value; - value = readl(ioaddr + LPI_CTRL_STATUS); - value &= ~(LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA); - writel(value, ioaddr + LPI_CTRL_STATUS); + return 0; } static void dwmac1000_set_eee_pls(struct mac_device_info *hw, int link) @@ -509,8 +502,7 @@ const struct stmmac_ops dwmac1000_ops = { .pmt = dwmac1000_pmt, .set_umac_addr = dwmac1000_set_umac_addr, .get_umac_addr = dwmac1000_get_umac_addr, - .set_eee_mode = dwmac1000_set_eee_mode, - .reset_eee_mode = dwmac1000_reset_eee_mode, + .set_lpi_mode = dwmac1000_set_lpi_mode, .set_eee_timer = dwmac1000_set_eee_timer, .set_eee_pls = dwmac1000_set_eee_pls, .debug = dwmac1000_debug, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 184d41a306af..42fe29a4e300 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -177,23 +177,13 @@ enum power_event { /* Energy Efficient Ethernet (EEE) for GMAC4 * * LPI status, timer and control register offset + * For LPI control and status bit definitions, see common.h. */ #define GMAC4_LPI_CTRL_STATUS 0xd0 #define GMAC4_LPI_TIMER_CTRL 0xd4 #define GMAC4_LPI_ENTRY_TIMER 0xd8 #define GMAC4_MAC_ONEUS_TIC_COUNTER 0xdc -/* LPI control and status defines */ -#define GMAC4_LPI_CTRL_STATUS_LPITCSE BIT(21) /* LPI Tx Clock Stop Enable */ -#define GMAC4_LPI_CTRL_STATUS_LPIATE BIT(20) /* LPI Timer Enable */ -#define GMAC4_LPI_CTRL_STATUS_LPITXA BIT(19) /* Enable LPI TX Automate */ -#define GMAC4_LPI_CTRL_STATUS_PLS BIT(17) /* PHY Link Status */ -#define GMAC4_LPI_CTRL_STATUS_LPIEN BIT(16) /* LPI Enable */ -#define GMAC4_LPI_CTRL_STATUS_RLPIEX BIT(3) /* Receive LPI Exit */ -#define GMAC4_LPI_CTRL_STATUS_RLPIEN BIT(2) /* Receive LPI Entry */ -#define GMAC4_LPI_CTRL_STATUS_TLPIEX BIT(1) /* Transmit LPI Exit */ -#define GMAC4_LPI_CTRL_STATUS_TLPIEN BIT(0) /* Transmit LPI Entry */ - /* MAC Debug bitmap */ #define GMAC_DEBUG_TFCSTS_MASK GENMASK(18, 17) #define GMAC_DEBUG_TFCSTS_SHIFT 17 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 9ed8620580a8..cc4ddf608652 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -376,33 +376,46 @@ static void dwmac4_get_umac_addr(struct mac_device_info *hw, GMAC_ADDR_LOW(reg_n)); } -static void dwmac4_set_eee_mode(struct mac_device_info *hw, - bool en_tx_lpi_clockgating) +static int dwmac4_set_lpi_mode(struct mac_device_info *hw, + enum stmmac_lpi_mode mode, + bool en_tx_lpi_clockgating, u32 et) { void __iomem *ioaddr = hw->pcsr; - u32 value; + u32 value, mask; - /* Enable the link status receive on RGMII, SGMII ore SMII - * receive path and instruct the transmit to enter in LPI - * state. - */ - value = readl(ioaddr + GMAC4_LPI_CTRL_STATUS); - value |= GMAC4_LPI_CTRL_STATUS_LPIEN | GMAC4_LPI_CTRL_STATUS_LPITXA; + if (mode == STMMAC_LPI_DISABLE) { + value = 0; + } else { + value = LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA; - if (en_tx_lpi_clockgating) - value |= GMAC4_LPI_CTRL_STATUS_LPITCSE; + if (mode == STMMAC_LPI_TIMER) { + /* Return ERANGE if the timer is larger than the + * register field. + */ + if (et > STMMAC_ET_MAX) + return -ERANGE; - writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS); -} + /* Set the hardware LPI entry timer */ + writel(et, ioaddr + GMAC4_LPI_ENTRY_TIMER); -static void dwmac4_reset_eee_mode(struct mac_device_info *hw) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value; + /* Interpret a zero LPI entry timer to mean + * immediate entry into LPI mode. + */ + if (et) + value |= LPI_CTRL_STATUS_LPIATE; + } - value = readl(ioaddr + GMAC4_LPI_CTRL_STATUS); - value &= ~(GMAC4_LPI_CTRL_STATUS_LPIEN | GMAC4_LPI_CTRL_STATUS_LPITXA); + if (en_tx_lpi_clockgating) + value |= LPI_CTRL_STATUS_LPITCSE; + } + + mask = LPI_CTRL_STATUS_LPIATE | LPI_CTRL_STATUS_LPIEN | + LPI_CTRL_STATUS_LPITXA | LPI_CTRL_STATUS_LPITCSE; + + value |= readl(ioaddr + GMAC4_LPI_CTRL_STATUS) & ~mask; writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS); + + return 0; } static void dwmac4_set_eee_pls(struct mac_device_info *hw, int link) @@ -413,34 +426,13 @@ static void dwmac4_set_eee_pls(struct mac_device_info *hw, int link) value = readl(ioaddr + GMAC4_LPI_CTRL_STATUS); if (link) - value |= GMAC4_LPI_CTRL_STATUS_PLS; + value |= LPI_CTRL_STATUS_PLS; else - value &= ~GMAC4_LPI_CTRL_STATUS_PLS; + value &= ~LPI_CTRL_STATUS_PLS; writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS); } -static void dwmac4_set_eee_lpi_entry_timer(struct mac_device_info *hw, u32 et) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value = et & STMMAC_ET_MAX; - int regval; - - /* Program LPI entry timer value into register */ - writel(value, ioaddr + GMAC4_LPI_ENTRY_TIMER); - - /* Enable/disable LPI entry timer */ - regval = readl(ioaddr + GMAC4_LPI_CTRL_STATUS); - regval |= GMAC4_LPI_CTRL_STATUS_LPIEN | GMAC4_LPI_CTRL_STATUS_LPITXA; - - if (et) - regval |= GMAC4_LPI_CTRL_STATUS_LPIATE; - else - regval &= ~GMAC4_LPI_CTRL_STATUS_LPIATE; - - writel(regval, ioaddr + GMAC4_LPI_CTRL_STATUS); -} - static void dwmac4_set_eee_timer(struct mac_device_info *hw, int ls, int tw) { void __iomem *ioaddr = hw->pcsr; @@ -849,17 +841,17 @@ static int dwmac4_irq_status(struct mac_device_info *hw, /* Clear LPI interrupt by reading MAC_LPI_Control_Status */ u32 status = readl(ioaddr + GMAC4_LPI_CTRL_STATUS); - if (status & GMAC4_LPI_CTRL_STATUS_TLPIEN) { + if (status & LPI_CTRL_STATUS_TLPIEN) { ret |= CORE_IRQ_TX_PATH_IN_LPI_MODE; x->irq_tx_path_in_lpi_mode_n++; } - if (status & GMAC4_LPI_CTRL_STATUS_TLPIEX) { + if (status & LPI_CTRL_STATUS_TLPIEX) { ret |= CORE_IRQ_TX_PATH_EXIT_LPI_MODE; x->irq_tx_path_exit_lpi_mode_n++; } - if (status & GMAC4_LPI_CTRL_STATUS_RLPIEN) + if (status & LPI_CTRL_STATUS_RLPIEN) x->irq_rx_path_in_lpi_mode_n++; - if (status & GMAC4_LPI_CTRL_STATUS_RLPIEX) + if (status & LPI_CTRL_STATUS_RLPIEX) x->irq_rx_path_exit_lpi_mode_n++; } @@ -1201,9 +1193,7 @@ const struct stmmac_ops dwmac4_ops = { .pmt = dwmac4_pmt, .set_umac_addr = dwmac4_set_umac_addr, .get_umac_addr = dwmac4_get_umac_addr, - .set_eee_mode = dwmac4_set_eee_mode, - .reset_eee_mode = dwmac4_reset_eee_mode, - .set_eee_lpi_entry_timer = dwmac4_set_eee_lpi_entry_timer, + .set_lpi_mode = dwmac4_set_lpi_mode, .set_eee_timer = dwmac4_set_eee_timer, .set_eee_pls = dwmac4_set_eee_pls, .pcs_ctrl_ane = dwmac4_ctrl_ane, @@ -1245,9 +1235,7 @@ const struct stmmac_ops dwmac410_ops = { .pmt = dwmac4_pmt, .set_umac_addr = dwmac4_set_umac_addr, .get_umac_addr = dwmac4_get_umac_addr, - .set_eee_mode = dwmac4_set_eee_mode, - .reset_eee_mode = dwmac4_reset_eee_mode, - .set_eee_lpi_entry_timer = dwmac4_set_eee_lpi_entry_timer, + .set_lpi_mode = dwmac4_set_lpi_mode, .set_eee_timer = dwmac4_set_eee_timer, .set_eee_pls = dwmac4_set_eee_pls, .pcs_ctrl_ane = dwmac4_ctrl_ane, @@ -1291,9 +1279,7 @@ const struct stmmac_ops dwmac510_ops = { .pmt = dwmac4_pmt, .set_umac_addr = dwmac4_set_umac_addr, .get_umac_addr = dwmac4_get_umac_addr, - .set_eee_mode = dwmac4_set_eee_mode, - .reset_eee_mode = dwmac4_reset_eee_mode, - .set_eee_lpi_entry_timer = dwmac4_set_eee_lpi_entry_timer, + .set_lpi_mode = dwmac4_set_lpi_mode, .set_eee_timer = dwmac4_set_eee_timer, .set_eee_pls = dwmac4_set_eee_pls, .pcs_ctrl_ane = dwmac4_ctrl_ane, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 20027d3c25a7..a03f5d771566 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -112,14 +112,7 @@ #define XGMAC_MGKPKTEN BIT(1) #define XGMAC_PWRDWN BIT(0) #define XGMAC_LPI_CTRL 0x000000d0 -#define XGMAC_TXCGE BIT(21) -#define XGMAC_LPITXA BIT(19) -#define XGMAC_PLS BIT(17) -#define XGMAC_LPITXEN BIT(16) -#define XGMAC_RLPIEX BIT(3) -#define XGMAC_RLPIEN BIT(2) -#define XGMAC_TLPIEX BIT(1) -#define XGMAC_TLPIEN BIT(0) +/* For definitions, see LPI_CTRL_STATUS_xxx in common.h */ #define XGMAC_LPI_TIMER_CTRL 0x000000d4 #define XGMAC_HW_FEATURE0 0x0000011c #define XGMAC_HWFEAT_EDMA BIT(31) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 9a60a6e8f633..a6d395c6bacd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -316,17 +316,17 @@ static int dwxgmac2_host_irq_status(struct mac_device_info *hw, if (stat & XGMAC_LPIIS) { u32 lpi = readl(ioaddr + XGMAC_LPI_CTRL); - if (lpi & XGMAC_TLPIEN) { + if (lpi & LPI_CTRL_STATUS_TLPIEN) { ret |= CORE_IRQ_TX_PATH_IN_LPI_MODE; x->irq_tx_path_in_lpi_mode_n++; } - if (lpi & XGMAC_TLPIEX) { + if (lpi & LPI_CTRL_STATUS_TLPIEX) { ret |= CORE_IRQ_TX_PATH_EXIT_LPI_MODE; x->irq_tx_path_exit_lpi_mode_n++; } - if (lpi & XGMAC_RLPIEN) + if (lpi & LPI_CTRL_STATUS_RLPIEN) x->irq_rx_path_in_lpi_mode_n++; - if (lpi & XGMAC_RLPIEX) + if (lpi & LPI_CTRL_STATUS_RLPIEX) x->irq_rx_path_exit_lpi_mode_n++; } @@ -425,29 +425,28 @@ static void dwxgmac2_get_umac_addr(struct mac_device_info *hw, addr[5] = (hi_addr >> 8) & 0xff; } -static void dwxgmac2_set_eee_mode(struct mac_device_info *hw, - bool en_tx_lpi_clockgating) +static int dwxgmac2_set_lpi_mode(struct mac_device_info *hw, + enum stmmac_lpi_mode mode, + bool en_tx_lpi_clockgating, u32 et) { void __iomem *ioaddr = hw->pcsr; u32 value; - value = readl(ioaddr + XGMAC_LPI_CTRL); - - value |= XGMAC_LPITXEN | XGMAC_LPITXA; - if (en_tx_lpi_clockgating) - value |= XGMAC_TXCGE; - - writel(value, ioaddr + XGMAC_LPI_CTRL); -} - -static void dwxgmac2_reset_eee_mode(struct mac_device_info *hw) -{ - void __iomem *ioaddr = hw->pcsr; - u32 value; + if (mode == STMMAC_LPI_TIMER) + return -EOPNOTSUPP; value = readl(ioaddr + XGMAC_LPI_CTRL); - value &= ~(XGMAC_LPITXEN | XGMAC_LPITXA | XGMAC_TXCGE); + if (mode == STMMAC_LPI_FORCED) { + value |= LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA; + if (en_tx_lpi_clockgating) + value |= LPI_CTRL_STATUS_LPITCSE; + } else { + value &= ~(LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA | + LPI_CTRL_STATUS_LPITCSE); + } writel(value, ioaddr + XGMAC_LPI_CTRL); + + return 0; } static void dwxgmac2_set_eee_pls(struct mac_device_info *hw, int link) @@ -457,9 +456,9 @@ static void dwxgmac2_set_eee_pls(struct mac_device_info *hw, int link) value = readl(ioaddr + XGMAC_LPI_CTRL); if (link) - value |= XGMAC_PLS; + value |= LPI_CTRL_STATUS_PLS; else - value &= ~XGMAC_PLS; + value &= ~LPI_CTRL_STATUS_PLS; writel(value, ioaddr + XGMAC_LPI_CTRL); } @@ -1525,8 +1524,7 @@ const struct stmmac_ops dwxgmac210_ops = { .pmt = dwxgmac2_pmt, .set_umac_addr = dwxgmac2_set_umac_addr, .get_umac_addr = dwxgmac2_get_umac_addr, - .set_eee_mode = dwxgmac2_set_eee_mode, - .reset_eee_mode = dwxgmac2_reset_eee_mode, + .set_lpi_mode = dwxgmac2_set_lpi_mode, .set_eee_timer = dwxgmac2_set_eee_timer, .set_eee_pls = dwxgmac2_set_eee_pls, .debug = NULL, @@ -1582,8 +1580,7 @@ const struct stmmac_ops dwxlgmac2_ops = { .pmt = dwxgmac2_pmt, .set_umac_addr = dwxgmac2_set_umac_addr, .get_umac_addr = dwxgmac2_get_umac_addr, - .set_eee_mode = dwxgmac2_set_eee_mode, - .reset_eee_mode = dwxgmac2_reset_eee_mode, + .set_lpi_mode = dwxgmac2_set_lpi_mode, .set_eee_timer = dwxgmac2_set_eee_timer, .set_eee_pls = dwxgmac2_set_eee_pls, .debug = NULL, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 0f200b72c225..27c63a9fc163 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -306,6 +306,12 @@ struct stmmac_pps_cfg; struct stmmac_rss; struct stmmac_est; +enum stmmac_lpi_mode { + STMMAC_LPI_DISABLE, + STMMAC_LPI_FORCED, + STMMAC_LPI_TIMER, +}; + /* Helpers to program the MAC core */ struct stmmac_ops { /* MAC core initialization */ @@ -360,10 +366,9 @@ struct stmmac_ops { unsigned int reg_n); void (*get_umac_addr)(struct mac_device_info *hw, unsigned char *addr, unsigned int reg_n); - void (*set_eee_mode)(struct mac_device_info *hw, - bool en_tx_lpi_clockgating); - void (*reset_eee_mode)(struct mac_device_info *hw); - void (*set_eee_lpi_entry_timer)(struct mac_device_info *hw, u32 et); + int (*set_lpi_mode)(struct mac_device_info *hw, + enum stmmac_lpi_mode mode, + bool en_tx_lpi_clockgating, u32 et); void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw); void (*set_eee_pls)(struct mac_device_info *hw, int link); void (*debug)(struct stmmac_priv *priv, void __iomem *ioaddr, @@ -467,12 +472,8 @@ struct stmmac_ops { stmmac_do_void_callback(__priv, mac, set_umac_addr, __args) #define stmmac_get_umac_addr(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, get_umac_addr, __args) -#define stmmac_set_eee_mode(__priv, __args...) \ - stmmac_do_void_callback(__priv, mac, set_eee_mode, __args) -#define stmmac_reset_eee_mode(__priv, __args...) \ - stmmac_do_void_callback(__priv, mac, reset_eee_mode, __args) -#define stmmac_set_eee_lpi_timer(__priv, __args...) \ - stmmac_do_void_callback(__priv, mac, set_eee_lpi_entry_timer, __args) +#define stmmac_set_lpi_mode(__priv, __args...) \ + stmmac_do_callback(__priv, mac, set_lpi_mode, __args) #define stmmac_set_eee_timer(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, set_eee_timer, __args) #define stmmac_set_eee_pls(__priv, __args...) \ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c0ae7db96f46..d99ff4146512 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -390,16 +390,6 @@ static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue) return dirty; } -static void stmmac_disable_hw_lpi_timer(struct stmmac_priv *priv) -{ - stmmac_set_eee_lpi_timer(priv, priv->hw, 0); -} - -static void stmmac_enable_hw_lpi_timer(struct stmmac_priv *priv) -{ - stmmac_set_eee_lpi_timer(priv, priv->hw, priv->tx_lpi_timer); -} - static bool stmmac_eee_tx_busy(struct stmmac_priv *priv) { u32 tx_cnt = priv->plat->tx_queues_to_use; @@ -436,8 +426,9 @@ static void stmmac_try_to_start_sw_lpi(struct stmmac_priv *priv) /* Check and enter in LPI mode */ if (!priv->tx_path_in_lpi_mode) - stmmac_set_eee_mode(priv, priv->hw, - priv->plat->flags & STMMAC_FLAG_EN_TX_LPI_CLOCKGATING); + stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_FORCED, + priv->plat->flags & STMMAC_FLAG_EN_TX_LPI_CLOCKGATING, + 0); } /** @@ -447,8 +438,8 @@ static void stmmac_try_to_start_sw_lpi(struct stmmac_priv *priv) */ static void stmmac_stop_sw_lpi(struct stmmac_priv *priv) { - stmmac_reset_eee_mode(priv, priv->hw); del_timer_sync(&priv->eee_ctrl_timer); + stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_DISABLE, false, 0); priv->tx_path_in_lpi_mode = false; } @@ -466,74 +457,6 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) stmmac_try_to_start_sw_lpi(priv); } -/** - * stmmac_eee_init - init EEE - * @priv: driver private structure - * @active: indicates whether EEE should be enabled. - * Description: - * if the GMAC supports the EEE (from the HW cap reg) and the phy device - * can also manage EEE, this function enable the LPI state and start related - * timer. - */ -static void stmmac_eee_init(struct stmmac_priv *priv, bool active) -{ - priv->eee_active = active; - - /* Check if MAC core supports the EEE feature. */ - if (!priv->dma_cap.eee) { - priv->eee_enabled = false; - return; - } - - mutex_lock(&priv->lock); - - /* Check if it needs to be deactivated */ - if (!priv->eee_active) { - if (priv->eee_enabled) { - netdev_dbg(priv->dev, "disable EEE\n"); - priv->eee_sw_timer_en = false; - stmmac_disable_hw_lpi_timer(priv); - del_timer_sync(&priv->eee_ctrl_timer); - stmmac_set_eee_timer(priv, priv->hw, 0, - STMMAC_DEFAULT_TWT_LS); - if (priv->hw->xpcs) - xpcs_config_eee(priv->hw->xpcs, - priv->plat->mult_fact_100ns, - false); - } - priv->eee_enabled = false; - mutex_unlock(&priv->lock); - return; - } - - if (priv->eee_active && !priv->eee_enabled) { - stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS, - STMMAC_DEFAULT_TWT_LS); - if (priv->hw->xpcs) - xpcs_config_eee(priv->hw->xpcs, - priv->plat->mult_fact_100ns, - true); - } - - if (priv->plat->has_gmac4 && priv->tx_lpi_timer <= STMMAC_ET_MAX) { - /* Use hardware LPI mode */ - del_timer_sync(&priv->eee_ctrl_timer); - priv->tx_path_in_lpi_mode = false; - priv->eee_sw_timer_en = false; - stmmac_enable_hw_lpi_timer(priv); - } else { - /* Use software LPI mode */ - priv->eee_sw_timer_en = true; - stmmac_disable_hw_lpi_timer(priv); - stmmac_restart_sw_lpi_timer(priv); - } - - priv->eee_enabled = true; - - mutex_unlock(&priv->lock); - netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n"); -} - /* stmmac_get_tx_hwtstamp - get HW TX timestamps * @priv: driver private structure * @p : descriptor pointer @@ -1110,16 +1033,60 @@ static void stmmac_mac_disable_tx_lpi(struct phylink_config *config) { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); - stmmac_eee_init(priv, false); + priv->eee_active = false; + + mutex_lock(&priv->lock); + + priv->eee_enabled = false; + + netdev_dbg(priv->dev, "disable EEE\n"); + priv->eee_sw_timer_en = false; + del_timer_sync(&priv->eee_ctrl_timer); + stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_DISABLE, false, 0); + priv->tx_path_in_lpi_mode = false; + + stmmac_set_eee_timer(priv, priv->hw, 0, STMMAC_DEFAULT_TWT_LS); + if (priv->hw->xpcs) + xpcs_config_eee(priv->hw->xpcs, priv->plat->mult_fact_100ns, + false); + + mutex_unlock(&priv->lock); } static int stmmac_mac_enable_tx_lpi(struct phylink_config *config, u32 timer, bool tx_clk_stop) { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); + int ret; priv->tx_lpi_timer = timer; - stmmac_eee_init(priv, true); + priv->eee_active = true; + + mutex_lock(&priv->lock); + + priv->eee_enabled = true; + + stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS, + STMMAC_DEFAULT_TWT_LS); + if (priv->hw->xpcs) + xpcs_config_eee(priv->hw->xpcs, priv->plat->mult_fact_100ns, + true); + + /* Try to cnfigure the hardware timer. */ + ret = stmmac_set_lpi_mode(priv, priv->hw, STMMAC_LPI_TIMER, + priv->plat->flags & STMMAC_FLAG_EN_TX_LPI_CLOCKGATING, + priv->tx_lpi_timer); + + if (ret) { + /* Hardware timer mode not supported, or value out of range. + * Fall back to using software LPI mode + */ + priv->eee_sw_timer_en = true; + stmmac_restart_sw_lpi_timer(priv); + } + + mutex_unlock(&priv->lock); + netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n"); return 0; } diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 35d96c633a33..7502214cc7d5 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -28,6 +28,7 @@ config XILINX_AXI_EMAC depends on HAS_IOMEM depends on XILINX_DMA select PHYLINK + select DIMLIB help This driver supports the 10/100/1000 Ethernet from Xilinx for the AXI bus interface used in Xilinx Virtex FPGAs and Soc's. diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index a3f4f3e42587..5ff742103beb 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -9,6 +9,7 @@ #ifndef XILINX_AXIENET_H #define XILINX_AXIENET_H +#include <linux/dim.h> #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/interrupt.h> @@ -112,9 +113,6 @@ #define XAXIDMA_DELAY_MASK 0xFF000000 /* Delay timeout counter */ #define XAXIDMA_COALESCE_MASK 0x00FF0000 /* Coalesce counter */ -#define XAXIDMA_DELAY_SHIFT 24 -#define XAXIDMA_COALESCE_SHIFT 16 - #define XAXIDMA_IRQ_IOC_MASK 0x00001000 /* Completion intr */ #define XAXIDMA_IRQ_DELAY_MASK 0x00002000 /* Delay interrupt */ #define XAXIDMA_IRQ_ERROR_MASK 0x00004000 /* Error interrupt */ @@ -126,8 +124,7 @@ /* Default TX/RX Threshold and delay timer values for SGDMA mode */ #define XAXIDMA_DFT_TX_THRESHOLD 24 #define XAXIDMA_DFT_TX_USEC 50 -#define XAXIDMA_DFT_RX_THRESHOLD 1 -#define XAXIDMA_DFT_RX_USEC 50 +#define XAXIDMA_DFT_RX_USEC 16 #define XAXIDMA_BD_CTRL_TXSOF_MASK 0x08000000 /* First tx packet */ #define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */ @@ -487,7 +484,12 @@ struct skbuf_dma_descriptor { * @regs: Base address for the axienet_local device address space * @dma_regs: Base address for the axidma device address space * @napi_rx: NAPI RX control structure + * @rx_dim: DIM state for the receive queue + * @rx_dim_enabled: Whether DIM is enabled or not + * @rx_irqs: Number of interrupts + * @rx_cr_lock: Lock protecting @rx_dma_cr, its register, and @rx_dma_started * @rx_dma_cr: Nominal content of RX DMA control register + * @rx_dma_started: Set when RX DMA is started * @rx_bd_v: Virtual address of the RX buffer descriptor ring * @rx_bd_p: Physical address(start address) of the RX buffer descr. ring * @rx_bd_num: Size of RX buffer descriptor ring @@ -497,7 +499,9 @@ struct skbuf_dma_descriptor { * @rx_bytes: RX byte count for statistics * @rx_stat_sync: Synchronization object for RX stats * @napi_tx: NAPI TX control structure + * @tx_cr_lock: Lock protecting @tx_dma_cr, its register, and @tx_dma_started * @tx_dma_cr: Nominal content of TX DMA control register + * @tx_dma_started: Set when TX DMA is started * @tx_bd_v: Virtual address of the TX buffer descriptor ring * @tx_bd_p: Physical address(start address) of the TX buffer descr. ring * @tx_bd_num: Size of TX buffer descriptor ring @@ -532,10 +536,6 @@ struct skbuf_dma_descriptor { * supported, the maximum frame size would be 9k. Else it is * 1522 bytes (assuming support for basic VLAN) * @rxmem: Stores rx memory size for jumbo frame handling. - * @coalesce_count_rx: Store the irq coalesce on RX side. - * @coalesce_usec_rx: IRQ coalesce delay for RX - * @coalesce_count_tx: Store the irq coalesce on TX side. - * @coalesce_usec_tx: IRQ coalesce delay for TX * @use_dmaengine: flag to check dmaengine framework usage. * @tx_chan: TX DMA channel. * @rx_chan: RX DMA channel. @@ -569,7 +569,12 @@ struct axienet_local { void __iomem *dma_regs; struct napi_struct napi_rx; + struct dim rx_dim; + bool rx_dim_enabled; + u16 rx_irqs; + spinlock_t rx_cr_lock; u32 rx_dma_cr; + bool rx_dma_started; struct axidma_bd *rx_bd_v; dma_addr_t rx_bd_p; u32 rx_bd_num; @@ -579,7 +584,9 @@ struct axienet_local { struct u64_stats_sync rx_stat_sync; struct napi_struct napi_tx; + spinlock_t tx_cr_lock; u32 tx_dma_cr; + bool tx_dma_started; struct axidma_bd *tx_bd_v; dma_addr_t tx_bd_p; u32 tx_bd_num; @@ -610,10 +617,6 @@ struct axienet_local { u32 max_frm_size; u32 rxmem; - u32 coalesce_count_rx; - u32 coalesce_usec_rx; - u32 coalesce_count_tx; - u32 coalesce_usec_tx; u8 use_dmaengine; struct dma_chan *tx_chan; struct dma_chan *rx_chan; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 9e7fa012e4fa..0673b2694e4c 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -223,23 +223,62 @@ static void axienet_dma_bd_release(struct net_device *ndev) lp->rx_bd_p); } +static u64 axienet_dma_rate(struct axienet_local *lp) +{ + if (lp->axi_clk) + return clk_get_rate(lp->axi_clk); + return 125000000; /* arbitrary guess if no clock rate set */ +} + /** - * axienet_usec_to_timer - Calculate IRQ delay timer value - * @lp: Pointer to the axienet_local structure - * @coalesce_usec: Microseconds to convert into timer value + * axienet_calc_cr() - Calculate control register value + * @lp: Device private data + * @count: Number of completions before an interrupt + * @usec: Microseconds after the last completion before an interrupt + * + * Calculate a control register value based on the coalescing settings. The + * run/stop bit is not set. */ -static u32 axienet_usec_to_timer(struct axienet_local *lp, u32 coalesce_usec) +static u32 axienet_calc_cr(struct axienet_local *lp, u32 count, u32 usec) { - u32 result; - u64 clk_rate = 125000000; /* arbitrary guess if no clock rate set */ + u32 cr; - if (lp->axi_clk) - clk_rate = clk_get_rate(lp->axi_clk); + cr = FIELD_PREP(XAXIDMA_COALESCE_MASK, count) | XAXIDMA_IRQ_IOC_MASK | + XAXIDMA_IRQ_ERROR_MASK; + /* Only set interrupt delay timer if not generating an interrupt on + * the first packet. Otherwise leave at 0 to disable delay interrupt. + */ + if (count > 1) { + u64 clk_rate = axienet_dma_rate(lp); + u32 timer; + + /* 1 Timeout Interval = 125 * (clock period of SG clock) */ + timer = DIV64_U64_ROUND_CLOSEST((u64)usec * clk_rate, + XAXIDMA_DELAY_SCALE); - /* 1 Timeout Interval = 125 * (clock period of SG clock) */ - result = DIV64_U64_ROUND_CLOSEST((u64)coalesce_usec * clk_rate, - XAXIDMA_DELAY_SCALE); - return min(result, FIELD_MAX(XAXIDMA_DELAY_MASK)); + timer = min(timer, FIELD_MAX(XAXIDMA_DELAY_MASK)); + cr |= FIELD_PREP(XAXIDMA_DELAY_MASK, timer) | + XAXIDMA_IRQ_DELAY_MASK; + } + + return cr; +} + +/** + * axienet_coalesce_params() - Extract coalesce parameters from the CR + * @lp: Device private data + * @cr: The control register to parse + * @count: Number of packets before an interrupt + * @usec: Idle time (in usec) before an interrupt + */ +static void axienet_coalesce_params(struct axienet_local *lp, u32 cr, + u32 *count, u32 *usec) +{ + u64 clk_rate = axienet_dma_rate(lp); + u64 timer = FIELD_GET(XAXIDMA_DELAY_MASK, cr); + + *count = FIELD_GET(XAXIDMA_COALESCE_MASK, cr); + *usec = DIV64_U64_ROUND_CLOSEST(timer * XAXIDMA_DELAY_SCALE, clk_rate); } /** @@ -248,30 +287,12 @@ static u32 axienet_usec_to_timer(struct axienet_local *lp, u32 coalesce_usec) */ static void axienet_dma_start(struct axienet_local *lp) { + spin_lock_irq(&lp->rx_cr_lock); + /* Start updating the Rx channel control register */ - lp->rx_dma_cr = (lp->coalesce_count_rx << XAXIDMA_COALESCE_SHIFT) | - XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK; - /* Only set interrupt delay timer if not generating an interrupt on - * the first RX packet. Otherwise leave at 0 to disable delay interrupt. - */ - if (lp->coalesce_count_rx > 1) - lp->rx_dma_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_rx) - << XAXIDMA_DELAY_SHIFT) | - XAXIDMA_IRQ_DELAY_MASK; + lp->rx_dma_cr &= ~XAXIDMA_CR_RUNSTOP_MASK; axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr); - /* Start updating the Tx channel control register */ - lp->tx_dma_cr = (lp->coalesce_count_tx << XAXIDMA_COALESCE_SHIFT) | - XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK; - /* Only set interrupt delay timer if not generating an interrupt on - * the first TX packet. Otherwise leave at 0 to disable delay interrupt. - */ - if (lp->coalesce_count_tx > 1) - lp->tx_dma_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_tx) - << XAXIDMA_DELAY_SHIFT) | - XAXIDMA_IRQ_DELAY_MASK; - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr); - /* Populate the tail pointer and bring the Rx Axi DMA engine out of * halted state. This will make the Rx side ready for reception. */ @@ -280,6 +301,14 @@ static void axienet_dma_start(struct axienet_local *lp) axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr); axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1))); + lp->rx_dma_started = true; + + spin_unlock_irq(&lp->rx_cr_lock); + spin_lock_irq(&lp->tx_cr_lock); + + /* Start updating the Tx channel control register */ + lp->tx_dma_cr &= ~XAXIDMA_CR_RUNSTOP_MASK; + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr); /* Write to the RS (Run-stop) bit in the Tx channel control register. * Tx channel is now ready to run. But only after we write to the @@ -288,6 +317,9 @@ static void axienet_dma_start(struct axienet_local *lp) axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p); lp->tx_dma_cr |= XAXIDMA_CR_RUNSTOP_MASK; axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr); + lp->tx_dma_started = true; + + spin_unlock_irq(&lp->tx_cr_lock); } /** @@ -623,14 +655,22 @@ static void axienet_dma_stop(struct axienet_local *lp) int count; u32 cr, sr; - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); + spin_lock_irq(&lp->rx_cr_lock); + + cr = lp->rx_dma_cr & ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + lp->rx_dma_started = false; + + spin_unlock_irq(&lp->rx_cr_lock); synchronize_irq(lp->rx_irq); - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); + spin_lock_irq(&lp->tx_cr_lock); + + cr = lp->tx_dma_cr & ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); + lp->tx_dma_started = false; + + spin_unlock_irq(&lp->tx_cr_lock); synchronize_irq(lp->tx_irq); /* Give DMAs a chance to halt gracefully */ @@ -979,7 +1019,9 @@ static int axienet_tx_poll(struct napi_struct *napi, int budget) * cause an immediate interrupt if any TX packets are * already pending. */ + spin_lock_irq(&lp->tx_cr_lock); axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr); + spin_unlock_irq(&lp->tx_cr_lock); } return packets; } @@ -1241,11 +1283,25 @@ static int axienet_rx_poll(struct napi_struct *napi, int budget) axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, tail_p); if (packets < budget && napi_complete_done(napi, packets)) { + if (READ_ONCE(lp->rx_dim_enabled)) { + struct dim_sample sample = { + .time = ktime_get(), + /* Safe because we are the only writer */ + .pkt_ctr = u64_stats_read(&lp->rx_packets), + .byte_ctr = u64_stats_read(&lp->rx_bytes), + .event_ctr = READ_ONCE(lp->rx_irqs), + }; + + net_dim(&lp->rx_dim, &sample); + } + /* Re-enable RX completion interrupts. This should * cause an immediate interrupt if any RX packets are * already pending. */ + spin_lock_irq(&lp->rx_cr_lock); axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr); + spin_unlock_irq(&lp->rx_cr_lock); } return packets; } @@ -1283,11 +1339,14 @@ static irqreturn_t axienet_tx_irq(int irq, void *_ndev) /* Disable further TX completion interrupts and schedule * NAPI to handle the completions. */ - u32 cr = lp->tx_dma_cr; - - cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); if (napi_schedule_prep(&lp->napi_tx)) { + u32 cr; + + spin_lock(&lp->tx_cr_lock); + cr = lp->tx_dma_cr; + cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); + spin_unlock(&lp->tx_cr_lock); __napi_schedule(&lp->napi_tx); } } @@ -1328,11 +1387,16 @@ static irqreturn_t axienet_rx_irq(int irq, void *_ndev) /* Disable further RX completion interrupts and schedule * NAPI receive. */ - u32 cr = lp->rx_dma_cr; - - cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); + WRITE_ONCE(lp->rx_irqs, READ_ONCE(lp->rx_irqs) + 1); if (napi_schedule_prep(&lp->napi_rx)) { + u32 cr; + + spin_lock(&lp->rx_cr_lock); + cr = lp->rx_dma_cr; + cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + spin_unlock(&lp->rx_cr_lock); + __napi_schedule(&lp->napi_rx); } } @@ -1625,6 +1689,7 @@ err_free_eth_irq: if (lp->eth_irq > 0) free_irq(lp->eth_irq, ndev); err_phy: + cancel_work_sync(&lp->rx_dim.work); cancel_delayed_work_sync(&lp->stats_work); phylink_stop(lp->phylink); phylink_disconnect_phy(lp->phylink); @@ -1654,6 +1719,7 @@ static int axienet_stop(struct net_device *ndev) napi_disable(&lp->napi_rx); } + cancel_work_sync(&lp->rx_dim.work); cancel_delayed_work_sync(&lp->stats_work); phylink_stop(lp->phylink); @@ -1999,6 +2065,87 @@ axienet_ethtools_set_pauseparam(struct net_device *ndev, } /** + * axienet_update_coalesce_rx() - Set RX CR + * @lp: Device private data + * @cr: Value to write to the RX CR + * @mask: Bits to set from @cr + */ +static void axienet_update_coalesce_rx(struct axienet_local *lp, u32 cr, + u32 mask) +{ + spin_lock_irq(&lp->rx_cr_lock); + lp->rx_dma_cr &= ~mask; + lp->rx_dma_cr |= cr; + /* If DMA isn't started, then the settings will be applied the next + * time dma_start() is called. + */ + if (lp->rx_dma_started) { + u32 reg = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); + + /* Don't enable IRQs if they are disabled by NAPI */ + if (reg & XAXIDMA_IRQ_ALL_MASK) + cr = lp->rx_dma_cr; + else + cr = lp->rx_dma_cr & ~XAXIDMA_IRQ_ALL_MASK; + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + } + spin_unlock_irq(&lp->rx_cr_lock); +} + +/** + * axienet_dim_coalesce_count_rx() - RX coalesce count for DIM + * @lp: Device private data + */ +static u32 axienet_dim_coalesce_count_rx(struct axienet_local *lp) +{ + return min(1 << (lp->rx_dim.profile_ix << 1), 255); +} + +/** + * axienet_rx_dim_work() - Adjust RX DIM settings + * @work: The work struct + */ +static void axienet_rx_dim_work(struct work_struct *work) +{ + struct axienet_local *lp = + container_of(work, struct axienet_local, rx_dim.work); + u32 cr = axienet_calc_cr(lp, axienet_dim_coalesce_count_rx(lp), 0); + u32 mask = XAXIDMA_COALESCE_MASK | XAXIDMA_IRQ_IOC_MASK | + XAXIDMA_IRQ_ERROR_MASK; + + axienet_update_coalesce_rx(lp, cr, mask); + lp->rx_dim.state = DIM_START_MEASURE; +} + +/** + * axienet_update_coalesce_tx() - Set TX CR + * @lp: Device private data + * @cr: Value to write to the TX CR + * @mask: Bits to set from @cr + */ +static void axienet_update_coalesce_tx(struct axienet_local *lp, u32 cr, + u32 mask) +{ + spin_lock_irq(&lp->tx_cr_lock); + lp->tx_dma_cr &= ~mask; + lp->tx_dma_cr |= cr; + /* If DMA isn't started, then the settings will be applied the next + * time dma_start() is called. + */ + if (lp->tx_dma_started) { + u32 reg = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); + + /* Don't enable IRQs if they are disabled by NAPI */ + if (reg & XAXIDMA_IRQ_ALL_MASK) + cr = lp->tx_dma_cr; + else + cr = lp->tx_dma_cr & ~XAXIDMA_IRQ_ALL_MASK; + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); + } + spin_unlock_irq(&lp->tx_cr_lock); +} + +/** * axienet_ethtools_get_coalesce - Get DMA interrupt coalescing count. * @ndev: Pointer to net_device structure * @ecoalesce: Pointer to ethtool_coalesce structure @@ -2018,11 +2165,23 @@ axienet_ethtools_get_coalesce(struct net_device *ndev, struct netlink_ext_ack *extack) { struct axienet_local *lp = netdev_priv(ndev); - - ecoalesce->rx_max_coalesced_frames = lp->coalesce_count_rx; - ecoalesce->rx_coalesce_usecs = lp->coalesce_usec_rx; - ecoalesce->tx_max_coalesced_frames = lp->coalesce_count_tx; - ecoalesce->tx_coalesce_usecs = lp->coalesce_usec_tx; + u32 cr; + + ecoalesce->use_adaptive_rx_coalesce = lp->rx_dim_enabled; + + spin_lock_irq(&lp->rx_cr_lock); + cr = lp->rx_dma_cr; + spin_unlock_irq(&lp->rx_cr_lock); + axienet_coalesce_params(lp, cr, + &ecoalesce->rx_max_coalesced_frames, + &ecoalesce->rx_coalesce_usecs); + + spin_lock_irq(&lp->tx_cr_lock); + cr = lp->tx_dma_cr; + spin_unlock_irq(&lp->tx_cr_lock); + axienet_coalesce_params(lp, cr, + &ecoalesce->tx_max_coalesced_frames, + &ecoalesce->tx_coalesce_usecs); return 0; } @@ -2046,12 +2205,9 @@ axienet_ethtools_set_coalesce(struct net_device *ndev, struct netlink_ext_ack *extack) { struct axienet_local *lp = netdev_priv(ndev); - - if (netif_running(ndev)) { - NL_SET_ERR_MSG(extack, - "Please stop netif before applying configuration"); - return -EBUSY; - } + bool new_dim = ecoalesce->use_adaptive_rx_coalesce; + bool old_dim = lp->rx_dim_enabled; + u32 cr, mask = ~XAXIDMA_CR_RUNSTOP_MASK; if (ecoalesce->rx_max_coalesced_frames > 255 || ecoalesce->tx_max_coalesced_frames > 255) { @@ -2065,7 +2221,7 @@ axienet_ethtools_set_coalesce(struct net_device *ndev, return -EINVAL; } - if ((ecoalesce->rx_max_coalesced_frames > 1 && + if (((ecoalesce->rx_max_coalesced_frames > 1 || new_dim) && !ecoalesce->rx_coalesce_usecs) || (ecoalesce->tx_max_coalesced_frames > 1 && !ecoalesce->tx_coalesce_usecs)) { @@ -2074,11 +2230,31 @@ axienet_ethtools_set_coalesce(struct net_device *ndev, return -EINVAL; } - lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames; - lp->coalesce_usec_rx = ecoalesce->rx_coalesce_usecs; - lp->coalesce_count_tx = ecoalesce->tx_max_coalesced_frames; - lp->coalesce_usec_tx = ecoalesce->tx_coalesce_usecs; + if (new_dim && !old_dim) { + cr = axienet_calc_cr(lp, axienet_dim_coalesce_count_rx(lp), + ecoalesce->rx_coalesce_usecs); + } else if (!new_dim) { + if (old_dim) { + WRITE_ONCE(lp->rx_dim_enabled, false); + napi_synchronize(&lp->napi_rx); + flush_work(&lp->rx_dim.work); + } + + cr = axienet_calc_cr(lp, ecoalesce->rx_max_coalesced_frames, + ecoalesce->rx_coalesce_usecs); + } else { + /* Dummy value for count just to calculate timer */ + cr = axienet_calc_cr(lp, 2, ecoalesce->rx_coalesce_usecs); + mask = XAXIDMA_DELAY_MASK | XAXIDMA_IRQ_DELAY_MASK; + } + + axienet_update_coalesce_rx(lp, cr, mask); + if (new_dim && !old_dim) + WRITE_ONCE(lp->rx_dim_enabled, true); + cr = axienet_calc_cr(lp, ecoalesce->tx_max_coalesced_frames, + ecoalesce->tx_coalesce_usecs); + axienet_update_coalesce_tx(lp, cr, ~XAXIDMA_CR_RUNSTOP_MASK); return 0; } @@ -2316,7 +2492,8 @@ axienet_ethtool_get_rmon_stats(struct net_device *dev, static const struct ethtool_ops axienet_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | - ETHTOOL_COALESCE_USECS, + ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_USE_ADAPTIVE_RX, .get_drvinfo = axienet_ethtools_get_drvinfo, .get_regs_len = axienet_ethtools_get_regs_len, .get_regs = axienet_ethtools_get_regs, @@ -2858,10 +3035,15 @@ static int axienet_probe(struct platform_device *pdev) axienet_set_mac_address(ndev, NULL); } - lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; - lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; - lp->coalesce_usec_rx = XAXIDMA_DFT_RX_USEC; - lp->coalesce_usec_tx = XAXIDMA_DFT_TX_USEC; + spin_lock_init(&lp->rx_cr_lock); + spin_lock_init(&lp->tx_cr_lock); + INIT_WORK(&lp->rx_dim.work, axienet_rx_dim_work); + lp->rx_dim_enabled = true; + lp->rx_dim.profile_ix = 1; + lp->rx_dma_cr = axienet_calc_cr(lp, axienet_dim_coalesce_count_rx(lp), + XAXIDMA_DFT_RX_USEC); + lp->tx_dma_cr = axienet_calc_cr(lp, XAXIDMA_DFT_TX_THRESHOLD, + XAXIDMA_DFT_TX_USEC); ret = axienet_mdio_setup(lp); if (ret) diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c index 00ebc25d0b22..f03797103c6a 100644 --- a/drivers/net/hamradio/baycom_par.c +++ b/drivers/net/hamradio/baycom_par.c @@ -427,7 +427,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data, break; case HDLCDRVCTL_GETMODE: - strcpy(hi->data.modename, bc->options ? "par96" : "picpar"); + strscpy(hi->data.modename, bc->options ? "par96" : "picpar"); if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; @@ -439,7 +439,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data, return baycom_setmode(bc, hi->data.modename); case HDLCDRVCTL_MODELIST: - strcpy(hi->data.modename, "par96,picpar"); + strscpy(hi->data.modename, "par96,picpar"); if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c index 799f8ece7824..ee5bd3c12040 100644 --- a/drivers/net/hamradio/baycom_ser_fdx.c +++ b/drivers/net/hamradio/baycom_ser_fdx.c @@ -531,7 +531,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data, return baycom_setmode(bc, hi->data.modename); case HDLCDRVCTL_MODELIST: - strcpy(hi->data.modename, "ser12,ser3,ser24"); + strscpy(hi->data.modename, "ser12,ser3,ser24"); if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c index 5d1ab4840753..05bdad214799 100644 --- a/drivers/net/hamradio/baycom_ser_hdx.c +++ b/drivers/net/hamradio/baycom_ser_hdx.c @@ -570,7 +570,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data, break; case HDLCDRVCTL_GETMODE: - strcpy(hi->data.modename, "ser12"); + strscpy(hi->data.modename, "ser12"); if (bc->opt_dcd <= 0) strcat(hi->data.modename, (!bc->opt_dcd) ? "*" : (bc->opt_dcd == -2) ? "@" : "+"); if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) @@ -584,7 +584,7 @@ static int baycom_ioctl(struct net_device *dev, void __user *data, return baycom_setmode(bc, hi->data.modename); case HDLCDRVCTL_MODELIST: - strcpy(hi->data.modename, "ser12"); + strscpy(hi->data.modename, "ser12"); if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl))) return -EFAULT; return 0; diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 86ab4a42769a..f77eddf22185 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -45,12 +45,12 @@ MODULE_DESCRIPTION("Console driver for network interfaces"); MODULE_LICENSE("GPL"); #define MAX_PARAM_LENGTH 256 -#define MAX_USERDATA_ENTRY_LENGTH 256 -#define MAX_USERDATA_VALUE_LENGTH 200 +#define MAX_EXTRADATA_ENTRY_LEN 256 +#define MAX_EXTRADATA_VALUE_LEN 200 /* The number 3 comes from userdata entry format characters (' ', '=', '\n') */ -#define MAX_USERDATA_NAME_LENGTH (MAX_USERDATA_ENTRY_LENGTH - \ - MAX_USERDATA_VALUE_LENGTH - 3) -#define MAX_USERDATA_ITEMS 16 +#define MAX_EXTRADATA_NAME_LEN (MAX_EXTRADATA_ENTRY_LEN - \ + MAX_EXTRADATA_VALUE_LEN - 3) +#define MAX_EXTRADATA_ITEMS 16 #define MAX_PRINT_CHUNK 1000 static char config[MAX_PARAM_LENGTH]; @@ -97,13 +97,23 @@ struct netconsole_target_stats { struct u64_stats_sync syncp; }; +/* Features enabled in sysdata. Contrary to userdata, this data is populated by + * the kernel. The fields are designed as bitwise flags, allowing multiple + * features to be set in sysdata_fields. + */ +enum sysdata_feature { + /* Populate the CPU that sends the message */ + CPU_NR = BIT(0), +}; + /** * struct netconsole_target - Represents a configured netconsole target. * @list: Links this target into the target_list. * @group: Links us into the configfs subsystem hierarchy. * @userdata_group: Links to the userdata configfs hierarchy - * @userdata_complete: Cached, formatted string of append - * @userdata_length: String length of userdata_complete + * @extradata_complete: Cached, formatted string of append + * @userdata_length: String length of usedata in extradata_complete. + * @sysdata_fields: Sysdata features enabled. * @stats: Packet send stats for the target. Used for debugging. * @enabled: On / off knob to enable / disable target. * Visible from userspace (read-write). @@ -123,20 +133,25 @@ struct netconsole_target_stats { * remote_ip (read-write) * local_mac (read-only) * remote_mac (read-write) + * @buf: The buffer used to send the full msg to the network stack */ struct netconsole_target { struct list_head list; #ifdef CONFIG_NETCONSOLE_DYNAMIC struct config_group group; struct config_group userdata_group; - char userdata_complete[MAX_USERDATA_ENTRY_LENGTH * MAX_USERDATA_ITEMS]; + char extradata_complete[MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS]; size_t userdata_length; + /* bit-wise with sysdata_feature bits */ + u32 sysdata_fields; #endif struct netconsole_target_stats stats; bool enabled; bool extended; bool release; struct netpoll np; + /* protected by target_list_lock */ + char buf[MAX_PRINT_CHUNK]; }; #ifdef CONFIG_NETCONSOLE_DYNAMIC @@ -396,6 +411,19 @@ static ssize_t transmit_errors_show(struct config_item *item, char *buf) return sysfs_emit(buf, "%llu\n", xmit_drop_count + enomem_count); } +/* configfs helper to display if cpu_nr sysdata feature is enabled */ +static ssize_t sysdata_cpu_nr_enabled_show(struct config_item *item, char *buf) +{ + struct netconsole_target *nt = to_target(item->ci_parent); + bool cpu_nr_enabled; + + mutex_lock(&dynamic_netconsole_mutex); + cpu_nr_enabled = !!(nt->sysdata_fields & CPU_NR); + mutex_unlock(&dynamic_netconsole_mutex); + + return sysfs_emit(buf, "%d\n", cpu_nr_enabled); +} + /* * This one is special -- targets created through the configfs interface * are not enabled (and the corresponding netpoll activated) by default. @@ -659,6 +687,24 @@ out_unlock: return ret; } +/* Count number of entries we have in extradata. + * This is important because the extradata_complete only supports + * MAX_EXTRADATA_ITEMS entries. Before enabling any new {user,sys}data + * feature, number of entries needs to checked for available space. + */ +static size_t count_extradata_entries(struct netconsole_target *nt) +{ + size_t entries; + + /* Userdata entries */ + entries = list_count_nodes(&nt->userdata_group.cg_children); + /* Plus sysdata entries */ + if (nt->sysdata_fields & CPU_NR) + entries += 1; + + return entries; +} + static ssize_t remote_mac_store(struct config_item *item, const char *buf, size_t count) { @@ -687,7 +733,7 @@ out_unlock: struct userdatum { struct config_item item; - char value[MAX_USERDATA_VALUE_LENGTH]; + char value[MAX_EXTRADATA_VALUE_LEN]; }; static struct userdatum *to_userdatum(struct config_item *item) @@ -724,13 +770,13 @@ static void update_userdata(struct netconsole_target *nt) /* Clear the current string in case the last userdatum was deleted */ nt->userdata_length = 0; - nt->userdata_complete[0] = 0; + nt->extradata_complete[0] = 0; list_for_each(entry, &nt->userdata_group.cg_children) { struct userdatum *udm_item; struct config_item *item; - if (WARN_ON_ONCE(child_count >= MAX_USERDATA_ITEMS)) + if (WARN_ON_ONCE(child_count >= MAX_EXTRADATA_ITEMS)) break; child_count++; @@ -738,19 +784,19 @@ static void update_userdata(struct netconsole_target *nt) udm_item = to_userdatum(item); /* Skip userdata with no value set */ - if (strnlen(udm_item->value, MAX_USERDATA_VALUE_LENGTH) == 0) + if (strnlen(udm_item->value, MAX_EXTRADATA_VALUE_LEN) == 0) continue; - /* This doesn't overflow userdata_complete since it will write - * one entry length (1/MAX_USERDATA_ITEMS long), entry count is + /* This doesn't overflow extradata_complete since it will write + * one entry length (1/MAX_EXTRADATA_ITEMS long), entry count is * checked to not exceed MAX items with child_count above */ - complete_idx += scnprintf(&nt->userdata_complete[complete_idx], - MAX_USERDATA_ENTRY_LENGTH, " %s=%s\n", + complete_idx += scnprintf(&nt->extradata_complete[complete_idx], + MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n", item->ci_name, udm_item->value); } - nt->userdata_length = strnlen(nt->userdata_complete, - sizeof(nt->userdata_complete)); + nt->userdata_length = strnlen(nt->extradata_complete, + sizeof(nt->extradata_complete)); } static ssize_t userdatum_value_store(struct config_item *item, const char *buf, @@ -761,7 +807,7 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, struct userdata *ud; ssize_t ret; - if (count > MAX_USERDATA_VALUE_LENGTH) + if (count > MAX_EXTRADATA_VALUE_LEN) return -EMSGSIZE; mutex_lock(&dynamic_netconsole_mutex); @@ -780,7 +826,62 @@ out_unlock: return ret; } +/* disable_sysdata_feature - Disable sysdata feature and clean sysdata + * @nt: target that is disabling the feature + * @feature: feature being disabled + */ +static void disable_sysdata_feature(struct netconsole_target *nt, + enum sysdata_feature feature) +{ + nt->sysdata_fields &= ~feature; + nt->extradata_complete[nt->userdata_length] = 0; +} + +/* configfs helper to sysdata cpu_nr feature */ +static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, + const char *buf, size_t count) +{ + struct netconsole_target *nt = to_target(item->ci_parent); + bool cpu_nr_enabled, curr; + ssize_t ret; + + ret = kstrtobool(buf, &cpu_nr_enabled); + if (ret) + return ret; + + mutex_lock(&dynamic_netconsole_mutex); + curr = nt->sysdata_fields & CPU_NR; + if (cpu_nr_enabled == curr) + /* no change requested */ + goto unlock_ok; + + if (cpu_nr_enabled && + count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) { + /* user wants the new feature, but there is no space in the + * buffer. + */ + ret = -ENOSPC; + goto unlock; + } + + if (cpu_nr_enabled) + nt->sysdata_fields |= CPU_NR; + else + /* This is special because extradata_complete might have + * remaining data from previous sysdata, and it needs to be + * cleaned. + */ + disable_sysdata_feature(nt, CPU_NR); + +unlock_ok: + ret = strnlen(buf, count); +unlock: + mutex_unlock(&dynamic_netconsole_mutex); + return ret; +} + CONFIGFS_ATTR(userdatum_, value); +CONFIGFS_ATTR(sysdata_, cpu_nr_enabled); static struct configfs_attribute *userdatum_attrs[] = { &userdatum_attr_value, @@ -808,15 +909,13 @@ static struct config_item *userdatum_make_item(struct config_group *group, struct netconsole_target *nt; struct userdatum *udm; struct userdata *ud; - size_t child_count; - if (strlen(name) > MAX_USERDATA_NAME_LENGTH) + if (strlen(name) > MAX_EXTRADATA_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); ud = to_userdata(&group->cg_item); nt = userdata_to_target(ud); - child_count = list_count_nodes(&nt->userdata_group.cg_children); - if (child_count >= MAX_USERDATA_ITEMS) + if (count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) return ERR_PTR(-ENOSPC); udm = kzalloc(sizeof(*udm), GFP_KERNEL); @@ -842,6 +941,7 @@ static void userdatum_drop(struct config_group *group, struct config_item *item) } static struct configfs_attribute *userdata_attrs[] = { + &sysdata_attr_cpu_nr_enabled, NULL, }; @@ -1017,6 +1117,40 @@ static void populate_configfs_item(struct netconsole_target *nt, init_target_config_group(nt, target_name); } +/* + * prepare_extradata - append sysdata at extradata_complete in runtime + * @nt: target to send message to + */ +static int prepare_extradata(struct netconsole_target *nt) +{ + int sysdata_len, extradata_len; + + /* userdata was appended when configfs write helper was called + * by update_userdata(). + */ + extradata_len = nt->userdata_length; + + if (!(nt->sysdata_fields & CPU_NR)) + goto out; + + /* Append cpu=%d at extradata_complete after userdata str */ + sysdata_len = scnprintf(&nt->extradata_complete[nt->userdata_length], + MAX_EXTRADATA_ENTRY_LEN, " cpu=%u\n", + raw_smp_processor_id()); + + extradata_len += sysdata_len; + + WARN_ON_ONCE(extradata_len > + MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS); + +out: + return extradata_len; +} +#else /* CONFIG_NETCONSOLE_DYNAMIC not set */ +static int prepare_extradata(struct netconsole_target *nt) +{ + return 0; +} #endif /* CONFIG_NETCONSOLE_DYNAMIC */ /* Handle network interface device notifications */ @@ -1117,29 +1251,28 @@ static void send_msg_no_fragmentation(struct netconsole_target *nt, int msg_len, int release_len) { - static char buf[MAX_PRINT_CHUNK]; /* protected by target_list_lock */ - const char *userdata = NULL; + const char *extradata = NULL; const char *release; #ifdef CONFIG_NETCONSOLE_DYNAMIC - userdata = nt->userdata_complete; + extradata = nt->extradata_complete; #endif if (release_len) { release = init_utsname()->release; - scnprintf(buf, MAX_PRINT_CHUNK, "%s,%s", release, msg); + scnprintf(nt->buf, MAX_PRINT_CHUNK, "%s,%s", release, msg); msg_len += release_len; } else { - memcpy(buf, msg, msg_len); + memcpy(nt->buf, msg, msg_len); } - if (userdata) - msg_len += scnprintf(&buf[msg_len], + if (extradata) + msg_len += scnprintf(&nt->buf[msg_len], MAX_PRINT_CHUNK - msg_len, - "%s", userdata); + "%s", extradata); - send_udp(nt, buf, msg_len); + send_udp(nt, nt->buf, msg_len); } static void append_release(char *buf) @@ -1150,28 +1283,27 @@ static void append_release(char *buf) scnprintf(buf, MAX_PRINT_CHUNK, "%s,", release); } -static void send_fragmented_body(struct netconsole_target *nt, char *buf, +static void send_fragmented_body(struct netconsole_target *nt, const char *msgbody, int header_len, - int msgbody_len) + int msgbody_len, int extradata_len) { - const char *userdata = NULL; + int sent_extradata, preceding_bytes; + const char *extradata = NULL; int body_len, offset = 0; - int userdata_len = 0; #ifdef CONFIG_NETCONSOLE_DYNAMIC - userdata = nt->userdata_complete; - userdata_len = nt->userdata_length; + extradata = nt->extradata_complete; #endif /* body_len represents the number of bytes that will be sent. This is * bigger than MAX_PRINT_CHUNK, thus, it will be split in multiple * packets */ - body_len = msgbody_len + userdata_len; + body_len = msgbody_len + extradata_len; /* In each iteration of the while loop below, we send a packet * containing the header and a portion of the body. The body is - * composed of two parts: msgbody and userdata. We keep track of how + * composed of two parts: msgbody and extradata. We keep track of how * many bytes have been sent so far using the offset variable, which * ranges from 0 to the total length of the body. */ @@ -1181,7 +1313,7 @@ static void send_fragmented_body(struct netconsole_target *nt, char *buf, int this_offset = 0; int this_chunk = 0; - this_header += scnprintf(buf + this_header, + this_header += scnprintf(nt->buf + this_header, MAX_PRINT_CHUNK - this_header, ",ncfrag=%d/%d;", offset, body_len); @@ -1192,47 +1324,48 @@ static void send_fragmented_body(struct netconsole_target *nt, char *buf, MAX_PRINT_CHUNK - this_header); if (WARN_ON_ONCE(this_chunk <= 0)) return; - memcpy(buf + this_header, msgbody + offset, this_chunk); + memcpy(nt->buf + this_header, msgbody + offset, + this_chunk); this_offset += this_chunk; } /* msgbody was finally written, either in the previous * messages and/or in the current buf. Time to write - * the userdata. + * the extradata. */ msgbody_written |= offset + this_offset >= msgbody_len; - /* Msg body is fully written and there is pending userdata to - * write, append userdata in this chunk + /* Msg body is fully written and there is pending extradata to + * write, append extradata in this chunk */ if (msgbody_written && offset + this_offset < body_len) { /* Track how much user data was already sent. First * time here, sent_userdata is zero */ - int sent_userdata = (offset + this_offset) - msgbody_len; + sent_extradata = (offset + this_offset) - msgbody_len; /* offset of bytes used in current buf */ - int preceding_bytes = this_chunk + this_header; + preceding_bytes = this_chunk + this_header; - if (WARN_ON_ONCE(sent_userdata < 0)) + if (WARN_ON_ONCE(sent_extradata < 0)) return; - this_chunk = min(userdata_len - sent_userdata, + this_chunk = min(extradata_len - sent_extradata, MAX_PRINT_CHUNK - preceding_bytes); if (WARN_ON_ONCE(this_chunk < 0)) /* this_chunk could be zero if all the previous * message used all the buffer. This is not a - * problem, userdata will be sent in the next + * problem, extradata will be sent in the next * iteration */ return; - memcpy(buf + this_header + this_offset, - userdata + sent_userdata, + memcpy(nt->buf + this_header + this_offset, + extradata + sent_extradata, this_chunk); this_offset += this_chunk; } - send_udp(nt, buf, this_header + this_offset); + send_udp(nt, nt->buf, this_header + this_offset); offset += this_offset; } } @@ -1240,9 +1373,9 @@ static void send_fragmented_body(struct netconsole_target *nt, char *buf, static void send_msg_fragmented(struct netconsole_target *nt, const char *msg, int msg_len, - int release_len) + int release_len, + int extradata_len) { - static char buf[MAX_PRINT_CHUNK]; /* protected by target_list_lock */ int header_len, msgbody_len; const char *msgbody; @@ -1260,16 +1393,17 @@ static void send_msg_fragmented(struct netconsole_target *nt, * "ncfrag=<byte-offset>/<total-bytes>" */ if (release_len) - append_release(buf); + append_release(nt->buf); /* Copy the header into the buffer */ - memcpy(buf + release_len, msg, header_len); + memcpy(nt->buf + release_len, msg, header_len); header_len += release_len; /* for now on, the header will be persisted, and the msgbody * will be replaced */ - send_fragmented_body(nt, buf, msgbody, header_len, msgbody_len); + send_fragmented_body(nt, msgbody, header_len, msgbody_len, + extradata_len); } /** @@ -1285,20 +1419,19 @@ static void send_msg_fragmented(struct netconsole_target *nt, static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg, int msg_len) { - int userdata_len = 0; int release_len = 0; + int extradata_len; -#ifdef CONFIG_NETCONSOLE_DYNAMIC - userdata_len = nt->userdata_length; -#endif + extradata_len = prepare_extradata(nt); if (nt->release) release_len = strlen(init_utsname()->release) + 1; - if (msg_len + release_len + userdata_len <= MAX_PRINT_CHUNK) + if (msg_len + release_len + extradata_len <= MAX_PRINT_CHUNK) return send_msg_no_fragmentation(nt, msg, msg_len, release_len); - return send_msg_fragmented(nt, msg, msg_len, release_len); + return send_msg_fragmented(nt, msg, msg_len, release_len, + extradata_len); } static void write_ext_msg(struct console *con, const char *msg, diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 42f247cbdcee..9b394ddc5206 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -645,8 +645,11 @@ nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx) if (ns->rq_reset_mode > 3) return -EINVAL; - if (ns->rq_reset_mode == 1) + if (ns->rq_reset_mode == 1) { + if (!netif_running(ns->netdev)) + return -ENETDOWN; return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi); + } qmem->rq = nsim_queue_alloc(); if (!qmem->rq) @@ -754,11 +757,6 @@ nsim_qreset_write(struct file *file, const char __user *data, return -EINVAL; rtnl_lock(); - if (!netif_running(ns->netdev)) { - ret = -ENETDOWN; - goto exit_unlock; - } - if (queue >= ns->netdev->real_num_rx_queues) { ret = -EINVAL; goto exit_unlock; diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c index 61944574d087..a808ac7375f5 100644 --- a/drivers/net/pcs/pcs-rzn1-miic.c +++ b/drivers/net/pcs/pcs-rzn1-miic.c @@ -268,17 +268,6 @@ static void miic_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, (MIIC_CONVCTRL_CONV_SPEED | MIIC_CONVCTRL_FULLD), val); } -static int miic_validate(struct phylink_pcs *pcs, unsigned long *supported, - const struct phylink_link_state *state) -{ - if (phy_interface_mode_is_rgmii(state->interface) || - state->interface == PHY_INTERFACE_MODE_RMII || - state->interface == PHY_INTERFACE_MODE_MII) - return 1; - - return -EINVAL; -} - static int miic_pre_init(struct phylink_pcs *pcs) { struct miic_port *miic_port = phylink_pcs_to_miic_port(pcs); @@ -307,7 +296,6 @@ static int miic_pre_init(struct phylink_pcs *pcs) } static const struct phylink_pcs_ops miic_phylink_ops = { - .pcs_validate = miic_validate, .pcs_config = miic_config, .pcs_link_up = miic_link_up, .pcs_pre_init = miic_pre_init, @@ -363,6 +351,10 @@ struct phylink_pcs *miic_create(struct device *dev, struct device_node *np) miic_port->pcs.ops = &miic_phylink_ops; miic_port->pcs.neg_mode = true; + phy_interface_set_rgmii(miic_port->pcs.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_RMII, miic_port->pcs.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_MII, miic_port->pcs.supported_interfaces); + return &miic_port->pcs; } EXPORT_SYMBOL(miic_create); @@ -472,13 +464,10 @@ static int miic_parse_dt(struct device *dev, u32 *mode_cfg) if (of_property_read_u32(np, "renesas,miic-switch-portin", &conf) == 0) dt_val[0] = conf; - for_each_child_of_node(np, conv) { + for_each_available_child_of_node(np, conv) { if (of_property_read_u32(conv, "reg", &port)) continue; - if (!of_device_is_available(conv)) - continue; - if (of_property_read_u32(conv, "renesas,miic-input", &conf) == 0) dt_val[port] = conf; } diff --git a/drivers/net/phy/aquantia/aquantia_hwmon.c b/drivers/net/phy/aquantia/aquantia_hwmon.c index 7b3c49c3bf49..1a714b56b765 100644 --- a/drivers/net/phy/aquantia/aquantia_hwmon.c +++ b/drivers/net/phy/aquantia/aquantia_hwmon.c @@ -172,33 +172,13 @@ static const struct hwmon_ops aqr_hwmon_ops = { .write = aqr_hwmon_write, }; -static u32 aqr_hwmon_chip_config[] = { - HWMON_C_REGISTER_TZ, - 0, -}; - -static const struct hwmon_channel_info aqr_hwmon_chip = { - .type = hwmon_chip, - .config = aqr_hwmon_chip_config, -}; - -static u32 aqr_hwmon_temp_config[] = { - HWMON_T_INPUT | - HWMON_T_MAX | HWMON_T_MIN | - HWMON_T_MAX_ALARM | HWMON_T_MIN_ALARM | - HWMON_T_CRIT | HWMON_T_LCRIT | - HWMON_T_CRIT_ALARM | HWMON_T_LCRIT_ALARM, - 0, -}; - -static const struct hwmon_channel_info aqr_hwmon_temp = { - .type = hwmon_temp, - .config = aqr_hwmon_temp_config, -}; - static const struct hwmon_channel_info * const aqr_hwmon_info[] = { - &aqr_hwmon_chip, - &aqr_hwmon_temp, + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | + HWMON_T_MAX | HWMON_T_MIN | + HWMON_T_MAX_ALARM | HWMON_T_MIN_ALARM | + HWMON_T_CRIT | HWMON_T_LCRIT | + HWMON_T_CRIT_ALARM | HWMON_T_LCRIT_ALARM), NULL, }; diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 22edb7e4c1a1..13e43fee1906 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -16,7 +16,7 @@ #include <linux/delay.h> #include <linux/module.h> #include <linux/phy.h> -#include <linux/pm_wakeup.h> +#include <linux/device.h> #include <linux/brcmphy.h> #include <linux/of.h> #include <linux/interrupt.h> diff --git a/drivers/net/phy/dp83td510.c b/drivers/net/phy/dp83td510.c index a42af9c168ec..23af1ac194fa 100644 --- a/drivers/net/phy/dp83td510.c +++ b/drivers/net/phy/dp83td510.c @@ -204,10 +204,191 @@ struct dp83td510_priv { #define DP83TD510E_UNKN_030E 0x30e #define DP83TD510E_030E_VAL 0x2520 +#define DP83TD510E_LEDS_CFG_1 0x460 +#define DP83TD510E_LED_FN(idx, val) (((val) & 0xf) << ((idx) * 4)) +#define DP83TD510E_LED_FN_MASK(idx) (0xf << ((idx) * 4)) +/* link OK */ +#define DP83TD510E_LED_MODE_LINK_OK 0x0 +/* TX/RX activity */ +#define DP83TD510E_LED_MODE_TX_RX_ACTIVITY 0x1 +/* TX activity */ +#define DP83TD510E_LED_MODE_TX_ACTIVITY 0x2 +/* RX activity */ +#define DP83TD510E_LED_MODE_RX_ACTIVITY 0x3 +/* LR */ +#define DP83TD510E_LED_MODE_LR 0x4 +/* SR */ +#define DP83TD510E_LED_MODE_SR 0x5 +/* LED SPEED: High for 10Base-T */ +#define DP83TD510E_LED_MODE_LED_SPEED 0x6 +/* Duplex mode */ +#define DP83TD510E_LED_MODE_DUPLEX 0x7 +/* link + blink on activity with stretch option */ +#define DP83TD510E_LED_MODE_LINK_BLINK 0x8 +/* blink on activity with stretch option */ +#define DP83TD510E_LED_MODE_BLINK_ACTIVITY 0x9 +/* blink on tx activity with stretch option */ +#define DP83TD510E_LED_MODE_BLINK_TX 0xa +/* blink on rx activity with stretch option */ +#define DP83TD510E_LED_MODE_BLINK_RX 0xb +/* link_lost */ +#define DP83TD510E_LED_MODE_LINK_LOST 0xc +/* PRBS error: toggles on error */ +#define DP83TD510E_LED_MODE_PRBS_ERROR 0xd +/* XMII TX/RX Error with stretch option */ +#define DP83TD510E_LED_MODE_XMII_ERR 0xe + +#define DP83TD510E_LED_COUNT 4 + +#define DP83TD510E_LEDS_CFG_2 0x469 +#define DP83TD510E_LED_POLARITY(idx) BIT((idx) * 4 + 2) +#define DP83TD510E_LED_DRV_VAL(idx) BIT((idx) * 4 + 1) +#define DP83TD510E_LED_DRV_EN(idx) BIT((idx) * 4) + #define DP83TD510E_ALCD_STAT 0xa9f #define DP83TD510E_ALCD_COMPLETE BIT(15) #define DP83TD510E_ALCD_CABLE_LENGTH GENMASK(10, 0) +static int dp83td510_led_brightness_set(struct phy_device *phydev, u8 index, + enum led_brightness brightness) +{ + u32 val; + + if (index >= DP83TD510E_LED_COUNT) + return -EINVAL; + + val = DP83TD510E_LED_DRV_EN(index); + + if (brightness) + val |= DP83TD510E_LED_DRV_VAL(index); + + return phy_modify_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_2, + DP83TD510E_LED_DRV_VAL(index) | + DP83TD510E_LED_DRV_EN(index), val); +} + +static int dp83td510_led_mode(u8 index, unsigned long rules) +{ + if (index >= DP83TD510E_LED_COUNT) + return -EINVAL; + + switch (rules) { + case BIT(TRIGGER_NETDEV_LINK): + return DP83TD510E_LED_MODE_LINK_OK; + case BIT(TRIGGER_NETDEV_LINK_10): + return DP83TD510E_LED_MODE_LED_SPEED; + case BIT(TRIGGER_NETDEV_FULL_DUPLEX): + return DP83TD510E_LED_MODE_DUPLEX; + case BIT(TRIGGER_NETDEV_TX): + return DP83TD510E_LED_MODE_TX_ACTIVITY; + case BIT(TRIGGER_NETDEV_RX): + return DP83TD510E_LED_MODE_RX_ACTIVITY; + case BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX): + return DP83TD510E_LED_MODE_TX_RX_ACTIVITY; + case BIT(TRIGGER_NETDEV_LINK) | BIT(TRIGGER_NETDEV_TX) | + BIT(TRIGGER_NETDEV_RX): + return DP83TD510E_LED_MODE_LINK_BLINK; + default: + return -EOPNOTSUPP; + } +} + +static int dp83td510_led_hw_is_supported(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + int ret; + + ret = dp83td510_led_mode(index, rules); + if (ret < 0) + return ret; + + return 0; +} + +static int dp83td510_led_hw_control_set(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + int mode, ret; + + mode = dp83td510_led_mode(index, rules); + if (mode < 0) + return mode; + + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_1, + DP83TD510E_LED_FN_MASK(index), + DP83TD510E_LED_FN(index, mode)); + if (ret) + return ret; + + return phy_modify_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_2, + DP83TD510E_LED_DRV_EN(index), 0); +} + +static int dp83td510_led_hw_control_get(struct phy_device *phydev, + u8 index, unsigned long *rules) +{ + int val; + + val = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_1); + if (val < 0) + return val; + + val &= DP83TD510E_LED_FN_MASK(index); + val >>= index * 4; + + switch (val) { + case DP83TD510E_LED_MODE_LINK_OK: + *rules = BIT(TRIGGER_NETDEV_LINK); + break; + /* LED mode: LED SPEED (10BaseT1L indicator) */ + case DP83TD510E_LED_MODE_LED_SPEED: + *rules = BIT(TRIGGER_NETDEV_LINK_10); + break; + case DP83TD510E_LED_MODE_DUPLEX: + *rules = BIT(TRIGGER_NETDEV_FULL_DUPLEX); + break; + case DP83TD510E_LED_MODE_TX_ACTIVITY: + *rules = BIT(TRIGGER_NETDEV_TX); + break; + case DP83TD510E_LED_MODE_RX_ACTIVITY: + *rules = BIT(TRIGGER_NETDEV_RX); + break; + case DP83TD510E_LED_MODE_TX_RX_ACTIVITY: + *rules = BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX); + break; + case DP83TD510E_LED_MODE_LINK_BLINK: + *rules = BIT(TRIGGER_NETDEV_LINK) | + BIT(TRIGGER_NETDEV_TX) | + BIT(TRIGGER_NETDEV_RX); + break; + default: + *rules = 0; + break; + } + + return 0; +} + +static int dp83td510_led_polarity_set(struct phy_device *phydev, int index, + unsigned long modes) +{ + u16 polarity = DP83TD510E_LED_POLARITY(index); + u32 mode; + + for_each_set_bit(mode, &modes, __PHY_LED_MODES_NUM) { + switch (mode) { + case PHY_LED_ACTIVE_LOW: + polarity = 0; + break; + default: + return -EINVAL; + } + } + + return phy_modify_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_LEDS_CFG_2, + DP83TD510E_LED_POLARITY(index), polarity); +} + /** * dp83td510_update_stats - Update the PHY statistics for the DP83TD510 PHY. * @phydev: Pointer to the phy_device structure. @@ -712,6 +893,12 @@ static struct phy_driver dp83td510_driver[] = { .get_phy_stats = dp83td510_get_phy_stats, .update_stats = dp83td510_update_stats, + .led_brightness_set = dp83td510_led_brightness_set, + .led_hw_is_supported = dp83td510_led_hw_is_supported, + .led_hw_control_set = dp83td510_led_hw_control_set, + .led_hw_control_get = dp83td510_led_hw_control_get, + .led_polarity_set = dp83td510_led_polarity_set, + .suspend = genphy_suspend, .resume = genphy_resume, } }; diff --git a/drivers/net/phy/dp83tg720.c b/drivers/net/phy/dp83tg720.c index 050f4537d140..7e76323409c4 100644 --- a/drivers/net/phy/dp83tg720.c +++ b/drivers/net/phy/dp83tg720.c @@ -4,12 +4,31 @@ */ #include <linux/bitfield.h> #include <linux/ethtool_netlink.h> +#include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/phy.h> +#include <linux/random.h> #include "open_alliance_helpers.h" +/* + * DP83TG720S_POLL_ACTIVE_LINK - Polling interval in milliseconds when the link + * is active. + * DP83TG720S_POLL_NO_LINK_MIN - Minimum polling interval in milliseconds when + * the link is down. + * DP83TG720S_POLL_NO_LINK_MAX - Maximum polling interval in milliseconds when + * the link is down. + * + * These values are not documented or officially recommended by the vendor but + * were determined through empirical testing. They achieve a good balance in + * minimizing the number of reset retries while ensuring reliable link recovery + * within a reasonable timeframe. + */ +#define DP83TG720S_POLL_ACTIVE_LINK 1000 +#define DP83TG720S_POLL_NO_LINK_MIN 100 +#define DP83TG720S_POLL_NO_LINK_MAX 1000 + #define DP83TG720S_PHY_ID 0x2000a284 /* MDIO_MMD_VEND2 registers */ @@ -371,6 +390,13 @@ static int dp83tg720_read_status(struct phy_device *phydev) if (ret) return ret; + /* Sleep 600ms for PHY stabilization post-reset. + * Empirically chosen value (not documented). + * Helps reduce reset bounces with link partners having similar + * issues. + */ + msleep(600); + /* After HW reset we need to restore master/slave configuration. * genphy_c45_pma_baset1_read_master_slave() call will be done * by the dp83tg720_config_aneg() function. @@ -498,6 +524,57 @@ static int dp83tg720_probe(struct phy_device *phydev) return 0; } +/** + * dp83tg720_get_next_update_time - Determine the next update time for PHY + * state + * @phydev: Pointer to the phy_device structure + * + * This function addresses a limitation of the DP83TG720 PHY, which cannot + * reliably detect or report a stable link state. To recover from such + * scenarios, the PHY must be periodically reset when the link is down. However, + * if the link partner also runs Linux with the same driver, synchronized reset + * intervals can lead to a deadlock where the link never establishes due to + * simultaneous resets on both sides. + * + * To avoid this, the function implements randomized polling intervals when the + * link is down. It ensures that reset intervals are desynchronized by + * introducing a random delay between a configured minimum and maximum range. + * When the link is up, a fixed polling interval is used to minimize overhead. + * + * This mechanism guarantees that the link will reestablish within 10 seconds + * in the worst-case scenario. + * + * Return: Time (in jiffies) until the next update event for the PHY state + * machine. + */ +static unsigned int dp83tg720_get_next_update_time(struct phy_device *phydev) +{ + unsigned int next_time_jiffies; + + if (phydev->link) { + /* When the link is up, use a fixed 1000ms interval + * (in jiffies) + */ + next_time_jiffies = + msecs_to_jiffies(DP83TG720S_POLL_ACTIVE_LINK); + } else { + unsigned int min_jiffies, max_jiffies, rand_jiffies; + + /* When the link is down, randomize interval between min/max + * (in jiffies) + */ + min_jiffies = msecs_to_jiffies(DP83TG720S_POLL_NO_LINK_MIN); + max_jiffies = msecs_to_jiffies(DP83TG720S_POLL_NO_LINK_MAX); + + rand_jiffies = min_jiffies + + get_random_u32_below(max_jiffies - min_jiffies + 1); + next_time_jiffies = rand_jiffies; + } + + /* Ensure the polling time is at least one jiffy */ + return max(next_time_jiffies, 1U); +} + static struct phy_driver dp83tg720_driver[] = { { PHY_ID_MATCH_MODEL(DP83TG720S_PHY_ID), @@ -516,6 +593,7 @@ static struct phy_driver dp83tg720_driver[] = { .get_link_stats = dp83tg720_get_link_stats, .get_phy_stats = dp83tg720_get_phy_stats, .update_stats = dp83tg720_update_stats, + .get_next_update_time = dp83tg720_get_next_update_time, .suspend = genphy_suspend, .resume = genphy_resume, diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c index a3996471a1c9..bad5e7b2357d 100644 --- a/drivers/net/phy/marvell-88q2xxx.c +++ b/drivers/net/phy/marvell-88q2xxx.c @@ -8,6 +8,7 @@ */ #include <linux/ethtool_netlink.h> #include <linux/marvell_phy.h> +#include <linux/of.h> #include <linux/phy.h> #include <linux/hwmon.h> @@ -27,6 +28,9 @@ #define MDIO_MMD_AN_MV_STAT2_100BT1 0x2000 #define MDIO_MMD_AN_MV_STAT2_1000BT1 0x4000 +#define MDIO_MMD_PCS_MV_RESET_CTRL 32768 +#define MDIO_MMD_PCS_MV_RESET_CTRL_TX_DISABLE 0x8 + #define MDIO_MMD_PCS_MV_INT_EN 32784 #define MDIO_MMD_PCS_MV_INT_EN_LINK_UP 0x0040 #define MDIO_MMD_PCS_MV_INT_EN_LINK_DOWN 0x0080 @@ -40,6 +44,22 @@ #define MDIO_MMD_PCS_MV_GPIO_INT_CTRL 32787 #define MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS 0x0800 +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL 32790 +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_1_MASK GENMASK(7, 4) +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_0_MASK GENMASK(3, 0) +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK 0x0 /* Link established */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_RX_TX 0x1 /* Link established, blink for rx or tx activity */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_1000BT1 0x2 /* Blink 3x for 1000BT1 link established */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_RX_TX_ON 0x3 /* Receive or transmit activity */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_RX_TX 0x4 /* Blink on receive or transmit activity */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_TX 0x5 /* Transmit activity */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_COPPER 0x6 /* Copper Link established */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_1000BT1_ON 0x7 /* 1000BT1 link established */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_FORCE_OFF 0x8 /* Force off */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_FORCE_ON 0x9 /* Force on */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_FORCE_HIGHZ 0xa /* Force Hi-Z */ +#define MDIO_MMD_PCS_MV_LED_FUNC_CTRL_FORCE_BLINK 0xb /* Force blink */ + #define MDIO_MMD_PCS_MV_TEMP_SENSOR1 32833 #define MDIO_MMD_PCS_MV_TEMP_SENSOR1_RAW_INT 0x0001 #define MDIO_MMD_PCS_MV_TEMP_SENSOR1_INT 0x0040 @@ -95,8 +115,12 @@ #define MDIO_MMD_PCS_MV_TDR_OFF_CUTOFF 65246 +#define MV88Q2XXX_LED_INDEX_TX_ENABLE 0 +#define MV88Q2XXX_LED_INDEX_GPIO 1 + struct mv88q2xxx_priv { bool enable_temp; + bool enable_led0; }; struct mmd_val { @@ -460,6 +484,9 @@ static int mv88q2xxx_config_aneg(struct phy_device *phydev) static int mv88q2xxx_config_init(struct phy_device *phydev) { + struct mv88q2xxx_priv *priv = phydev->priv; + int ret; + /* The 88Q2XXX PHYs do have the extended ability register available, but * register MDIO_PMA_EXTABLE where they should signalize it does not * work according to specification. Therefore, we force it here. @@ -469,10 +496,22 @@ static int mv88q2xxx_config_init(struct phy_device *phydev) /* Configure interrupt with default settings, output is driven low for * active interrupt and high for inactive. */ - if (phy_interrupt_is_valid(phydev)) - return phy_set_bits_mmd(phydev, MDIO_MMD_PCS, - MDIO_MMD_PCS_MV_GPIO_INT_CTRL, - MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS); + if (phy_interrupt_is_valid(phydev)) { + ret = phy_set_bits_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_GPIO_INT_CTRL, + MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS); + if (ret < 0) + return ret; + } + + /* Enable LED function and disable TX disable feature on LED/TX_ENABLE */ + if (priv->enable_led0) { + ret = phy_clear_bits_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_RESET_CTRL, + MDIO_MMD_PCS_MV_RESET_CTRL_TX_DISABLE); + if (ret < 0) + return ret; + } return 0; } @@ -740,15 +779,62 @@ static int mv88q2xxx_hwmon_probe(struct phy_device *phydev) } #endif +#if IS_ENABLED(CONFIG_OF_MDIO) +static int mv88q2xxx_leds_probe(struct phy_device *phydev) +{ + struct device_node *node = phydev->mdio.dev.of_node; + struct mv88q2xxx_priv *priv = phydev->priv; + struct device_node *leds; + int ret = 0; + u32 index; + + if (!node) + return 0; + + leds = of_get_child_by_name(node, "leds"); + if (!leds) + return 0; + + for_each_available_child_of_node_scoped(leds, led) { + ret = of_property_read_u32(led, "reg", &index); + if (ret) + goto exit; + + if (index > MV88Q2XXX_LED_INDEX_GPIO) { + ret = -EINVAL; + goto exit; + } + + if (index == MV88Q2XXX_LED_INDEX_TX_ENABLE) + priv->enable_led0 = true; + } + +exit: + of_node_put(leds); + + return ret; +} + +#else +static int mv88q2xxx_leds_probe(struct phy_device *phydev) +{ + return 0; +} +#endif + static int mv88q2xxx_probe(struct phy_device *phydev) { struct mv88q2xxx_priv *priv; + int ret; priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; phydev->priv = priv; + ret = mv88q2xxx_leds_probe(phydev); + if (ret) + return ret; return mv88q2xxx_hwmon_probe(phydev); } @@ -918,6 +1004,98 @@ static int mv88q222x_cable_test_get_status(struct phy_device *phydev, return 0; } +static int mv88q2xxx_led_mode(u8 index, unsigned long rules) +{ + switch (rules) { + case BIT(TRIGGER_NETDEV_LINK): + return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK; + case BIT(TRIGGER_NETDEV_LINK_1000): + return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_1000BT1_ON; + case BIT(TRIGGER_NETDEV_TX): + return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_TX; + case BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX): + return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_RX_TX; + case BIT(TRIGGER_NETDEV_LINK) | BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX): + return MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_RX_TX; + default: + return -EOPNOTSUPP; + } +} + +static int mv88q2xxx_led_hw_is_supported(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + int mode; + + mode = mv88q2xxx_led_mode(index, rules); + if (mode < 0) + return mode; + + return 0; +} + +static int mv88q2xxx_led_hw_control_set(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + int mode; + + mode = mv88q2xxx_led_mode(index, rules); + if (mode < 0) + return mode; + + if (index == MV88Q2XXX_LED_INDEX_TX_ENABLE) + return phy_modify_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_LED_FUNC_CTRL, + MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_0_MASK, + FIELD_PREP(MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_0_MASK, + mode)); + else + return phy_modify_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_LED_FUNC_CTRL, + MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_1_MASK, + FIELD_PREP(MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_1_MASK, + mode)); +} + +static int mv88q2xxx_led_hw_control_get(struct phy_device *phydev, u8 index, + unsigned long *rules) +{ + int val; + + val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_LED_FUNC_CTRL); + if (val < 0) + return val; + + if (index == MV88Q2XXX_LED_INDEX_TX_ENABLE) + val = FIELD_GET(MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_0_MASK, val); + else + val = FIELD_GET(MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LED_1_MASK, val); + + switch (val) { + case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK: + *rules = BIT(TRIGGER_NETDEV_LINK); + break; + case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_1000BT1_ON: + *rules = BIT(TRIGGER_NETDEV_LINK_1000); + break; + case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_TX: + *rules = BIT(TRIGGER_NETDEV_TX); + break; + case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_RX_TX: + *rules = BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX); + break; + case MDIO_MMD_PCS_MV_LED_FUNC_CTRL_LINK_RX_TX: + *rules = BIT(TRIGGER_NETDEV_LINK) | BIT(TRIGGER_NETDEV_TX) | + BIT(TRIGGER_NETDEV_RX); + break; + default: + *rules = 0; + break; + } + + return 0; +} + static struct phy_driver mv88q2xxx_driver[] = { { .phy_id = MARVELL_PHY_ID_88Q2110, @@ -953,6 +1131,9 @@ static struct phy_driver mv88q2xxx_driver[] = { .get_sqi_max = mv88q2xxx_get_sqi_max, .suspend = mv88q2xxx_suspend, .resume = mv88q2xxx_resume, + .led_hw_is_supported = mv88q2xxx_led_hw_is_supported, + .led_hw_control_set = mv88q2xxx_led_hw_control_set, + .led_hw_control_get = mv88q2xxx_led_hw_control_get, }, }; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 44e1927de499..dd254e36ca8a 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -3124,33 +3124,13 @@ static umode_t marvell_hwmon_is_visible(const void *data, } } -static u32 marvell_hwmon_chip_config[] = { - HWMON_C_REGISTER_TZ, - 0 -}; - -static const struct hwmon_channel_info marvell_hwmon_chip = { - .type = hwmon_chip, - .config = marvell_hwmon_chip_config, -}; - /* we can define HWMON_T_CRIT and HWMON_T_MAX_ALARM even though these are not * defined for all PHYs, because the hwmon code checks whether the attributes * exists via the .is_visible method */ -static u32 marvell_hwmon_temp_config[] = { - HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_MAX_ALARM, - 0 -}; - -static const struct hwmon_channel_info marvell_hwmon_temp = { - .type = hwmon_temp, - .config = marvell_hwmon_temp_config, -}; - static const struct hwmon_channel_info * const marvell_hwmon_info[] = { - &marvell_hwmon_chip, - &marvell_hwmon_temp, + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_MAX_ALARM), NULL }; diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 623bdb8466b8..5354c8895163 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -230,29 +230,9 @@ static const struct hwmon_ops mv3310_hwmon_ops = { .read = mv3310_hwmon_read, }; -static u32 mv3310_hwmon_chip_config[] = { - HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL, - 0, -}; - -static const struct hwmon_channel_info mv3310_hwmon_chip = { - .type = hwmon_chip, - .config = mv3310_hwmon_chip_config, -}; - -static u32 mv3310_hwmon_temp_config[] = { - HWMON_T_INPUT, - 0, -}; - -static const struct hwmon_channel_info mv3310_hwmon_temp = { - .type = hwmon_temp, - .config = mv3310_hwmon_temp_config, -}; - static const struct hwmon_channel_info * const mv3310_hwmon_info[] = { - &mv3310_hwmon_chip, - &mv3310_hwmon_temp, + HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT), NULL, }; diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 0dac08e85304..468d246113a4 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -686,7 +686,7 @@ static int genphy_c45_write_eee_adv(struct phy_device *phydev, __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp); int val, changed = 0; - linkmode_andnot(tmp, adv, phydev->eee_broken_modes); + linkmode_andnot(tmp, adv, phydev->eee_disabled_modes); if (linkmode_intersects(phydev->supported_eee, PHY_EEE_CAP1_FEATURES)) { val = linkmode_to_mii_eee_cap1_t(tmp); diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 6bf3ec985f3d..2fd1d153abc9 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -13,7 +13,7 @@ */ const char *phy_speed_to_str(int speed) { - BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 103, + BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 121, "Enum ethtool_link_mode_bit_indices and phylib are out of sync. " "If a speed or mode has been added please update phy_speed_to_str " "and the PHY settings array.\n"); @@ -169,6 +169,12 @@ static const struct phy_setting settings[] = { PHY_SETTING( 800000, FULL, 800000baseDR8_2_Full ), PHY_SETTING( 800000, FULL, 800000baseSR8_Full ), PHY_SETTING( 800000, FULL, 800000baseVR8_Full ), + PHY_SETTING( 800000, FULL, 800000baseCR4_Full ), + PHY_SETTING( 800000, FULL, 800000baseKR4_Full ), + PHY_SETTING( 800000, FULL, 800000baseDR4_Full ), + PHY_SETTING( 800000, FULL, 800000baseDR4_2_Full ), + PHY_SETTING( 800000, FULL, 800000baseSR4_Full ), + PHY_SETTING( 800000, FULL, 800000baseVR4_Full ), /* 400G */ PHY_SETTING( 400000, FULL, 400000baseCR8_Full ), PHY_SETTING( 400000, FULL, 400000baseKR8_Full ), @@ -180,6 +186,12 @@ static const struct phy_setting settings[] = { PHY_SETTING( 400000, FULL, 400000baseLR4_ER4_FR4_Full ), PHY_SETTING( 400000, FULL, 400000baseDR4_Full ), PHY_SETTING( 400000, FULL, 400000baseSR4_Full ), + PHY_SETTING( 400000, FULL, 400000baseCR2_Full ), + PHY_SETTING( 400000, FULL, 400000baseKR2_Full ), + PHY_SETTING( 400000, FULL, 400000baseDR2_Full ), + PHY_SETTING( 400000, FULL, 400000baseDR2_2_Full ), + PHY_SETTING( 400000, FULL, 400000baseSR2_Full ), + PHY_SETTING( 400000, FULL, 400000baseVR2_Full ), /* 200G */ PHY_SETTING( 200000, FULL, 200000baseCR4_Full ), PHY_SETTING( 200000, FULL, 200000baseKR4_Full ), @@ -191,6 +203,12 @@ static const struct phy_setting settings[] = { PHY_SETTING( 200000, FULL, 200000baseLR2_ER2_FR2_Full ), PHY_SETTING( 200000, FULL, 200000baseDR2_Full ), PHY_SETTING( 200000, FULL, 200000baseSR2_Full ), + PHY_SETTING( 200000, FULL, 200000baseCR_Full ), + PHY_SETTING( 200000, FULL, 200000baseKR_Full ), + PHY_SETTING( 200000, FULL, 200000baseDR_Full ), + PHY_SETTING( 200000, FULL, 200000baseDR_2_Full ), + PHY_SETTING( 200000, FULL, 200000baseSR_Full ), + PHY_SETTING( 200000, FULL, 200000baseVR_Full ), /* 100G */ PHY_SETTING( 100000, FULL, 100000baseCR4_Full ), PHY_SETTING( 100000, FULL, 100000baseKR4_Full ), @@ -388,7 +406,7 @@ void of_set_phy_supported(struct phy_device *phydev) void of_set_phy_eee_broken(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; - unsigned long *modes = phydev->eee_broken_modes; + unsigned long *modes = phydev->eee_disabled_modes; if (!IS_ENABLED(CONFIG_OF_MDIO) || !node) return; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index d0c1718e2b16..347ad1b64497 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1501,6 +1501,24 @@ void phy_free_interrupt(struct phy_device *phydev) } EXPORT_SYMBOL(phy_free_interrupt); +/** + * phy_get_next_update_time - Determine the next PHY update time + * @phydev: Pointer to the phy_device structure + * + * This function queries the PHY driver to get the time for the next polling + * event. If the driver does not implement the callback, a default value is + * used. + * + * Return: The time for the next polling event in jiffies + */ +static unsigned int phy_get_next_update_time(struct phy_device *phydev) +{ + if (phydev->drv && phydev->drv->get_next_update_time) + return phydev->drv->get_next_update_time(phydev); + + return PHY_STATE_TIME; +} + enum phy_state_work { PHY_STATE_WORK_NONE, PHY_STATE_WORK_ANEG, @@ -1580,7 +1598,8 @@ static enum phy_state_work _phy_state_machine(struct phy_device *phydev) * called from phy_disconnect() synchronously. */ if (phy_polling_mode(phydev) && phy_is_started(phydev)) - phy_queue_state_machine(phydev, PHY_STATE_TIME); + phy_queue_state_machine(phydev, + phy_get_next_update_time(phydev)); return state_work; } diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 46713d27412b..9b06ba92f2ed 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2966,7 +2966,7 @@ void phy_disable_eee(struct phy_device *phydev) phydev->eee_cfg.tx_lpi_enabled = false; phydev->eee_cfg.eee_enabled = false; /* don't let userspace re-enable EEE advertisement */ - linkmode_fill(phydev->eee_broken_modes); + linkmode_fill(phydev->eee_disabled_modes); } EXPORT_SYMBOL_GPL(phy_disable_eee); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index b00a315de060..74693307dfbf 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1957,8 +1957,7 @@ struct phylink *phylink_create(struct phylink_config *config, return ERR_PTR(-EINVAL); } - pl->mac_supports_eee_ops = mac_ops->mac_disable_tx_lpi && - mac_ops->mac_enable_tx_lpi; + pl->mac_supports_eee_ops = phylink_mac_implements_lpi(mac_ops); pl->mac_supports_eee = pl->mac_supports_eee_ops && pl->config->lpi_capabilities && !phy_interface_empty(pl->config->lpi_interfaces); diff --git a/drivers/net/phy/realtek/Kconfig b/drivers/net/phy/realtek/Kconfig index 31935f147d87..b05c2a1e9024 100644 --- a/drivers/net/phy/realtek/Kconfig +++ b/drivers/net/phy/realtek/Kconfig @@ -4,8 +4,12 @@ config REALTEK_PHY help Currently supports RTL821x/RTL822x and fast ethernet PHYs +if REALTEK_PHY + config REALTEK_PHY_HWMON - def_bool REALTEK_PHY && HWMON - depends on !(REALTEK_PHY=y && HWMON=m) + bool "HWMON support for Realtek PHYs" + depends on HWMON && !(REALTEK_PHY=y && HWMON=m) help Optional hwmon support for the temperature sensor + +endif # REALTEK_PHY diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 572a933636b0..210fefac44d4 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/delay.h> #include <linux/clk.h> +#include <linux/string_choices.h> #include "realtek.h" @@ -422,11 +423,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) } else if (ret) { dev_dbg(dev, "%s 2ns TX delay (and changing the value from pin-strapping RXD1 or the bootloader)\n", - val_txdly ? "Enabling" : "Disabling"); + str_enable_disable(val_txdly)); } else { dev_dbg(dev, "2ns TX delay was already %s (by pin-strapping RXD1 or bootloader configuration)\n", - val_txdly ? "enabled" : "disabled"); + str_enabled_disabled(val_txdly)); } ret = phy_modify_paged_changed(phydev, 0xd08, 0x15, RTL8211F_RX_DELAY, @@ -437,11 +438,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) } else if (ret) { dev_dbg(dev, "%s 2ns RX delay (and changing the value from pin-strapping RXD0 or the bootloader)\n", - val_rxdly ? "Enabling" : "Disabling"); + str_enable_disable(val_rxdly)); } else { dev_dbg(dev, "2ns RX delay was already %s (by pin-strapping RXD0 or bootloader configuration)\n", - val_rxdly ? "enabled" : "disabled"); + str_enabled_disabled(val_rxdly)); } if (priv->has_phycr2) { diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 5ca6ecf0ce5f..d4ece538f1b2 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -26,74 +26,9 @@ #include <linux/virtio_net.h> #include <linux/skb_array.h> -#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) - -#define TAP_VNET_LE 0x80000000 -#define TAP_VNET_BE 0x40000000 - -#ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool tap_legacy_is_little_endian(struct tap_queue *q) -{ - return q->flags & TAP_VNET_BE ? false : - virtio_legacy_is_little_endian(); -} - -static long tap_get_vnet_be(struct tap_queue *q, int __user *sp) -{ - int s = !!(q->flags & TAP_VNET_BE); - - if (put_user(s, sp)) - return -EFAULT; - - return 0; -} - -static long tap_set_vnet_be(struct tap_queue *q, int __user *sp) -{ - int s; - - if (get_user(s, sp)) - return -EFAULT; - - if (s) - q->flags |= TAP_VNET_BE; - else - q->flags &= ~TAP_VNET_BE; - - return 0; -} -#else -static inline bool tap_legacy_is_little_endian(struct tap_queue *q) -{ - return virtio_legacy_is_little_endian(); -} - -static long tap_get_vnet_be(struct tap_queue *q, int __user *argp) -{ - return -EINVAL; -} - -static long tap_set_vnet_be(struct tap_queue *q, int __user *argp) -{ - return -EINVAL; -} -#endif /* CONFIG_TUN_VNET_CROSS_LE */ - -static inline bool tap_is_little_endian(struct tap_queue *q) -{ - return q->flags & TAP_VNET_LE || - tap_legacy_is_little_endian(q); -} - -static inline u16 tap16_to_cpu(struct tap_queue *q, __virtio16 val) -{ - return __virtio16_to_cpu(tap_is_little_endian(q), val); -} +#include "tun_vnet.h" -static inline __virtio16 cpu_to_tap16(struct tap_queue *q, u16 val) -{ - return __cpu_to_virtio16(tap_is_little_endian(q), val); -} +#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) static struct proto tap_proto = { .name = "tap", @@ -645,6 +580,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0; + int hdr_len = 0; int copylen = 0; int depth; bool zerocopy = false; @@ -654,25 +590,13 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (q->flags & IFF_VNET_HDR) { vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); - err = -EINVAL; - if (len < vnet_hdr_len) + hdr_len = tun_vnet_hdr_get(vnet_hdr_len, q->flags, from, &vnet_hdr); + if (hdr_len < 0) { + err = hdr_len; goto err; - len -= vnet_hdr_len; + } - err = -EFAULT; - if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr), from)) - goto err; - iov_iter_advance(from, vnet_hdr_len - sizeof(vnet_hdr)); - if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tap16_to_cpu(q, vnet_hdr.csum_start) + - tap16_to_cpu(q, vnet_hdr.csum_offset) + 2 > - tap16_to_cpu(q, vnet_hdr.hdr_len)) - vnet_hdr.hdr_len = cpu_to_tap16(q, - tap16_to_cpu(q, vnet_hdr.csum_start) + - tap16_to_cpu(q, vnet_hdr.csum_offset) + 2); - err = -EINVAL; - if (tap16_to_cpu(q, vnet_hdr.hdr_len) > len) - goto err; + len -= vnet_hdr_len; } err = -EINVAL; @@ -682,12 +606,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { struct iov_iter i; - copylen = vnet_hdr.hdr_len ? - tap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN; - if (copylen > good_linear) - copylen = good_linear; - else if (copylen < ETH_HLEN) - copylen = ETH_HLEN; + copylen = clamp(hdr_len ?: GOODCOPY_LEN, ETH_HLEN, good_linear); linear = copylen; i = *from; iov_iter_advance(&i, copylen); @@ -697,11 +616,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (!zerocopy) { copylen = len; - linear = tap16_to_cpu(q, vnet_hdr.hdr_len); - if (linear > good_linear) - linear = good_linear; - else if (linear < ETH_HLEN) - linear = ETH_HLEN; + linear = clamp(hdr_len, ETH_HLEN, good_linear); } skb = tap_alloc_skb(&q->sk, TAP_RESERVE, copylen, @@ -733,8 +648,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, skb->dev = tap->dev; if (vnet_hdr_len) { - err = virtio_net_hdr_to_skb(skb, &vnet_hdr, - tap_is_little_endian(q)); + err = tun_vnet_hdr_to_skb(q->flags, skb, &vnet_hdr); if (err) { rcu_read_unlock(); drop_reason = SKB_DROP_REASON_DEV_HDR; @@ -797,23 +711,17 @@ static ssize_t tap_put_user(struct tap_queue *q, int total; if (q->flags & IFF_VNET_HDR) { - int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; struct virtio_net_hdr vnet_hdr; vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); - if (iov_iter_count(iter) < vnet_hdr_len) - return -EINVAL; - if (virtio_net_hdr_from_skb(skb, &vnet_hdr, - tap_is_little_endian(q), true, - vlan_hlen)) - BUG(); - - if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) != - sizeof(vnet_hdr)) - return -EFAULT; + ret = tun_vnet_hdr_from_skb(q->flags, NULL, skb, &vnet_hdr); + if (ret) + return ret; - iov_iter_advance(iter, vnet_hdr_len - sizeof(vnet_hdr)); + ret = tun_vnet_hdr_put(vnet_hdr_len, iter, &vnet_hdr); + if (ret) + return ret; } total = vnet_hdr_len; total += skb->len; @@ -1072,42 +980,6 @@ static long tap_ioctl(struct file *file, unsigned int cmd, q->sk.sk_sndbuf = s; return 0; - case TUNGETVNETHDRSZ: - s = q->vnet_hdr_sz; - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETHDRSZ: - if (get_user(s, sp)) - return -EFAULT; - if (s < (int)sizeof(struct virtio_net_hdr)) - return -EINVAL; - - q->vnet_hdr_sz = s; - return 0; - - case TUNGETVNETLE: - s = !!(q->flags & TAP_VNET_LE); - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETLE: - if (get_user(s, sp)) - return -EFAULT; - if (s) - q->flags |= TAP_VNET_LE; - else - q->flags &= ~TAP_VNET_LE; - return 0; - - case TUNGETVNETBE: - return tap_get_vnet_be(q, sp); - - case TUNSETVNETBE: - return tap_set_vnet_be(q, sp); - case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | @@ -1151,7 +1023,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, return ret; default: - return -EINVAL; + return tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags, cmd, sp); } } @@ -1198,7 +1070,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) skb->protocol = eth_hdr(skb)->h_proto; if (vnet_hdr_len) { - err = virtio_net_hdr_to_skb(skb, gso, tap_is_little_endian(q)); + err = tun_vnet_hdr_to_skb(q->flags, skb, gso); if (err) goto err_kfree; } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index acf96f262488..d8f4d3e996a7 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -83,6 +83,8 @@ #include <linux/uaccess.h> #include <linux/proc_fs.h> +#include "tun_vnet.h" + static void tun_default_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd); @@ -94,9 +96,6 @@ static void tun_default_link_ksettings(struct net_device *dev, * overload it to mean fasync when stored there. */ #define TUN_FASYNC IFF_ATTACH_QUEUE -/* High bits in flags field are unused. */ -#define TUN_VNET_LE 0x80000000 -#define TUN_VNET_BE 0x40000000 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS) @@ -298,70 +297,6 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile) return tfile->napi_frags_enabled; } -#ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) -{ - return tun->flags & TUN_VNET_BE ? false : - virtio_legacy_is_little_endian(); -} - -static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) -{ - int be = !!(tun->flags & TUN_VNET_BE); - - if (put_user(be, argp)) - return -EFAULT; - - return 0; -} - -static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) -{ - int be; - - if (get_user(be, argp)) - return -EFAULT; - - if (be) - tun->flags |= TUN_VNET_BE; - else - tun->flags &= ~TUN_VNET_BE; - - return 0; -} -#else -static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) -{ - return virtio_legacy_is_little_endian(); -} - -static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) -{ - return -EINVAL; -} - -static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) -{ - return -EINVAL; -} -#endif /* CONFIG_TUN_VNET_CROSS_LE */ - -static inline bool tun_is_little_endian(struct tun_struct *tun) -{ - return tun->flags & TUN_VNET_LE || - tun_legacy_is_little_endian(tun); -} - -static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) -{ - return __virtio16_to_cpu(tun_is_little_endian(tun), val); -} - -static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) -{ - return __cpu_to_virtio16(tun_is_little_endian(tun), val); -} - static inline u32 tun_hashfn(u32 rxhash) { return rxhash & TUN_MASK_FLOW_ENTRIES; @@ -1756,6 +1691,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, struct virtio_net_hdr gso = { 0 }; int good_linear; int copylen; + int hdr_len = 0; bool zerocopy = false; int err; u32 rxhash = 0; @@ -1775,26 +1711,16 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); - if (len < vnet_hdr_sz) - return -EINVAL; - len -= vnet_hdr_sz; + hdr_len = tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, from, &gso); + if (hdr_len < 0) + return hdr_len; - if (!copy_from_iter_full(&gso, sizeof(gso), from)) - return -EFAULT; - - if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len)) - gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2); - - if (tun16_to_cpu(tun, gso.hdr_len) > len) - return -EINVAL; - iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); + len -= vnet_hdr_sz; } if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { align += NET_IP_ALIGN; - if (unlikely(len < ETH_HLEN || - (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN))) + if (unlikely(len < ETH_HLEN || (hdr_len && hdr_len < ETH_HLEN))) return -EINVAL; } @@ -1807,9 +1733,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace. */ - copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN; - if (copylen > good_linear) - copylen = good_linear; + copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear); linear = copylen; iov_iter_advance(&i, copylen); if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS) @@ -1830,10 +1754,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } else { if (!zerocopy) { copylen = len; - if (tun16_to_cpu(tun, gso.hdr_len) > good_linear) - linear = good_linear; - else - linear = tun16_to_cpu(tun, gso.hdr_len); + linear = min(hdr_len, good_linear); } if (frags) { @@ -1868,7 +1789,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } } - if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) { + if (tun_vnet_hdr_to_skb(tun->flags, skb, &gso)) { atomic_long_inc(&tun->rx_frame_errors); err = -EINVAL; goto free_skb; @@ -2063,18 +1984,15 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun, { int vnet_hdr_sz = 0; size_t size = xdp_frame->len; - size_t ret; + ssize_t ret; if (tun->flags & IFF_VNET_HDR) { struct virtio_net_hdr gso = { 0 }; vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); - if (unlikely(iov_iter_count(iter) < vnet_hdr_sz)) - return -EINVAL; - if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) != - sizeof(gso))) - return -EFAULT; - iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); + ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); + if (ret) + return ret; } ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz; @@ -2097,6 +2015,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, int vlan_offset = 0; int vlan_hlen = 0; int vnet_hdr_sz = 0; + int ret; if (skb_vlan_tag_present(skb)) vlan_hlen = VLAN_HLEN; @@ -2123,31 +2042,13 @@ static ssize_t tun_put_user(struct tun_struct *tun, if (vnet_hdr_sz) { struct virtio_net_hdr gso; - if (iov_iter_count(iter) < vnet_hdr_sz) - return -EINVAL; - - if (virtio_net_hdr_from_skb(skb, &gso, - tun_is_little_endian(tun), true, - vlan_hlen)) { - struct skb_shared_info *sinfo = skb_shinfo(skb); - - if (net_ratelimit()) { - netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), - tun16_to_cpu(tun, gso.hdr_len)); - print_hex_dump(KERN_ERR, "tun: ", - DUMP_PREFIX_NONE, - 16, 1, skb->head, - min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); - } - WARN_ON_ONCE(1); - return -EINVAL; - } - - if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso)) - return -EFAULT; + ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso); + if (ret) + return ret; - iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); + ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); + if (ret) + return ret; } if (vlan_hlen) { @@ -2507,7 +2408,7 @@ build: skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); - if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) { + if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) { atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); ret = -EINVAL; @@ -3091,8 +2992,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, kgid_t group; int ifindex; int sndbuf; - int vnet_hdr_sz; - int le; int ret; bool do_notify = false; @@ -3299,50 +3198,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tun_set_sndbuf(tun); break; - case TUNGETVNETHDRSZ: - vnet_hdr_sz = tun->vnet_hdr_sz; - if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz))) - ret = -EFAULT; - break; - - case TUNSETVNETHDRSZ: - if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) { - ret = -EFAULT; - break; - } - if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) { - ret = -EINVAL; - break; - } - - tun->vnet_hdr_sz = vnet_hdr_sz; - break; - - case TUNGETVNETLE: - le = !!(tun->flags & TUN_VNET_LE); - if (put_user(le, (int __user *)argp)) - ret = -EFAULT; - break; - - case TUNSETVNETLE: - if (get_user(le, (int __user *)argp)) { - ret = -EFAULT; - break; - } - if (le) - tun->flags |= TUN_VNET_LE; - else - tun->flags &= ~TUN_VNET_LE; - break; - - case TUNGETVNETBE: - ret = tun_get_vnet_be(tun, argp); - break; - - case TUNSETVNETBE: - ret = tun_set_vnet_be(tun, argp); - break; - case TUNATTACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; @@ -3398,7 +3253,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; default: - ret = -EINVAL; + ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp); break; } diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h new file mode 100644 index 000000000000..fd7411c4447f --- /dev/null +++ b/drivers/net/tun_vnet.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef TUN_VNET_H +#define TUN_VNET_H + +/* High bits in flags field are unused. */ +#define TUN_VNET_LE 0x80000000 +#define TUN_VNET_BE 0x40000000 + +static inline bool tun_vnet_legacy_is_little_endian(unsigned int flags) +{ + bool be = IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && + (flags & TUN_VNET_BE); + + return !be && virtio_legacy_is_little_endian(); +} + +static inline long tun_get_vnet_be(unsigned int flags, int __user *argp) +{ + int be = !!(flags & TUN_VNET_BE); + + if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) + return -EINVAL; + + if (put_user(be, argp)) + return -EFAULT; + + return 0; +} + +static inline long tun_set_vnet_be(unsigned int *flags, int __user *argp) +{ + int be; + + if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) + return -EINVAL; + + if (get_user(be, argp)) + return -EFAULT; + + if (be) + *flags |= TUN_VNET_BE; + else + *flags &= ~TUN_VNET_BE; + + return 0; +} + +static inline bool tun_vnet_is_little_endian(unsigned int flags) +{ + return flags & TUN_VNET_LE || tun_vnet_legacy_is_little_endian(flags); +} + +static inline u16 tun_vnet16_to_cpu(unsigned int flags, __virtio16 val) +{ + return __virtio16_to_cpu(tun_vnet_is_little_endian(flags), val); +} + +static inline __virtio16 cpu_to_tun_vnet16(unsigned int flags, u16 val) +{ + return __cpu_to_virtio16(tun_vnet_is_little_endian(flags), val); +} + +static inline long tun_vnet_ioctl(int *vnet_hdr_sz, unsigned int *flags, + unsigned int cmd, int __user *sp) +{ + int s; + + switch (cmd) { + case TUNGETVNETHDRSZ: + s = *vnet_hdr_sz; + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETHDRSZ: + if (get_user(s, sp)) + return -EFAULT; + if (s < (int)sizeof(struct virtio_net_hdr)) + return -EINVAL; + + *vnet_hdr_sz = s; + return 0; + + case TUNGETVNETLE: + s = !!(*flags & TUN_VNET_LE); + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETLE: + if (get_user(s, sp)) + return -EFAULT; + if (s) + *flags |= TUN_VNET_LE; + else + *flags &= ~TUN_VNET_LE; + return 0; + + case TUNGETVNETBE: + return tun_get_vnet_be(*flags, sp); + + case TUNSETVNETBE: + return tun_set_vnet_be(flags, sp); + + default: + return -EINVAL; + } +} + +static inline int tun_vnet_hdr_get(int sz, unsigned int flags, + struct iov_iter *from, + struct virtio_net_hdr *hdr) +{ + u16 hdr_len; + + if (iov_iter_count(from) < sz) + return -EINVAL; + + if (!copy_from_iter_full(hdr, sizeof(*hdr), from)) + return -EFAULT; + + hdr_len = tun_vnet16_to_cpu(flags, hdr->hdr_len); + + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + hdr_len = max(tun_vnet16_to_cpu(flags, hdr->csum_start) + tun_vnet16_to_cpu(flags, hdr->csum_offset) + 2, hdr_len); + hdr->hdr_len = cpu_to_tun_vnet16(flags, hdr_len); + } + + if (hdr_len > iov_iter_count(from)) + return -EINVAL; + + iov_iter_advance(from, sz - sizeof(*hdr)); + + return hdr_len; +} + +static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter, + const struct virtio_net_hdr *hdr) +{ + if (unlikely(iov_iter_count(iter) < sz)) + return -EINVAL; + + if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))) + return -EFAULT; + + iov_iter_advance(iter, sz - sizeof(*hdr)); + + return 0; +} + +static inline int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, + const struct virtio_net_hdr *hdr) +{ + return virtio_net_hdr_to_skb(skb, hdr, tun_vnet_is_little_endian(flags)); +} + +static inline int tun_vnet_hdr_from_skb(unsigned int flags, + const struct net_device *dev, + const struct sk_buff *skb, + struct virtio_net_hdr *hdr) +{ + int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; + + if (virtio_net_hdr_from_skb(skb, hdr, + tun_vnet_is_little_endian(flags), true, + vlan_hlen)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + + if (net_ratelimit()) { + netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", + sinfo->gso_type, tun_vnet16_to_cpu(flags, hdr->gso_size), + tun_vnet16_to_cpu(flags, hdr->hdr_len)); + print_hex_dump(KERN_ERR, "tun: ", + DUMP_PREFIX_NONE, + 16, 1, skb->head, + min(tun_vnet16_to_cpu(flags, hdr->hdr_len), 64), true); + } + WARN_ON_ONCE(1); + return -EINVAL; + } + + return 0; +} + +#endif /* TUN_VNET_H */ diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 57d6e5abc30e..da24941a6e44 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -1421,6 +1421,19 @@ static const struct driver_info hg20f9_info = { .data = FLAG_EEPROM_MAC, }; +static const struct driver_info lyconsys_fibergecko100_info = { + .description = "LyconSys FiberGecko 100 USB 2.0 to SFP Adapter", + .bind = ax88178_bind, + .status = asix_status, + .link_reset = ax88178_link_reset, + .reset = ax88178_link_reset, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | + FLAG_MULTI_PACKET, + .rx_fixup = asix_rx_fixup_common, + .tx_fixup = asix_tx_fixup, + .data = 0x20061201, +}; + static const struct usb_device_id products [] = { { // Linksys USB200M @@ -1578,6 +1591,10 @@ static const struct usb_device_id products [] = { // Linux Automation GmbH USB 10Base-T1L USB_DEVICE(0x33f7, 0x0004), .driver_info = (unsigned long) &lxausb_t1l_info, +}, { + /* LyconSys FiberGecko 100 */ + USB_DEVICE(0x1d2a, 0x0801), + .driver_info = (unsigned long) &lyconsys_fibergecko100_info, }, { }, // END }; diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index e13e4920ee9b..88921c13b629 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -660,7 +660,7 @@ static const struct usb_device_id mbim_devs[] = { .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle, }, - /* Telit FN990 */ + /* Telit FN990A */ { USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1071, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle, }, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index e9208a8d2bfa..14d1c85c8000 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1360,7 +1360,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1057, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ - {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a0, 0)}, /* Telit FN920C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a4, 0)}, /* Telit FN920C04 */ @@ -1368,6 +1368,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x10c0, 0)}, /* Telit FE910C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10c4, 0)}, /* Telit FE910C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10c8, 0)}, /* Telit FE910C04 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10d0, 0)}, /* Telit FN990B */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 468c73974046..e1021148d3a6 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10079,6 +10079,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff) }, { USB_DEVICE(VENDOR_ID_TPLINK, 0x0601) }, { USB_DEVICE(VENDOR_ID_DLINK, 0xb301) }, + { USB_DEVICE(VENDOR_ID_DELL, 0xb097) }, { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, {} }; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index ca81b212a246..5f21ce1013c4 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1537,14 +1537,12 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) nlmsg_end(skb, nlh); - /* fib_nl_{new,del}rule handling looks for net from skb->sk */ - skb->sk = dev_net(dev)->rtnl; if (add_it) { - err = fib_nl_newrule(skb, nlh, NULL); + err = fib_newrule(dev_net(dev), skb, nlh, NULL, true); if (err == -EEXIST) err = 0; } else { - err = fib_nl_delrule(skb, nlh, NULL); + err = fib_delrule(dev_net(dev), skb, nlh, NULL, true); if (err == -ENOENT) err = 0; } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 92516189e792..ac5bc78edfd7 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -227,9 +227,9 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, be32_to_cpu(fdb->vni))) goto nla_put_failure; - ci.ndm_used = jiffies_to_clock_t(now - fdb->used); + ci.ndm_used = jiffies_to_clock_t(now - READ_ONCE(fdb->used)); ci.ndm_confirmed = 0; - ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); + ci.ndm_updated = jiffies_to_clock_t(now - READ_ONCE(fdb->updated)); ci.ndm_refcnt = 0; if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) @@ -434,8 +434,12 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, struct vxlan_fdb *f; f = __vxlan_find_mac(vxlan, mac, vni); - if (f && f->used != jiffies) - f->used = jiffies; + if (f) { + unsigned long now = jiffies; + + if (READ_ONCE(f->used) != now) + WRITE_ONCE(f->used, now); + } return f; } @@ -1009,12 +1013,10 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, !(f->flags & NTF_VXLAN_ADDED_BY_USER)) { if (f->state != state) { f->state = state; - f->updated = jiffies; notify = 1; } if (f->flags != fdb_flags) { f->flags = fdb_flags; - f->updated = jiffies; notify = 1; } } @@ -1048,12 +1050,13 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, } if (ndm_flags & NTF_USE) - f->used = jiffies; + WRITE_ONCE(f->updated, jiffies); if (notify) { if (rd == NULL) rd = first_remote_rtnl(f); + WRITE_ONCE(f->updated, jiffies); err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH, swdev_notify, extack); if (err) @@ -1292,7 +1295,7 @@ int __vxlan_fdb_delete(struct vxlan_dev *vxlan, struct vxlan_fdb *f; int err = -ENOENT; - f = vxlan_find_mac(vxlan, addr, src_vni); + f = __vxlan_find_mac(vxlan, addr, src_vni); if (!f) return err; @@ -1459,9 +1462,13 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, ifindex = src_ifindex; #endif - f = vxlan_find_mac(vxlan, src_mac, vni); + f = __vxlan_find_mac(vxlan, src_mac, vni); if (likely(f)) { struct vxlan_rdst *rdst = first_remote_rcu(f); + unsigned long now = jiffies; + + if (READ_ONCE(f->updated) != now) + WRITE_ONCE(f->updated, now); if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) && rdst->remote_ifindex == ifindex)) @@ -1481,7 +1488,6 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, src_mac, &rdst->remote_ip.sa, &src_ip->sa); rdst->remote_ip = *src_ip; - f->updated = jiffies; vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL); } else { u32 hash_index = fdb_head_index(vxlan, src_mac, vni); @@ -1664,7 +1670,6 @@ static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph, return err <= 1; } -/* Callback from net/ipv4/udp.c to receive packets */ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) { struct vxlan_vni_node *vninode = NULL; @@ -1834,7 +1839,6 @@ drop: return 0; } -/* Callback from net/ipv{4,6}/udp.c to check that we have a VNI for errors */ static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb) { struct vxlan_dev *vxlan; @@ -2852,7 +2856,7 @@ static void vxlan_cleanup(struct timer_list *t) if (f->flags & NTF_EXT_LEARNED) continue; - timeout = f->used + vxlan->cfg.age_interval * HZ; + timeout = READ_ONCE(f->updated) + vxlan->cfg.age_interval * HZ; if (time_before_eq(timeout, jiffies)) { netdev_dbg(vxlan->dev, "garbage collect %pM\n", @@ -4768,7 +4772,7 @@ vxlan_fdb_offloaded_set(struct net_device *dev, spin_lock_bh(&vxlan->hash_lock[hash_index]); - f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); + f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); if (!f) goto out; @@ -4824,7 +4828,7 @@ vxlan_fdb_external_learn_del(struct net_device *dev, hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni); spin_lock_bh(&vxlan->hash_lock[hash_index]); - f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); + f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); if (!f) err = -ENOENT; else if (f->flags & NTF_EXT_LEARNED) diff --git a/drivers/net/wwan/t7xx/t7xx_pci.c b/drivers/net/wwan/t7xx/t7xx_pci.c index 02f2ec7cf4ce..8bf63f2dcbbf 100644 --- a/drivers/net/wwan/t7xx/t7xx_pci.c +++ b/drivers/net/wwan/t7xx/t7xx_pci.c @@ -32,7 +32,6 @@ #include <linux/pci.h> #include <linux/pm.h> #include <linux/pm_runtime.h> -#include <linux/pm_wakeup.h> #include <linux/spinlock.h> #include "t7xx_mhccif.h" diff --git a/drivers/of/base.c b/drivers/of/base.c index af6c68bbb427..e37b088f1fad 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -824,6 +824,33 @@ struct device_node *of_get_child_by_name(const struct device_node *node, } EXPORT_SYMBOL(of_get_child_by_name); +/** + * of_get_available_child_by_name - Find the available child node by name for a given parent + * @node: parent node + * @name: child name to look for. + * + * This function looks for child node for given matching name and checks the + * device's availability for use. + * + * Return: A node pointer if found, with refcount incremented, use + * of_node_put() on it when done. + * Returns NULL if node is not found. + */ +struct device_node *of_get_available_child_by_name(const struct device_node *node, + const char *name) +{ + struct device_node *child; + + child = of_get_child_by_name(node, name); + if (child && !of_device_is_available(child)) { + of_node_put(child); + return NULL; + } + + return child; +} +EXPORT_SYMBOL(of_get_available_child_by_name); + struct device_node *__of_find_node_by_path(const struct device_node *parent, const char *path) { diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index c61e6427384c..9eb9e3c49f81 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -2,15 +2,6 @@ menu "S/390 network device drivers" depends on NETDEVICES && S390 -config LCS - def_tristate m - prompt "Lan Channel Station Interface" - depends on CCW && NETDEVICES && ETHERNET - help - Select this option if you want to use LCS networking on IBM System z. - To compile as a module, choose M. The module name is lcs. - If you do not use LCS, choose N. - config CTCM def_tristate m prompt "CTC and MPC SNA device support" @@ -98,7 +89,7 @@ config QETH_OSX config CCWGROUP tristate - default (LCS || CTCM || QETH || SMC) + default (CTCM || QETH || SMC) config ISM tristate "Support for ISM vPCI Adapter" diff --git a/drivers/s390/net/Makefile b/drivers/s390/net/Makefile index bc55ec316adb..b5aaba290127 100644 --- a/drivers/s390/net/Makefile +++ b/drivers/s390/net/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_CTCM) += ctcm.o fsm.o obj-$(CONFIG_NETIUCV) += netiucv.o fsm.o obj-$(CONFIG_SMSGIUCV) += smsgiucv.o obj-$(CONFIG_SMSGIUCV_EVENT) += smsgiucv_app.o -obj-$(CONFIG_LCS) += lcs.o qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o qeth_ethtool.o obj-$(CONFIG_QETH) += qeth.o qeth_l2-y += qeth_l2_main.o qeth_l2_sys.o diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c deleted file mode 100644 index 88db8378325a..000000000000 --- a/drivers/s390/net/lcs.c +++ /dev/null @@ -1,2385 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Linux for S/390 LAN channel station device driver - * - * Copyright IBM Corp. 1999, 2009 - * Author(s): Original Code written by - * DJ Barrow <djbarrow@de.ibm.com,barrow_dj@yahoo.com> - * Rewritten by - * Frank Pavlic <fpavlic@de.ibm.com> and - * Martin Schwidefsky <schwidefsky@de.ibm.com> - */ - -#define KMSG_COMPONENT "lcs" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/module.h> -#include <linux/if.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/inetdevice.h> -#include <linux/in.h> -#include <linux/igmp.h> -#include <linux/delay.h> -#include <linux/kthread.h> -#include <linux/slab.h> -#include <net/arp.h> -#include <net/ip.h> - -#include <asm/debug.h> -#include <asm/idals.h> -#include <asm/timex.h> -#include <linux/device.h> -#include <asm/ccwgroup.h> - -#include "lcs.h" - - -/* - * initialization string for output - */ - -static char version[] __initdata = "LCS driver"; - -/* - * the root device for lcs group devices - */ -static struct device *lcs_root_dev; - -/* - * Some prototypes. - */ -static void lcs_tasklet(unsigned long); -static void lcs_start_kernel_thread(struct work_struct *); -static void lcs_get_frames_cb(struct lcs_channel *, struct lcs_buffer *); -#ifdef CONFIG_IP_MULTICAST -static int lcs_send_delipm(struct lcs_card *, struct lcs_ipm_list *); -#endif /* CONFIG_IP_MULTICAST */ -static int lcs_recovery(void *ptr); - -/* - * Debug Facility Stuff - */ -static char debug_buffer[255]; -static debug_info_t *lcs_dbf_setup; -static debug_info_t *lcs_dbf_trace; - -/* - * LCS Debug Facility functions - */ -static void -lcs_unregister_debug_facility(void) -{ - debug_unregister(lcs_dbf_setup); - debug_unregister(lcs_dbf_trace); -} - -static int -lcs_register_debug_facility(void) -{ - lcs_dbf_setup = debug_register("lcs_setup", 2, 1, 8); - lcs_dbf_trace = debug_register("lcs_trace", 4, 1, 8); - if (lcs_dbf_setup == NULL || lcs_dbf_trace == NULL) { - pr_err("Not enough memory for debug facility.\n"); - lcs_unregister_debug_facility(); - return -ENOMEM; - } - debug_register_view(lcs_dbf_setup, &debug_hex_ascii_view); - debug_set_level(lcs_dbf_setup, 2); - debug_register_view(lcs_dbf_trace, &debug_hex_ascii_view); - debug_set_level(lcs_dbf_trace, 2); - return 0; -} - -/* - * Allocate io buffers. - */ -static int -lcs_alloc_channel(struct lcs_channel *channel) -{ - int cnt; - - LCS_DBF_TEXT(2, setup, "ichalloc"); - for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) { - /* alloc memory fo iobuffer */ - channel->iob[cnt].data = - kzalloc(LCS_IOBUFFERSIZE, GFP_DMA | GFP_KERNEL); - if (channel->iob[cnt].data == NULL) - break; - channel->iob[cnt].state = LCS_BUF_STATE_EMPTY; - } - if (cnt < LCS_NUM_BUFFS) { - /* Not all io buffers could be allocated. */ - LCS_DBF_TEXT(2, setup, "echalloc"); - while (cnt-- > 0) - kfree(channel->iob[cnt].data); - return -ENOMEM; - } - return 0; -} - -/* - * Free io buffers. - */ -static void -lcs_free_channel(struct lcs_channel *channel) -{ - int cnt; - - LCS_DBF_TEXT(2, setup, "ichfree"); - for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) { - kfree(channel->iob[cnt].data); - channel->iob[cnt].data = NULL; - } -} - -/* - * Cleanup channel. - */ -static void -lcs_cleanup_channel(struct lcs_channel *channel) -{ - LCS_DBF_TEXT(3, setup, "cleanch"); - /* Kill write channel tasklets. */ - tasklet_kill(&channel->irq_tasklet); - /* Free channel buffers. */ - lcs_free_channel(channel); -} - -/* - * LCS free memory for card and channels. - */ -static void -lcs_free_card(struct lcs_card *card) -{ - LCS_DBF_TEXT(2, setup, "remcard"); - LCS_DBF_HEX(2, setup, &card, sizeof(void*)); - kfree(card); -} - -/* - * LCS alloc memory for card and channels - */ -static struct lcs_card * -lcs_alloc_card(void) -{ - struct lcs_card *card; - int rc; - - LCS_DBF_TEXT(2, setup, "alloclcs"); - - card = kzalloc(sizeof(struct lcs_card), GFP_KERNEL | GFP_DMA); - if (card == NULL) - return NULL; - card->lan_type = LCS_FRAME_TYPE_AUTO; - card->pkt_seq = 0; - card->lancmd_timeout = LCS_LANCMD_TIMEOUT_DEFAULT; - /* Allocate io buffers for the read channel. */ - rc = lcs_alloc_channel(&card->read); - if (rc){ - LCS_DBF_TEXT(2, setup, "iccwerr"); - lcs_free_card(card); - return NULL; - } - /* Allocate io buffers for the write channel. */ - rc = lcs_alloc_channel(&card->write); - if (rc) { - LCS_DBF_TEXT(2, setup, "iccwerr"); - lcs_cleanup_channel(&card->read); - lcs_free_card(card); - return NULL; - } - -#ifdef CONFIG_IP_MULTICAST - INIT_LIST_HEAD(&card->ipm_list); -#endif - LCS_DBF_HEX(2, setup, &card, sizeof(void*)); - return card; -} - -/* - * Setup read channel. - */ -static void -lcs_setup_read_ccws(struct lcs_card *card) -{ - int cnt; - - LCS_DBF_TEXT(2, setup, "ireadccw"); - /* Setup read ccws. */ - memset(card->read.ccws, 0, sizeof (struct ccw1) * (LCS_NUM_BUFFS + 1)); - for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) { - card->read.ccws[cnt].cmd_code = LCS_CCW_READ; - card->read.ccws[cnt].count = LCS_IOBUFFERSIZE; - card->read.ccws[cnt].flags = - CCW_FLAG_CC | CCW_FLAG_SLI | CCW_FLAG_PCI; - /* - * Note: we have allocated the buffer with GFP_DMA, so - * we do not need to do set_normalized_cda. - */ - card->read.ccws[cnt].cda = - virt_to_dma32(card->read.iob[cnt].data); - ((struct lcs_header *) - card->read.iob[cnt].data)->offset = LCS_ILLEGAL_OFFSET; - card->read.iob[cnt].callback = lcs_get_frames_cb; - card->read.iob[cnt].state = LCS_BUF_STATE_READY; - card->read.iob[cnt].count = LCS_IOBUFFERSIZE; - } - card->read.ccws[0].flags &= ~CCW_FLAG_PCI; - card->read.ccws[LCS_NUM_BUFFS - 1].flags &= ~CCW_FLAG_PCI; - card->read.ccws[LCS_NUM_BUFFS - 1].flags |= CCW_FLAG_SUSPEND; - /* Last ccw is a tic (transfer in channel). */ - card->read.ccws[LCS_NUM_BUFFS].cmd_code = LCS_CCW_TRANSFER; - card->read.ccws[LCS_NUM_BUFFS].cda = virt_to_dma32(card->read.ccws); - /* Setg initial state of the read channel. */ - card->read.state = LCS_CH_STATE_INIT; - - card->read.io_idx = 0; - card->read.buf_idx = 0; -} - -static void -lcs_setup_read(struct lcs_card *card) -{ - LCS_DBF_TEXT(3, setup, "initread"); - - lcs_setup_read_ccws(card); - /* Initialize read channel tasklet. */ - card->read.irq_tasklet.data = (unsigned long) &card->read; - card->read.irq_tasklet.func = lcs_tasklet; - /* Initialize waitqueue. */ - init_waitqueue_head(&card->read.wait_q); -} - -/* - * Setup write channel. - */ -static void -lcs_setup_write_ccws(struct lcs_card *card) -{ - int cnt; - - LCS_DBF_TEXT(3, setup, "iwritccw"); - /* Setup write ccws. */ - memset(card->write.ccws, 0, sizeof(struct ccw1) * (LCS_NUM_BUFFS + 1)); - for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) { - card->write.ccws[cnt].cmd_code = LCS_CCW_WRITE; - card->write.ccws[cnt].count = 0; - card->write.ccws[cnt].flags = - CCW_FLAG_SUSPEND | CCW_FLAG_CC | CCW_FLAG_SLI; - /* - * Note: we have allocated the buffer with GFP_DMA, so - * we do not need to do set_normalized_cda. - */ - card->write.ccws[cnt].cda = - virt_to_dma32(card->write.iob[cnt].data); - } - /* Last ccw is a tic (transfer in channel). */ - card->write.ccws[LCS_NUM_BUFFS].cmd_code = LCS_CCW_TRANSFER; - card->write.ccws[LCS_NUM_BUFFS].cda = virt_to_dma32(card->write.ccws); - /* Set initial state of the write channel. */ - card->read.state = LCS_CH_STATE_INIT; - - card->write.io_idx = 0; - card->write.buf_idx = 0; -} - -static void -lcs_setup_write(struct lcs_card *card) -{ - LCS_DBF_TEXT(3, setup, "initwrit"); - - lcs_setup_write_ccws(card); - /* Initialize write channel tasklet. */ - card->write.irq_tasklet.data = (unsigned long) &card->write; - card->write.irq_tasklet.func = lcs_tasklet; - /* Initialize waitqueue. */ - init_waitqueue_head(&card->write.wait_q); -} - -static void -lcs_set_allowed_threads(struct lcs_card *card, unsigned long threads) -{ - unsigned long flags; - - spin_lock_irqsave(&card->mask_lock, flags); - card->thread_allowed_mask = threads; - spin_unlock_irqrestore(&card->mask_lock, flags); - wake_up(&card->wait_q); -} -static int lcs_threads_running(struct lcs_card *card, unsigned long threads) -{ - unsigned long flags; - int rc = 0; - - spin_lock_irqsave(&card->mask_lock, flags); - rc = (card->thread_running_mask & threads); - spin_unlock_irqrestore(&card->mask_lock, flags); - return rc; -} - -static int -lcs_wait_for_threads(struct lcs_card *card, unsigned long threads) -{ - return wait_event_interruptible(card->wait_q, - lcs_threads_running(card, threads) == 0); -} - -static int lcs_set_thread_start_bit(struct lcs_card *card, unsigned long thread) -{ - unsigned long flags; - - spin_lock_irqsave(&card->mask_lock, flags); - if ( !(card->thread_allowed_mask & thread) || - (card->thread_start_mask & thread) ) { - spin_unlock_irqrestore(&card->mask_lock, flags); - return -EPERM; - } - card->thread_start_mask |= thread; - spin_unlock_irqrestore(&card->mask_lock, flags); - return 0; -} - -static void -lcs_clear_thread_running_bit(struct lcs_card *card, unsigned long thread) -{ - unsigned long flags; - - spin_lock_irqsave(&card->mask_lock, flags); - card->thread_running_mask &= ~thread; - spin_unlock_irqrestore(&card->mask_lock, flags); - wake_up(&card->wait_q); -} - -static int __lcs_do_run_thread(struct lcs_card *card, unsigned long thread) -{ - unsigned long flags; - int rc = 0; - - spin_lock_irqsave(&card->mask_lock, flags); - if (card->thread_start_mask & thread){ - if ((card->thread_allowed_mask & thread) && - !(card->thread_running_mask & thread)){ - rc = 1; - card->thread_start_mask &= ~thread; - card->thread_running_mask |= thread; - } else - rc = -EPERM; - } - spin_unlock_irqrestore(&card->mask_lock, flags); - return rc; -} - -static int -lcs_do_run_thread(struct lcs_card *card, unsigned long thread) -{ - int rc = 0; - wait_event(card->wait_q, - (rc = __lcs_do_run_thread(card, thread)) >= 0); - return rc; -} - -static int -lcs_do_start_thread(struct lcs_card *card, unsigned long thread) -{ - unsigned long flags; - int rc = 0; - - spin_lock_irqsave(&card->mask_lock, flags); - LCS_DBF_TEXT_(4, trace, " %02x%02x%02x", - (u8) card->thread_start_mask, - (u8) card->thread_allowed_mask, - (u8) card->thread_running_mask); - rc = (card->thread_start_mask & thread); - spin_unlock_irqrestore(&card->mask_lock, flags); - return rc; -} - -/* - * Initialize channels,card and state machines. - */ -static void -lcs_setup_card(struct lcs_card *card) -{ - LCS_DBF_TEXT(2, setup, "initcard"); - LCS_DBF_HEX(2, setup, &card, sizeof(void*)); - - lcs_setup_read(card); - lcs_setup_write(card); - /* Set cards initial state. */ - card->state = DEV_STATE_DOWN; - card->tx_buffer = NULL; - card->tx_emitted = 0; - - init_waitqueue_head(&card->wait_q); - spin_lock_init(&card->lock); - spin_lock_init(&card->ipm_lock); - spin_lock_init(&card->mask_lock); -#ifdef CONFIG_IP_MULTICAST - INIT_LIST_HEAD(&card->ipm_list); -#endif - INIT_LIST_HEAD(&card->lancmd_waiters); -} - -static void lcs_clear_multicast_list(struct lcs_card *card) -{ -#ifdef CONFIG_IP_MULTICAST - struct lcs_ipm_list *ipm; - unsigned long flags; - - /* Free multicast list. */ - LCS_DBF_TEXT(3, setup, "clmclist"); - spin_lock_irqsave(&card->ipm_lock, flags); - while (!list_empty(&card->ipm_list)){ - ipm = list_entry(card->ipm_list.next, - struct lcs_ipm_list, list); - list_del(&ipm->list); - if (ipm->ipm_state != LCS_IPM_STATE_SET_REQUIRED){ - spin_unlock_irqrestore(&card->ipm_lock, flags); - lcs_send_delipm(card, ipm); - spin_lock_irqsave(&card->ipm_lock, flags); - } - kfree(ipm); - } - spin_unlock_irqrestore(&card->ipm_lock, flags); -#endif -} - -/* - * Cleanup channels,card and state machines. - */ -static void -lcs_cleanup_card(struct lcs_card *card) -{ - - LCS_DBF_TEXT(3, setup, "cleancrd"); - LCS_DBF_HEX(2,setup,&card,sizeof(void*)); - - if (card->dev != NULL) - free_netdev(card->dev); - /* Cleanup channels. */ - lcs_cleanup_channel(&card->write); - lcs_cleanup_channel(&card->read); -} - -/* - * Start channel. - */ -static int -lcs_start_channel(struct lcs_channel *channel) -{ - unsigned long flags; - int rc; - - LCS_DBF_TEXT_(4, trace,"ssch%s", dev_name(&channel->ccwdev->dev)); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - rc = ccw_device_start(channel->ccwdev, - channel->ccws + channel->io_idx, 0, 0, - DOIO_DENY_PREFETCH | DOIO_ALLOW_SUSPEND); - if (rc == 0) - channel->state = LCS_CH_STATE_RUNNING; - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - if (rc) { - LCS_DBF_TEXT_(4,trace,"essh%s", - dev_name(&channel->ccwdev->dev)); - dev_err(&channel->ccwdev->dev, - "Starting an LCS device resulted in an error," - " rc=%d!\n", rc); - } - return rc; -} - -static int -lcs_clear_channel(struct lcs_channel *channel) -{ - unsigned long flags; - int rc; - - LCS_DBF_TEXT(4,trace,"clearch"); - LCS_DBF_TEXT_(4, trace, "%s", dev_name(&channel->ccwdev->dev)); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - rc = ccw_device_clear(channel->ccwdev, 0); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - if (rc) { - LCS_DBF_TEXT_(4, trace, "ecsc%s", - dev_name(&channel->ccwdev->dev)); - return rc; - } - wait_event(channel->wait_q, (channel->state == LCS_CH_STATE_CLEARED)); - channel->state = LCS_CH_STATE_STOPPED; - return rc; -} - - -/* - * Stop channel. - */ -static int -lcs_stop_channel(struct lcs_channel *channel) -{ - unsigned long flags; - int rc; - - if (channel->state == LCS_CH_STATE_STOPPED) - return 0; - LCS_DBF_TEXT(4,trace,"haltsch"); - LCS_DBF_TEXT_(4, trace, "%s", dev_name(&channel->ccwdev->dev)); - channel->state = LCS_CH_STATE_INIT; - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - rc = ccw_device_halt(channel->ccwdev, 0); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - if (rc) { - LCS_DBF_TEXT_(4, trace, "ehsc%s", - dev_name(&channel->ccwdev->dev)); - return rc; - } - /* Asynchronous halt initialted. Wait for its completion. */ - wait_event(channel->wait_q, (channel->state == LCS_CH_STATE_HALTED)); - lcs_clear_channel(channel); - return 0; -} - -/* - * start read and write channel - */ -static int -lcs_start_channels(struct lcs_card *card) -{ - int rc; - - LCS_DBF_TEXT(2, trace, "chstart"); - /* start read channel */ - rc = lcs_start_channel(&card->read); - if (rc) - return rc; - /* start write channel */ - rc = lcs_start_channel(&card->write); - if (rc) - lcs_stop_channel(&card->read); - return rc; -} - -/* - * stop read and write channel - */ -static int -lcs_stop_channels(struct lcs_card *card) -{ - LCS_DBF_TEXT(2, trace, "chhalt"); - lcs_stop_channel(&card->read); - lcs_stop_channel(&card->write); - return 0; -} - -/* - * Get empty buffer. - */ -static struct lcs_buffer * -__lcs_get_buffer(struct lcs_channel *channel) -{ - int index; - - LCS_DBF_TEXT(5, trace, "_getbuff"); - index = channel->io_idx; - do { - if (channel->iob[index].state == LCS_BUF_STATE_EMPTY) { - channel->iob[index].state = LCS_BUF_STATE_LOCKED; - return channel->iob + index; - } - index = (index + 1) & (LCS_NUM_BUFFS - 1); - } while (index != channel->io_idx); - return NULL; -} - -static struct lcs_buffer * -lcs_get_buffer(struct lcs_channel *channel) -{ - struct lcs_buffer *buffer; - unsigned long flags; - - LCS_DBF_TEXT(5, trace, "getbuff"); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - buffer = __lcs_get_buffer(channel); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - return buffer; -} - -/* - * Resume channel program if the channel is suspended. - */ -static int -__lcs_resume_channel(struct lcs_channel *channel) -{ - int rc; - - if (channel->state != LCS_CH_STATE_SUSPENDED) - return 0; - if (channel->ccws[channel->io_idx].flags & CCW_FLAG_SUSPEND) - return 0; - LCS_DBF_TEXT_(5, trace, "rsch%s", dev_name(&channel->ccwdev->dev)); - rc = ccw_device_resume(channel->ccwdev); - if (rc) { - LCS_DBF_TEXT_(4, trace, "ersc%s", - dev_name(&channel->ccwdev->dev)); - dev_err(&channel->ccwdev->dev, - "Sending data from the LCS device to the LAN failed" - " with rc=%d\n",rc); - } else - channel->state = LCS_CH_STATE_RUNNING; - return rc; - -} - -/* - * Make a buffer ready for processing. - */ -static void __lcs_ready_buffer_bits(struct lcs_channel *channel, int index) -{ - int prev, next; - - LCS_DBF_TEXT(5, trace, "rdybits"); - prev = (index - 1) & (LCS_NUM_BUFFS - 1); - next = (index + 1) & (LCS_NUM_BUFFS - 1); - /* Check if we may clear the suspend bit of this buffer. */ - if (channel->ccws[next].flags & CCW_FLAG_SUSPEND) { - /* Check if we have to set the PCI bit. */ - if (!(channel->ccws[prev].flags & CCW_FLAG_SUSPEND)) - /* Suspend bit of the previous buffer is not set. */ - channel->ccws[index].flags |= CCW_FLAG_PCI; - /* Suspend bit of the next buffer is set. */ - channel->ccws[index].flags &= ~CCW_FLAG_SUSPEND; - } -} - -static int -lcs_ready_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - unsigned long flags; - int index, rc; - - LCS_DBF_TEXT(5, trace, "rdybuff"); - BUG_ON(buffer->state != LCS_BUF_STATE_LOCKED && - buffer->state != LCS_BUF_STATE_PROCESSED); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - buffer->state = LCS_BUF_STATE_READY; - index = buffer - channel->iob; - /* Set length. */ - channel->ccws[index].count = buffer->count; - /* Check relevant PCI/suspend bits. */ - __lcs_ready_buffer_bits(channel, index); - rc = __lcs_resume_channel(channel); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - return rc; -} - -/* - * Mark the buffer as processed. Take care of the suspend bit - * of the previous buffer. This function is called from - * interrupt context, so the lock must not be taken. - */ -static int -__lcs_processed_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - int index, prev, next; - - LCS_DBF_TEXT(5, trace, "prcsbuff"); - BUG_ON(buffer->state != LCS_BUF_STATE_READY); - buffer->state = LCS_BUF_STATE_PROCESSED; - index = buffer - channel->iob; - prev = (index - 1) & (LCS_NUM_BUFFS - 1); - next = (index + 1) & (LCS_NUM_BUFFS - 1); - /* Set the suspend bit and clear the PCI bit of this buffer. */ - channel->ccws[index].flags |= CCW_FLAG_SUSPEND; - channel->ccws[index].flags &= ~CCW_FLAG_PCI; - /* Check the suspend bit of the previous buffer. */ - if (channel->iob[prev].state == LCS_BUF_STATE_READY) { - /* - * Previous buffer is in state ready. It might have - * happened in lcs_ready_buffer that the suspend bit - * has not been cleared to avoid an endless loop. - * Do it now. - */ - __lcs_ready_buffer_bits(channel, prev); - } - /* Clear PCI bit of next buffer. */ - channel->ccws[next].flags &= ~CCW_FLAG_PCI; - return __lcs_resume_channel(channel); -} - -/* - * Put a processed buffer back to state empty. - */ -static void -lcs_release_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - unsigned long flags; - - LCS_DBF_TEXT(5, trace, "relbuff"); - BUG_ON(buffer->state != LCS_BUF_STATE_LOCKED && - buffer->state != LCS_BUF_STATE_PROCESSED); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - buffer->state = LCS_BUF_STATE_EMPTY; - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); -} - -/* - * Get buffer for a lan command. - */ -static struct lcs_buffer * -lcs_get_lancmd(struct lcs_card *card, int count) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(4, trace, "getlncmd"); - /* Get buffer and wait if none is available. */ - wait_event(card->write.wait_q, - ((buffer = lcs_get_buffer(&card->write)) != NULL)); - count += sizeof(struct lcs_header); - *(__u16 *)(buffer->data + count) = 0; - buffer->count = count + sizeof(__u16); - buffer->callback = lcs_release_buffer; - cmd = (struct lcs_cmd *) buffer->data; - cmd->offset = count; - cmd->type = LCS_FRAME_TYPE_CONTROL; - cmd->slot = 0; - return buffer; -} - - -static void -lcs_get_reply(struct lcs_reply *reply) -{ - refcount_inc(&reply->refcnt); -} - -static void -lcs_put_reply(struct lcs_reply *reply) -{ - if (refcount_dec_and_test(&reply->refcnt)) - kfree(reply); -} - -static struct lcs_reply * -lcs_alloc_reply(struct lcs_cmd *cmd) -{ - struct lcs_reply *reply; - - LCS_DBF_TEXT(4, trace, "getreply"); - - reply = kzalloc(sizeof(struct lcs_reply), GFP_ATOMIC); - if (!reply) - return NULL; - refcount_set(&reply->refcnt, 1); - reply->sequence_no = cmd->sequence_no; - reply->received = 0; - reply->rc = 0; - init_waitqueue_head(&reply->wait_q); - - return reply; -} - -/* - * Notifier function for lancmd replies. Called from read irq. - */ -static void -lcs_notify_lancmd_waiters(struct lcs_card *card, struct lcs_cmd *cmd) -{ - struct list_head *l, *n; - struct lcs_reply *reply; - - LCS_DBF_TEXT(4, trace, "notiwait"); - spin_lock(&card->lock); - list_for_each_safe(l, n, &card->lancmd_waiters) { - reply = list_entry(l, struct lcs_reply, list); - if (reply->sequence_no == cmd->sequence_no) { - lcs_get_reply(reply); - list_del_init(&reply->list); - if (reply->callback != NULL) - reply->callback(card, cmd); - reply->received = 1; - reply->rc = cmd->return_code; - wake_up(&reply->wait_q); - lcs_put_reply(reply); - break; - } - } - spin_unlock(&card->lock); -} - -/* - * Emit buffer of a lan command. - */ -static void -lcs_lancmd_timeout(struct timer_list *t) -{ - struct lcs_reply *reply = from_timer(reply, t, timer); - struct lcs_reply *list_reply, *r; - unsigned long flags; - - LCS_DBF_TEXT(4, trace, "timeout"); - spin_lock_irqsave(&reply->card->lock, flags); - list_for_each_entry_safe(list_reply, r, - &reply->card->lancmd_waiters,list) { - if (reply == list_reply) { - lcs_get_reply(reply); - list_del_init(&reply->list); - spin_unlock_irqrestore(&reply->card->lock, flags); - reply->received = 1; - reply->rc = -ETIME; - wake_up(&reply->wait_q); - lcs_put_reply(reply); - return; - } - } - spin_unlock_irqrestore(&reply->card->lock, flags); -} - -static int -lcs_send_lancmd(struct lcs_card *card, struct lcs_buffer *buffer, - void (*reply_callback)(struct lcs_card *, struct lcs_cmd *)) -{ - struct lcs_reply *reply; - struct lcs_cmd *cmd; - unsigned long flags; - int rc; - - LCS_DBF_TEXT(4, trace, "sendcmd"); - cmd = (struct lcs_cmd *) buffer->data; - cmd->return_code = 0; - cmd->sequence_no = card->sequence_no++; - reply = lcs_alloc_reply(cmd); - if (!reply) - return -ENOMEM; - reply->callback = reply_callback; - reply->card = card; - spin_lock_irqsave(&card->lock, flags); - list_add_tail(&reply->list, &card->lancmd_waiters); - spin_unlock_irqrestore(&card->lock, flags); - - buffer->callback = lcs_release_buffer; - rc = lcs_ready_buffer(&card->write, buffer); - if (rc) - return rc; - timer_setup(&reply->timer, lcs_lancmd_timeout, 0); - mod_timer(&reply->timer, jiffies + HZ * card->lancmd_timeout); - wait_event(reply->wait_q, reply->received); - del_timer_sync(&reply->timer); - LCS_DBF_TEXT_(4, trace, "rc:%d",reply->rc); - rc = reply->rc; - lcs_put_reply(reply); - return rc ? -EIO : 0; -} - -/* - * LCS startup command - */ -static int -lcs_send_startup(struct lcs_card *card, __u8 initiator) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "startup"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_STARTUP; - cmd->initiator = initiator; - cmd->cmd.lcs_startup.buff_size = LCS_IOBUFFERSIZE; - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * LCS shutdown command - */ -static int -lcs_send_shutdown(struct lcs_card *card) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "shutdown"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_SHUTDOWN; - cmd->initiator = LCS_INITIATOR_TCPIP; - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * LCS lanstat command - */ -static void -__lcs_lanstat_cb(struct lcs_card *card, struct lcs_cmd *cmd) -{ - LCS_DBF_TEXT(2, trace, "statcb"); - memcpy(card->mac, cmd->cmd.lcs_lanstat_cmd.mac_addr, LCS_MAC_LENGTH); -} - -static int -lcs_send_lanstat(struct lcs_card *card) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2,trace, "cmdstat"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - /* Setup lanstat command. */ - cmd->cmd_code = LCS_CMD_LANSTAT; - cmd->initiator = LCS_INITIATOR_TCPIP; - cmd->cmd.lcs_std_cmd.lan_type = card->lan_type; - cmd->cmd.lcs_std_cmd.portno = card->portno; - return lcs_send_lancmd(card, buffer, __lcs_lanstat_cb); -} - -/* - * send stoplan command - */ -static int -lcs_send_stoplan(struct lcs_card *card, __u8 initiator) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "cmdstpln"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_STOPLAN; - cmd->initiator = initiator; - cmd->cmd.lcs_std_cmd.lan_type = card->lan_type; - cmd->cmd.lcs_std_cmd.portno = card->portno; - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * send startlan command - */ -static void -__lcs_send_startlan_cb(struct lcs_card *card, struct lcs_cmd *cmd) -{ - LCS_DBF_TEXT(2, trace, "srtlancb"); - card->lan_type = cmd->cmd.lcs_std_cmd.lan_type; - card->portno = cmd->cmd.lcs_std_cmd.portno; -} - -static int -lcs_send_startlan(struct lcs_card *card, __u8 initiator) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "cmdstaln"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_STARTLAN; - cmd->initiator = initiator; - cmd->cmd.lcs_std_cmd.lan_type = card->lan_type; - cmd->cmd.lcs_std_cmd.portno = card->portno; - return lcs_send_lancmd(card, buffer, __lcs_send_startlan_cb); -} - -#ifdef CONFIG_IP_MULTICAST -/* - * send setipm command (Multicast) - */ -static int -lcs_send_setipm(struct lcs_card *card,struct lcs_ipm_list *ipm_list) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "cmdsetim"); - buffer = lcs_get_lancmd(card, LCS_MULTICAST_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_SETIPM; - cmd->initiator = LCS_INITIATOR_TCPIP; - cmd->cmd.lcs_qipassist.lan_type = card->lan_type; - cmd->cmd.lcs_qipassist.portno = card->portno; - cmd->cmd.lcs_qipassist.version = 4; - cmd->cmd.lcs_qipassist.num_ip_pairs = 1; - memcpy(cmd->cmd.lcs_qipassist.lcs_ipass_ctlmsg.ip_mac_pair, - &ipm_list->ipm, sizeof (struct lcs_ip_mac_pair)); - LCS_DBF_TEXT_(2, trace, "%x",ipm_list->ipm.ip_addr); - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * send delipm command (Multicast) - */ -static int -lcs_send_delipm(struct lcs_card *card,struct lcs_ipm_list *ipm_list) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "cmddelim"); - buffer = lcs_get_lancmd(card, LCS_MULTICAST_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_DELIPM; - cmd->initiator = LCS_INITIATOR_TCPIP; - cmd->cmd.lcs_qipassist.lan_type = card->lan_type; - cmd->cmd.lcs_qipassist.portno = card->portno; - cmd->cmd.lcs_qipassist.version = 4; - cmd->cmd.lcs_qipassist.num_ip_pairs = 1; - memcpy(cmd->cmd.lcs_qipassist.lcs_ipass_ctlmsg.ip_mac_pair, - &ipm_list->ipm, sizeof (struct lcs_ip_mac_pair)); - LCS_DBF_TEXT_(2, trace, "%x",ipm_list->ipm.ip_addr); - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * check if multicast is supported by LCS - */ -static void -__lcs_check_multicast_cb(struct lcs_card *card, struct lcs_cmd *cmd) -{ - LCS_DBF_TEXT(2, trace, "chkmccb"); - card->ip_assists_supported = - cmd->cmd.lcs_qipassist.ip_assists_supported; - card->ip_assists_enabled = - cmd->cmd.lcs_qipassist.ip_assists_enabled; -} - -static int -lcs_check_multicast_support(struct lcs_card *card) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - int rc; - - LCS_DBF_TEXT(2, trace, "cmdqipa"); - /* Send query ipassist. */ - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_QIPASSIST; - cmd->initiator = LCS_INITIATOR_TCPIP; - cmd->cmd.lcs_qipassist.lan_type = card->lan_type; - cmd->cmd.lcs_qipassist.portno = card->portno; - cmd->cmd.lcs_qipassist.version = 4; - cmd->cmd.lcs_qipassist.num_ip_pairs = 1; - rc = lcs_send_lancmd(card, buffer, __lcs_check_multicast_cb); - if (rc != 0) { - pr_err("Query IPAssist failed. Assuming unsupported!\n"); - return -EOPNOTSUPP; - } - if (card->ip_assists_supported & LCS_IPASS_MULTICAST_SUPPORT) - return 0; - return -EOPNOTSUPP; -} - -/* - * set or del multicast address on LCS card - */ -static void -lcs_fix_multicast_list(struct lcs_card *card) -{ - struct list_head failed_list; - struct lcs_ipm_list *ipm, *tmp; - unsigned long flags; - int rc; - - LCS_DBF_TEXT(4,trace, "fixipm"); - INIT_LIST_HEAD(&failed_list); - spin_lock_irqsave(&card->ipm_lock, flags); -list_modified: - list_for_each_entry_safe(ipm, tmp, &card->ipm_list, list){ - switch (ipm->ipm_state) { - case LCS_IPM_STATE_SET_REQUIRED: - /* del from ipm_list so no one else can tamper with - * this entry */ - list_del_init(&ipm->list); - spin_unlock_irqrestore(&card->ipm_lock, flags); - rc = lcs_send_setipm(card, ipm); - spin_lock_irqsave(&card->ipm_lock, flags); - if (rc) { - pr_info("Adding multicast address failed." - " Table possibly full!\n"); - /* store ipm in failed list -> will be added - * to ipm_list again, so a retry will be done - * during the next call of this function */ - list_add_tail(&ipm->list, &failed_list); - } else { - ipm->ipm_state = LCS_IPM_STATE_ON_CARD; - /* re-insert into ipm_list */ - list_add_tail(&ipm->list, &card->ipm_list); - } - goto list_modified; - case LCS_IPM_STATE_DEL_REQUIRED: - list_del(&ipm->list); - spin_unlock_irqrestore(&card->ipm_lock, flags); - lcs_send_delipm(card, ipm); - spin_lock_irqsave(&card->ipm_lock, flags); - kfree(ipm); - goto list_modified; - case LCS_IPM_STATE_ON_CARD: - break; - } - } - /* re-insert all entries from the failed_list into ipm_list */ - list_for_each_entry_safe(ipm, tmp, &failed_list, list) - list_move_tail(&ipm->list, &card->ipm_list); - - spin_unlock_irqrestore(&card->ipm_lock, flags); -} - -/* - * get mac address for the relevant Multicast address - */ -static void -lcs_get_mac_for_ipm(__be32 ipm, char *mac, struct net_device *dev) -{ - LCS_DBF_TEXT(4,trace, "getmac"); - ip_eth_mc_map(ipm, mac); -} - -/* - * function called by net device to handle multicast address relevant things - */ -static void lcs_remove_mc_addresses(struct lcs_card *card, - struct in_device *in4_dev) -{ - struct ip_mc_list *im4; - struct list_head *l; - struct lcs_ipm_list *ipm; - unsigned long flags; - char buf[MAX_ADDR_LEN]; - - LCS_DBF_TEXT(4, trace, "remmclst"); - spin_lock_irqsave(&card->ipm_lock, flags); - list_for_each(l, &card->ipm_list) { - ipm = list_entry(l, struct lcs_ipm_list, list); - for (im4 = rcu_dereference(in4_dev->mc_list); - im4 != NULL; im4 = rcu_dereference(im4->next_rcu)) { - lcs_get_mac_for_ipm(im4->multiaddr, buf, card->dev); - if ( (ipm->ipm.ip_addr == im4->multiaddr) && - (memcmp(buf, &ipm->ipm.mac_addr, - LCS_MAC_LENGTH) == 0) ) - break; - } - if (im4 == NULL) - ipm->ipm_state = LCS_IPM_STATE_DEL_REQUIRED; - } - spin_unlock_irqrestore(&card->ipm_lock, flags); -} - -static struct lcs_ipm_list *lcs_check_addr_entry(struct lcs_card *card, - struct ip_mc_list *im4, - char *buf) -{ - struct lcs_ipm_list *tmp, *ipm = NULL; - struct list_head *l; - unsigned long flags; - - LCS_DBF_TEXT(4, trace, "chkmcent"); - spin_lock_irqsave(&card->ipm_lock, flags); - list_for_each(l, &card->ipm_list) { - tmp = list_entry(l, struct lcs_ipm_list, list); - if ( (tmp->ipm.ip_addr == im4->multiaddr) && - (memcmp(buf, &tmp->ipm.mac_addr, - LCS_MAC_LENGTH) == 0) ) { - ipm = tmp; - break; - } - } - spin_unlock_irqrestore(&card->ipm_lock, flags); - return ipm; -} - -static void lcs_set_mc_addresses(struct lcs_card *card, - struct in_device *in4_dev) -{ - - struct ip_mc_list *im4; - struct lcs_ipm_list *ipm; - char buf[MAX_ADDR_LEN]; - unsigned long flags; - - LCS_DBF_TEXT(4, trace, "setmclst"); - for (im4 = rcu_dereference(in4_dev->mc_list); im4 != NULL; - im4 = rcu_dereference(im4->next_rcu)) { - lcs_get_mac_for_ipm(im4->multiaddr, buf, card->dev); - ipm = lcs_check_addr_entry(card, im4, buf); - if (ipm != NULL) - continue; /* Address already in list. */ - ipm = kzalloc(sizeof(struct lcs_ipm_list), GFP_ATOMIC); - if (ipm == NULL) { - pr_info("Not enough memory to add" - " new multicast entry!\n"); - break; - } - memcpy(&ipm->ipm.mac_addr, buf, LCS_MAC_LENGTH); - ipm->ipm.ip_addr = im4->multiaddr; - ipm->ipm_state = LCS_IPM_STATE_SET_REQUIRED; - spin_lock_irqsave(&card->ipm_lock, flags); - LCS_DBF_HEX(2,trace,&ipm->ipm.ip_addr,4); - list_add(&ipm->list, &card->ipm_list); - spin_unlock_irqrestore(&card->ipm_lock, flags); - } -} - -static int -lcs_register_mc_addresses(void *data) -{ - struct lcs_card *card; - struct in_device *in4_dev; - - card = (struct lcs_card *) data; - - if (!lcs_do_run_thread(card, LCS_SET_MC_THREAD)) - return 0; - LCS_DBF_TEXT(4, trace, "regmulti"); - - in4_dev = in_dev_get(card->dev); - if (in4_dev == NULL) - goto out; - rcu_read_lock(); - lcs_remove_mc_addresses(card,in4_dev); - lcs_set_mc_addresses(card, in4_dev); - rcu_read_unlock(); - in_dev_put(in4_dev); - - netif_carrier_off(card->dev); - netif_tx_disable(card->dev); - wait_event(card->write.wait_q, - (card->write.state != LCS_CH_STATE_RUNNING)); - lcs_fix_multicast_list(card); - if (card->state == DEV_STATE_UP) { - netif_carrier_on(card->dev); - netif_wake_queue(card->dev); - } -out: - lcs_clear_thread_running_bit(card, LCS_SET_MC_THREAD); - return 0; -} -#endif /* CONFIG_IP_MULTICAST */ - -/* - * function called by net device to - * handle multicast address relevant things - */ -static void -lcs_set_multicast_list(struct net_device *dev) -{ -#ifdef CONFIG_IP_MULTICAST - struct lcs_card *card; - - LCS_DBF_TEXT(4, trace, "setmulti"); - card = (struct lcs_card *) dev->ml_priv; - - if (!lcs_set_thread_start_bit(card, LCS_SET_MC_THREAD)) - schedule_work(&card->kernel_thread_starter); -#endif /* CONFIG_IP_MULTICAST */ -} - -static long -lcs_check_irb_error(struct ccw_device *cdev, struct irb *irb) -{ - if (!IS_ERR(irb)) - return 0; - - switch (PTR_ERR(irb)) { - case -EIO: - dev_warn(&cdev->dev, - "An I/O-error occurred on the LCS device\n"); - LCS_DBF_TEXT(2, trace, "ckirberr"); - LCS_DBF_TEXT_(2, trace, " rc%d", -EIO); - break; - case -ETIMEDOUT: - dev_warn(&cdev->dev, - "A command timed out on the LCS device\n"); - LCS_DBF_TEXT(2, trace, "ckirberr"); - LCS_DBF_TEXT_(2, trace, " rc%d", -ETIMEDOUT); - break; - default: - dev_warn(&cdev->dev, - "An error occurred on the LCS device, rc=%ld\n", - PTR_ERR(irb)); - LCS_DBF_TEXT(2, trace, "ckirberr"); - LCS_DBF_TEXT(2, trace, " rc???"); - } - return PTR_ERR(irb); -} - -static int -lcs_get_problem(struct ccw_device *cdev, struct irb *irb) -{ - int dstat, cstat; - char *sense; - - sense = (char *) irb->ecw; - cstat = irb->scsw.cmd.cstat; - dstat = irb->scsw.cmd.dstat; - - if (cstat & (SCHN_STAT_CHN_CTRL_CHK | SCHN_STAT_INTF_CTRL_CHK | - SCHN_STAT_CHN_DATA_CHK | SCHN_STAT_CHAIN_CHECK | - SCHN_STAT_PROT_CHECK | SCHN_STAT_PROG_CHECK)) { - LCS_DBF_TEXT(2, trace, "CGENCHK"); - return 1; - } - if (dstat & DEV_STAT_UNIT_CHECK) { - if (sense[LCS_SENSE_BYTE_1] & - LCS_SENSE_RESETTING_EVENT) { - LCS_DBF_TEXT(2, trace, "REVIND"); - return 1; - } - if (sense[LCS_SENSE_BYTE_0] & - LCS_SENSE_CMD_REJECT) { - LCS_DBF_TEXT(2, trace, "CMDREJ"); - return 0; - } - if ((!sense[LCS_SENSE_BYTE_0]) && - (!sense[LCS_SENSE_BYTE_1]) && - (!sense[LCS_SENSE_BYTE_2]) && - (!sense[LCS_SENSE_BYTE_3])) { - LCS_DBF_TEXT(2, trace, "ZEROSEN"); - return 0; - } - LCS_DBF_TEXT(2, trace, "DGENCHK"); - return 1; - } - return 0; -} - -static void -lcs_schedule_recovery(struct lcs_card *card) -{ - LCS_DBF_TEXT(2, trace, "startrec"); - if (!lcs_set_thread_start_bit(card, LCS_RECOVERY_THREAD)) - schedule_work(&card->kernel_thread_starter); -} - -/* - * IRQ Handler for LCS channels - */ -static void -lcs_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) -{ - struct lcs_card *card; - struct lcs_channel *channel; - int rc, index; - int cstat, dstat; - - if (lcs_check_irb_error(cdev, irb)) - return; - - card = CARD_FROM_DEV(cdev); - if (card->read.ccwdev == cdev) - channel = &card->read; - else - channel = &card->write; - - cstat = irb->scsw.cmd.cstat; - dstat = irb->scsw.cmd.dstat; - LCS_DBF_TEXT_(5, trace, "Rint%s", dev_name(&cdev->dev)); - LCS_DBF_TEXT_(5, trace, "%4x%4x", irb->scsw.cmd.cstat, - irb->scsw.cmd.dstat); - LCS_DBF_TEXT_(5, trace, "%4x%4x", irb->scsw.cmd.fctl, - irb->scsw.cmd.actl); - - /* Check for channel and device errors presented */ - rc = lcs_get_problem(cdev, irb); - if (rc || (dstat & DEV_STAT_UNIT_EXCEP)) { - dev_warn(&cdev->dev, - "The LCS device stopped because of an error," - " dstat=0x%X, cstat=0x%X \n", - dstat, cstat); - if (rc) { - channel->state = LCS_CH_STATE_ERROR; - } - } - if (channel->state == LCS_CH_STATE_ERROR) { - lcs_schedule_recovery(card); - wake_up(&card->wait_q); - return; - } - /* How far in the ccw chain have we processed? */ - if ((channel->state != LCS_CH_STATE_INIT) && - (irb->scsw.cmd.fctl & SCSW_FCTL_START_FUNC) && - (irb->scsw.cmd.cpa != 0)) { - index = (struct ccw1 *)dma32_to_virt(irb->scsw.cmd.cpa) - - channel->ccws; - if ((irb->scsw.cmd.actl & SCSW_ACTL_SUSPENDED) || - (irb->scsw.cmd.cstat & SCHN_STAT_PCI)) - /* Bloody io subsystem tells us lies about cpa... */ - index = (index - 1) & (LCS_NUM_BUFFS - 1); - while (channel->io_idx != index) { - __lcs_processed_buffer(channel, - channel->iob + channel->io_idx); - channel->io_idx = - (channel->io_idx + 1) & (LCS_NUM_BUFFS - 1); - } - } - - if ((irb->scsw.cmd.dstat & DEV_STAT_DEV_END) || - (irb->scsw.cmd.dstat & DEV_STAT_CHN_END) || - (irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK)) - /* Mark channel as stopped. */ - channel->state = LCS_CH_STATE_STOPPED; - else if (irb->scsw.cmd.actl & SCSW_ACTL_SUSPENDED) - /* CCW execution stopped on a suspend bit. */ - channel->state = LCS_CH_STATE_SUSPENDED; - if (irb->scsw.cmd.fctl & SCSW_FCTL_HALT_FUNC) { - if (irb->scsw.cmd.cc != 0) { - ccw_device_halt(channel->ccwdev, 0); - return; - } - /* The channel has been stopped by halt_IO. */ - channel->state = LCS_CH_STATE_HALTED; - } - if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC) - channel->state = LCS_CH_STATE_CLEARED; - /* Do the rest in the tasklet. */ - tasklet_schedule(&channel->irq_tasklet); -} - -/* - * Tasklet for IRQ handler - */ -static void -lcs_tasklet(unsigned long data) -{ - unsigned long flags; - struct lcs_channel *channel; - struct lcs_buffer *iob; - int buf_idx; - - channel = (struct lcs_channel *) data; - LCS_DBF_TEXT_(5, trace, "tlet%s", dev_name(&channel->ccwdev->dev)); - - /* Check for processed buffers. */ - iob = channel->iob; - buf_idx = channel->buf_idx; - while (iob[buf_idx].state == LCS_BUF_STATE_PROCESSED) { - /* Do the callback thing. */ - if (iob[buf_idx].callback != NULL) - iob[buf_idx].callback(channel, iob + buf_idx); - buf_idx = (buf_idx + 1) & (LCS_NUM_BUFFS - 1); - } - channel->buf_idx = buf_idx; - - if (channel->state == LCS_CH_STATE_STOPPED) - lcs_start_channel(channel); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - if (channel->state == LCS_CH_STATE_SUSPENDED && - channel->iob[channel->io_idx].state == LCS_BUF_STATE_READY) - __lcs_resume_channel(channel); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - - /* Something happened on the channel. Wake up waiters. */ - wake_up(&channel->wait_q); -} - -/* - * Finish current tx buffer and make it ready for transmit. - */ -static void -__lcs_emit_txbuffer(struct lcs_card *card) -{ - LCS_DBF_TEXT(5, trace, "emittx"); - *(__u16 *)(card->tx_buffer->data + card->tx_buffer->count) = 0; - card->tx_buffer->count += 2; - lcs_ready_buffer(&card->write, card->tx_buffer); - card->tx_buffer = NULL; - card->tx_emitted++; -} - -/* - * Callback for finished tx buffers. - */ -static void -lcs_txbuffer_cb(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - struct lcs_card *card; - - LCS_DBF_TEXT(5, trace, "txbuffcb"); - /* Put buffer back to pool. */ - lcs_release_buffer(channel, buffer); - card = container_of(channel, struct lcs_card, write); - if (netif_queue_stopped(card->dev) && netif_carrier_ok(card->dev)) - netif_wake_queue(card->dev); - spin_lock(&card->lock); - card->tx_emitted--; - if (card->tx_emitted <= 0 && card->tx_buffer != NULL) - /* - * Last running tx buffer has finished. Submit partially - * filled current buffer. - */ - __lcs_emit_txbuffer(card); - spin_unlock(&card->lock); -} - -/* - * Packet transmit function called by network stack - */ -static netdev_tx_t __lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb, - struct net_device *dev) -{ - struct lcs_header *header; - int rc = NETDEV_TX_OK; - - LCS_DBF_TEXT(5, trace, "hardxmit"); - if (skb == NULL) { - card->stats.tx_dropped++; - card->stats.tx_errors++; - return NETDEV_TX_OK; - } - if (card->state != DEV_STATE_UP) { - dev_kfree_skb(skb); - card->stats.tx_dropped++; - card->stats.tx_errors++; - card->stats.tx_carrier_errors++; - return NETDEV_TX_OK; - } - if (skb->protocol == htons(ETH_P_IPV6)) { - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - netif_stop_queue(card->dev); - spin_lock(&card->lock); - if (card->tx_buffer != NULL && - card->tx_buffer->count + sizeof(struct lcs_header) + - skb->len + sizeof(u16) > LCS_IOBUFFERSIZE) - /* skb too big for current tx buffer. */ - __lcs_emit_txbuffer(card); - if (card->tx_buffer == NULL) { - /* Get new tx buffer */ - card->tx_buffer = lcs_get_buffer(&card->write); - if (card->tx_buffer == NULL) { - card->stats.tx_dropped++; - rc = NETDEV_TX_BUSY; - goto out; - } - card->tx_buffer->callback = lcs_txbuffer_cb; - card->tx_buffer->count = 0; - } - header = (struct lcs_header *) - (card->tx_buffer->data + card->tx_buffer->count); - card->tx_buffer->count += skb->len + sizeof(struct lcs_header); - header->offset = card->tx_buffer->count; - header->type = card->lan_type; - header->slot = card->portno; - skb_copy_from_linear_data(skb, header + 1, skb->len); - spin_unlock(&card->lock); - card->stats.tx_bytes += skb->len; - card->stats.tx_packets++; - dev_kfree_skb(skb); - netif_wake_queue(card->dev); - spin_lock(&card->lock); - if (card->tx_emitted <= 0 && card->tx_buffer != NULL) - /* If this is the first tx buffer emit it immediately. */ - __lcs_emit_txbuffer(card); -out: - spin_unlock(&card->lock); - return rc; -} - -static netdev_tx_t lcs_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct lcs_card *card; - int rc; - - LCS_DBF_TEXT(5, trace, "pktxmit"); - card = (struct lcs_card *) dev->ml_priv; - rc = __lcs_start_xmit(card, skb, dev); - return rc; -} - -/* - * send startlan and lanstat command to make LCS device ready - */ -static int -lcs_startlan_auto(struct lcs_card *card) -{ - int rc; - - LCS_DBF_TEXT(2, trace, "strtauto"); - card->lan_type = LCS_FRAME_TYPE_ENET; - rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP); - if (rc == 0) - return 0; - - return -EIO; -} - -static int -lcs_startlan(struct lcs_card *card) -{ - int rc, i; - - LCS_DBF_TEXT(2, trace, "startlan"); - rc = 0; - if (card->portno != LCS_INVALID_PORT_NO) { - if (card->lan_type == LCS_FRAME_TYPE_AUTO) - rc = lcs_startlan_auto(card); - else - rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP); - } else { - for (i = 0; i <= 16; i++) { - card->portno = i; - if (card->lan_type != LCS_FRAME_TYPE_AUTO) - rc = lcs_send_startlan(card, - LCS_INITIATOR_TCPIP); - else - /* autodetecting lan type */ - rc = lcs_startlan_auto(card); - if (rc == 0) - break; - } - } - if (rc == 0) - return lcs_send_lanstat(card); - return rc; -} - -/* - * LCS detect function - * setup channels and make them I/O ready - */ -static int -lcs_detect(struct lcs_card *card) -{ - int rc = 0; - - LCS_DBF_TEXT(2, setup, "lcsdetct"); - /* start/reset card */ - if (card->dev) - netif_stop_queue(card->dev); - rc = lcs_stop_channels(card); - if (rc == 0) { - rc = lcs_start_channels(card); - if (rc == 0) { - rc = lcs_send_startup(card, LCS_INITIATOR_TCPIP); - if (rc == 0) - rc = lcs_startlan(card); - } - } - if (rc == 0) { - card->state = DEV_STATE_UP; - } else { - card->state = DEV_STATE_DOWN; - card->write.state = LCS_CH_STATE_INIT; - card->read.state = LCS_CH_STATE_INIT; - } - return rc; -} - -/* - * LCS Stop card - */ -static int -lcs_stopcard(struct lcs_card *card) -{ - int rc; - - LCS_DBF_TEXT(3, setup, "stopcard"); - - if (card->read.state != LCS_CH_STATE_STOPPED && - card->write.state != LCS_CH_STATE_STOPPED && - card->read.state != LCS_CH_STATE_ERROR && - card->write.state != LCS_CH_STATE_ERROR && - card->state == DEV_STATE_UP) { - lcs_clear_multicast_list(card); - rc = lcs_send_stoplan(card,LCS_INITIATOR_TCPIP); - rc = lcs_send_shutdown(card); - } - rc = lcs_stop_channels(card); - card->state = DEV_STATE_DOWN; - - return rc; -} - -/* - * Kernel Thread helper functions for LGW initiated commands - */ -static void -lcs_start_kernel_thread(struct work_struct *work) -{ - struct lcs_card *card = container_of(work, struct lcs_card, kernel_thread_starter); - LCS_DBF_TEXT(5, trace, "krnthrd"); - if (lcs_do_start_thread(card, LCS_RECOVERY_THREAD)) - kthread_run(lcs_recovery, card, "lcs_recover"); -#ifdef CONFIG_IP_MULTICAST - if (lcs_do_start_thread(card, LCS_SET_MC_THREAD)) - kthread_run(lcs_register_mc_addresses, card, "regipm"); -#endif -} - -/* - * Process control frames. - */ -static void -lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd) -{ - LCS_DBF_TEXT(5, trace, "getctrl"); - if (cmd->initiator == LCS_INITIATOR_LGW) { - switch(cmd->cmd_code) { - case LCS_CMD_STARTUP: - case LCS_CMD_STARTLAN: - lcs_schedule_recovery(card); - break; - case LCS_CMD_STOPLAN: - if (card->dev) { - pr_warn("Stoplan for %s initiated by LGW\n", - card->dev->name); - netif_carrier_off(card->dev); - } - break; - default: - LCS_DBF_TEXT(5, trace, "noLGWcmd"); - break; - } - } else - lcs_notify_lancmd_waiters(card, cmd); -} - -/* - * Unpack network packet. - */ -static void -lcs_get_skb(struct lcs_card *card, char *skb_data, unsigned int skb_len) -{ - struct sk_buff *skb; - - LCS_DBF_TEXT(5, trace, "getskb"); - if (card->dev == NULL || - card->state != DEV_STATE_UP) - /* The card isn't up. Ignore the packet. */ - return; - - skb = dev_alloc_skb(skb_len); - if (skb == NULL) { - dev_err(&card->dev->dev, - " Allocating a socket buffer to interface %s failed\n", - card->dev->name); - card->stats.rx_dropped++; - return; - } - skb_put_data(skb, skb_data, skb_len); - skb->protocol = card->lan_type_trans(skb, card->dev); - card->stats.rx_bytes += skb_len; - card->stats.rx_packets++; - if (skb->protocol == htons(ETH_P_802_2)) - *((__u32 *)skb->cb) = ++card->pkt_seq; - netif_rx(skb); -} - -/* - * LCS main routine to get packets and lancmd replies from the buffers - */ -static void -lcs_get_frames_cb(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - struct lcs_card *card; - struct lcs_header *lcs_hdr; - __u16 offset; - - LCS_DBF_TEXT(5, trace, "lcsgtpkt"); - lcs_hdr = (struct lcs_header *) buffer->data; - if (lcs_hdr->offset == LCS_ILLEGAL_OFFSET) { - LCS_DBF_TEXT(4, trace, "-eiogpkt"); - return; - } - card = container_of(channel, struct lcs_card, read); - offset = 0; - while (lcs_hdr->offset != 0) { - if (lcs_hdr->offset <= 0 || - lcs_hdr->offset > LCS_IOBUFFERSIZE || - lcs_hdr->offset < offset) { - /* Offset invalid. */ - card->stats.rx_length_errors++; - card->stats.rx_errors++; - return; - } - if (lcs_hdr->type == LCS_FRAME_TYPE_CONTROL) - lcs_get_control(card, (struct lcs_cmd *) lcs_hdr); - else if (lcs_hdr->type == LCS_FRAME_TYPE_ENET) - lcs_get_skb(card, (char *)(lcs_hdr + 1), - lcs_hdr->offset - offset - - sizeof(struct lcs_header)); - else - dev_info_once(&card->dev->dev, - "Unknown frame type %d\n", - lcs_hdr->type); - offset = lcs_hdr->offset; - lcs_hdr->offset = LCS_ILLEGAL_OFFSET; - lcs_hdr = (struct lcs_header *) (buffer->data + offset); - } - /* The buffer is now empty. Make it ready again. */ - lcs_ready_buffer(&card->read, buffer); -} - -/* - * get network statistics for ifconfig and other user programs - */ -static struct net_device_stats * -lcs_getstats(struct net_device *dev) -{ - struct lcs_card *card; - - LCS_DBF_TEXT(4, trace, "netstats"); - card = (struct lcs_card *) dev->ml_priv; - return &card->stats; -} - -/* - * stop lcs device - * This function will be called by user doing ifconfig xxx down - */ -static int -lcs_stop_device(struct net_device *dev) -{ - struct lcs_card *card; - int rc; - - LCS_DBF_TEXT(2, trace, "stopdev"); - card = (struct lcs_card *) dev->ml_priv; - netif_carrier_off(dev); - netif_tx_disable(dev); - dev->flags &= ~IFF_UP; - wait_event(card->write.wait_q, - (card->write.state != LCS_CH_STATE_RUNNING)); - rc = lcs_stopcard(card); - if (rc) - dev_err(&card->dev->dev, - " Shutting down the LCS device failed\n"); - return rc; -} - -/* - * start lcs device and make it runnable - * This function will be called by user doing ifconfig xxx up - */ -static int -lcs_open_device(struct net_device *dev) -{ - struct lcs_card *card; - int rc; - - LCS_DBF_TEXT(2, trace, "opendev"); - card = (struct lcs_card *) dev->ml_priv; - /* initialize statistics */ - rc = lcs_detect(card); - if (rc) { - pr_err("Error in opening device!\n"); - - } else { - dev->flags |= IFF_UP; - netif_carrier_on(dev); - netif_wake_queue(dev); - card->state = DEV_STATE_UP; - } - return rc; -} - -/* - * show function for portno called by cat or similar things - */ -static ssize_t -lcs_portno_show (struct device *dev, struct device_attribute *attr, char *buf) -{ - struct lcs_card *card; - - card = dev_get_drvdata(dev); - - if (!card) - return 0; - - return sysfs_emit(buf, "%d\n", card->portno); -} - -/* - * store the value which is piped to file portno - */ -static ssize_t -lcs_portno_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct lcs_card *card; - int rc; - s16 value; - - card = dev_get_drvdata(dev); - - if (!card) - return 0; - - rc = kstrtos16(buf, 0, &value); - if (rc) - return -EINVAL; - /* TODO: sanity checks */ - card->portno = value; - if (card->dev) - card->dev->dev_port = card->portno; - - return count; - -} - -static DEVICE_ATTR(portno, 0644, lcs_portno_show, lcs_portno_store); - -static const char *lcs_type[] = { - "not a channel", - "2216 parallel", - "2216 channel", - "OSA LCS card", - "unknown channel type", - "unsupported channel type", -}; - -static ssize_t -lcs_type_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct ccwgroup_device *cgdev; - - cgdev = to_ccwgroupdev(dev); - if (!cgdev) - return -ENODEV; - - return sysfs_emit(buf, "%s\n", - lcs_type[cgdev->cdev[0]->id.driver_info]); -} - -static DEVICE_ATTR(type, 0444, lcs_type_show, NULL); - -static ssize_t -lcs_timeout_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct lcs_card *card; - - card = dev_get_drvdata(dev); - - return card ? sysfs_emit(buf, "%u\n", card->lancmd_timeout) : 0; -} - -static ssize_t -lcs_timeout_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct lcs_card *card; - unsigned int value; - int rc; - - card = dev_get_drvdata(dev); - - if (!card) - return 0; - - rc = kstrtouint(buf, 0, &value); - if (rc) - return -EINVAL; - /* TODO: sanity checks */ - card->lancmd_timeout = value; - - return count; - -} - -static DEVICE_ATTR(lancmd_timeout, 0644, lcs_timeout_show, lcs_timeout_store); - -static ssize_t -lcs_dev_recover_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct lcs_card *card = dev_get_drvdata(dev); - char *tmp; - int i; - - if (!card) - return -EINVAL; - if (card->state != DEV_STATE_UP) - return -EPERM; - i = simple_strtoul(buf, &tmp, 16); - if (i == 1) - lcs_schedule_recovery(card); - return count; -} - -static DEVICE_ATTR(recover, 0200, NULL, lcs_dev_recover_store); - -static struct attribute * lcs_attrs[] = { - &dev_attr_portno.attr, - &dev_attr_type.attr, - &dev_attr_lancmd_timeout.attr, - &dev_attr_recover.attr, - NULL, -}; -static struct attribute_group lcs_attr_group = { - .attrs = lcs_attrs, -}; -static const struct attribute_group *lcs_attr_groups[] = { - &lcs_attr_group, - NULL, -}; -static const struct device_type lcs_devtype = { - .name = "lcs", - .groups = lcs_attr_groups, -}; - -/* - * lcs_probe_device is called on establishing a new ccwgroup_device. - */ -static int -lcs_probe_device(struct ccwgroup_device *ccwgdev) -{ - struct lcs_card *card; - - if (!get_device(&ccwgdev->dev)) - return -ENODEV; - - LCS_DBF_TEXT(2, setup, "add_dev"); - card = lcs_alloc_card(); - if (!card) { - LCS_DBF_TEXT_(2, setup, " rc%d", -ENOMEM); - put_device(&ccwgdev->dev); - return -ENOMEM; - } - dev_set_drvdata(&ccwgdev->dev, card); - ccwgdev->cdev[0]->handler = lcs_irq; - ccwgdev->cdev[1]->handler = lcs_irq; - card->gdev = ccwgdev; - INIT_WORK(&card->kernel_thread_starter, lcs_start_kernel_thread); - card->thread_start_mask = 0; - card->thread_allowed_mask = 0; - card->thread_running_mask = 0; - ccwgdev->dev.type = &lcs_devtype; - - return 0; -} - -static int -lcs_register_netdev(struct ccwgroup_device *ccwgdev) -{ - struct lcs_card *card; - - LCS_DBF_TEXT(2, setup, "regnetdv"); - card = dev_get_drvdata(&ccwgdev->dev); - if (card->dev->reg_state != NETREG_UNINITIALIZED) - return 0; - SET_NETDEV_DEV(card->dev, &ccwgdev->dev); - return register_netdev(card->dev); -} - -/* - * lcs_new_device will be called by setting the group device online. - */ -static const struct net_device_ops lcs_netdev_ops = { - .ndo_open = lcs_open_device, - .ndo_stop = lcs_stop_device, - .ndo_get_stats = lcs_getstats, - .ndo_start_xmit = lcs_start_xmit, -}; - -static const struct net_device_ops lcs_mc_netdev_ops = { - .ndo_open = lcs_open_device, - .ndo_stop = lcs_stop_device, - .ndo_get_stats = lcs_getstats, - .ndo_start_xmit = lcs_start_xmit, - .ndo_set_rx_mode = lcs_set_multicast_list, -}; - -static int -lcs_new_device(struct ccwgroup_device *ccwgdev) -{ - struct lcs_card *card; - struct net_device *dev=NULL; - enum lcs_dev_states recover_state; - int rc; - - card = dev_get_drvdata(&ccwgdev->dev); - if (!card) - return -ENODEV; - - LCS_DBF_TEXT(2, setup, "newdev"); - LCS_DBF_HEX(3, setup, &card, sizeof(void*)); - card->read.ccwdev = ccwgdev->cdev[0]; - card->write.ccwdev = ccwgdev->cdev[1]; - - recover_state = card->state; - rc = ccw_device_set_online(card->read.ccwdev); - if (rc) - goto out_err; - rc = ccw_device_set_online(card->write.ccwdev); - if (rc) - goto out_werr; - - LCS_DBF_TEXT(3, setup, "lcsnewdv"); - - lcs_setup_card(card); - rc = lcs_detect(card); - if (rc) { - LCS_DBF_TEXT(2, setup, "dtctfail"); - dev_err(&ccwgdev->dev, - "Detecting a network adapter for LCS devices" - " failed with rc=%d (0x%x)\n", rc, rc); - lcs_stopcard(card); - goto out; - } - if (card->dev) { - LCS_DBF_TEXT(2, setup, "samedev"); - LCS_DBF_HEX(3, setup, &card, sizeof(void*)); - goto netdev_out; - } - switch (card->lan_type) { - case LCS_FRAME_TYPE_ENET: - card->lan_type_trans = eth_type_trans; - dev = alloc_etherdev(0); - break; - default: - LCS_DBF_TEXT(3, setup, "errinit"); - pr_err(" Initialization failed\n"); - goto out; - } - if (!dev) - goto out; - card->dev = dev; - card->dev->ml_priv = card; - card->dev->netdev_ops = &lcs_netdev_ops; - card->dev->dev_port = card->portno; - eth_hw_addr_set(card->dev, card->mac); -#ifdef CONFIG_IP_MULTICAST - if (!lcs_check_multicast_support(card)) - card->dev->netdev_ops = &lcs_mc_netdev_ops; -#endif -netdev_out: - lcs_set_allowed_threads(card,0xffffffff); - if (recover_state == DEV_STATE_RECOVER) { - lcs_set_multicast_list(card->dev); - card->dev->flags |= IFF_UP; - netif_carrier_on(card->dev); - netif_wake_queue(card->dev); - card->state = DEV_STATE_UP; - } else { - lcs_stopcard(card); - } - - if (lcs_register_netdev(ccwgdev) != 0) - goto out; - - /* Print out supported assists: IPv6 */ - pr_info("LCS device %s %s IPv6 support\n", card->dev->name, - (card->ip_assists_supported & LCS_IPASS_IPV6_SUPPORT) ? - "with" : "without"); - /* Print out supported assist: Multicast */ - pr_info("LCS device %s %s Multicast support\n", card->dev->name, - (card->ip_assists_supported & LCS_IPASS_MULTICAST_SUPPORT) ? - "with" : "without"); - return 0; -out: - - ccw_device_set_offline(card->write.ccwdev); -out_werr: - ccw_device_set_offline(card->read.ccwdev); -out_err: - return -ENODEV; -} - -/* - * lcs_shutdown_device, called when setting the group device offline. - */ -static int -__lcs_shutdown_device(struct ccwgroup_device *ccwgdev, int recovery_mode) -{ - struct lcs_card *card; - enum lcs_dev_states recover_state; - int ret = 0, ret2 = 0, ret3 = 0; - - LCS_DBF_TEXT(3, setup, "shtdndev"); - card = dev_get_drvdata(&ccwgdev->dev); - if (!card) - return -ENODEV; - if (recovery_mode == 0) { - lcs_set_allowed_threads(card, 0); - if (lcs_wait_for_threads(card, LCS_SET_MC_THREAD)) - return -ERESTARTSYS; - } - LCS_DBF_HEX(3, setup, &card, sizeof(void*)); - recover_state = card->state; - - ret = lcs_stop_device(card->dev); - ret2 = ccw_device_set_offline(card->read.ccwdev); - ret3 = ccw_device_set_offline(card->write.ccwdev); - if (!ret) - ret = (ret2) ? ret2 : ret3; - if (ret) - LCS_DBF_TEXT_(3, setup, "1err:%d", ret); - if (recover_state == DEV_STATE_UP) { - card->state = DEV_STATE_RECOVER; - } - return 0; -} - -static int -lcs_shutdown_device(struct ccwgroup_device *ccwgdev) -{ - return __lcs_shutdown_device(ccwgdev, 0); -} - -/* - * drive lcs recovery after startup and startlan initiated by Lan Gateway - */ -static int -lcs_recovery(void *ptr) -{ - struct lcs_card *card; - struct ccwgroup_device *gdev; - int rc; - - card = (struct lcs_card *) ptr; - - LCS_DBF_TEXT(4, trace, "recover1"); - if (!lcs_do_run_thread(card, LCS_RECOVERY_THREAD)) - return 0; - LCS_DBF_TEXT(4, trace, "recover2"); - gdev = card->gdev; - dev_warn(&gdev->dev, - "A recovery process has been started for the LCS device\n"); - rc = __lcs_shutdown_device(gdev, 1); - rc = lcs_new_device(gdev); - if (!rc) - pr_info("Device %s successfully recovered!\n", - card->dev->name); - else - pr_info("Device %s could not be recovered!\n", - card->dev->name); - lcs_clear_thread_running_bit(card, LCS_RECOVERY_THREAD); - return 0; -} - -/* - * lcs_remove_device, free buffers and card - */ -static void -lcs_remove_device(struct ccwgroup_device *ccwgdev) -{ - struct lcs_card *card; - - card = dev_get_drvdata(&ccwgdev->dev); - if (!card) - return; - - LCS_DBF_TEXT(3, setup, "remdev"); - LCS_DBF_HEX(3, setup, &card, sizeof(void*)); - if (ccwgdev->state == CCWGROUP_ONLINE) { - lcs_shutdown_device(ccwgdev); - } - if (card->dev) - unregister_netdev(card->dev); - lcs_cleanup_card(card); - lcs_free_card(card); - dev_set_drvdata(&ccwgdev->dev, NULL); - put_device(&ccwgdev->dev); -} - -static struct ccw_device_id lcs_ids[] = { - {CCW_DEVICE(0x3088, 0x08), .driver_info = lcs_channel_type_parallel}, - {CCW_DEVICE(0x3088, 0x1f), .driver_info = lcs_channel_type_2216}, - {CCW_DEVICE(0x3088, 0x60), .driver_info = lcs_channel_type_osa2}, - {}, -}; -MODULE_DEVICE_TABLE(ccw, lcs_ids); - -static struct ccw_driver lcs_ccw_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "lcs", - }, - .ids = lcs_ids, - .probe = ccwgroup_probe_ccwdev, - .remove = ccwgroup_remove_ccwdev, - .int_class = IRQIO_LCS, -}; - -/* - * LCS ccwgroup driver registration - */ -static struct ccwgroup_driver lcs_group_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "lcs", - }, - .ccw_driver = &lcs_ccw_driver, - .setup = lcs_probe_device, - .remove = lcs_remove_device, - .set_online = lcs_new_device, - .set_offline = lcs_shutdown_device, -}; - -static ssize_t group_store(struct device_driver *ddrv, const char *buf, - size_t count) -{ - int err; - err = ccwgroup_create_dev(lcs_root_dev, &lcs_group_driver, 2, buf); - return err ? err : count; -} -static DRIVER_ATTR_WO(group); - -static struct attribute *lcs_drv_attrs[] = { - &driver_attr_group.attr, - NULL, -}; -static struct attribute_group lcs_drv_attr_group = { - .attrs = lcs_drv_attrs, -}; -static const struct attribute_group *lcs_drv_attr_groups[] = { - &lcs_drv_attr_group, - NULL, -}; - -/* - * LCS Module/Kernel initialization function - */ -static int -__init lcs_init_module(void) -{ - int rc; - - pr_info("Loading %s\n", version); - rc = lcs_register_debug_facility(); - LCS_DBF_TEXT(0, setup, "lcsinit"); - if (rc) - goto out_err; - lcs_root_dev = root_device_register("lcs"); - rc = PTR_ERR_OR_ZERO(lcs_root_dev); - if (rc) - goto register_err; - rc = ccw_driver_register(&lcs_ccw_driver); - if (rc) - goto ccw_err; - lcs_group_driver.driver.groups = lcs_drv_attr_groups; - rc = ccwgroup_driver_register(&lcs_group_driver); - if (rc) - goto ccwgroup_err; - return 0; - -ccwgroup_err: - ccw_driver_unregister(&lcs_ccw_driver); -ccw_err: - root_device_unregister(lcs_root_dev); -register_err: - lcs_unregister_debug_facility(); -out_err: - pr_err("Initializing the lcs device driver failed\n"); - return rc; -} - - -/* - * LCS module cleanup function - */ -static void -__exit lcs_cleanup_module(void) -{ - pr_info("Terminating lcs module.\n"); - LCS_DBF_TEXT(0, trace, "cleanup"); - ccwgroup_driver_unregister(&lcs_group_driver); - ccw_driver_unregister(&lcs_ccw_driver); - root_device_unregister(lcs_root_dev); - lcs_unregister_debug_facility(); -} - -module_init(lcs_init_module); -module_exit(lcs_cleanup_module); - -MODULE_AUTHOR("Frank Pavlic <fpavlic@de.ibm.com>"); -MODULE_DESCRIPTION("S/390 LAN channel station device driver"); -MODULE_LICENSE("GPL"); - diff --git a/drivers/s390/net/lcs.h b/drivers/s390/net/lcs.h deleted file mode 100644 index a2699b70b050..000000000000 --- a/drivers/s390/net/lcs.h +++ /dev/null @@ -1,342 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/*lcs.h*/ - -#include <linux/interrupt.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/workqueue.h> -#include <linux/refcount.h> -#include <asm/ccwdev.h> - -#define LCS_DBF_TEXT(level, name, text) \ - do { \ - debug_text_event(lcs_dbf_##name, level, text); \ - } while (0) - -#define LCS_DBF_HEX(level,name,addr,len) \ -do { \ - debug_event(lcs_dbf_##name,level,(void*)(addr),len); \ -} while (0) - -#define LCS_DBF_TEXT_(level,name,text...) \ - do { \ - if (debug_level_enabled(lcs_dbf_##name, level)) { \ - scnprintf(debug_buffer, sizeof(debug_buffer), text); \ - debug_text_event(lcs_dbf_##name, level, debug_buffer); \ - } \ - } while (0) - -/** - * sysfs related stuff - */ -#define CARD_FROM_DEV(cdev) \ - (struct lcs_card *) dev_get_drvdata( \ - &((struct ccwgroup_device *)dev_get_drvdata(&cdev->dev))->dev); - -/** - * Enum for classifying detected devices. - */ -enum lcs_channel_types { - /* Device is not a channel */ - lcs_channel_type_none, - - /* Device is a 2216 channel */ - lcs_channel_type_parallel, - - /* Device is a 2216 channel */ - lcs_channel_type_2216, - - /* Device is a OSA2 card */ - lcs_channel_type_osa2 -}; - -/** - * CCW commands used in this driver - */ -#define LCS_CCW_WRITE 0x01 -#define LCS_CCW_READ 0x02 -#define LCS_CCW_TRANSFER 0x08 - -/** - * LCS device status primitives - */ -#define LCS_CMD_STARTLAN 0x01 -#define LCS_CMD_STOPLAN 0x02 -#define LCS_CMD_LANSTAT 0x04 -#define LCS_CMD_STARTUP 0x07 -#define LCS_CMD_SHUTDOWN 0x08 -#define LCS_CMD_QIPASSIST 0xb2 -#define LCS_CMD_SETIPM 0xb4 -#define LCS_CMD_DELIPM 0xb5 - -#define LCS_INITIATOR_TCPIP 0x00 -#define LCS_INITIATOR_LGW 0x01 -#define LCS_STD_CMD_SIZE 16 -#define LCS_MULTICAST_CMD_SIZE 404 - -/** - * LCS IPASSIST MASKS,only used when multicast is switched on - */ -/* Not supported by LCS */ -#define LCS_IPASS_ARP_PROCESSING 0x0001 -#define LCS_IPASS_IN_CHECKSUM_SUPPORT 0x0002 -#define LCS_IPASS_OUT_CHECKSUM_SUPPORT 0x0004 -#define LCS_IPASS_IP_FRAG_REASSEMBLY 0x0008 -#define LCS_IPASS_IP_FILTERING 0x0010 -/* Supported by lcs 3172 */ -#define LCS_IPASS_IPV6_SUPPORT 0x0020 -#define LCS_IPASS_MULTICAST_SUPPORT 0x0040 - -/** - * LCS sense byte definitions - */ -#define LCS_SENSE_BYTE_0 0 -#define LCS_SENSE_BYTE_1 1 -#define LCS_SENSE_BYTE_2 2 -#define LCS_SENSE_BYTE_3 3 -#define LCS_SENSE_INTERFACE_DISCONNECT 0x01 -#define LCS_SENSE_EQUIPMENT_CHECK 0x10 -#define LCS_SENSE_BUS_OUT_CHECK 0x20 -#define LCS_SENSE_INTERVENTION_REQUIRED 0x40 -#define LCS_SENSE_CMD_REJECT 0x80 -#define LCS_SENSE_RESETTING_EVENT 0x80 -#define LCS_SENSE_DEVICE_ONLINE 0x20 - -/** - * LCS packet type definitions - */ -#define LCS_FRAME_TYPE_CONTROL 0 -#define LCS_FRAME_TYPE_ENET 1 -#define LCS_FRAME_TYPE_TR 2 -#define LCS_FRAME_TYPE_FDDI 7 -#define LCS_FRAME_TYPE_AUTO -1 - -/** - * some more definitions,we will sort them later - */ -#define LCS_ILLEGAL_OFFSET 0xffff -#define LCS_IOBUFFERSIZE 0x5000 -#define LCS_NUM_BUFFS 32 /* needs to be power of 2 */ -#define LCS_MAC_LENGTH 6 -#define LCS_INVALID_PORT_NO -1 -#define LCS_LANCMD_TIMEOUT_DEFAULT 5 - -/** - * Multicast state - */ -#define LCS_IPM_STATE_SET_REQUIRED 0 -#define LCS_IPM_STATE_DEL_REQUIRED 1 -#define LCS_IPM_STATE_ON_CARD 2 - -/** - * LCS IP Assist declarations - * seems to be only used for multicast - */ -#define LCS_IPASS_ARP_PROCESSING 0x0001 -#define LCS_IPASS_INBOUND_CSUM_SUPP 0x0002 -#define LCS_IPASS_OUTBOUND_CSUM_SUPP 0x0004 -#define LCS_IPASS_IP_FRAG_REASSEMBLY 0x0008 -#define LCS_IPASS_IP_FILTERING 0x0010 -#define LCS_IPASS_IPV6_SUPPORT 0x0020 -#define LCS_IPASS_MULTICAST_SUPPORT 0x0040 - -/** - * LCS Buffer states - */ -enum lcs_buffer_states { - LCS_BUF_STATE_EMPTY, /* buffer is empty */ - LCS_BUF_STATE_LOCKED, /* buffer is locked, don't touch */ - LCS_BUF_STATE_READY, /* buffer is ready for read/write */ - LCS_BUF_STATE_PROCESSED, -}; - -/** - * LCS Channel State Machine declarations - */ -enum lcs_channel_states { - LCS_CH_STATE_INIT, - LCS_CH_STATE_HALTED, - LCS_CH_STATE_STOPPED, - LCS_CH_STATE_RUNNING, - LCS_CH_STATE_SUSPENDED, - LCS_CH_STATE_CLEARED, - LCS_CH_STATE_ERROR, -}; - -/** - * LCS device state machine - */ -enum lcs_dev_states { - DEV_STATE_DOWN, - DEV_STATE_UP, - DEV_STATE_RECOVER, -}; - -enum lcs_threads { - LCS_SET_MC_THREAD = 1, - LCS_RECOVERY_THREAD = 2, -}; - -/** - * LCS struct declarations - */ -struct lcs_header { - __u16 offset; - __u8 type; - __u8 slot; -} __attribute__ ((packed)); - -struct lcs_ip_mac_pair { - __be32 ip_addr; - __u8 mac_addr[LCS_MAC_LENGTH]; - __u8 reserved[2]; -} __attribute__ ((packed)); - -struct lcs_ipm_list { - struct list_head list; - struct lcs_ip_mac_pair ipm; - __u8 ipm_state; -}; - -struct lcs_cmd { - __u16 offset; - __u8 type; - __u8 slot; - __u8 cmd_code; - __u8 initiator; - __u16 sequence_no; - __u16 return_code; - union { - struct { - __u8 lan_type; - __u8 portno; - __u16 parameter_count; - __u8 operator_flags[3]; - __u8 reserved[3]; - } lcs_std_cmd; - struct { - __u16 unused1; - __u16 buff_size; - __u8 unused2[6]; - } lcs_startup; - struct { - __u8 lan_type; - __u8 portno; - __u8 unused[10]; - __u8 mac_addr[LCS_MAC_LENGTH]; - __u32 num_packets_deblocked; - __u32 num_packets_blocked; - __u32 num_packets_tx_on_lan; - __u32 num_tx_errors_detected; - __u32 num_tx_packets_disgarded; - __u32 num_packets_rx_from_lan; - __u32 num_rx_errors_detected; - __u32 num_rx_discarded_nobuffs_avail; - __u32 num_rx_packets_too_large; - } lcs_lanstat_cmd; -#ifdef CONFIG_IP_MULTICAST - struct { - __u8 lan_type; - __u8 portno; - __u16 num_ip_pairs; - __u16 ip_assists_supported; - __u16 ip_assists_enabled; - __u16 version; - struct { - struct lcs_ip_mac_pair - ip_mac_pair[32]; - __u32 response_data; - } lcs_ipass_ctlmsg __attribute ((packed)); - } lcs_qipassist __attribute__ ((packed)); -#endif /*CONFIG_IP_MULTICAST */ - } cmd __attribute__ ((packed)); -} __attribute__ ((packed)); - -/** - * Forward declarations. - */ -struct lcs_card; -struct lcs_channel; - -/** - * Definition of an lcs buffer. - */ -struct lcs_buffer { - enum lcs_buffer_states state; - void *data; - int count; - /* Callback for completion notification. */ - void (*callback)(struct lcs_channel *, struct lcs_buffer *); -}; - -struct lcs_reply { - struct list_head list; - __u16 sequence_no; - refcount_t refcnt; - /* Callback for completion notification. */ - void (*callback)(struct lcs_card *, struct lcs_cmd *); - wait_queue_head_t wait_q; - struct lcs_card *card; - struct timer_list timer; - int received; - int rc; -}; - -/** - * Definition of an lcs channel - */ -struct lcs_channel { - enum lcs_channel_states state; - struct ccw_device *ccwdev; - struct ccw1 ccws[LCS_NUM_BUFFS + 1]; - wait_queue_head_t wait_q; - struct tasklet_struct irq_tasklet; - struct lcs_buffer iob[LCS_NUM_BUFFS]; - int io_idx; - int buf_idx; -}; - - -/** - * definition of the lcs card - */ -struct lcs_card { - spinlock_t lock; - spinlock_t ipm_lock; - enum lcs_dev_states state; - struct net_device *dev; - struct net_device_stats stats; - __be16 (*lan_type_trans)(struct sk_buff *skb, - struct net_device *dev); - struct ccwgroup_device *gdev; - struct lcs_channel read; - struct lcs_channel write; - struct lcs_buffer *tx_buffer; - int tx_emitted; - struct list_head lancmd_waiters; - int lancmd_timeout; - - struct work_struct kernel_thread_starter; - spinlock_t mask_lock; - unsigned long thread_start_mask; - unsigned long thread_running_mask; - unsigned long thread_allowed_mask; - wait_queue_head_t wait_q; - -#ifdef CONFIG_IP_MULTICAST - struct list_head ipm_list; -#endif - __u8 mac[LCS_MAC_LENGTH]; - __u16 ip_assists_supported; - __u16 ip_assists_enabled; - __s8 lan_type; - __u32 pkt_seq; - __u16 sequence_no; - __s16 portno; - /* Some info copied from probeinfo */ - u8 device_forced; - u8 max_port_no; - u8 hint_port_no; - s16 port_protocol_no; -} __attribute__ ((aligned(8))); - diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 27f42f713c89..87edb7a8173b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1415,7 +1415,6 @@ int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, bool *vlan_offload_disabled); void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, struct _rule_hw *eth_header); -int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index af86097641b0..46bd7550adf8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -54,7 +54,6 @@ #include <linux/mlx5/doorbell.h> #include <linux/mlx5/eq.h> #include <linux/timecounter.h> -#include <linux/ptp_clock_kernel.h> #include <net/devlink.h> #define MLX5_ADEV_NAME "mlx5_core" @@ -679,33 +678,8 @@ struct mlx5_rsvd_gids { struct ida ida; }; -#define MAX_PIN_NUM 8 -struct mlx5_pps { - u8 pin_caps[MAX_PIN_NUM]; - struct work_struct out_work; - u64 start[MAX_PIN_NUM]; - u8 enabled; - u64 min_npps_period; - u64 min_out_pulse_duration_ns; -}; - -struct mlx5_timer { - struct cyclecounter cycles; - struct timecounter tc; - u32 nominal_c_mult; - unsigned long overflow_period; -}; - -struct mlx5_clock { - struct mlx5_nb pps_nb; - seqlock_t lock; - struct hwtstamp_config hwtstamp_config; - struct ptp_clock *ptp; - struct ptp_clock_info ptp_info; - struct mlx5_pps pps_info; - struct mlx5_timer timer; -}; - +struct mlx5_clock; +struct mlx5_clock_dev_state; struct mlx5_dm; struct mlx5_fw_tracer; struct mlx5_vxlan; @@ -789,7 +763,8 @@ struct mlx5_core_dev { #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif - struct mlx5_clock clock; + struct mlx5_clock *clock; + struct mlx5_clock_dev_state *clock_state; struct mlx5_ib_clock_info *clock_info; struct mlx5_fw_tracer *tracer; struct mlx5_rsc_dump *rsc_dump; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index e68d42b8ce65..fd625e0dd869 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -115,9 +115,12 @@ enum mlx5e_ext_link_mode { MLX5E_100GAUI_1_100GBASE_CR_KR = 11, MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13, + MLX5E_200GAUI_1_200GBASE_CR1_KR1 = 14, MLX5E_400GAUI_8_400GBASE_CR8 = 15, MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16, + MLX5E_400GAUI_2_400GBASE_CR2_KR2 = 17, MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19, + MLX5E_800GAUI_4_800GBASE_CR4_KR4 = 20, MLX5E_EXT_LINK_MODES_NUMBER, }; diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h index 1c1332e4df26..13274c3def66 100644 --- a/include/linux/net/intel/iidc.h +++ b/include/linux/net/intel/iidc.h @@ -78,6 +78,8 @@ int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type); int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable); void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos); +int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); +void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); /* Structure representing auxiliary driver tailored information about the core * PCI dev, each auxiliary driver using the IIDC interface will have an diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c0a86afb85da..fccc03cd2164 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -658,6 +658,7 @@ struct netdev_queue { struct Qdisc __rcu *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; + const struct attribute_group **groups; #endif unsigned long tx_maxrate; /* diff --git a/include/linux/of.h b/include/linux/of.h index eaf0e2a2b75c..9d6b8a61607f 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -301,6 +301,8 @@ extern struct device_node *of_get_compatible_child(const struct device_node *par const char *compatible); extern struct device_node *of_get_child_by_name(const struct device_node *node, const char *name); +extern struct device_node *of_get_available_child_by_name(const struct device_node *node, + const char *name); /* cache lookup */ extern struct device_node *of_find_next_cache_node(const struct device_node *); @@ -578,6 +580,13 @@ static inline struct device_node *of_get_child_by_name( return NULL; } +static inline struct device_node *of_get_available_child_by_name( + const struct device_node *node, + const char *name) +{ + return NULL; +} + static inline int of_device_is_compatible(const struct device_node *device, const char *name) { diff --git a/include/linux/phy.h b/include/linux/phy.h index 19f076a71f94..64982eba71d1 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -303,9 +303,6 @@ static inline long rgmii_clock(int speed) } } -#define PHY_INIT_TIMEOUT 100000 -#define PHY_FORCE_TIMEOUT 10 - #define PHY_MAX_ADDR 32 /* Used when trying to connect to a specific phy (mii bus id:phy device id) */ @@ -611,7 +608,7 @@ struct macsec_ops; * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host - * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited + * @eee_disabled_modes: Energy efficient ethernet modes not to be advertised * @autoneg: Flag autoneg being used * @rate_matching: Current rate matching mode * @link: Current link state @@ -727,7 +724,7 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); /* Energy efficient ethernet modes which should be prohibited */ - __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_broken_modes); + __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_disabled_modes); bool enable_tx_lpi; bool eee_active; struct eee_config eee_cfg; @@ -1273,6 +1270,19 @@ struct phy_driver { */ int (*led_polarity_set)(struct phy_device *dev, int index, unsigned long modes); + + /** + * @get_next_update_time: Get the time until the next update event + * @dev: PHY device + * + * Callback to determine the time (in jiffies) until the next + * update event for the PHY state machine. Allows PHY drivers to + * dynamically adjust polling intervals based on link state or other + * conditions. + * + * Returns the time in jiffies until the next update event. + */ + unsigned int (*get_next_update_time)(struct phy_device *dev); }; #define to_phy_driver(d) container_of_const(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) @@ -1347,13 +1357,13 @@ void of_set_phy_timing_role(struct phy_device *phydev); int phy_speed_down_core(struct phy_device *phydev); /** - * phy_set_eee_broken - Mark an EEE mode as broken so that it isn't advertised. + * phy_disable_eee_mode - Don't advertise an EEE mode. * @phydev: The phy_device struct - * @link_mode: The broken EEE mode + * @link_mode: The EEE mode to be disabled */ -static inline void phy_set_eee_broken(struct phy_device *phydev, u32 link_mode) +static inline void phy_disable_eee_mode(struct phy_device *phydev, u32 link_mode) { - linkmode_set_bit(link_mode, phydev->eee_broken_modes); + linkmode_set_bit(link_mode, phydev->eee_disabled_modes); } /** diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 898b00451bbf..0de78673172d 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -737,6 +737,18 @@ static inline int phylink_get_link_timer_ns(phy_interface_t interface) } } +/** + * phylink_mac_implements_lpi() - determine if MAC implements LPI ops + * @ops: phylink_mac_ops structure + * + * Returns true if the phylink MAC operations structure indicates that the + * LPI operations have been implemented, false otherwise. + */ +static inline bool phylink_mac_implements_lpi(const struct phylink_mac_ops *ops) +{ + return ops && ops->mac_disable_tx_lpi && ops->mac_enable_tx_lpi; +} + void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, unsigned int neg_mode, u16 bmsr, u16 lpa); void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 4bc2ee0b10b0..ccaaf4c7d5f6 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -43,6 +43,7 @@ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); +extern int rtnl_lock_interruptible(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 836a7e200f39..812011d8b67e 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -222,7 +222,6 @@ struct sctp_datahdr { __be16 stream; __be16 ssn; __u32 ppid; - /* __u8 payload[]; */ }; struct sctp_data_chunk { diff --git a/include/linux/unroll.h b/include/linux/unroll.h index d42fd6366373..863fb69f6a7e 100644 --- a/include/linux/unroll.h +++ b/include/linux/unroll.h @@ -9,6 +9,50 @@ #include <linux/args.h> +#ifdef CONFIG_CC_IS_CLANG +#define __pick_unrolled(x, y) _Pragma(#x) +#elif CONFIG_GCC_VERSION >= 80000 +#define __pick_unrolled(x, y) _Pragma(#y) +#else +#define __pick_unrolled(x, y) /* not supported */ +#endif + +/** + * unrolled - loop attributes to ask the compiler to unroll it + * + * Usage: + * + * #define BATCH 8 + * + * unrolled_count(BATCH) + * for (u32 i = 0; i < BATCH; i++) + * // loop body without cross-iteration dependencies + * + * This is only a hint and the compiler is free to disable unrolling if it + * thinks the count is suboptimal and may hurt performance and/or hugely + * increase object code size. + * Not having any cross-iteration dependencies (i.e. when iter x + 1 depends + * on what iter x will do with variables) is not a strict requirement, but + * provides best performance and object code size. + * Available only on Clang and GCC 8.x onwards. + */ + +/* Ask the compiler to pick an optimal unroll count, Clang only */ +#define unrolled \ + __pick_unrolled(clang loop unroll(enable), /* nothing */) + +/* Unroll each @n iterations of the loop */ +#define unrolled_count(n) \ + __pick_unrolled(clang loop unroll_count(n), GCC unroll n) + +/* Unroll the whole loop */ +#define unrolled_full \ + __pick_unrolled(clang loop unroll(full), GCC unroll 65534) + +/* Never unroll the loop */ +#define unrolled_none \ + __pick_unrolled(clang loop unroll(disable), GCC unroll 1) + #define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args) #define __UNROLL_0(MACRO, args...) diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 33a4c146dc19..2ca60828f28b 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h @@ -30,6 +30,7 @@ #define VENDOR_ID_NVIDIA 0x0955 #define VENDOR_ID_TPLINK 0x2357 #define VENDOR_ID_DLINK 0x2001 +#define VENDOR_ID_DELL 0x413c #define VENDOR_ID_ASUS 0x0b05 #if IS_REACHABLE(CONFIG_USB_RTL8152) diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index c39a426ebf52..741fa7754700 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -24,6 +24,11 @@ */ #define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1)) +static inline bool napi_id_valid(unsigned int napi_id) +{ + return napi_id >= MIN_NAPI_ID; +} + #define BUSY_POLL_BUDGET 8 #ifdef CONFIG_NET_RX_BUSY_POLL diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 04383d90a1e3..710caacad9da 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -178,10 +178,10 @@ int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, struct netlink_ext_ack *extack); unsigned int fib_rules_seq_read(const struct net *net, int family); -int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack); -int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack); +int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held); +int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held); INDIRECT_CALLABLE_DECLARE(int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c7f42844c79a..d9978ffacc97 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -90,6 +90,7 @@ struct inet_connection_sock { struct timer_list icsk_delack_timer; __u32 icsk_rto; __u32 icsk_rto_min; + u32 icsk_rto_max; __u32 icsk_delack_max; __u32 icsk_pmtu_cookie; const struct tcp_congestion_ops *icsk_ca_ops; @@ -189,9 +190,6 @@ static inline void inet_csk_delack_init(struct sock *sk) memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); } -void inet_csk_delete_keepalive_timer(struct sock *sk); -void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); - static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) { struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index b02bb9f109d5..825141d675e5 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -23,6 +23,7 @@ struct netdev_queue_stats_rx { u64 hw_drops; u64 hw_drop_overruns; + u64 csum_complete; u64 csum_unnecessary; u64 csum_none; u64 csum_bad; @@ -117,6 +118,10 @@ struct netdev_stat_ops { * * @ndo_queue_stop: Stop the RX queue at the specified index. The stopped * queue's memory is written at the specified address. + * + * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while + * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only + * be called for an interface which is open. */ struct netdev_queue_mgmt_ops { size_t ndo_queue_mem_size; diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index 596836abf7bf..af40842f229d 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -16,6 +16,7 @@ struct netdev_rx_queue { struct rps_dev_flow_table __rcu *rps_flow_table; #endif struct kobject kobj; + const struct attribute_group **groups; struct net_device *dev; netdevice_tracker dev_tracker; diff --git a/include/net/netmem.h b/include/net/netmem.h index 1b58faa4f20f..c61d5b21e7b4 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -24,11 +24,20 @@ struct net_iov { unsigned long __unused_padding; unsigned long pp_magic; struct page_pool *pp; - struct dmabuf_genpool_chunk_owner *owner; + struct net_iov_area *owner; unsigned long dma_addr; atomic_long_t pp_ref_count; }; +struct net_iov_area { + /* Array of net_iovs for this area. */ + struct net_iov *niovs; + size_t num_niovs; + + /* Offset into the dma-buf where this chunk starts. */ + unsigned long base_virtual; +}; + /* These fields in struct page are used by the page_pool and net stack: * * struct { @@ -54,6 +63,16 @@ NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr); NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count); #undef NET_IOV_ASSERT_OFFSET +static inline struct net_iov_area *net_iov_owner(const struct net_iov *niov) +{ + return niov->owner; +} + +static inline unsigned int net_iov_idx(const struct net_iov *niov) +{ + return niov - net_iov_owner(niov)->niovs; +} + /* netmem */ /** diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 46452da35206..45ac125e8aeb 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -181,6 +181,7 @@ struct netns_ipv4 { u8 sysctl_tcp_window_scaling; u8 sysctl_tcp_timestamps; int sysctl_tcp_rto_min_us; + int sysctl_tcp_rto_max_ms; u8 sysctl_tcp_recovery; u8 sysctl_tcp_thin_linear_timeouts; u8 sysctl_tcp_slow_start_after_idle; diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h new file mode 100644 index 000000000000..b3e665897767 --- /dev/null +++ b/include/net/page_pool/memory_provider.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NET_PAGE_POOL_MEMORY_PROVIDER_H +#define _NET_PAGE_POOL_MEMORY_PROVIDER_H + +#include <net/netmem.h> +#include <net/page_pool/types.h> + +struct netdev_rx_queue; +struct sk_buff; + +struct memory_provider_ops { + netmem_ref (*alloc_netmems)(struct page_pool *pool, gfp_t gfp); + bool (*release_netmem)(struct page_pool *pool, netmem_ref netmem); + int (*init)(struct page_pool *pool); + void (*destroy)(struct page_pool *pool); + int (*nl_fill)(void *mp_priv, struct sk_buff *rsp, + struct netdev_rx_queue *rxq); + void (*uninstall)(void *mp_priv, struct netdev_rx_queue *rxq); +}; + +bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr); +void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov); +void net_mp_niov_clear_page_pool(struct net_iov *niov); + +int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *p); +void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *old_p); + +/** + * net_mp_netmem_place_in_cache() - give a netmem to a page pool + * @pool: the page pool to place the netmem into + * @netmem: netmem to give + * + * Push an accounted netmem into the page pool's allocation cache. The caller + * must ensure that there is space in the cache. It should only be called off + * the mp_ops->alloc_netmems() path. + */ +static inline void net_mp_netmem_place_in_cache(struct page_pool *pool, + netmem_ref netmem) +{ + pool->alloc.cache[pool->alloc.count++] = netmem; +} + +#endif diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 7f405672b089..36eb57d73abc 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -152,8 +152,11 @@ struct page_pool_stats { */ #define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long)) +struct memory_provider_ops; + struct pp_memory_provider_params { void *mp_priv; + const struct memory_provider_ops *mp_ops; }; struct page_pool { @@ -216,6 +219,7 @@ struct page_pool { struct ptr_ring ring; void *mp_priv; + const struct memory_provider_ops *mp_ops; #ifdef CONFIG_PAGE_POOL_STATS /* recycle stats are per-cpu to avoid locking */ diff --git a/include/net/sock.h b/include/net/sock.h index 8036b3b79cd8..60ebf3c7b229 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -954,6 +954,7 @@ enum sock_flags { SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */ SOCK_RCVPRIORITY, /* Receive SO_PRIORITY ancillary data with packet */ + SOCK_TIMESTAMPING_ANY, /* Copy of sk_tsflags & TSFLAGS_ANY */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) @@ -2664,13 +2665,13 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, { #define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_RCVTSTAMP) | \ - (1UL << SOCK_RCVMARK) |\ - (1UL << SOCK_RCVPRIORITY)) + (1UL << SOCK_RCVMARK) | \ + (1UL << SOCK_RCVPRIORITY) | \ + (1UL << SOCK_TIMESTAMPING_ANY)) #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) - if (sk->sk_flags & FLAGS_RECV_CMSGS || - READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY) + if (READ_ONCE(sk->sk_flags) & FLAGS_RECV_CMSGS) __sock_recv_cmsgs(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) sock_write_timestamp(sk, skb->tstamp); diff --git a/include/net/tcp.h b/include/net/tcp.h index 5b2b04835688..7fd2d7fa4532 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -143,8 +143,9 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); #define TCP_DELACK_MIN 4U #define TCP_ATO_MIN 4U #endif -#define TCP_RTO_MAX ((unsigned)(120*HZ)) -#define TCP_RTO_MIN ((unsigned)(HZ/5)) +#define TCP_RTO_MAX_SEC 120 +#define TCP_RTO_MAX ((unsigned)(TCP_RTO_MAX_SEC * HZ)) +#define TCP_RTO_MIN ((unsigned)(HZ / 5)) #define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */ #define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */ @@ -415,6 +416,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); +void tcp_reset_keepalive_timer(struct sock *sk, unsigned long timeout); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, @@ -739,10 +741,14 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu); int tcp_mss_to_mtu(struct sock *sk, int mss); void tcp_mtup_init(struct sock *sk); +static inline unsigned int tcp_rto_max(const struct sock *sk) +{ + return READ_ONCE(inet_csk(sk)->icsk_rto_max); +} + static inline void tcp_bound_rto(struct sock *sk) { - if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) - inet_csk(sk)->icsk_rto = TCP_RTO_MAX; + inet_csk(sk)->icsk_rto = min(inet_csk(sk)->icsk_rto, tcp_rto_max(sk)); } static inline u32 __tcp_set_rto(const struct tcp_sock *tp) @@ -1423,10 +1429,12 @@ static inline unsigned long tcp_pacing_delay(const struct sock *sk) static inline void tcp_reset_xmit_timer(struct sock *sk, const int what, unsigned long when, - const unsigned long max_when) + bool pace_delay) { - inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk), - max_when); + if (pace_delay) + when += tcp_pacing_delay(sk); + inet_csk_reset_xmit_timer(sk, what, when, + tcp_rto_max(sk)); } /* Something is really bad, we could not queue an additional packet, @@ -1455,7 +1463,7 @@ static inline void tcp_check_probe_timer(struct sock *sk) { if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - tcp_probe0_base(sk), TCP_RTO_MAX); + tcp_probe0_base(sk), true); } static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 784cd34f5bba..15086dcf51d8 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -196,6 +196,23 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) return xp_raw_get_data(pool, addr); } +/** + * xsk_buff_raw_get_ctx - get &xdp_desc context + * @pool: XSk buff pool desc address belongs to + * @addr: desc address (from userspace) + * + * Wrapper for xp_raw_get_ctx() to be used in drivers, see its kdoc for + * details. + * + * Return: new &xdp_desc_ctx struct containing desc's DMA address and metadata + * pointer, if it is present and valid (initialized to %NULL otherwise). + */ +static inline struct xdp_desc_ctx +xsk_buff_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr) +{ + return xp_raw_get_ctx(pool, addr); +} + #define XDP_TXMD_FLAGS_VALID ( \ XDP_TXMD_FLAGS_TIMESTAMP | \ XDP_TXMD_FLAGS_CHECKSUM | \ @@ -207,20 +224,27 @@ xsk_buff_valid_tx_metadata(const struct xsk_tx_metadata *meta) return !(meta->flags & ~XDP_TXMD_FLAGS_VALID); } -static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +static inline struct xsk_tx_metadata * +__xsk_buff_get_metadata(const struct xsk_buff_pool *pool, void *data) { struct xsk_tx_metadata *meta; if (!pool->tx_metadata_len) return NULL; - meta = xp_raw_get_data(pool, addr) - pool->tx_metadata_len; + meta = data - pool->tx_metadata_len; if (unlikely(!xsk_buff_valid_tx_metadata(meta))) return NULL; /* no way to signal the error to the user */ return meta; } +static inline struct xsk_tx_metadata * +xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +{ + return __xsk_buff_get_metadata(pool, xp_raw_get_data(pool, addr)); +} + static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); @@ -388,12 +412,25 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) return NULL; } +static inline struct xdp_desc_ctx +xsk_buff_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr) +{ + return (struct xdp_desc_ctx){ }; +} + static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta) { return false; } -static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +static inline struct xsk_tx_metadata * +__xsk_buff_get_metadata(const struct xsk_buff_pool *pool, void *data) +{ + return NULL; +} + +static inline struct xsk_tx_metadata * +xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) { return NULL; } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 50779406bc2d..1dcd4d71468a 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -141,6 +141,14 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max); bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count); void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr); dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr); + +struct xdp_desc_ctx { + dma_addr_t dma; + struct xsk_tx_metadata *meta; +}; + +struct xdp_desc_ctx xp_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr); + static inline dma_addr_t xp_get_dma(struct xdp_buff_xsk *xskb) { return xskb->dma; diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 9b18c4cfe56f..2feba0929a8a 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2059,6 +2059,24 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100, ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101, ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT = 102, + ETHTOOL_LINK_MODE_200000baseCR_Full_BIT = 103, + ETHTOOL_LINK_MODE_200000baseKR_Full_BIT = 104, + ETHTOOL_LINK_MODE_200000baseDR_Full_BIT = 105, + ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT = 106, + ETHTOOL_LINK_MODE_200000baseSR_Full_BIT = 107, + ETHTOOL_LINK_MODE_200000baseVR_Full_BIT = 108, + ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT = 109, + ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT = 110, + ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT = 111, + ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT = 112, + ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT = 113, + ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT = 114, + ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT = 115, + ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT = 116, + ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT = 117, + ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT = 118, + ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT = 119, + ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT = 120, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index e4be227d3ad6..6c6ee183802d 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -87,6 +87,11 @@ enum { }; enum { + __NETDEV_A_IO_URING_PROVIDER_INFO_MAX, + NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1) +}; + +enum { NETDEV_A_PAGE_POOL_ID = 1, NETDEV_A_PAGE_POOL_IFINDEX, NETDEV_A_PAGE_POOL_NAPI_ID, @@ -94,6 +99,7 @@ enum { NETDEV_A_PAGE_POOL_INFLIGHT_MEM, NETDEV_A_PAGE_POOL_DETACH_TIME, NETDEV_A_PAGE_POOL_DMABUF, + NETDEV_A_PAGE_POOL_IO_URING, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) @@ -136,6 +142,7 @@ enum { NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, NETDEV_A_QUEUE_DMABUF, + NETDEV_A_QUEUE_IO_URING, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index dbf896f3146c..32a27b4a5020 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -136,6 +136,7 @@ enum { #define TCP_AO_REPAIR 42 /* Get/Set SNEs and ISNs */ #define TCP_IS_MPTCP 43 /* Is MPTCP being used? */ +#define TCP_RTO_MAX_MS 44 /* max rto time in ms */ #define TCP_REPAIR_ON 1 #define TCP_REPAIR_OFF 0 diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1af972a92d06..238321830993 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2557,15 +2557,6 @@ config TEST_BPF If unsure, say N. -config TEST_BLACKHOLE_DEV - tristate "Test blackhole netdev functionality" - depends on m && NET - help - This builds the "test_blackhole_dev" module that validates the - data path through this blackhole netdev. - - If unsure, say N. - config FIND_BIT_BENCHMARK tristate "Test find_bit functions" help @@ -2888,6 +2879,17 @@ config USERCOPY_KUNIT_TEST on the copy_to/from_user infrastructure, making sure basic user/kernel boundary testing is working. +config BLACKHOLE_DEV_KUNIT_TEST + tristate "Test blackhole netdev functionality" if !KUNIT_ALL_TESTS + depends on NET + depends on KUNIT + default KUNIT_ALL_TESTS + help + This builds the "blackhole_dev_kunit" module that validates the + data path through this blackhole netdev. + + If unsure, say N. + config TEST_UDELAY tristate "udelay test driver" help diff --git a/lib/Makefile b/lib/Makefile index d5cfc7afbbb8..19ff6993c2bc 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -102,7 +102,6 @@ obj-$(CONFIG_TEST_RUNTIME) += tests/ obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o -obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o obj-$(CONFIG_TEST_HMM) += test_hmm.o @@ -393,6 +392,7 @@ obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o obj-$(CONFIG_CRC_KUNIT_TEST) += crc_kunit.o obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o +obj-$(CONFIG_BLACKHOLE_DEV_KUNIT_TEST) += blackhole_dev_kunit.o obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o diff --git a/lib/test_blackhole_dev.c b/lib/blackhole_dev_kunit.c index ec290ac2a0d9..06834ab35f43 100644 --- a/lib/test_blackhole_dev.c +++ b/lib/blackhole_dev_kunit.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * This module tests the blackhole_dev that is created during the + * This tests the blackhole_dev that is created during the * net subsystem initialization. The test this module performs is * by injecting an skb into the stack with skb->dev as the * blackhole_dev and expects kernel to behave in a sane manner @@ -9,9 +9,8 @@ * Copyright (c) 2018, Mahesh Bandewar <maheshb@google.com> */ -#include <linux/init.h> +#include <kunit/test.h> #include <linux/module.h> -#include <linux/printk.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/udp.h> @@ -25,17 +24,15 @@ #define UDP_PORT 1234 -static int __init test_blackholedev_init(void) +static void test_blackholedev(struct kunit *test) { struct ipv6hdr *ip6h; struct sk_buff *skb; struct udphdr *uh; int data_len; - int ret; skb = alloc_skb(SKB_SIZE, GFP_KERNEL); - if (!skb) - return -ENOMEM; + KUNIT_ASSERT_NOT_NULL(test, skb); /* Reserve head-room for the headers */ skb_reserve(skb, HEAD_SIZE); @@ -55,7 +52,7 @@ static int __init test_blackholedev_init(void) ip6h = (struct ipv6hdr *)skb_push(skb, sizeof(struct ipv6hdr)); skb_set_network_header(skb, 0); ip6h->hop_limit = 32; - ip6h->payload_len = data_len + sizeof(struct udphdr); + ip6h->payload_len = htons(data_len + sizeof(struct udphdr)); ip6h->nexthdr = IPPROTO_UDP; ip6h->saddr = in6addr_loopback; ip6h->daddr = in6addr_loopback; @@ -68,32 +65,20 @@ static int __init test_blackholedev_init(void) skb->dev = blackhole_netdev; /* Now attempt to send the packet */ - ret = dev_queue_xmit(skb); - - switch (ret) { - case NET_XMIT_SUCCESS: - pr_warn("dev_queue_xmit() returned NET_XMIT_SUCCESS\n"); - break; - case NET_XMIT_DROP: - pr_warn("dev_queue_xmit() returned NET_XMIT_DROP\n"); - break; - case NET_XMIT_CN: - pr_warn("dev_queue_xmit() returned NET_XMIT_CN\n"); - break; - default: - pr_err("dev_queue_xmit() returned UNKNOWN(%d)\n", ret); - } - - return 0; + KUNIT_EXPECT_EQ(test, dev_queue_xmit(skb), NET_XMIT_SUCCESS); } -static void __exit test_blackholedev_exit(void) -{ - pr_warn("test_blackholedev module terminating.\n"); -} +static struct kunit_case blackholedev_cases[] = { + KUNIT_CASE(test_blackholedev), + {}, +}; + +static struct kunit_suite blackholedev_suite = { + .name = "blackholedev", + .test_cases = blackholedev_cases, +}; -module_init(test_blackholedev_init); -module_exit(test_blackholedev_exit); +kunit_test_suite(blackholedev_suite); MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>"); MODULE_DESCRIPTION("module test of the blackhole_dev"); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 1a52a0bca086..7e1ad229e133 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -1040,7 +1040,7 @@ static int br_mdb_add_group(const struct br_mdb_config *cfg, /* host join */ if (!port) { - if (mp->host_joined) { + if (mp->host_joined && !(cfg->nlflags & NLM_F_REPLACE)) { NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host"); return -EEXIST; } diff --git a/net/core/dev.c b/net/core/dev.c index b91658e8aedb..d5ab9a4b318e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -159,6 +159,7 @@ #include <net/netdev_rx_queue.h> #include <net/page_pool/types.h> #include <net/page_pool/helpers.h> +#include <net/page_pool/memory_provider.h> #include <net/rps.h> #include <linux/phy_link_topology.h> @@ -6119,16 +6120,18 @@ EXPORT_SYMBOL(netif_receive_skb_list); static void flush_backlog(struct work_struct *work) { struct sk_buff *skb, *tmp; + struct sk_buff_head list; struct softnet_data *sd; + __skb_queue_head_init(&list); local_bh_disable(); sd = this_cpu_ptr(&softnet_data); backlog_lock_irq_disable(sd); skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { - if (skb->dev->reg_state == NETREG_UNREGISTERING) { + if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); - dev_kfree_skb_irq(skb); + __skb_queue_tail(&list, skb); rps_input_queue_head_incr(sd); } } @@ -6136,14 +6139,16 @@ static void flush_backlog(struct work_struct *work) local_lock_nested_bh(&softnet_data.process_queue_bh_lock); skb_queue_walk_safe(&sd->process_queue, skb, tmp) { - if (skb->dev->reg_state == NETREG_UNREGISTERING) { + if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); - kfree_skb(skb); + __skb_queue_tail(&list, skb); rps_input_queue_head_incr(sd); } } local_unlock_nested_bh(&softnet_data.process_queue_bh_lock); local_bh_enable(); + + __skb_queue_purge_reason(&list, SKB_DROP_REASON_DEV_READY); } static bool flush_required(int cpu) @@ -7071,6 +7076,9 @@ void __netif_napi_del_locked(struct napi_struct *napi) if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state)) return; + /* Make sure NAPI is disabled (or was never enabled). */ + WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state)); + if (napi->config) { napi->index = -1; napi->config = NULL; @@ -11738,6 +11746,19 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) } EXPORT_SYMBOL(unregister_netdevice_queue); +static void dev_memory_provider_uninstall(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->real_num_rx_queues; i++) { + struct netdev_rx_queue *rxq = &dev->_rx[i]; + struct pp_memory_provider_params *p = &rxq->mp_params; + + if (p->mp_ops && p->mp_ops->uninstall) + p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq); + } +} + void unregister_netdevice_many_notify(struct list_head *head, u32 portid, const struct nlmsghdr *nlh) { @@ -11792,7 +11813,7 @@ void unregister_netdevice_many_notify(struct list_head *head, dev_tcx_uninstall(dev); dev_xdp_uninstall(dev); bpf_dev_bound_netdev_unregister(dev); - dev_dmabuf_uninstall(dev); + dev_memory_provider_uninstall(dev); netdev_offload_xstats_disable_all(dev); diff --git a/net/core/dev.h b/net/core/dev.h index a5b166bbd169..caa13e431a6b 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -299,6 +299,18 @@ void xdp_do_check_flushed(struct napi_struct *napi); static inline void xdp_do_check_flushed(struct napi_struct *napi) { } #endif +/* Best effort check that NAPI is not idle (can't be scheduled to run) */ +static inline void napi_assert_will_not_race(const struct napi_struct *napi) +{ + /* uninitialized instance, can't race */ + if (!napi->poll_list.next) + return; + + /* SCHED bit is set on disabled instances */ + WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state)); + WARN_ON(READ_ONCE(napi->list_owner) != -1); +} + void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); #define XMIT_RECURSION_LIMIT 8 diff --git a/net/core/devmem.c b/net/core/devmem.c index 3bba3f018df0..7c6e0b5b6acb 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -16,6 +16,7 @@ #include <net/netdev_queues.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> +#include <net/page_pool/memory_provider.h> #include <trace/events/page_pool.h> #include "devmem.h" @@ -27,20 +28,28 @@ /* Protected by rtnl_lock() */ static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); +static const struct memory_provider_ops dmabuf_devmem_ops; + +bool net_is_devmem_iov(struct net_iov *niov) +{ + return niov->pp->mp_ops == &dmabuf_devmem_ops; +} + static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, struct gen_pool_chunk *chunk, void *not_used) { struct dmabuf_genpool_chunk_owner *owner = chunk->owner; - kvfree(owner->niovs); + kvfree(owner->area.niovs); kfree(owner); } static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov) { - struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov); + struct dmabuf_genpool_chunk_owner *owner; + owner = net_devmem_iov_to_chunk_owner(niov); return owner->base_dma_addr + ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT); } @@ -83,7 +92,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) offset = dma_addr - owner->base_dma_addr; index = offset / PAGE_SIZE; - niov = &owner->niovs[index]; + niov = &owner->area.niovs[index]; niov->pp_magic = 0; niov->pp = NULL; @@ -94,7 +103,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) void net_devmem_free_dmabuf(struct net_iov *niov) { - struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov); + struct net_devmem_dmabuf_binding *binding = net_devmem_iov_binding(niov); unsigned long dma_addr = net_devmem_get_dma_addr(niov); if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr, @@ -117,6 +126,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) WARN_ON(rxq->mp_params.mp_priv != binding); rxq->mp_params.mp_priv = NULL; + rxq->mp_params.mp_ops = NULL; rxq_idx = get_netdev_rx_queue_index(rxq); @@ -152,7 +162,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, } rxq = __netif_get_rx_queue(dev, rxq_idx); - if (rxq->mp_params.mp_priv) { + if (rxq->mp_params.mp_ops) { NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); return -EEXIST; } @@ -170,6 +180,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return err; rxq->mp_params.mp_priv = binding; + rxq->mp_params.mp_ops = &dmabuf_devmem_ops; err = netdev_rx_queue_restart(dev, rxq_idx); if (err) @@ -179,6 +190,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, err_xa_erase: rxq->mp_params.mp_priv = NULL; + rxq->mp_params.mp_ops = NULL; xa_erase(&binding->bound_rxqs, xa_idx); return err; @@ -261,9 +273,9 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, goto err_free_chunks; } - owner->base_virtual = virtual; + owner->area.base_virtual = virtual; owner->base_dma_addr = dma_addr; - owner->num_niovs = len / PAGE_SIZE; + owner->area.num_niovs = len / PAGE_SIZE; owner->binding = binding; err = gen_pool_add_owner(binding->chunk_pool, dma_addr, @@ -275,17 +287,17 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, goto err_free_chunks; } - owner->niovs = kvmalloc_array(owner->num_niovs, - sizeof(*owner->niovs), - GFP_KERNEL); - if (!owner->niovs) { + owner->area.niovs = kvmalloc_array(owner->area.num_niovs, + sizeof(*owner->area.niovs), + GFP_KERNEL); + if (!owner->area.niovs) { err = -ENOMEM; goto err_free_chunks; } - for (i = 0; i < owner->num_niovs; i++) { - niov = &owner->niovs[i]; - niov->owner = owner; + for (i = 0; i < owner->area.num_niovs; i++) { + niov = &owner->area.niovs[i]; + niov->owner = &owner->area; page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), net_devmem_get_dma_addr(niov)); } @@ -313,26 +325,6 @@ err_put_dmabuf: return ERR_PTR(err); } -void dev_dmabuf_uninstall(struct net_device *dev) -{ - struct net_devmem_dmabuf_binding *binding; - struct netdev_rx_queue *rxq; - unsigned long xa_idx; - unsigned int i; - - for (i = 0; i < dev->real_num_rx_queues; i++) { - binding = dev->_rx[i].mp_params.mp_priv; - if (!binding) - continue; - - xa_for_each(&binding->bound_rxqs, xa_idx, rxq) - if (rxq == &dev->_rx[i]) { - xa_erase(&binding->bound_rxqs, xa_idx); - break; - } - } -} - /*** "Dmabuf devmem memory provider" ***/ int mp_dmabuf_devmem_init(struct page_pool *pool) @@ -398,3 +390,36 @@ bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem) /* We don't want the page pool put_page()ing our net_iovs. */ return false; } + +static int mp_dmabuf_devmem_nl_fill(void *mp_priv, struct sk_buff *rsp, + struct netdev_rx_queue *rxq) +{ + const struct net_devmem_dmabuf_binding *binding = mp_priv; + int type = rxq ? NETDEV_A_QUEUE_DMABUF : NETDEV_A_PAGE_POOL_DMABUF; + + return nla_put_u32(rsp, type, binding->id); +} + +static void mp_dmabuf_devmem_uninstall(void *mp_priv, + struct netdev_rx_queue *rxq) +{ + struct net_devmem_dmabuf_binding *binding = mp_priv; + struct netdev_rx_queue *bound_rxq; + unsigned long xa_idx; + + xa_for_each(&binding->bound_rxqs, xa_idx, bound_rxq) { + if (bound_rxq == rxq) { + xa_erase(&binding->bound_rxqs, xa_idx); + break; + } + } +} + +static const struct memory_provider_ops dmabuf_devmem_ops = { + .init = mp_dmabuf_devmem_init, + .destroy = mp_dmabuf_devmem_destroy, + .alloc_netmems = mp_dmabuf_devmem_alloc_netmems, + .release_netmem = mp_dmabuf_devmem_release_page, + .nl_fill = mp_dmabuf_devmem_nl_fill, + .uninstall = mp_dmabuf_devmem_uninstall, +}; diff --git a/net/core/devmem.h b/net/core/devmem.h index 76099ef9c482..7fc158d52729 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -10,6 +10,8 @@ #ifndef _NET_DEVMEM_H #define _NET_DEVMEM_H +#include <net/netmem.h> + struct netlink_ext_ack; struct net_devmem_dmabuf_binding { @@ -51,17 +53,11 @@ struct net_devmem_dmabuf_binding { * allocations from this chunk. */ struct dmabuf_genpool_chunk_owner { - /* Offset into the dma-buf where this chunk starts. */ - unsigned long base_virtual; + struct net_iov_area area; + struct net_devmem_dmabuf_binding *binding; /* dma_addr of the start of the chunk. */ dma_addr_t base_dma_addr; - - /* Array of net_iovs for this chunk. */ - struct net_iov *niovs; - size_t num_niovs; - - struct net_devmem_dmabuf_binding *binding; }; void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding); @@ -72,38 +68,34 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding); int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, struct net_devmem_dmabuf_binding *binding, struct netlink_ext_ack *extack); -void dev_dmabuf_uninstall(struct net_device *dev); static inline struct dmabuf_genpool_chunk_owner * -net_iov_owner(const struct net_iov *niov) +net_devmem_iov_to_chunk_owner(const struct net_iov *niov) { - return niov->owner; + struct net_iov_area *owner = net_iov_owner(niov); + + return container_of(owner, struct dmabuf_genpool_chunk_owner, area); } -static inline unsigned int net_iov_idx(const struct net_iov *niov) +static inline struct net_devmem_dmabuf_binding * +net_devmem_iov_binding(const struct net_iov *niov) { - return niov - net_iov_owner(niov)->niovs; + return net_devmem_iov_to_chunk_owner(niov)->binding; } -static inline struct net_devmem_dmabuf_binding * -net_iov_binding(const struct net_iov *niov) +static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov) { - return net_iov_owner(niov)->binding; + return net_devmem_iov_binding(niov)->id; } static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov) { - struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov); + struct net_iov_area *owner = net_iov_owner(niov); return owner->base_virtual + ((unsigned long)net_iov_idx(niov) << PAGE_SHIFT); } -static inline u32 net_iov_binding_id(const struct net_iov *niov) -{ - return net_iov_owner(niov)->binding->id; -} - static inline void net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding) { @@ -123,6 +115,8 @@ struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding); void net_devmem_free_dmabuf(struct net_iov *ppiov); +bool net_is_devmem_iov(struct net_iov *niov); + #else struct net_devmem_dmabuf_binding; @@ -152,10 +146,6 @@ net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return -EOPNOTSUPP; } -static inline void dev_dmabuf_uninstall(struct net_device *dev) -{ -} - static inline struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) { @@ -171,10 +161,15 @@ static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov) return 0; } -static inline u32 net_iov_binding_id(const struct net_iov *niov) +static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov) { return 0; } + +static inline bool net_is_devmem_iov(struct net_iov *niov) +{ + return false; +} #endif #endif /* _NET_DEVMEM_H */ diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 94a7872ab231..424b4cd4e9e5 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -373,7 +373,8 @@ static int call_fib_rule_notifiers(struct net *net, .rule = rule, }; - ASSERT_RTNL(); + ASSERT_RTNL_NET(net); + /* Paired with READ_ONCE() in fib_rules_seq() */ WRITE_ONCE(ops->fib_rules_seq, ops->fib_rules_seq + 1); return call_fib_notifiers(net, event_type, &info.info); @@ -461,9 +462,6 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops, if (rule->tun_id && r->tun_id != rule->tun_id) continue; - if (r->fr_net != rule->fr_net) - continue; - if (rule->l3mdev && r->l3mdev != rule->l3mdev) continue; @@ -517,14 +515,13 @@ static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule, } #endif -static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, +static int fib_nl2rule(struct net *net, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, struct fib_rules_ops *ops, struct nlattr *tb[], struct fib_rule **rule, bool *user_priority) { - struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rule *nlrule = NULL; int err = -EINVAL; @@ -556,30 +553,18 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[FRA_PRIORITY]) { nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]); *user_priority = true; - } else { - nlrule->pref = fib_default_rule_pref(ops); } nlrule->proto = nla_get_u8_default(tb[FRA_PROTOCOL], RTPROT_UNSPEC); if (tb[FRA_IIFNAME]) { - struct net_device *dev; - nlrule->iifindex = -1; nla_strscpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); - dev = __dev_get_by_name(net, nlrule->iifname); - if (dev) - nlrule->iifindex = dev->ifindex; } if (tb[FRA_OIFNAME]) { - struct net_device *dev; - nlrule->oifindex = -1; nla_strscpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); - dev = __dev_get_by_name(net, nlrule->oifname); - if (dev) - nlrule->oifindex = dev->ifindex; } if (tb[FRA_FWMARK]) { @@ -621,11 +606,6 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, } nlrule->target = nla_get_u32(tb[FRA_GOTO]); - /* Backward jumps are prohibited to avoid endless loops */ - if (nlrule->target <= nlrule->pref) { - NL_SET_ERR_MSG(extack, "Backward goto not supported"); - goto errout_free; - } } else if (nlrule->action == FR_ACT_GOTO) { NL_SET_ERR_MSG(extack, "Missing goto target for action goto"); goto errout_free; @@ -685,6 +665,39 @@ errout: return err; } +static int fib_nl2rule_rtnl(struct fib_rule *nlrule, + struct fib_rules_ops *ops, + struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + if (!tb[FRA_PRIORITY]) + nlrule->pref = fib_default_rule_pref(ops); + + /* Backward jumps are prohibited to avoid endless loops */ + if (tb[FRA_GOTO] && nlrule->target <= nlrule->pref) { + NL_SET_ERR_MSG(extack, "Backward goto not supported"); + return -EINVAL; + } + + if (tb[FRA_IIFNAME]) { + struct net_device *dev; + + dev = __dev_get_by_name(nlrule->fr_net, nlrule->iifname); + if (dev) + nlrule->iifindex = dev->ifindex; + } + + if (tb[FRA_OIFNAME]) { + struct net_device *dev; + + dev = __dev_get_by_name(nlrule->fr_net, nlrule->oifname); + if (dev) + nlrule->oifindex = dev->ifindex; + } + + return 0; +} + static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, struct nlattr **tb, struct fib_rule *rule) { @@ -721,9 +734,6 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, if (r->tun_id != rule->tun_id) continue; - if (r->fr_net != rule->fr_net) - continue; - if (r->l3mdev != rule->l3mdev) continue; @@ -776,15 +786,14 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = { [FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 }, }; -int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held) { - struct net *net = sock_net(skb->sk); + struct fib_rule *rule = NULL, *r, *last = NULL; struct fib_rule_hdr *frh = nlmsg_data(nlh); + int err = -EINVAL, unresolved = 0; struct fib_rules_ops *ops = NULL; - struct fib_rule *rule = NULL, *r, *last = NULL; struct nlattr *tb[FRA_MAX + 1]; - int err = -EINVAL, unresolved = 0; bool user_priority = false; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { @@ -806,10 +815,17 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority); + err = fib_nl2rule(net, nlh, extack, ops, tb, &rule, &user_priority); if (err) goto errout; + if (!rtnl_held) + rtnl_net_lock(net); + + err = fib_nl2rule_rtnl(rule, ops, tb, extack); + if (err) + goto errout_free; + if ((nlh->nlmsg_flags & NLM_F_EXCL) && rule_exists(ops, frh, tb, rule)) { err = -EEXIST; @@ -871,29 +887,42 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, if (rule->tun_id) ip_tunnel_need_metadata(); + fib_rule_get(rule); + + if (!rtnl_held) + rtnl_net_unlock(net); + notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); + fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); return 0; errout_free: + if (!rtnl_held) + rtnl_net_unlock(net); kfree(rule); errout: rules_ops_put(ops); return err; } -EXPORT_SYMBOL_GPL(fib_nl_newrule); +EXPORT_SYMBOL_GPL(fib_newrule); -int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); + return fib_newrule(sock_net(skb->sk), skb, nlh, extack, false); +} + +int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held) +{ + struct fib_rule *rule = NULL, *nlrule = NULL; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; - struct fib_rule *rule = NULL, *r, *nlrule = NULL; struct nlattr *tb[FRA_MAX+1]; - int err = -EINVAL; bool user_priority = false; + int err = -EINVAL; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { NL_SET_ERR_MSG(extack, "Invalid msg length"); @@ -914,25 +943,32 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority); + err = fib_nl2rule(net, nlh, extack, ops, tb, &nlrule, &user_priority); if (err) goto errout; + if (!rtnl_held) + rtnl_net_lock(net); + + err = fib_nl2rule_rtnl(nlrule, ops, tb, extack); + if (err) + goto errout_free; + rule = rule_find(ops, frh, tb, nlrule, user_priority); if (!rule) { err = -ENOENT; - goto errout; + goto errout_free; } if (rule->flags & FIB_RULE_PERMANENT) { err = -EPERM; - goto errout; + goto errout_free; } if (ops->delete) { err = ops->delete(rule); if (err) - goto errout; + goto errout_free; } if (rule->tun_id) @@ -954,7 +990,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, * current if it is goto rule, have actually been added. */ if (ops->nr_goto_rules > 0) { - struct fib_rule *n; + struct fib_rule *n, *r; n = list_next_entry(rule, list); if (&n->list == &ops->rules_list || n->pref != rule->pref) @@ -968,22 +1004,33 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, } } - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, - NULL); - notify_rule_change(RTM_DELRULE, rule, ops, nlh, - NETLINK_CB(skb).portid); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, NULL); + + if (!rtnl_held) + rtnl_net_unlock(net); + + notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid); fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); kfree(nlrule); return 0; -errout: +errout_free: + if (!rtnl_held) + rtnl_net_unlock(net); kfree(nlrule); +errout: rules_ops_put(ops); return err; } -EXPORT_SYMBOL_GPL(fib_nl_delrule); +EXPORT_SYMBOL_GPL(fib_delrule); + +static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + return fib_delrule(sock_net(skb->sk), skb, nlh, extack, false); +} static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, struct fib_rule *rule) @@ -1295,8 +1342,10 @@ static struct pernet_operations fib_rules_net_ops = { }; static const struct rtnl_msg_handler fib_rules_rtnl_msg_handlers[] __initconst = { - {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule}, - {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule}, + {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule, + .flags = RTNL_FLAG_DOIT_PERNET}, + {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule, + .flags = RTNL_FLAG_DOIT_PERNET}, {.msgtype = RTM_GETRULE, .dumpit = fib_nl_dumprule, .flags = RTNL_FLAG_DUMP_UNLOCKED}, }; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index bd0251bd74a1..d8dd686b5287 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -832,12 +832,10 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, return -ENOENT; } -static void neigh_parms_destroy(struct neigh_parms *parms); - static inline void neigh_parms_put(struct neigh_parms *parms) { if (refcount_dec_and_test(&parms->refcnt)) - neigh_parms_destroy(parms); + kfree(parms); } /* @@ -1713,11 +1711,6 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) } EXPORT_SYMBOL(neigh_parms_release); -static void neigh_parms_destroy(struct neigh_parms *parms) -{ - kfree(parms); -} - static struct lock_class_key neigh_table_proxy_queue_class; static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 07cb99b114bd..3fe2c521e574 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -42,6 +42,87 @@ static inline int dev_isalive(const struct net_device *dev) return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED; } +/* There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active, + * when unregistering a net device and accessing associated sysfs files. The + * potential deadlock is as follow: + * + * CPU 0 CPU 1 + * + * rtnl_lock vfs_read + * unregister_netdevice_many kernfs_seq_start + * device_del / kobject_put kernfs_get_active (kn->active++) + * kernfs_drain sysfs_kf_seq_show + * wait_event( rtnl_lock + * kn->active == KN_DEACTIVATED_BIAS) -> waits on CPU 0 to release + * -> waits on CPU 1 to decrease kn->active the rtnl lock. + * + * The historical fix was to use rtnl_trylock with restart_syscall to bail out + * of sysfs operations when the lock couldn't be taken. This fixed the above + * issue as it allowed CPU 1 to bail out of the ABBA situation. + * + * But it came with performances issues, as syscalls are being restarted in + * loops when there was contention on the rtnl lock, with huge slow downs in + * specific scenarios (e.g. lots of virtual interfaces created and userspace + * daemons querying their attributes). + * + * The idea below is to bail out of the active kernfs_node protection + * (kn->active) while trying to take the rtnl lock. + * + * This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The + * net device is guaranteed to be alive if this returns successfully. + */ +static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr, + struct net_device *ndev) +{ + struct kernfs_node *kn; + int ret = 0; + + /* First, we hold a reference to the net device as the unregistration + * path might run in parallel. This will ensure the net device and the + * associated sysfs objects won't be freed while we try to take the rtnl + * lock. + */ + dev_hold(ndev); + /* sysfs_break_active_protection was introduced to allow self-removal of + * devices and their associated sysfs files by bailing out of the + * sysfs/kernfs protection. We do this here to allow the unregistration + * path to complete in parallel. The following takes a reference on the + * kobject and the kernfs_node being accessed. + * + * This works because we hold a reference onto the net device and the + * unregistration path will wait for us eventually in netdev_run_todo + * (outside an rtnl lock section). + */ + kn = sysfs_break_active_protection(kobj, attr); + /* We can now try to take the rtnl lock. This can't deadlock us as the + * unregistration path is able to drain sysfs files (kernfs_node) thanks + * to the above dance. + */ + if (rtnl_lock_interruptible()) { + ret = -ERESTARTSYS; + goto unbreak; + } + /* Check dismantle on the device hasn't started, otherwise deny the + * operation. + */ + if (!dev_isalive(ndev)) { + rtnl_unlock(); + ret = -ENODEV; + goto unbreak; + } + /* We are now sure the device dismantle hasn't started nor that it can + * start before we exit the locking section as we hold the rtnl lock. + * There's no need to keep unbreaking the sysfs protection nor to hold + * a net device reference from that point; that was only needed to take + * the rtnl lock. + */ +unbreak: + sysfs_unbreak_active_protection(kn); + dev_put(ndev); + + return ret; +} + /* use same locking rules as GIF* ioctl's */ static ssize_t netdev_show(const struct device *dev, struct device_attribute *attr, char *buf, @@ -95,14 +176,14 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, if (ret) goto err; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + goto err; + + ret = (*set)(netdev, new); + if (ret == 0) + ret = len; - if (dev_isalive(netdev)) { - ret = (*set)(netdev, new); - if (ret == 0) - ret = len; - } rtnl_unlock(); err: return ret; @@ -220,7 +301,7 @@ static ssize_t carrier_store(struct device *dev, struct device_attribute *attr, struct net_device *netdev = to_net_dev(dev); /* The check is also done in change_carrier; this helps returning early - * without hitting the trylock/restart in netdev_store. + * without hitting the locking section in netdev_store. */ if (!netdev->netdev_ops->ndo_change_carrier) return -EOPNOTSUPP; @@ -234,8 +315,9 @@ static ssize_t carrier_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); int ret = -EINVAL; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; if (netif_running(netdev)) { /* Synchronize carrier state with link watch, @@ -245,8 +327,8 @@ static ssize_t carrier_show(struct device *dev, ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev)); } - rtnl_unlock(); + rtnl_unlock(); return ret; } static DEVICE_ATTR_RW(carrier); @@ -258,13 +340,14 @@ static ssize_t speed_show(struct device *dev, int ret = -EINVAL; /* The check is also done in __ethtool_get_link_ksettings; this helps - * returning early without hitting the trylock/restart below. + * returning early without hitting the locking section below. */ if (!netdev->ethtool_ops->get_link_ksettings) return ret; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; if (netif_running(netdev)) { struct ethtool_link_ksettings cmd; @@ -284,13 +367,14 @@ static ssize_t duplex_show(struct device *dev, int ret = -EINVAL; /* The check is also done in __ethtool_get_link_ksettings; this helps - * returning early without hitting the trylock/restart below. + * returning early without hitting the locking section below. */ if (!netdev->ethtool_ops->get_link_ksettings) return ret; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; if (netif_running(netdev)) { struct ethtool_link_ksettings cmd; @@ -490,16 +574,15 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, if (len > 0 && buf[len - 1] == '\n') --count; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - ret = dev_set_alias(netdev, buf, count); - if (ret < 0) - goto err; - ret = len; - netdev_state_change(netdev); - } + ret = dev_set_alias(netdev, buf, count); + if (ret < 0) + goto err; + ret = len; + netdev_state_change(netdev); err: rtnl_unlock(); @@ -551,24 +634,23 @@ static ssize_t phys_port_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); + struct netdev_phys_item_id ppid; ssize_t ret = -EINVAL; /* The check is also done in dev_get_phys_port_id; this helps returning - * early without hitting the trylock/restart below. + * early without hitting the locking section below. */ if (!netdev->netdev_ops->ndo_get_phys_port_id) return -EOPNOTSUPP; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - struct netdev_phys_item_id ppid; + ret = dev_get_phys_port_id(netdev, &ppid); + if (!ret) + ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - ret = dev_get_phys_port_id(netdev, &ppid); - if (!ret) - ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - } rtnl_unlock(); return ret; @@ -580,24 +662,23 @@ static ssize_t phys_port_name_show(struct device *dev, { struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + char name[IFNAMSIZ]; /* The checks are also done in dev_get_phys_port_name; this helps - * returning early without hitting the trylock/restart below. + * returning early without hitting the locking section below. */ if (!netdev->netdev_ops->ndo_get_phys_port_name && !netdev->devlink_port) return -EOPNOTSUPP; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - char name[IFNAMSIZ]; + ret = dev_get_phys_port_name(netdev, name, sizeof(name)); + if (!ret) + ret = sysfs_emit(buf, "%s\n", name); - ret = dev_get_phys_port_name(netdev, name, sizeof(name)); - if (!ret) - ret = sysfs_emit(buf, "%s\n", name); - } rtnl_unlock(); return ret; @@ -608,26 +689,25 @@ static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); + struct netdev_phys_item_id ppid = { }; ssize_t ret = -EINVAL; /* The checks are also done in dev_get_phys_port_name; this helps - * returning early without hitting the trylock/restart below. This works + * returning early without hitting the locking section below. This works * because recurse is false when calling dev_get_port_parent_id. */ if (!netdev->netdev_ops->ndo_get_port_parent_id && !netdev->devlink_port) return -EOPNOTSUPP; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - struct netdev_phys_item_id ppid = { }; + ret = dev_get_port_parent_id(netdev, &ppid, false); + if (!ret) + ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - ret = dev_get_port_parent_id(netdev, &ppid, false); - if (!ret) - ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - } rtnl_unlock(); return ret; @@ -1108,7 +1188,6 @@ static void rx_queue_get_ownership(const struct kobject *kobj, static const struct kobj_type rx_queue_ktype = { .sysfs_ops = &rx_queue_sysfs_ops, .release = rx_queue_release, - .default_groups = rx_queue_default_groups, .namespace = rx_queue_namespace, .get_ownership = rx_queue_get_ownership, }; @@ -1131,6 +1210,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Rx queues are cleared in rx_queue_release to allow later + * re-registration. This is triggered when their kobj refcount is + * dropped. + * + * If a queue is removed while both a read (or write) operation and a + * the re-addition of the same queue are pending (waiting on rntl_lock) + * it might happen that the re-addition will execute before the read, + * making the initial removal to never happen (queue's kobj refcount + * won't drop enough because of the pending read). In such rare case, + * return to allow the removal operation to complete. + */ + if (unlikely(kobj->state_initialized)) { + netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed"); + return -EAGAIN; + } + /* Kobject_put later will trigger rx_queue_release call which * decreases dev refcount: Take that reference here */ @@ -1142,20 +1237,27 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) if (error) goto err; + queue->groups = rx_queue_default_groups; + error = sysfs_create_groups(kobj, queue->groups); + if (error) + goto err; + if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error) - goto err; + goto err_default_groups; } error = rx_queue_default_mask(dev, queue); if (error) - goto err; + goto err_default_groups; kobject_uevent(kobj, KOBJ_ADD); return error; +err_default_groups: + sysfs_remove_groups(kobj, queue->groups); err: kobject_put(kobj); return error; @@ -1200,12 +1302,14 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) } while (--i >= new_num) { - struct kobject *kobj = &dev->_rx[i].kobj; + struct netdev_rx_queue *queue = &dev->_rx[i]; + struct kobject *kobj = &queue->kobj; if (!refcount_read(&dev_net(dev)->ns.count)) kobj->uevent_suppress = 1; if (dev->sysfs_rx_queue_group) sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); + sysfs_remove_groups(kobj, queue->groups); kobject_put(kobj); } @@ -1244,9 +1348,11 @@ static int net_rx_queue_change_owner(struct net_device *dev, int num, */ struct netdev_queue_attribute { struct attribute attr; - ssize_t (*show)(struct netdev_queue *queue, char *buf); - ssize_t (*store)(struct netdev_queue *queue, - const char *buf, size_t len); + ssize_t (*show)(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf); + ssize_t (*store)(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len); }; #define to_netdev_queue_attr(_attr) \ container_of(_attr, struct netdev_queue_attribute, attr) @@ -1263,7 +1369,7 @@ static ssize_t netdev_queue_attr_show(struct kobject *kobj, if (!attribute->show) return -EIO; - return attribute->show(queue, buf); + return attribute->show(kobj, attr, queue, buf); } static ssize_t netdev_queue_attr_store(struct kobject *kobj, @@ -1277,7 +1383,7 @@ static ssize_t netdev_queue_attr_store(struct kobject *kobj, if (!attribute->store) return -EIO; - return attribute->store(queue, buf, count); + return attribute->store(kobj, attr, queue, buf, count); } static const struct sysfs_ops netdev_queue_sysfs_ops = { @@ -1285,7 +1391,8 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = { .store = netdev_queue_attr_store, }; -static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf) +static ssize_t tx_timeout_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout); @@ -1303,18 +1410,18 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue) return i; } -static ssize_t traffic_class_show(struct netdev_queue *queue, - char *buf) +static ssize_t traffic_class_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; - int num_tc, tc; - int index; + int num_tc, tc, index, ret; if (!netif_is_multiqueue(dev)) return -ENOENT; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(kobj, attr, queue->dev); + if (ret) + return ret; index = get_netdev_queue_index(queue); @@ -1341,24 +1448,25 @@ static ssize_t traffic_class_show(struct netdev_queue *queue, } #ifdef CONFIG_XPS -static ssize_t tx_maxrate_show(struct netdev_queue *queue, - char *buf) +static ssize_t tx_maxrate_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { return sysfs_emit(buf, "%lu\n", queue->tx_maxrate); } -static ssize_t tx_maxrate_store(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { - struct net_device *dev = queue->dev; int err, index = get_netdev_queue_index(queue); + struct net_device *dev = queue->dev; u32 rate = 0; if (!capable(CAP_NET_ADMIN)) return -EPERM; /* The check is also done later; this helps returning early without - * hitting the trylock/restart below. + * hitting the locking section below. */ if (!dev->netdev_ops->ndo_set_tx_maxrate) return -EOPNOTSUPP; @@ -1367,18 +1475,21 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue, if (err < 0) return err; - if (!rtnl_trylock()) - return restart_syscall(); + err = sysfs_rtnl_lock(kobj, attr, dev); + if (err) + return err; err = -EOPNOTSUPP; if (dev->netdev_ops->ndo_set_tx_maxrate) err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate); - rtnl_unlock(); if (!err) { queue->tx_maxrate = rate; + rtnl_unlock(); return len; } + + rtnl_unlock(); return err; } @@ -1422,16 +1533,17 @@ static ssize_t bql_set(const char *buf, const size_t count, return count; } -static ssize_t bql_show_hold_time(struct netdev_queue *queue, - char *buf) +static ssize_t bql_show_hold_time(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time)); } -static ssize_t bql_set_hold_time(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t bql_set_hold_time(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { struct dql *dql = &queue->dql; unsigned int value; @@ -1450,15 +1562,17 @@ static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init = __ATTR(hold_time, 0644, bql_show_hold_time, bql_set_hold_time); -static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf) +static ssize_t bql_show_stall_thrs(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs)); } -static ssize_t bql_set_stall_thrs(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t bql_set_stall_thrs(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { struct dql *dql = &queue->dql; unsigned int value; @@ -1484,13 +1598,15 @@ static ssize_t bql_set_stall_thrs(struct netdev_queue *queue, static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init = __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs); -static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf) +static ssize_t bql_show_stall_max(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max)); } -static ssize_t bql_set_stall_max(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t bql_set_stall_max(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { WRITE_ONCE(queue->dql.stall_max, 0); return len; @@ -1499,7 +1615,8 @@ static ssize_t bql_set_stall_max(struct netdev_queue *queue, static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init = __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max); -static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf) +static ssize_t bql_show_stall_cnt(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; @@ -1509,8 +1626,8 @@ static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf) static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init = __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL); -static ssize_t bql_show_inflight(struct netdev_queue *queue, - char *buf) +static ssize_t bql_show_inflight(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; @@ -1521,13 +1638,16 @@ static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init = __ATTR(inflight, 0444, bql_show_inflight, NULL); #define BQL_ATTR(NAME, FIELD) \ -static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \ - char *buf) \ +static ssize_t bql_show_ ## NAME(struct kobject *kobj, \ + struct attribute *attr, \ + struct netdev_queue *queue, char *buf) \ { \ return bql_show(buf, queue->dql.FIELD); \ } \ \ -static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \ +static ssize_t bql_set_ ## NAME(struct kobject *kobj, \ + struct attribute *attr, \ + struct netdev_queue *queue, \ const char *buf, size_t len) \ { \ return bql_set(buf, len, &queue->dql.FIELD); \ @@ -1613,19 +1733,21 @@ out_no_maps: return len < PAGE_SIZE ? len : -EINVAL; } -static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf) +static ssize_t xps_cpus_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; unsigned int index; - int len, tc; + int len, tc, ret; if (!netif_is_multiqueue(dev)) return -ENOENT; index = get_netdev_queue_index(queue); - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(kobj, attr, queue->dev); + if (ret) + return ret; /* If queue belongs to subordinate dev use its map */ dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; @@ -1636,18 +1758,21 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf) return -EINVAL; } - /* Make sure the subordinate device can't be freed */ - get_device(&dev->dev); + /* Increase the net device refcnt to make sure it won't be freed while + * xps_queue_show is running. + */ + dev_hold(dev); rtnl_unlock(); len = xps_queue_show(dev, index, tc, buf, XPS_CPUS); - put_device(&dev->dev); + dev_put(dev); return len; } -static ssize_t xps_cpus_store(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t xps_cpus_store(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { struct net_device *dev = queue->dev; unsigned int index; @@ -1671,9 +1796,10 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue, return err; } - if (!rtnl_trylock()) { + err = sysfs_rtnl_lock(kobj, attr, dev); + if (err) { free_cpumask_var(mask); - return restart_syscall(); + return err; } err = netif_set_xps_queue(dev, mask, index); @@ -1687,26 +1813,34 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue, static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init = __ATTR_RW(xps_cpus); -static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf) +static ssize_t xps_rxqs_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; unsigned int index; - int tc; + int tc, ret; index = get_netdev_queue_index(queue); - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(kobj, attr, dev); + if (ret) + return ret; tc = netdev_txq_to_tc(dev, index); + + /* Increase the net device refcnt to make sure it won't be freed while + * xps_queue_show is running. + */ + dev_hold(dev); rtnl_unlock(); - if (tc < 0) - return -EINVAL; - return xps_queue_show(dev, index, tc, buf, XPS_RXQS); + ret = tc >= 0 ? xps_queue_show(dev, index, tc, buf, XPS_RXQS) : -EINVAL; + dev_put(dev); + return ret; } -static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, +static ssize_t xps_rxqs_store(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, size_t len) { struct net_device *dev = queue->dev; @@ -1730,9 +1864,10 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, return err; } - if (!rtnl_trylock()) { + err = sysfs_rtnl_lock(kobj, attr, dev); + if (err) { bitmap_free(mask); - return restart_syscall(); + return err; } cpus_read_lock(); @@ -1792,7 +1927,6 @@ static void netdev_queue_get_ownership(const struct kobject *kobj, static const struct kobj_type netdev_queue_ktype = { .sysfs_ops = &netdev_queue_sysfs_ops, .release = netdev_queue_release, - .default_groups = netdev_queue_default_groups, .namespace = netdev_queue_namespace, .get_ownership = netdev_queue_get_ownership, }; @@ -1811,6 +1945,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Tx queues are cleared in netdev_queue_release to allow later + * re-registration. This is triggered when their kobj refcount is + * dropped. + * + * If a queue is removed while both a read (or write) operation and a + * the re-addition of the same queue are pending (waiting on rntl_lock) + * it might happen that the re-addition will execute before the read, + * making the initial removal to never happen (queue's kobj refcount + * won't drop enough because of the pending read). In such rare case, + * return to allow the removal operation to complete. + */ + if (unlikely(kobj->state_initialized)) { + netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed"); + return -EAGAIN; + } + /* Kobject_put later will trigger netdev_queue_release call * which decreases dev refcount: Take that reference here */ @@ -1822,15 +1972,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) if (error) goto err; + queue->groups = netdev_queue_default_groups; + error = sysfs_create_groups(kobj, queue->groups); + if (error) + goto err; + if (netdev_uses_bql(dev)) { error = sysfs_create_group(kobj, &dql_group); if (error) - goto err; + goto err_default_groups; } kobject_uevent(kobj, KOBJ_ADD); return 0; +err_default_groups: + sysfs_remove_groups(kobj, queue->groups); err: kobject_put(kobj); return error; @@ -1885,6 +2042,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) if (netdev_uses_bql(dev)) sysfs_remove_group(&queue->kobj, &dql_group); + sysfs_remove_groups(&queue->kobj, queue->groups); kobject_put(&queue->kobj); } diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 715f85c6b62e..c18bb53d13fd 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -10,6 +10,7 @@ #include <net/sock.h> #include <net/xdp.h> #include <net/xdp_sock.h> +#include <net/page_pool/memory_provider.h> #include "dev.h" #include "devmem.h" @@ -364,11 +365,18 @@ int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info) return err; } +static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi) +{ + if (napi && napi_id_valid(napi->napi_id)) + return nla_put_u32(skb, NETDEV_A_QUEUE_NAPI_ID, napi->napi_id); + return 0; +} + static int netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { - struct net_devmem_dmabuf_binding *binding; + struct pp_memory_provider_params *params; struct netdev_rx_queue *rxq; struct netdev_queue *txq; void *hdr; @@ -385,20 +393,17 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, switch (q_type) { case NETDEV_QUEUE_TYPE_RX: rxq = __netif_get_rx_queue(netdev, q_idx); - if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID, - rxq->napi->napi_id)) + if (nla_put_napi_id(rsp, rxq->napi)) goto nla_put_failure; - binding = rxq->mp_params.mp_priv; - if (binding && - nla_put_u32(rsp, NETDEV_A_QUEUE_DMABUF, binding->id)) + params = &rxq->mp_params; + if (params->mp_ops && + params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) goto nla_put_failure; - break; case NETDEV_QUEUE_TYPE_TX: txq = netdev_get_tx_queue(netdev, q_idx); - if (txq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID, - txq->napi->napi_id)) + if (nla_put_napi_id(rsp, txq->napi)) goto nla_put_failure; } @@ -576,6 +581,7 @@ netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx) netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_COMPLETE, rx->csum_complete) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) || diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index db82786fa0c4..ddd54e1e7289 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -3,34 +3,34 @@ #include <linux/netdevice.h> #include <net/netdev_queues.h> #include <net/netdev_rx_queue.h> +#include <net/page_pool/memory_provider.h> #include "page_pool_priv.h" int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) { struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, rxq_idx); + const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops; void *new_mem, *old_mem; int err; - if (!dev->queue_mgmt_ops || !dev->queue_mgmt_ops->ndo_queue_stop || - !dev->queue_mgmt_ops->ndo_queue_mem_free || - !dev->queue_mgmt_ops->ndo_queue_mem_alloc || - !dev->queue_mgmt_ops->ndo_queue_start) + if (!qops || !qops->ndo_queue_stop || !qops->ndo_queue_mem_free || + !qops->ndo_queue_mem_alloc || !qops->ndo_queue_start) return -EOPNOTSUPP; ASSERT_RTNL(); - new_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL); + new_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL); if (!new_mem) return -ENOMEM; - old_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL); + old_mem = kvzalloc(qops->ndo_queue_mem_size, GFP_KERNEL); if (!old_mem) { err = -ENOMEM; goto err_free_new_mem; } - err = dev->queue_mgmt_ops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx); + err = qops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx); if (err) goto err_free_old_mem; @@ -38,15 +38,19 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) if (err) goto err_free_new_queue_mem; - err = dev->queue_mgmt_ops->ndo_queue_stop(dev, old_mem, rxq_idx); - if (err) - goto err_free_new_queue_mem; + if (netif_running(dev)) { + err = qops->ndo_queue_stop(dev, old_mem, rxq_idx); + if (err) + goto err_free_new_queue_mem; - err = dev->queue_mgmt_ops->ndo_queue_start(dev, new_mem, rxq_idx); - if (err) - goto err_start_queue; + err = qops->ndo_queue_start(dev, new_mem, rxq_idx); + if (err) + goto err_start_queue; + } else { + swap(new_mem, old_mem); + } - dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem); + qops->ndo_queue_mem_free(dev, old_mem); kvfree(old_mem); kvfree(new_mem); @@ -61,15 +65,15 @@ err_start_queue: * WARN if we fail to recover the old rx queue, and at least free * old_mem so we don't also leak that. */ - if (dev->queue_mgmt_ops->ndo_queue_start(dev, old_mem, rxq_idx)) { + if (qops->ndo_queue_start(dev, old_mem, rxq_idx)) { WARN(1, "Failed to restart old queue in error path. RX queue %d may be unhealthy.", rxq_idx); - dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem); + qops->ndo_queue_mem_free(dev, old_mem); } err_free_new_queue_mem: - dev->queue_mgmt_ops->ndo_queue_mem_free(dev, new_mem); + qops->ndo_queue_mem_free(dev, new_mem); err_free_old_mem: kvfree(old_mem); @@ -80,3 +84,71 @@ err_free_new_mem: return err; } EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL"); + +static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *p) +{ + struct netdev_rx_queue *rxq; + int ret; + + if (ifq_idx >= dev->real_num_rx_queues) + return -EINVAL; + ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues); + + rxq = __netif_get_rx_queue(dev, ifq_idx); + if (rxq->mp_params.mp_ops) + return -EEXIST; + + rxq->mp_params = *p; + ret = netdev_rx_queue_restart(dev, ifq_idx); + if (ret) { + rxq->mp_params.mp_ops = NULL; + rxq->mp_params.mp_priv = NULL; + } + return ret; +} + +int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *p) +{ + int ret; + + rtnl_lock(); + ret = __net_mp_open_rxq(dev, ifq_idx, p); + rtnl_unlock(); + return ret; +} + +static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *old_p) +{ + struct netdev_rx_queue *rxq; + + if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) + return; + + rxq = __netif_get_rx_queue(dev, ifq_idx); + + /* Callers holding a netdev ref may get here after we already + * went thru shutdown via dev_memory_provider_uninstall(). + */ + if (dev->reg_state > NETREG_REGISTERED && + !rxq->mp_params.mp_ops) + return; + + if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops || + rxq->mp_params.mp_priv != old_p->mp_priv)) + return; + + rxq->mp_params.mp_ops = NULL; + rxq->mp_params.mp_priv = NULL; + WARN_ON(netdev_rx_queue_restart(dev, ifq_idx)); +} + +void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, + struct pp_memory_provider_params *old_p) +{ + rtnl_lock(); + __net_mp_close_rxq(dev, ifq_idx, old_p); + rtnl_unlock(); +} diff --git a/net/core/page_pool.c b/net/core/page_pool.c index f5e908c9e7ad..1c6fec08bc43 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -13,6 +13,7 @@ #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> +#include <net/page_pool/memory_provider.h> #include <net/xdp.h> #include <linux/dma-direction.h> @@ -25,6 +26,7 @@ #include <trace/events/page_pool.h> +#include "dev.h" #include "mp_dmabuf_devmem.h" #include "netmem_priv.h" #include "page_pool_priv.h" @@ -285,13 +287,19 @@ static int page_pool_init(struct page_pool *pool, rxq = __netif_get_rx_queue(pool->slow.netdev, pool->slow.queue_idx); pool->mp_priv = rxq->mp_params.mp_priv; + pool->mp_ops = rxq->mp_params.mp_ops; } - if (pool->mp_priv) { + if (pool->mp_ops) { if (!pool->dma_map || !pool->dma_sync) return -EOPNOTSUPP; - err = mp_dmabuf_devmem_init(pool); + if (WARN_ON(!is_kernel_rodata((unsigned long)pool->mp_ops))) { + err = -EFAULT; + goto free_ptr_ring; + } + + err = pool->mp_ops->init(pool); if (err) { pr_warn("%s() mem-provider init failed %d\n", __func__, err); @@ -587,8 +595,8 @@ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) return netmem; /* Slow-path: cache empty, do real allocation */ - if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv) - netmem = mp_dmabuf_devmem_alloc_netmems(pool, gfp); + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + netmem = pool->mp_ops->alloc_netmems(pool, gfp); else netmem = __page_pool_alloc_pages_slow(pool, gfp); return netmem; @@ -679,8 +687,8 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) bool put; put = true; - if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv) - put = mp_dmabuf_devmem_release_page(pool, netmem); + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + put = pool->mp_ops->release_netmem(pool, netmem); else __page_pool_release_page_dma(pool, netmem); @@ -1048,8 +1056,8 @@ static void __page_pool_destroy(struct page_pool *pool) page_pool_unlist(pool); page_pool_uninit(pool); - if (pool->mp_priv) { - mp_dmabuf_devmem_destroy(pool); + if (pool->mp_ops) { + pool->mp_ops->destroy(pool); static_branch_dec(&page_pool_mem_providers); } @@ -1140,11 +1148,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool) if (!pool->p.napi) return; - /* To avoid races with recycling and additional barriers make sure - * pool and NAPI are unlinked when NAPI is disabled. - */ - WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state)); - WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1); + napi_assert_will_not_race(pool->p.napi); mutex_lock(&page_pools_lock); WRITE_ONCE(pool->p.napi, NULL); @@ -1190,3 +1194,31 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid) } } EXPORT_SYMBOL(page_pool_update_nid); + +bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr) +{ + return page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), addr); +} + +/* Associate a niov with a page pool. Should follow with a matching + * net_mp_niov_clear_page_pool() + */ +void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov) +{ + netmem_ref netmem = net_iov_to_netmem(niov); + + page_pool_set_pp_info(pool, netmem); + + pool->pages_state_hold_cnt++; + trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); +} + +/* Disassociate a niov from a page pool. Should only be used in the + * ->release_netmem() path. + */ +void net_mp_niov_clear_page_pool(struct net_iov *niov) +{ + netmem_ref netmem = net_iov_to_netmem(niov); + + page_pool_clear_pp_info(netmem); +} diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index 6677e0c2e256..9d8a3d8597fa 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -8,9 +8,9 @@ #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> #include <net/page_pool/types.h> +#include <net/page_pool/memory_provider.h> #include <net/sock.h> -#include "devmem.h" #include "page_pool_priv.h" #include "netdev-genl-gen.h" @@ -216,7 +216,6 @@ static int page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, const struct genl_info *info) { - struct net_devmem_dmabuf_binding *binding = pool->mp_priv; size_t inflight, refsz; unsigned int napi_id; void *hdr; @@ -249,7 +248,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, pool->user.detach_time)) goto err_cancel; - if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id)) + if (pool->mp_ops && pool->mp_ops->nl_fill(pool->mp_priv, rsp, NULL)) goto err_cancel; genlmsg_end(rsp, hdr); @@ -356,7 +355,7 @@ void page_pool_unlist(struct page_pool *pool) int page_pool_check_memory_provider(struct net_device *dev, struct netdev_rx_queue *rxq) { - struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv; + void *binding = rxq->mp_params.mp_priv; struct page_pool *pool; struct hlist_node *n; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d1e559fce918..abe1a461ea67 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -80,6 +80,11 @@ void rtnl_lock(void) } EXPORT_SYMBOL(rtnl_lock); +int rtnl_lock_interruptible(void) +{ + return mutex_lock_interruptible(&rtnl_mutex); +} + int rtnl_lock_killable(void) { return mutex_lock_killable(&rtnl_mutex); diff --git a/net/core/sock.c b/net/core/sock.c index eae2ae70a2e0..a197f0a0b878 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -938,6 +938,7 @@ int sock_set_timestamping(struct sock *sk, int optname, WRITE_ONCE(sk->sk_tsflags, val); sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); + sock_valbool_flag(sk, SOCK_TIMESTAMPING_ANY, !!(val & TSFLAGS_ANY)); if (val & SOF_TIMESTAMPING_RX_SOFTWARE) sock_enable_timestamp(sk, diff --git a/net/dsa/user.c b/net/dsa/user.c index 291ab1b4acc4..2296a4ead020 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -1243,16 +1243,25 @@ static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e) if (!ds->ops->support_eee || !ds->ops->support_eee(ds, dp->index)) return -EOPNOTSUPP; - /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev) - return -ENODEV; + /* If the port is using phylink managed EEE, then an unimplemented + * set_mac_eee() is permissible. + */ + if (!phylink_mac_implements_lpi(ds->phylink_mac_ops)) { + /* Port's PHY and MAC both need to be EEE capable */ + if (!dev->phydev) + return -ENODEV; - if (!ds->ops->set_mac_eee) - return -EOPNOTSUPP; + if (!ds->ops->set_mac_eee) + return -EOPNOTSUPP; - ret = ds->ops->set_mac_eee(ds, dp->index, e); - if (ret) - return ret; + ret = ds->ops->set_mac_eee(ds, dp->index, e); + if (ret) + return ret; + } else if (ds->ops->set_mac_eee) { + ret = ds->ops->set_mac_eee(ds, dp->index, e); + if (ret) + return ret; + } return phylink_ethtool_set_eee(dp->pl, e); } diff --git a/net/ethtool/common.c b/net/ethtool/common.c index d88e9080643b..7149d07e90c6 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -213,6 +213,24 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(10, T1S, Half), __DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half), __DEFINE_LINK_MODE_NAME(10, T1BRR, Full), + __DEFINE_LINK_MODE_NAME(200000, CR, Full), + __DEFINE_LINK_MODE_NAME(200000, KR, Full), + __DEFINE_LINK_MODE_NAME(200000, DR, Full), + __DEFINE_LINK_MODE_NAME(200000, DR_2, Full), + __DEFINE_LINK_MODE_NAME(200000, SR, Full), + __DEFINE_LINK_MODE_NAME(200000, VR, Full), + __DEFINE_LINK_MODE_NAME(400000, CR2, Full), + __DEFINE_LINK_MODE_NAME(400000, KR2, Full), + __DEFINE_LINK_MODE_NAME(400000, DR2, Full), + __DEFINE_LINK_MODE_NAME(400000, DR2_2, Full), + __DEFINE_LINK_MODE_NAME(400000, SR2, Full), + __DEFINE_LINK_MODE_NAME(400000, VR2, Full), + __DEFINE_LINK_MODE_NAME(800000, CR4, Full), + __DEFINE_LINK_MODE_NAME(800000, KR4, Full), + __DEFINE_LINK_MODE_NAME(800000, DR4, Full), + __DEFINE_LINK_MODE_NAME(800000, DR4_2, Full), + __DEFINE_LINK_MODE_NAME(800000, SR4, Full), + __DEFINE_LINK_MODE_NAME(800000, VR4, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -221,8 +239,11 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_CR4 4 #define __LINK_MODE_LANES_CR8 8 #define __LINK_MODE_LANES_DR 1 +#define __LINK_MODE_LANES_DR_2 1 #define __LINK_MODE_LANES_DR2 2 +#define __LINK_MODE_LANES_DR2_2 2 #define __LINK_MODE_LANES_DR4 4 +#define __LINK_MODE_LANES_DR4_2 4 #define __LINK_MODE_LANES_DR8 8 #define __LINK_MODE_LANES_KR 1 #define __LINK_MODE_LANES_KR2 2 @@ -251,6 +272,9 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_T1L 1 #define __LINK_MODE_LANES_T1S 1 #define __LINK_MODE_LANES_T1S_P2MP 1 +#define __LINK_MODE_LANES_VR 1 +#define __LINK_MODE_LANES_VR2 2 +#define __LINK_MODE_LANES_VR4 4 #define __LINK_MODE_LANES_VR8 8 #define __LINK_MODE_LANES_DR8_2 8 #define __LINK_MODE_LANES_T1BRR 1 @@ -378,6 +402,24 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(10, T1S, Half), __DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half), __DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, CR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, KR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, DR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, DR_2, Full), + __DEFINE_LINK_MODE_PARAMS(200000, SR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, VR, Full), + __DEFINE_LINK_MODE_PARAMS(400000, CR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, KR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, DR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, DR2_2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, SR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, VR2, Full), + __DEFINE_LINK_MODE_PARAMS(800000, CR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, KR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, DR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, DR4_2, Full), + __DEFINE_LINK_MODE_PARAMS(800000, SR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, VR4, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 7609ce2b2c5e..98b7dcea207a 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -993,10 +993,14 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, return rc; /* Nonzero ring with RSS only makes sense if NIC adds them together */ - if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS && - !ops->cap_rss_rxnfc_adds && - ethtool_get_flow_spec_ring(info.fs.ring_cookie)) - return -EINVAL; + if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS) { + if (!ops->cap_rss_rxnfc_adds && + ethtool_get_flow_spec_ring(info.fs.ring_cookie)) + return -EINVAL; + + if (!xa_load(&dev->ethtool->rss_ctx, info.rss_context)) + return -EINVAL; + } if (cmd == ETHTOOL_SRXFH && ops->get_rxfh) { struct ethtool_rxfh_param rxfh = {}; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f23a1ec6694c..2c8f98cfe26c 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1064,8 +1064,8 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; return 0; } - if (__in_dev_get_rtnl(dev)) { - IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on); + if (__in_dev_get_rtnl_net(dev)) { + IN_DEV_CONF_SET(__in_dev_get_rtnl_net(dev), PROXY_ARP, on); return 0; } return -ENXIO; @@ -1295,14 +1295,14 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) switch (cmd) { case SIOCDARP: - rtnl_lock(); + rtnl_net_lock(net); err = arp_req_delete(net, &r); - rtnl_unlock(); + rtnl_net_unlock(net); break; case SIOCSARP: - rtnl_lock(); + rtnl_net_lock(net); err = arp_req_set(net, &r); - rtnl_unlock(); + rtnl_net_unlock(net); break; case SIOCGARP: rcu_read_lock(); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 55b8151759bc..754f60fb6e25 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -46,6 +46,7 @@ #include <linux/notifier.h> #include <linux/inetdevice.h> #include <linux/igmp.h> +#include "igmp_internal.h" #include <linux/slab.h> #include <linux/hash.h> #ifdef CONFIG_SYSCTL @@ -107,15 +108,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_PROTO] = { .type = NLA_U8 }, }; -struct inet_fill_args { - u32 portid; - u32 seq; - int event; - unsigned int flags; - int netnsid; - int ifindex; -}; - #define IN4_ADDR_HSIZE_SHIFT 8 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) @@ -1847,9 +1839,38 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, return 0; } -static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, - struct netlink_callback *cb, int *s_ip_idx, - struct inet_fill_args *fillargs) +static int in_dev_dump_ifmcaddr(struct in_device *in_dev, struct sk_buff *skb, + struct netlink_callback *cb, int *s_ip_idx, + struct inet_fill_args *fillargs) +{ + struct ip_mc_list *im; + int ip_idx = 0; + int err; + + for (im = rcu_dereference(in_dev->mc_list); + im; + im = rcu_dereference(im->next_rcu)) { + if (ip_idx < *s_ip_idx) { + ip_idx++; + continue; + } + err = inet_fill_ifmcaddr(skb, in_dev->dev, im, fillargs); + if (err < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + ip_idx++; + } + err = 0; + ip_idx = 0; +done: + *s_ip_idx = ip_idx; + return err; +} + +static int in_dev_dump_ifaddr(struct in_device *in_dev, struct sk_buff *skb, + struct netlink_callback *cb, int *s_ip_idx, + struct inet_fill_args *fillargs) { struct in_ifaddr *ifa; int ip_idx = 0; @@ -1875,6 +1896,21 @@ done: return err; } +static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, + struct netlink_callback *cb, int *s_ip_idx, + struct inet_fill_args *fillargs) +{ + switch (fillargs->event) { + case RTM_NEWADDR: + return in_dev_dump_ifaddr(in_dev, skb, cb, s_ip_idx, fillargs); + case RTM_GETMULTICAST: + return in_dev_dump_ifmcaddr(in_dev, skb, cb, s_ip_idx, + fillargs); + default: + return -EINVAL; + } +} + /* Combine dev_addr_genid and dev_base_seq to detect changes. */ static u32 inet_base_seq(const struct net *net) @@ -1890,13 +1926,14 @@ static u32 inet_base_seq(const struct net *net) return res; } -static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +static int inet_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, + int event) { const struct nlmsghdr *nlh = cb->nlh; struct inet_fill_args fillargs = { .portid = NETLINK_CB(cb->skb).portid, .seq = nlh->nlmsg_seq, - .event = RTM_NEWADDR, + .event = event, .flags = NLM_F_MULTI, .netnsid = -1, }; @@ -1950,6 +1987,16 @@ done: return err; } +static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +{ + return inet_dump_addr(skb, cb, RTM_NEWADDR); +} + +static int inet_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) +{ + return inet_dump_addr(skb, cb, RTM_GETMULTICAST); +} + static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid) { @@ -2846,6 +2893,8 @@ static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = { {.protocol = PF_INET, .msgtype = RTM_GETNETCONF, .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf, .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET, .msgtype = RTM_GETMULTICAST, + .dumpit = inet_dump_ifmcaddr, .flags = RTNL_FLAG_DUMP_UNLOCKED}, }; void __init devinet_init(void) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 9517b8667e00..041c46787d94 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -245,9 +245,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct nlattr **tb, struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); + struct fib4_rule *rule4 = (struct fib4_rule *)rule; + struct net *net = rule->fr_net; int err = -EINVAL; - struct fib4_rule *rule4 = (struct fib4_rule *) rule; if (tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) { NL_SET_ERR_MSG(extack, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 3da126cea884..2c394c364cb9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -81,6 +81,7 @@ #include <linux/skbuff.h> #include <linux/inetdevice.h> #include <linux/igmp.h> +#include "igmp_internal.h" #include <linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/times.h> @@ -1432,14 +1433,16 @@ static void ip_mc_hash_remove(struct in_device *in_dev, *mc_hash = im->next_hash; } -static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, - const struct ip_mc_list *im, int event) +int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + const struct ip_mc_list *im, + struct inet_fill_args *args) { struct ifa_cacheinfo ci; struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); + nlh = nlmsg_put(skb, args->portid, args->seq, args->event, + sizeof(struct ifaddrmsg), args->flags); if (!nlh) return -EMSGSIZE; @@ -1468,6 +1471,9 @@ static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, static void inet_ifmcaddr_notify(struct net_device *dev, const struct ip_mc_list *im, int event) { + struct inet_fill_args fillargs = { + .event = event, + }; struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOMEM; @@ -1479,7 +1485,7 @@ static void inet_ifmcaddr_notify(struct net_device *dev, if (!skb) goto error; - err = inet_fill_ifmcaddr(skb, dev, im, event); + err = inet_fill_ifmcaddr(skb, dev, im, &fillargs); if (err < 0) { WARN_ON_ONCE(err == -EMSGSIZE); nlmsg_free(skb); diff --git a/net/ipv4/igmp_internal.h b/net/ipv4/igmp_internal.h new file mode 100644 index 000000000000..0a1bcc8ec8e1 --- /dev/null +++ b/net/ipv4/igmp_internal.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_IGMP_INTERNAL_H +#define _LINUX_IGMP_INTERNAL_H + +struct inet_fill_args { + u32 portid; + u32 seq; + int event; + unsigned int flags; + int netnsid; + int ifindex; +}; + +int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + const struct ip_mc_list *im, + struct inet_fill_args *args); +#endif diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e4decfb270fa..2b7775b90a09 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -799,18 +799,6 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk) sk_stop_timer_sync(sk, &sk->sk_timer); } -void inet_csk_delete_keepalive_timer(struct sock *sk) -{ - sk_stop_timer(sk, &sk->sk_timer); -} -EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); - -void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) -{ - sk_reset_timer(sk, &sk->sk_timer, jiffies + len); -} -EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); - struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4, const struct request_sock *req) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ed1b6b44faf8..c9f11a046c26 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -141,7 +141,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info, const struct iphdr *iph; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; - unsigned int data_len = 0; struct ip_tunnel *t; if (tpi->proto == htons(ETH_P_TEB)) @@ -182,7 +181,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info, case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return 0; - data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ break; case ICMP_REDIRECT: @@ -190,10 +188,16 @@ static int ipgre_err(struct sk_buff *skb, u32 info, } #if IS_ENABLED(CONFIG_IPV6) - if (tpi->proto == htons(ETH_P_IPV6) && - !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, - type, data_len)) - return 0; + if (tpi->proto == htons(ETH_P_IPV6)) { + unsigned int data_len = 0; + + if (type == ICMP_TIME_EXCEEDED) + data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ + + if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, + type, data_len)) + return 0; + } #endif if (t->parms.iph.daddr == 0 || diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 42cb5dc9cb24..3a43010d726f 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -28,6 +28,7 @@ static int tcp_adv_win_scale_max = 31; static int tcp_app_win_max = 31; static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; static int tcp_min_snd_mss_max = 65535; +static int tcp_rto_max_max = TCP_RTO_MAX_SEC * MSEC_PER_SEC; static int ip_privileged_port_min; static int ip_privileged_port_max = 65535; static int ip_ttl_min = 1; @@ -1583,6 +1584,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, }, + { + .procname = "tcp_rto_max_ms", + .data = &init_net.ipv4.sysctl_tcp_rto_max_ms, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE_THOUSAND, + .extra2 = &tcp_rto_max_max, + }, }; static __net_init int ipv4_sysctl_init_net(struct net *net) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0d704bda6c41..992d5c9b2487 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -423,7 +423,7 @@ void tcp_init_sock(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int rto_min_us; + int rto_min_us, rto_max_ms; tp->out_of_order_queue = RB_ROOT; sk->tcp_rtx_queue = RB_ROOT; @@ -432,6 +432,10 @@ void tcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&tp->tsorted_sent_queue); icsk->icsk_rto = TCP_TIMEOUT_INIT; + + rto_max_ms = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_max_ms); + icsk->icsk_rto_max = msecs_to_jiffies(rto_max_ms); + rto_min_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_min_us); icsk->icsk_rto_min = usecs_to_jiffies(rto_min_us); icsk->icsk_delack_max = TCP_DELACK_MAX; @@ -2476,6 +2480,11 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, } niov = skb_frag_net_iov(frag); + if (!net_is_devmem_iov(niov)) { + err = -ENODEV; + goto out; + } + end = start + skb_frag_size(frag); copy = end - offset; @@ -2494,7 +2503,7 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, /* Will perform the exchange later */ dmabuf_cmsg.frag_token = tcp_xa_pool.tokens[tcp_xa_pool.idx]; - dmabuf_cmsg.dmabuf_id = net_iov_binding_id(niov); + dmabuf_cmsg.dmabuf_id = net_devmem_iov_binding_id(niov); offset += copy; remaining_len -= copy; @@ -3174,7 +3183,7 @@ adjudge_to_death: const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, + tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); @@ -3627,7 +3636,7 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val) elapsed = tp->keepalive_time - elapsed; else elapsed = 0; - inet_csk_reset_keepalive_timer(sk, elapsed); + tcp_reset_keepalive_timer(sk, elapsed); } return 0; @@ -3802,6 +3811,11 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ)); return 0; + case TCP_RTO_MAX_MS: + if (val < MSEC_PER_SEC || val > TCP_RTO_MAX_SEC * MSEC_PER_SEC) + return -EINVAL; + WRITE_ONCE(inet_csk(sk)->icsk_rto_max, msecs_to_jiffies(val)); + return 0; } sockopt_lock_sock(sk); @@ -4638,6 +4652,9 @@ zerocopy_rcv_out: case TCP_IS_MPTCP: val = 0; break; + case TCP_RTO_MAX_MS: + val = jiffies_to_msecs(tcp_rto_max(sk)); + break; default: return -ENOPROTOOPT; } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 0f523cbfe329..e4c95238df58 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -274,8 +274,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, * because it's been added to the accept queue directly. */ req->timeout = tcp_timeout_init(child); - inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, - req->timeout, TCP_RTO_MAX); + tcp_reset_xmit_timer(child, ICSK_TIME_RETRANS, + req->timeout, false); refcount_set(&req->rsk_refcnt, 2); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb82e01da911..4686783b70de 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2252,8 +2252,7 @@ static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag) unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4), msecs_to_jiffies(10)); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - delay, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, false); *ack_flag &= ~FLAG_SET_XMIT_TIMER; return true; } @@ -3282,8 +3281,7 @@ void tcp_rearm_rto(struct sock *sk) */ rto = usecs_to_jiffies(max_t(int, delta_us, 1)); } - tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, - TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, true); } } @@ -3560,10 +3558,10 @@ static void tcp_ack_probe(struct sock *sk) * This function is not for random using! */ } else { - unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); + unsigned long when = tcp_probe0_when(sk, tcp_rto_max(sk)); when = tcp_clamp_probe0_to_user_timeout(sk, when); - tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, true); } } @@ -4174,7 +4172,6 @@ u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) } return mss; } -EXPORT_SYMBOL_GPL(tcp_parse_mss_option); /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when @@ -6347,7 +6344,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tp->lsndtime = tcp_jiffies32; if (sock_flag(sk, SOCK_KEEPOPEN)) - inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); + tcp_reset_keepalive_timer(sk, keepalive_time_when(tp)); if (!tp->rx_opt.snd_wscale) __tcp_fast_path_on(tp, tp->snd_wnd); @@ -6470,9 +6467,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { /* Previous FIN/ACK or RST/ACK might be ignored. */ if (icsk->icsk_retransmits == 0) - inet_csk_reset_xmit_timer(sk, - ICSK_TIME_RETRANS, - TCP_TIMEOUT_MIN, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + TCP_TIMEOUT_MIN, false); SKB_DR_SET(reason, TCP_INVALID_ACK_SEQUENCE); goto reset_and_undo; } @@ -6587,8 +6583,8 @@ consume: */ inet_csk_schedule_ack(sk); tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - TCP_DELACK_MAX, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MAX, false); goto consume; } tcp_send_ack(sk); @@ -6922,7 +6918,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); + tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else if (th->fin || sock_owned_by_user(sk)) { /* Bad case. We could lose such FIN otherwise. * It is not a big problem, but it looks confusing @@ -6930,7 +6926,7 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) * if it spins in bh_lock_sock(), but it is really * marginal case. */ - inet_csk_reset_keepalive_timer(sk, tmo); + tcp_reset_keepalive_timer(sk, tmo); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto consume; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cc2b5194a18d..d1fd2128ac6c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -458,15 +458,14 @@ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) icsk->icsk_backoff--; icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT; - icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); + icsk->icsk_rto = inet_csk_rto_backoff(icsk, tcp_rto_max(sk)); tcp_mstamp_refresh(tp); delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb)); remaining = icsk->icsk_rto - usecs_to_jiffies(delta_us); if (remaining > 0) { - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - remaining, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, remaining, false); } else { /* RTO revert clocked out retransmission. * Will retransmit now. @@ -3533,6 +3532,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_pingpong_thresh = 1; net->ipv4.sysctl_tcp_rto_min_us = jiffies_to_usecs(TCP_RTO_MIN); + net->ipv4.sysctl_tcp_rto_max_ms = TCP_RTO_MAX_SEC * MSEC_PER_SEC; return 0; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b089b08e9617..0deb2ac85acf 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -566,8 +566,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1); if (sock_flag(newsk, SOCK_KEEPOPEN)) - inet_csk_reset_keepalive_timer(newsk, - keepalive_time_when(newtp)); + tcp_reset_keepalive_timer(newsk, keepalive_time_when(newtp)); newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; newtp->rx_opt.sack_ok = ireq->sack_ok; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bc95d2a5924f..464232a0d637 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1813,7 +1813,6 @@ void tcp_mtup_init(struct sock *sk) if (icsk->icsk_mtup.enabled) icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } -EXPORT_SYMBOL(tcp_mtup_init); /* This function synchronize snd mss to current pmtu/exthdr set. @@ -2911,7 +2910,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) if (rto_delta_us > 0) timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us)); - tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, true); return true; } @@ -3545,8 +3544,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) } if (rearm_timer) tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, - TCP_RTO_MAX); + inet_csk(sk)->icsk_rto, true); } /* We allow to exceed memory limits for FIN packets to expedite @@ -4163,8 +4161,8 @@ int tcp_connect(struct sock *sk) TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, false); return 0; } EXPORT_SYMBOL(tcp_connect); @@ -4253,11 +4251,11 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) unsigned long delay; delay = TCP_DELACK_MAX << icsk->icsk_ack.retry; - if (delay < TCP_RTO_MAX) + if (delay < tcp_rto_max(sk)) icsk->icsk_ack.retry++; inet_csk_schedule_ack(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, delay, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_DACK, delay, false); return; } @@ -4393,7 +4391,7 @@ void tcp_send_probe0(struct sock *sk) if (err <= 0) { if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2)) icsk->icsk_backoff++; - timeout = tcp_probe0_when(sk, TCP_RTO_MAX); + timeout = tcp_probe0_when(sk, tcp_rto_max(sk)); } else { /* If packet was not sent due to local congestion, * Let senders fight for local resources conservatively. @@ -4402,7 +4400,7 @@ void tcp_send_probe0(struct sock *sk) } timeout = tcp_clamp_probe0_to_user_timeout(sk, timeout); - tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, true); } int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b412ed88ccd9..c0e601e4f39c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -109,7 +109,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ - if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) + if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*tcp_rto_max(sk) || !do_reset) shift++; /* If some dubious ICMP arrived, penalize even more. */ @@ -189,12 +189,12 @@ static unsigned int tcp_model_timeout(struct sock *sk, { unsigned int linear_backoff_thresh, timeout; - linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base); + linear_backoff_thresh = ilog2(tcp_rto_max(sk) / rto_base); if (boundary <= linear_backoff_thresh) timeout = ((2 << boundary) - 1) * rto_base; else timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + - (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + (boundary - linear_backoff_thresh) * tcp_rto_max(sk); return jiffies_to_msecs(timeout); } /** @@ -268,7 +268,7 @@ static int tcp_write_timeout(struct sock *sk) retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { - const bool alive = icsk->icsk_rto < TCP_RTO_MAX; + const bool alive = icsk->icsk_rto < tcp_rto_max(sk); retry_until = tcp_orphan_retries(sk, alive); do_reset = alive || @@ -416,7 +416,8 @@ static void tcp_probe_timer(struct sock *sk) } max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { - const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; + unsigned int rto_max = tcp_rto_max(sk); + const bool alive = inet_csk_rto_backoff(icsk, rto_max) < rto_max; max_probes = tcp_orphan_retries(sk, alive); if (!alive && icsk->icsk_backoff >= max_probes) @@ -481,8 +482,8 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) tcp_update_rto_stats(sk); if (!tp->retrans_stamp) tp->retrans_stamp = tcp_time_stamp_ts(tp); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - req->timeout << req->num_timeout, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + req->timeout << req->num_timeout, false); } static bool tcp_rtx_probe0_timed_out(const struct sock *sk, @@ -492,7 +493,7 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk, const struct inet_connection_sock *icsk = inet_csk(sk); u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); const struct tcp_sock *tp = tcp_sk(sk); - int timeout = TCP_RTO_MAX * 2; + int timeout = tcp_rto_max(sk) * 2; s32 rcv_delta; if (user_timeout) { @@ -626,9 +627,9 @@ void tcp_retransmit_timer(struct sock *sk) /* Retransmission failed because of local congestion, * Let senders fight for local resources conservatively. */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - TCP_RESOURCE_PROBE_INTERVAL, - TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + TCP_RESOURCE_PROBE_INTERVAL, + false); goto out; } @@ -665,7 +666,7 @@ out_reset_timer: icsk->icsk_backoff = 0; icsk->icsk_rto = clamp(__tcp_set_rto(tp), tcp_rto_min(sk), - TCP_RTO_MAX); + tcp_rto_max(sk)); } else if (sk->sk_state != TCP_SYN_SENT || tp->total_rto > READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) { @@ -673,10 +674,10 @@ out_reset_timer: * activated. */ icsk->icsk_backoff++; - icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); + icsk->icsk_rto = min(icsk->icsk_rto << 1, tcp_rto_max(sk)); } - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + tcp_clamp_rto_to_user_timeout(sk), false); if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0)) __sk_dst_reset(sk); @@ -751,20 +752,29 @@ void tcp_syn_ack_timeout(const struct request_sock *req) } EXPORT_SYMBOL(tcp_syn_ack_timeout); +void tcp_reset_keepalive_timer(struct sock *sk, unsigned long len) +{ + sk_reset_timer(sk, &sk->sk_timer, jiffies + len); +} + +static void tcp_delete_keepalive_timer(struct sock *sk) +{ + sk_stop_timer(sk, &sk->sk_timer); +} + void tcp_set_keepalive(struct sock *sk, int val) { if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) return; if (val && !sock_flag(sk, SOCK_KEEPOPEN)) - inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); + tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); else if (!val) - inet_csk_delete_keepalive_timer(sk); + tcp_delete_keepalive_timer(sk); } EXPORT_SYMBOL_GPL(tcp_set_keepalive); - -static void tcp_keepalive_timer (struct timer_list *t) +static void tcp_keepalive_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); struct inet_connection_sock *icsk = inet_csk(sk); @@ -775,7 +785,7 @@ static void tcp_keepalive_timer (struct timer_list *t) bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ - inet_csk_reset_keepalive_timer (sk, HZ/20); + tcp_reset_keepalive_timer(sk, HZ/20); goto out; } @@ -841,7 +851,7 @@ static void tcp_keepalive_timer (struct timer_list *t) } resched: - inet_csk_reset_keepalive_timer (sk, elapsed); + tcp_reset_keepalive_timer(sk, elapsed); goto out; death: diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 67d39114d9a6..40af8fd6efa7 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -399,9 +399,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct nlattr **tb, struct netlink_ext_ack *extack) { + struct fib6_rule *rule6 = (struct fib6_rule *)rule; + struct net *net = rule->fr_net; int err = -EINVAL; - struct net *net = sock_net(skb->sk); - struct fib6_rule *rule6 = (struct fib6_rule *) rule; if (!inet_validate_dscp(frh->tos)) { NL_SET_ERR_MSG(extack, diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 16c336c51940..b1f36dc1a091 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -10,6 +10,7 @@ #include "protocol.h" #include "mib.h" +#include "mptcp_pm_gen.h" /* path manager command handlers */ @@ -433,14 +434,62 @@ bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_is_backup(msk, &skc_local); } -int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info) +static int mptcp_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, + struct genl_info *info) { if (info->attrs[MPTCP_PM_ATTR_TOKEN]) - return mptcp_userspace_pm_get_addr(skb, info); - return mptcp_pm_nl_get_addr(skb, info); + return mptcp_userspace_pm_get_addr(id, addr, info); + return mptcp_pm_nl_get_addr(id, addr, info); } -int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) +int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct mptcp_pm_addr_entry addr; + struct nlattr *attr; + struct sk_buff *msg; + void *reply; + int ret; + + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) + return -EINVAL; + + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; + ret = mptcp_pm_parse_entry(attr, info, false, &addr); + if (ret < 0) + return ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, + info->genlhdr->cmd); + if (!reply) { + GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); + ret = -EMSGSIZE; + goto fail; + } + + ret = mptcp_pm_get_addr(addr.addr.id, &addr, info); + if (ret) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); + goto fail; + } + + ret = mptcp_nl_fill_addr(msg, &addr); + if (ret) + goto fail; + + genlmsg_end(msg, reply); + ret = genlmsg_reply(msg, info); + return ret; + +fail: + nlmsg_free(msg); + return ret; +} + +static int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) { const struct genl_info *info = genl_info_dump(cb); @@ -449,11 +498,34 @@ int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) return mptcp_pm_nl_dump_addr(msg, cb); } -int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return mptcp_pm_dump_addr(msg, cb); +} + +static int mptcp_pm_set_flags(struct genl_info *info) { + struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, }; + struct nlattr *attr_loc; + int ret = -EINVAL; + + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR)) + return ret; + + attr_loc = info->attrs[MPTCP_PM_ATTR_ADDR]; + ret = mptcp_pm_parse_entry(attr_loc, info, false, &loc); + if (ret < 0) + return ret; + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) - return mptcp_userspace_pm_set_flags(skb, info); - return mptcp_pm_nl_set_flags(skb, info); + return mptcp_userspace_pm_set_flags(&loc, info); + return mptcp_pm_nl_set_flags(&loc, info); +} + +int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) +{ + return mptcp_pm_set_flags(info); } void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 572d160edca3..99705a9c2238 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1393,28 +1393,35 @@ static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; + struct nlattr *attr; int ret; + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) + return -EINVAL; + + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; ret = mptcp_pm_parse_entry(attr, info, true, &addr); if (ret < 0) return ret; if (addr.addr.port && !address_use_port(&addr)) { - GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port"); + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "flags must have signal and not subflow when using port"); return -EINVAL; } if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { - GENL_SET_ERR_MSG(info, "flags mustn't have both signal and fullmesh"); + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "flags mustn't have both signal and fullmesh"); return -EINVAL; } if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { - GENL_SET_ERR_MSG(info, "can't create IMPLICIT endpoint"); + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "can't create IMPLICIT endpoint"); return -EINVAL; } @@ -1587,12 +1594,16 @@ next: int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; unsigned int addr_max; + struct nlattr *attr; int ret; + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) + return -EINVAL; + + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret; @@ -1608,7 +1619,7 @@ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&pernet->lock); entry = __lookup_addr_by_id(pernet, addr.addr.id); if (!entry) { - GENL_SET_ERR_MSG(info, "address not found"); + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); spin_unlock_bh(&pernet->lock); return -EINVAL; } @@ -1762,61 +1773,24 @@ nla_put_failure: return -EMSGSIZE; } -int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, + struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); - struct mptcp_pm_addr_entry addr, *entry; - struct sk_buff *msg; - void *reply; - int ret; - - ret = mptcp_pm_parse_entry(attr, info, false, &addr); - if (ret < 0) - return ret; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - - reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, - info->genlhdr->cmd); - if (!reply) { - GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); - ret = -EMSGSIZE; - goto fail; - } + struct mptcp_pm_addr_entry *entry; + int ret = -EINVAL; rcu_read_lock(); - entry = __lookup_addr_by_id(pernet, addr.addr.id); - if (!entry) { - GENL_SET_ERR_MSG(info, "address not found"); - ret = -EINVAL; - goto unlock_fail; + entry = __lookup_addr_by_id(pernet, id); + if (entry) { + *addr = *entry; + ret = 0; } - - ret = mptcp_nl_fill_addr(msg, entry); - if (ret) - goto unlock_fail; - - genlmsg_end(msg, reply); - ret = genlmsg_reply(msg, info); - rcu_read_unlock(); - return ret; - -unlock_fail: rcu_read_unlock(); -fail: - nlmsg_free(msg); return ret; } -int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) -{ - return mptcp_pm_get_addr(skb, info); -} - int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) { @@ -1860,12 +1834,6 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg, return msg->len; } -int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) -{ - return mptcp_pm_dump_addr(msg, cb); -} - static int parse_limit(struct genl_info *info, int id, unsigned int *limit) { struct nlattr *attr = info->attrs[id]; @@ -1875,7 +1843,9 @@ static int parse_limit(struct genl_info *info, int id, unsigned int *limit) *limit = nla_get_u32(attr); if (*limit > MPTCP_PM_ADDR_MAX) { - GENL_SET_ERR_MSG(info, "limit greater than maximum"); + NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, + "limit greater than maximum (%u)", + MPTCP_PM_ADDR_MAX); return -EINVAL; } return 0; @@ -1981,66 +1951,57 @@ next: return ret; } -int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, + struct genl_info *info) { - struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | MPTCP_PM_ADDR_FLAG_FULLMESH; - struct net *net = sock_net(skb->sk); + struct net *net = genl_info_net(info); struct mptcp_pm_addr_entry *entry; struct pm_nl_pernet *pernet; u8 lookup_by_id = 0; u8 bkup = 0; - int ret; pernet = pm_nl_get_pernet(net); - ret = mptcp_pm_parse_entry(attr, info, false, &addr); - if (ret < 0) - return ret; - - if (addr.addr.family == AF_UNSPEC) { + if (local->addr.family == AF_UNSPEC) { lookup_by_id = 1; - if (!addr.addr.id) { - GENL_SET_ERR_MSG(info, "missing required inputs"); + if (!local->addr.id) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "missing address ID"); return -EOPNOTSUPP; } } - if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) + if (local->flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; spin_lock_bh(&pernet->lock); - entry = lookup_by_id ? __lookup_addr_by_id(pernet, addr.addr.id) : - __lookup_addr(pernet, &addr.addr); + entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) : + __lookup_addr(pernet, &local->addr); if (!entry) { spin_unlock_bh(&pernet->lock); - GENL_SET_ERR_MSG(info, "address not found"); + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); return -EINVAL; } - if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && + if ((local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_IMPLICIT))) { spin_unlock_bh(&pernet->lock); - GENL_SET_ERR_MSG(info, "invalid addr flags"); + NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid addr flags"); return -EINVAL; } - changed = (addr.flags ^ entry->flags) & mask; - entry->flags = (entry->flags & ~mask) | (addr.flags & mask); - addr = *entry; + changed = (local->flags ^ entry->flags) & mask; + entry->flags = (entry->flags & ~mask) | (local->flags & mask); + *local = *entry; spin_unlock_bh(&pernet->lock); - mptcp_nl_set_flags(net, &addr.addr, bkup, changed); + mptcp_nl_set_flags(net, &local->addr, bkup, changed); return 0; } -int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) -{ - return mptcp_pm_set_flags(skb, info); -} - static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) { genlmsg_multicast_netns(&mptcp_genl_family, net, diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index a3d477059b11..277cf092a870 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -175,14 +175,13 @@ bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *info) { - struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct mptcp_sock *msk; + struct nlattr *token; - if (!token) { - GENL_SET_ERR_MSG(info, "missing required token"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_TOKEN)) return NULL; - } + token = info->attrs[MPTCP_PM_ATTR_TOKEN]; msk = mptcp_token_get_sock(genl_info_net(info), nla_get_u32(token)); if (!msk) { NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); @@ -190,7 +189,8 @@ static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *in } if (!mptcp_pm_is_userspace(msk)) { - GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + NL_SET_ERR_MSG_ATTR(info->extack, token, + "userspace PM not selected"); sock_put((struct sock *)msk); return NULL; } @@ -200,16 +200,14 @@ static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *in int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry addr_val; struct mptcp_sock *msk; + struct nlattr *addr; int err = -EINVAL; struct sock *sk; - if (!addr) { - GENL_SET_ERR_MSG(info, "missing required address"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR)) return err; - } msk = mptcp_userspace_pm_get_sock(info); if (!msk) @@ -217,21 +215,27 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) sk = (struct sock *)msk; + addr = info->attrs[MPTCP_PM_ATTR_ADDR]; err = mptcp_pm_parse_entry(addr, info, true, &addr_val); - if (err < 0) { - GENL_SET_ERR_MSG(info, "error parsing local address"); + if (err < 0) + goto announce_err; + + if (addr_val.addr.id == 0) { + NL_SET_ERR_MSG_ATTR(info->extack, addr, "invalid addr id"); + err = -EINVAL; goto announce_err; } - if (addr_val.addr.id == 0 || !(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { - GENL_SET_ERR_MSG(info, "invalid addr id or flags"); + if (!(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + NL_SET_ERR_MSG_ATTR(info->extack, addr, "invalid addr flags"); err = -EINVAL; goto announce_err; } err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false); if (err < 0) { - GENL_SET_ERR_MSG(info, "did not match address and id"); + NL_SET_ERR_MSG_ATTR(info->extack, addr, + "did not match address and id"); goto announce_err; } @@ -253,8 +257,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) return err; } -static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, - struct genl_info *info) +static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk) { struct mptcp_rm_list list = { .nr = 0 }; struct mptcp_subflow_context *subflow; @@ -269,10 +272,8 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, break; } } - if (!has_id_0) { - GENL_SET_ERR_MSG(info, "address with id 0 not found"); + if (!has_id_0) goto remove_err; - } list.ids[list.nr++] = 0; @@ -309,18 +310,17 @@ void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; struct mptcp_pm_addr_entry *match; struct mptcp_sock *msk; + struct nlattr *id; int err = -EINVAL; struct sock *sk; u8 id_val; - if (!id) { - GENL_SET_ERR_MSG(info, "missing required ID"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_LOC_ID)) return err; - } + id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; id_val = nla_get_u8(id); msk = mptcp_userspace_pm_get_sock(info); @@ -330,7 +330,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) sk = (struct sock *)msk; if (id_val == 0) { - err = mptcp_userspace_pm_remove_id_zero_address(msk, info); + err = mptcp_userspace_pm_remove_id_zero_address(msk); goto out; } @@ -339,7 +339,6 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&msk->pm.lock); match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val); if (!match) { - GENL_SET_ERR_MSG(info, "address with specified id not found"); spin_unlock_bh(&msk->pm.lock); release_sock(sk); goto out; @@ -356,25 +355,28 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) err = 0; out: + if (err) + NL_SET_ERR_MSG_ATTR_FMT(info->extack, id, + "address with id %u not found", + id_val); + sock_put(sk); return err; } int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry entry = { 0 }; struct mptcp_addr_info addr_r; + struct nlattr *raddr, *laddr; struct mptcp_pm_local local; struct mptcp_sock *msk; int err = -EINVAL; struct sock *sk; - if (!laddr || !raddr) { - GENL_SET_ERR_MSG(info, "missing required address(es)"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR) || + GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE)) return err; - } msk = mptcp_userspace_pm_get_sock(info); if (!msk) @@ -382,24 +384,22 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) sk = (struct sock *)msk; + laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; err = mptcp_pm_parse_entry(laddr, info, true, &entry); - if (err < 0) { - NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); + if (err < 0) goto create_err; - } if (entry.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { - GENL_SET_ERR_MSG(info, "invalid addr flags"); + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "invalid addr flags"); err = -EINVAL; goto create_err; } entry.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; + raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; err = mptcp_pm_parse_addr(raddr, info, &addr_r); - if (err < 0) { - NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); + if (err < 0) goto create_err; - } if (!mptcp_pm_addr_families_match(sk, &entry.addr, &addr_r)) { GENL_SET_ERR_MSG(info, "families mismatch"); @@ -409,7 +409,8 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) err = mptcp_userspace_pm_append_new_local_addr(msk, &entry, false); if (err < 0) { - GENL_SET_ERR_MSG(info, "did not match address and id"); + NL_SET_ERR_MSG_ATTR(info->extack, laddr, + "did not match address and id"); goto create_err; } @@ -421,6 +422,9 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) err = __mptcp_subflow_connect(sk, &local, &addr_r); release_sock(sk); + if (err) + GENL_SET_ERR_MSG_FMT(info, "connect error: %d", err); + spin_lock_bh(&msk->pm.lock); if (err) mptcp_userspace_pm_delete_local_addr(msk, &entry); @@ -483,18 +487,16 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk, int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry addr_l; struct mptcp_addr_info addr_r; + struct nlattr *raddr, *laddr; struct mptcp_sock *msk; struct sock *sk, *ssk; int err = -EINVAL; - if (!laddr || !raddr) { - GENL_SET_ERR_MSG(info, "missing required address(es)"); + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR) || + GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE)) return err; - } msk = mptcp_userspace_pm_get_sock(info); if (!msk) @@ -502,17 +504,15 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info sk = (struct sock *)msk; + laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; err = mptcp_pm_parse_entry(laddr, info, true, &addr_l); - if (err < 0) { - NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); + if (err < 0) goto destroy_err; - } + raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; err = mptcp_pm_parse_addr(raddr, info, &addr_r); - if (err < 0) { - NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); + if (err < 0) goto destroy_err; - } #if IS_ENABLED(CONFIG_MPTCP_IPV6) if (addr_l.addr.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) { @@ -530,8 +530,14 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info goto destroy_err; } - if (!addr_l.addr.port || !addr_r.port) { - GENL_SET_ERR_MSG(info, "missing local or remote port"); + if (!addr_l.addr.port) { + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local port"); + err = -EINVAL; + goto destroy_err; + } + + if (!addr_r.port) { + NL_SET_ERR_MSG_ATTR(info->extack, raddr, "missing remote port"); err = -EINVAL; goto destroy_err; } @@ -539,6 +545,7 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info lock_sock(sk); ssk = mptcp_nl_find_ssk(msk, &addr_l.addr, &addr_r); if (!ssk) { + GENL_SET_ERR_MSG(info, "subflow not found"); err = -ESRCH; goto release_sock; } @@ -557,46 +564,51 @@ destroy_err: return err; } -int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) +int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local, + struct genl_info *info) { - struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, }; - struct mptcp_pm_addr_entry rem = { .addr = { .family = AF_UNSPEC }, }; - struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_addr_info rem = { .family = AF_UNSPEC, }; struct mptcp_pm_addr_entry *entry; + struct nlattr *attr, *attr_rem; struct mptcp_sock *msk; int ret = -EINVAL; struct sock *sk; u8 bkup = 0; + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ATTR_ADDR_REMOTE)) + return ret; + msk = mptcp_userspace_pm_get_sock(info); if (!msk) return ret; sk = (struct sock *)msk; - ret = mptcp_pm_parse_entry(attr, info, false, &loc); - if (ret < 0) + attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + if (local->addr.family == AF_UNSPEC) { + NL_SET_ERR_MSG_ATTR(info->extack, attr, + "invalid local address family"); + ret = -EINVAL; goto set_flags_err; - - if (attr_rem) { - ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem); - if (ret < 0) - goto set_flags_err; } - if (loc.addr.family == AF_UNSPEC || - rem.addr.family == AF_UNSPEC) { - GENL_SET_ERR_MSG(info, "invalid address families"); + attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; + ret = mptcp_pm_parse_addr(attr_rem, info, &rem); + if (ret < 0) + goto set_flags_err; + + if (rem.family == AF_UNSPEC) { + NL_SET_ERR_MSG_ATTR(info->extack, attr_rem, + "invalid remote address family"); ret = -EINVAL; goto set_flags_err; } - if (loc.flags & MPTCP_PM_ADDR_FLAG_BACKUP) + if (local->flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; spin_lock_bh(&msk->pm.lock); - entry = mptcp_userspace_pm_lookup_addr(msk, &loc.addr); + entry = mptcp_userspace_pm_lookup_addr(msk, &local->addr); if (entry) { if (bkup) entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; @@ -606,9 +618,13 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&msk->pm.lock); lock_sock(sk); - ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc.addr, &rem.addr, bkup); + ret = mptcp_pm_nl_mp_prio_send_ack(msk, &local->addr, &rem, bkup); release_sock(sk); + /* mptcp_pm_nl_mp_prio_send_ack() only fails in one case */ + if (ret < 0) + GENL_SET_ERR_MSG(info, "subflow not found"); + set_flags_err: sock_put(sk); return ret; @@ -663,16 +679,13 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, return ret; } -int mptcp_userspace_pm_get_addr(struct sk_buff *skb, +int mptcp_userspace_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; - struct mptcp_pm_addr_entry addr, *entry; + struct mptcp_pm_addr_entry *entry; struct mptcp_sock *msk; - struct sk_buff *msg; int ret = -EINVAL; struct sock *sk; - void *reply; msk = mptcp_userspace_pm_get_sock(info); if (!msk) @@ -680,50 +693,16 @@ int mptcp_userspace_pm_get_addr(struct sk_buff *skb, sk = (struct sock *)msk; - ret = mptcp_pm_parse_entry(attr, info, false, &addr); - if (ret < 0) - goto out; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) { - ret = -ENOMEM; - goto out; - } - - reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, - info->genlhdr->cmd); - if (!reply) { - GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); - ret = -EMSGSIZE; - goto fail; - } - lock_sock(sk); spin_lock_bh(&msk->pm.lock); - entry = mptcp_userspace_pm_lookup_addr_by_id(msk, addr.addr.id); - if (!entry) { - GENL_SET_ERR_MSG(info, "address not found"); - ret = -EINVAL; - goto unlock_fail; + entry = mptcp_userspace_pm_lookup_addr_by_id(msk, id); + if (entry) { + *addr = *entry; + ret = 0; } - - ret = mptcp_nl_fill_addr(msg, entry); - if (ret) - goto unlock_fail; - - genlmsg_end(msg, reply); - ret = genlmsg_reply(msg, info); spin_unlock_bh(&msk->pm.lock); release_sock(sk); - sock_put(sk); - return ret; -unlock_fail: - spin_unlock_bh(&msk->pm.lock); - release_sock(sk); -fail: - nlmsg_free(msg); -out: sock_put(sk); return ret; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f6a207958459..37226cdd9e37 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1038,9 +1038,10 @@ bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, const struct mptcp_addr_info *saddr); bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); -int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info); -int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info); -int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, + struct genl_info *info); +int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local, + struct genl_info *info); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); @@ -1131,14 +1132,13 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_in bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); -int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); -int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info); -int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info); -int mptcp_userspace_pm_get_addr(struct sk_buff *skb, +int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, + struct genl_info *info); +int mptcp_userspace_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, struct genl_info *info); static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 1f7975b49657..c263fb7a68dc 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -699,18 +699,56 @@ void xp_free(struct xdp_buff_xsk *xskb) } EXPORT_SYMBOL(xp_free); -void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr) +static u64 __xp_raw_get_addr(const struct xsk_buff_pool *pool, u64 addr) +{ + return pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr; +} + +static void *__xp_raw_get_data(const struct xsk_buff_pool *pool, u64 addr) { - addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr; return pool->addrs + addr; } + +void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr) +{ + return __xp_raw_get_data(pool, __xp_raw_get_addr(pool, addr)); +} EXPORT_SYMBOL(xp_raw_get_data); -dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) +static dma_addr_t __xp_raw_get_dma(const struct xsk_buff_pool *pool, u64 addr) { - addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr; return (pool->dma_pages[addr >> PAGE_SHIFT] & ~XSK_NEXT_PG_CONTIG_MASK) + (addr & ~PAGE_MASK); } + +dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) +{ + return __xp_raw_get_dma(pool, __xp_raw_get_addr(pool, addr)); +} EXPORT_SYMBOL(xp_raw_get_dma); + +/** + * xp_raw_get_ctx - get &xdp_desc context + * @pool: XSk buff pool desc address belongs to + * @addr: desc address (from userspace) + * + * Helper for getting desc's DMA address and metadata pointer, if present. + * Saves one call on hotpath, double calculation of the actual address, + * and inline checks for metadata presence and sanity. + * + * Return: new &xdp_desc_ctx struct containing desc's DMA address and metadata + * pointer, if it is present and valid (initialized to %NULL otherwise). + */ +struct xdp_desc_ctx xp_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr) +{ + struct xdp_desc_ctx ret; + + addr = __xp_raw_get_addr(pool, addr); + + ret.dma = __xp_raw_get_dma(pool, addr); + ret.meta = __xsk_buff_get_metadata(pool, __xp_raw_get_data(pool, addr)); + + return ret; +} +EXPORT_SYMBOL(xp_raw_get_ctx); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index e4be227d3ad6..6c6ee183802d 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -87,6 +87,11 @@ enum { }; enum { + __NETDEV_A_IO_URING_PROVIDER_INFO_MAX, + NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1) +}; + +enum { NETDEV_A_PAGE_POOL_ID = 1, NETDEV_A_PAGE_POOL_IFINDEX, NETDEV_A_PAGE_POOL_NAPI_ID, @@ -94,6 +99,7 @@ enum { NETDEV_A_PAGE_POOL_INFLIGHT_MEM, NETDEV_A_PAGE_POOL_DETACH_TIME, NETDEV_A_PAGE_POOL_DMABUF, + NETDEV_A_PAGE_POOL_IO_URING, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) @@ -136,6 +142,7 @@ enum { NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, NETDEV_A_QUEUE_DMABUF, + NETDEV_A_QUEUE_IO_URING, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index 0712b5e82eb7..f3269ce39e5b 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -17,10 +17,13 @@ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2) CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h) CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \ - $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) + $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \ + $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h) +CFLAGS_net_shaper:=$(call get_hdr_inc,_LINUX_NET_SHAPER_H,net_shaper.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) +CFLAGS_nl80211:=$(call get_hdr_inc,__LINUX_NL802121_H,nl80211.h) CFLAGS_nlctrl:=$(call get_hdr_inc,__LINUX_GENERIC_NETLINK_H,genetlink.h) CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 08f8bf89cfc2..dcc2c6b298d6 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -536,9 +536,11 @@ class YnlFamily(SpecFamily): try: return int(value) except (ValueError, TypeError) as e: - if 'enum' not in attr_spec: - raise e - return self._encode_enum(attr_spec, value) + if 'enum' in attr_spec: + return self._encode_enum(attr_spec, value) + if attr_spec.display_hint: + return self._from_string(value, attr_spec) + raise e def _add_attr(self, space, name, value, search_attrs): try: @@ -571,7 +573,10 @@ class YnlFamily(SpecFamily): if isinstance(value, bytes): attr_payload = value elif isinstance(value, str): - attr_payload = bytes.fromhex(value) + if attr.display_hint: + attr_payload = self._from_string(value, attr) + else: + attr_payload = bytes.fromhex(value) elif isinstance(value, dict) and attr.struct_name: attr_payload = self._encode_struct(attr.struct_name, value) else: @@ -627,6 +632,11 @@ class YnlFamily(SpecFamily): decoded = self._decode_struct(attr.raw, attr_spec.struct_name) elif attr_spec.sub_type: decoded = attr.as_c_array(attr_spec.sub_type) + if 'enum' in attr_spec: + decoded = [ self._decode_enum(x, attr_spec) for x in decoded ] + elif attr_spec.display_hint: + decoded = [ self._formatted_string(x, attr_spec.display_hint) + for x in decoded ] else: decoded = attr.as_bin() if attr_spec.display_hint: @@ -644,15 +654,17 @@ class YnlFamily(SpecFamily): subattrs = self._decode(NlAttrs(item.raw), attr_spec['nested-attributes']) decoded.append({ item.type: subattrs }) elif attr_spec["sub-type"] == 'binary': - subattrs = item.as_bin() + subattr = item.as_bin() if attr_spec.display_hint: - subattrs = self._formatted_string(subattrs, attr_spec.display_hint) - decoded.append(subattrs) + subattr = self._formatted_string(subattr, attr_spec.display_hint) + decoded.append(subattr) elif attr_spec["sub-type"] in NlAttr.type_formats: - subattrs = item.as_scalar(attr_spec['sub-type'], attr_spec.byte_order) - if attr_spec.display_hint: - subattrs = self._formatted_string(subattrs, attr_spec.display_hint) - decoded.append(subattrs) + subattr = item.as_scalar(attr_spec['sub-type'], attr_spec.byte_order) + if 'enum' in attr_spec: + subattr = self._decode_enum(subattr, attr_spec) + elif attr_spec.display_hint: + subattr = self._formatted_string(subattr, attr_spec.display_hint) + decoded.append(subattr) else: raise Exception(f'Unknown {attr_spec["sub-type"]} with name {attr_spec["name"]}') return decoded @@ -899,6 +911,18 @@ class YnlFamily(SpecFamily): formatted = raw return formatted + def _from_string(self, string, attr_spec): + if attr_spec.display_hint in ['ipv4', 'ipv6']: + ip = ipaddress.ip_address(string) + if attr_spec['type'] == 'binary': + raw = ip.packed + else: + raw = int(ip) + else: + raise Exception(f"Display hint '{attr_spec.display_hint}' not implemented" + f" when parsing '{attr_spec['name']}'") + return raw + def handle_ntf(self, decoded): msg = dict() if self.include_raw: diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index c2eabc90dce8..a1427c537030 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -74,6 +74,8 @@ class Type(SpecAttr): self.c_name = c_lower(self.name) if self.c_name in _C_KW: self.c_name += '_' + if self.c_name[0].isdigit(): + self.c_name = '_' + self.c_name # Added by resolve(): self.enum_name = None @@ -100,7 +102,7 @@ class Type(SpecAttr): if isinstance(value, int): return value if value in self.family.consts: - raise Exception("Resolving family constants not implemented, yet") + return self.family.consts[value]["value"] return limit_to_number(value) def get_limit_str(self, limit, default=None, suffix=''): @@ -110,6 +112,9 @@ class Type(SpecAttr): if isinstance(value, int): return str(value) + suffix if value in self.family.consts: + const = self.family.consts[value] + if const.get('header'): + return c_upper(value) return c_upper(f"{self.family['name']}-{value}") return c_upper(value) @@ -683,7 +688,10 @@ class TypeArrayNest(Type): raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet") def _attr_typol(self): - return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' + if self.attr['sub-type'] in scalars: + return f'.type = YNL_PT_U{c_upper(self.sub_type[1:])}, ' + else: + return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' def _attr_get(self, ri, var): local_vars = ['const struct nlattr *attr2;'] @@ -885,7 +893,7 @@ class AttrSet(SpecAttrSet): elif elem['type'] == 'nest': t = TypeNest(self.family, self, elem, value) elif elem['type'] == 'indexed-array' and 'sub-type' in elem: - if elem["sub-type"] == 'nest': + if elem["sub-type"] in ['nest', 'u32']: t = TypeArrayNest(self.family, self, elem, value) else: raise Exception(f'new_attr: unsupported sub-type {elem["sub-type"]}') @@ -1437,7 +1445,7 @@ class CodeWriter: self._ifdef_block = config_option -scalars = {'u8', 'u16', 'u32', 'u64', 's32', 's64', 'uint', 'sint'} +scalars = {'u8', 'u16', 'u32', 'u64', 's8', 's16', 's32', 's64', 'uint', 'sint'} direction_to_suffix = { 'reply': '_rsp', @@ -1669,6 +1677,9 @@ def _multi_parse(ri, struct, init_lines, local_vars): if aspec["sub-type"] == 'nest': local_vars.append(f'const struct nlattr *attr_{aspec.c_name};') array_nests.add(arg) + elif aspec['sub-type'] in scalars: + local_vars.append(f'const struct nlattr *attr_{aspec.c_name};') + array_nests.add(arg) else: raise Exception(f'Not supported sub-type {aspec["sub-type"]}') if 'multi-attr' in aspec: @@ -1724,11 +1735,17 @@ def _multi_parse(ri, struct, init_lines, local_vars): ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));") ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};") ri.cw.p('i = 0;') - ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") + if 'nested-attributes' in aspec: + ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") ri.cw.block_start(line=f"ynl_attr_for_each_nested(attr, attr_{aspec.c_name})") - ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];") - ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, ynl_attr_type(attr)))") - ri.cw.p('return YNL_PARSE_CB_ERROR;') + if 'nested-attributes' in aspec: + ri.cw.p(f"parg.data = &dst->{aspec.c_name}[i];") + ri.cw.p(f"if ({aspec.nested_render_name}_parse(&parg, attr, ynl_attr_type(attr)))") + ri.cw.p('return YNL_PARSE_CB_ERROR;') + elif aspec.sub_type in scalars: + ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.sub_type}(attr);") + else: + raise Exception(f"Nest parsing type not supported in {aspec['name']}") ri.cw.p('i++;') ri.cw.block_end() ri.cw.block_end() @@ -2549,6 +2566,9 @@ def render_uapi(family, cw): defines = [] for const in family['definitions']: + if const.get('header'): + continue + if const['type'] != 'const': cw.writes_defines(defines) defines = [] diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 137470bdee0c..28b6d47f812d 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -7,7 +7,9 @@ TEST_INCLUDES := $(wildcard lib/py/*.py) \ TEST_PROGS := \ netcons_basic.sh \ + netcons_fragmented_msg.sh \ netcons_overflow.sh \ + netcons_sysdata.sh \ ping.py \ queues.py \ stats.py \ diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index cb40497faee4..cd477f3440ca 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -100,7 +100,7 @@ def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: check_nic_features(cfg) - cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../net/lib/csum") + cfg.bin_local = cfg.rpath("../../../net/lib/csum") cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) cases = [] diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 19a6969643f4..2bf14ac2b8c6 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -50,7 +50,6 @@ #include <linux/memfd.h> #include <linux/dma-buf.h> #include <linux/udmabuf.h> -#include <libmnl/libmnl.h> #include <linux/types.h> #include <linux/netlink.h> #include <linux/genetlink.h> diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 319aaa004c40..d6e69d7d5e43 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -4,7 +4,8 @@ import datetime import random import re -from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt, ksft_true +from lib.py import ksft_run, ksft_pr, ksft_exit +from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily from lib.py import KsftSkipEx, KsftFailEx @@ -58,6 +59,14 @@ def require_ntuple(cfg): raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"])) +def require_context_cnt(cfg, need_cnt): + # There's no good API to get the context count, so the tests + # which try to add a lot opportunisitically set the count they + # discovered. Careful with test ordering! + if need_cnt and cfg.context_cnt and cfg.context_cnt < need_cnt: + raise KsftSkipEx(f"Test requires at least {need_cnt} contexts, but device only has {cfg.context_cnt}") + + # Get Rx packet counts for all queues, as a simple list of integers # if @prev is specified the prev counts will be subtracted def _get_rx_cnts(cfg, prev=None): @@ -456,6 +465,8 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): raise ksft_pr(f"Failed to create context {i + 1}, trying to test what we got") ctx_cnt = i + if cfg.context_cnt is None: + cfg.context_cnt = ctx_cnt break _rss_key_check(cfg, context=ctx_id) @@ -511,8 +522,7 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): """ require_ntuple(cfg) - - requested_ctx_cnt = ctx_cnt + require_context_cnt(cfg, 4) # Try to allocate more queues when necessary qcnt = len(_get_rx_cnts(cfg)) @@ -577,9 +587,6 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): remove_ctx(-1) check_traffic() - if requested_ctx_cnt != ctx_cnt: - raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") - def test_rss_context_overlap(cfg, other_ctx=0): """ @@ -588,6 +595,8 @@ def test_rss_context_overlap(cfg, other_ctx=0): """ require_ntuple(cfg) + if other_ctx: + require_context_cnt(cfg, 2) queue_cnt = len(_get_rx_cnts(cfg)) if queue_cnt < 4: @@ -649,6 +658,29 @@ def test_rss_context_overlap2(cfg): test_rss_context_overlap(cfg, True) +def test_flow_add_context_missing(cfg): + """ + Test that we are not allowed to add a rule pointing to an RSS context + which was never created. + """ + + require_ntuple(cfg) + + # Find a context which doesn't exist + for ctx_id in range(1, 100): + try: + get_rss(cfg, context=ctx_id) + except CmdExitFailure: + break + + with ksft_raises(CmdExitFailure) as cm: + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port 1234 context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ethtool(f"-N {cfg.ifname} delete {ntuple_id}") + if cm.exception: + ksft_in('Invalid argument', cm.exception.cmd.stderr) + + def test_delete_rss_context_busy(cfg): """ Test that deletion returns -EBUSY when an rss context is being used @@ -717,6 +749,7 @@ def test_rss_ntuple_addition(cfg): def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.context_cnt = None cfg.ethnl = EthtoolFamily() cfg.netdevnl = NetdevFamily() @@ -726,6 +759,7 @@ def main() -> None: test_rss_context_dump, test_rss_context_queue_reconfigure, test_rss_context_overlap, test_rss_context_overlap2, test_rss_context_out_of_order, test_rss_context4_create_with_cfg, + test_flow_add_context_missing, test_delete_rss_context_busy, test_rss_ntuple_addition], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 987e452d3a45..886b4904613c 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -10,38 +10,58 @@ from lib.py import NetNS, NetdevSimDev from .remote import Remote -def _load_env_file(src_path): - env = os.environ.copy() +class NetDrvEnvBase: + """ + Base class for a NIC / host envirnoments + """ + def __init__(self, src_path): + self.src_path = src_path + self.env = self._load_env_file() - src_dir = Path(src_path).parent.resolve() - if not (src_dir / "net.config").exists(): + def rpath(self, path): + """ + Get an absolute path to a file based on a path relative to the directory + containing the test which constructed env. + + For example, if the test.py is in the same directory as + a binary (built from helper.c), the test can use env.rpath("helper") + to get the absolute path to the binary + """ + src_dir = Path(self.src_path).parent.resolve() + return (src_dir / path).as_posix() + + def _load_env_file(self): + env = os.environ.copy() + + src_dir = Path(self.src_path).parent.resolve() + if not (src_dir / "net.config").exists(): + return ksft_setup(env) + + with open((src_dir / "net.config").as_posix(), 'r') as fp: + for line in fp.readlines(): + full_file = line + # Strip comments + pos = line.find("#") + if pos >= 0: + line = line[:pos] + line = line.strip() + if not line: + continue + pair = line.split('=', maxsplit=1) + if len(pair) != 2: + raise Exception("Can't parse configuration line:", full_file) + env[pair[0]] = pair[1] return ksft_setup(env) - with open((src_dir / "net.config").as_posix(), 'r') as fp: - for line in fp.readlines(): - full_file = line - # Strip comments - pos = line.find("#") - if pos >= 0: - line = line[:pos] - line = line.strip() - if not line: - continue - pair = line.split('=', maxsplit=1) - if len(pair) != 2: - raise Exception("Can't parse configuration line:", full_file) - env[pair[0]] = pair[1] - return ksft_setup(env) - - -class NetDrvEnv: + +class NetDrvEnv(NetDrvEnvBase): """ Class for a single NIC / host env, with no remote end """ def __init__(self, src_path, **kwargs): - self._ns = None + super().__init__(src_path) - self.env = _load_env_file(src_path) + self._ns = None if 'NETIF' in self.env: self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0] @@ -68,7 +88,7 @@ class NetDrvEnv: self._ns = None -class NetDrvEpEnv: +class NetDrvEpEnv(NetDrvEnvBase): """ Class for an environment with a local device and "remote endpoint" which can be used to send traffic in. @@ -82,8 +102,7 @@ class NetDrvEpEnv: nsim_v6_pfx = "2001:db8::" def __init__(self, src_path, nsim_test=None): - - self.env = _load_env_file(src_path) + super().__init__(src_path) self._stats_settle_time = None diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 3acaba41ac7b..3c96b022954d 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -110,6 +110,13 @@ function create_dynamic_target() { echo 1 > "${NETCONS_PATH}"/enabled } +# Do not append the release to the header of the message +function disable_release_append() { + echo 0 > "${NETCONS_PATH}"/enabled + echo 0 > "${NETCONS_PATH}"/release + echo 1 > "${NETCONS_PATH}"/enabled +} + function cleanup() { local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" @@ -223,3 +230,20 @@ function check_for_dependencies() { exit "${ksft_skip}" fi } + +function check_for_taskset() { + if ! which taskset > /dev/null ; then + echo "SKIP: taskset(1) is not available" >&2 + exit "${ksft_skip}" + fi +} + +# This is necessary if running multiple tests in a row +function pkill_socat() { + PROCESS_NAME="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}" + # socat runs under timeout(1), kill it if it is still alive + # do not fail if socat doesn't exist anymore + set +e + pkill -f "${PROCESS_NAME}" + set -e +} diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh new file mode 100755 index 000000000000..4a71e01a230c --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# Test netconsole's message fragmentation functionality. +# +# When a message exceeds the maximum packet size, netconsole splits it into +# multiple fragments for transmission. This test verifies: +# - Correct fragmentation of large messages +# - Proper reassembly of fragments at the receiver +# - Preservation of userdata across fragments +# - Behavior with and without kernel release version appending +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# set userdata to a long value. In this case, it is "1-2-3-4...50-" +USERDATA_VALUE=$(printf -- '%.2s-' {1..60}) + +# Convert the header string in a regexp, so, we can remove +# the second header as well. +# A header looks like "13,468,514729715,-,ncfrag=0/1135;". If +# release is appended, you might find something like:L +# "6.13.0-04048-g4f561a87745a,13,468,514729715,-,ncfrag=0/1135;" +function header_to_regex() { + # header is everything before ; + local HEADER="${1}" + REGEX=$(echo "${HEADER}" | cut -d'=' -f1) + echo "${REGEX}=[0-9]*\/[0-9]*;" +} + +# We have two headers in the message. Remove both to get the full message, +# and extract the full message. +function extract_msg() { + local MSGFILE="${1}" + # Extract the header, which is the very first thing that arrives in the + # first list. + HEADER=$(sed -n '1p' "${MSGFILE}" | cut -d';' -f1) + HEADER_REGEX=$(header_to_regex "${HEADER}") + + # Remove the two headers from the received message + # This will return the message without any header, similarly to what + # was sent. + sed "s/""${HEADER_REGEX}""//g" "${MSGFILE}" +} + +# Validate the message, which has two messages glued together. +# unwrap them to make sure all the characters were transmitted. +# File will look like the following: +# 13,468,514729715,-,ncfrag=0/1135;<message> +# key=<part of key>-13,468,514729715,-,ncfrag=967/1135;<rest of the key> +function validate_fragmented_result() { + # Discard the netconsole headers, and assemble the full message + RCVMSG=$(extract_msg "${1}") + + # check for the main message + if ! echo "${RCVMSG}" | grep -q "${MSG}"; then + echo "Message body doesn't match." >&2 + echo "msg received=" "${RCVMSG}" >&2 + exit "${ksft_fail}" + fi + + # check userdata + if ! echo "${RCVMSG}" | grep -q "${USERDATA_VALUE}"; then + echo "message userdata doesn't match" >&2 + echo "msg received=" "${RCVMSG}" >&2 + exit "${ksft_fail}" + fi + # test passed. hooray +} + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target +# Set userdata "key" with the "value" value +set_user_data + + +# TEST 1: Send message and userdata. They will fragment +# ======= +MSG=$(printf -- 'MSG%.3s=' {1..150}) + +# Listen for netconsole port inside the namespace and destination interface +listen_port_and_save_to "${OUTPUT_FILE}" & +# Wait for socat to start and listen to the port. +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +# Send the message +echo "${MSG}: ${TARGET}" > /dev/kmsg +# Wait until socat saves the file to disk +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +# Check if the message was not corrupted +validate_fragmented_result "${OUTPUT_FILE}" + +# TEST 2: Test with smaller message, and without release appended +# ======= +MSG=$(printf -- 'FOOBAR%.3s=' {1..100}) +# Let's disable release and test again. +disable_release_append + +listen_port_and_save_to "${OUTPUT_FILE}" & +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +echo "${MSG}: ${TARGET}" > /dev/kmsg +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +validate_fragmented_result "${OUTPUT_FILE}" +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh new file mode 100755 index 000000000000..2b78fd1f5982 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh @@ -0,0 +1,167 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# A test that makes sure that sysdata runtime CPU data is properly set +# when a message is sent. +# +# There are 3 different tests, every time sent using a random CPU. +# - Test #1 +# * Only enable cpu_nr sysdata feature. +# - Test #2 +# * Keep cpu_nr sysdata feature enable and enable userdata. +# - Test #3 +# * keep userdata enabled, and disable sysdata cpu_nr feature. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +# Enable the sysdata cpu_nr feature +function set_cpu_nr() { + if [[ ! -f "${NETCONS_PATH}/userdata/cpu_nr_enabled" ]] + then + echo "Populate CPU configfs path not available in ${NETCONS_PATH}/userdata/cpu_nr_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" +} + +# Disable the sysdata cpu_nr feature +function unset_cpu_nr() { + echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" +} + +# Test if MSG content and `cpu=${CPU}` exists in OUTPUT_FILE +function validate_sysdata_cpu_exists() { + # OUTPUT_FILE will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # userdatakey=userdatavalue + # cpu=X + + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then + echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + # Check if cpu=XX exists in the file and matches the one used + # in taskset(1) + if ! grep -q "cpu=${CPU}\+" "${OUTPUT_FILE}"; then + echo "FAIL: 'cpu=${CPU}' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + rm "${OUTPUT_FILE}" + pkill_socat +} + +# Test if MSG content exists in OUTPUT_FILE but no `cpu=` string +function validate_sysdata_no_cpu() { + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then + echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if grep -q "cpu=" "${OUTPUT_FILE}"; then + echo "FAIL: 'cpu= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + rm "${OUTPUT_FILE}" +} + +# Start socat, send the message and wait for the file to show up in the file +# system +function runtest { + # Listen for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + taskset -c "${CPU}" echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +} + +# ========== # +# Start here # +# ========== # + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies +# This test also depends on taskset(1). Check for it before starting the test +check_for_taskset + +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target + +#==================================================== +# TEST #1 +# Send message from a random CPU +#==================================================== +# Random CPU in the system +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_1" +MSG="Test #1 from CPU${CPU}" +# Enable the auto population of cpu_nr +set_cpu_nr +runtest +# Make sure the message was received in the dst part +# and exit +validate_sysdata_cpu_exists + +#==================================================== +# TEST #2 +# This test now adds userdata together with sysdata +# =================================================== +# Get a new random CPU +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_2" +MSG="Test #2 from CPU${CPU}" +set_user_data +runtest +validate_sysdata_cpu_exists + +# =================================================== +# TEST #3 +# Unset cpu_nr, so, no CPU should be appended. +# userdata is still set +# =================================================== +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_3" +MSG="Test #3 from CPU${CPU}" +# Enable the auto population of cpu_nr +unset_cpu_nr +runtest +# At this time, cpu= shouldn't be present in the msg +validate_sysdata_no_cpu + +exit "${ksft_pass}" diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 73ee88d6b043..b6271714504d 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -7,7 +7,7 @@ CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES) CFLAGS += -I../ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ - rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh + rtnetlink.sh xfrm_policy.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh @@ -36,6 +36,7 @@ TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += netns-name.sh TEST_PROGS += nl_netdev.py +TEST_PROGS += rtnetlink.py TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index d9d587454d20..8c1597ebc2d3 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -149,7 +149,7 @@ cfg_test_host_common() check_err $? "Failed to add $name host entry" bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null - check_fail $? "Managed to replace $name host entry" + check_err $? "Failed to replace $name host entry" bridge mdb del dev br0 port br0 grp $grp $state vid 10 bridge mdb get dev br0 grp $grp vid 10 &> /dev/null diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 3f9d50f1ef9e..180c5eca556f 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -740,6 +740,8 @@ test_learning() vxlan_flood_test $mac $dst 0 10 0 + # The entry should age out when it only forwards traffic + $MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q & sleep 60 bridge fdb show brport vx1 | grep $mac | grep -q self diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 54d8f5eba810..729457859316 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -5,5 +5,5 @@ from .ksft import * from .netns import NetNS from .nsim import * from .utils import * -from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily +from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily from .ynl import NetshaperFamily diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index ad1e36baee2a..8986c584cb37 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -42,6 +42,10 @@ class RtnlFamily(YnlFamily): super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), schema='', recv_size=recv_size) +class RtnlAddrFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(), + schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): def __init__(self, recv_size=0): diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index 93e8cb671c3d..beaee5e4e2aa 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -35,6 +35,21 @@ def napi_list_check(nf) -> None: comment=f"queue count after reset queue {q} mode {i}") +def nsim_rxq_reset_down(nf) -> None: + """ + Test that the queue API supports resetting a queue + while the interface is down. We should convert this + test to testing real HW once more devices support + queue API. + """ + with NetdevSimDev(queue_count=4) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} down") + for i in [0, 2, 3]: + nsim.dfs_write("queue_reset", f"1 {i}") + + def page_pool_check(nf) -> None: with NetdevSimDev() as nsimdev: nsim = nsimdev.nsims[0] @@ -106,7 +121,8 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() - ksft_run([empty_check, lo_check, page_pool_check, napi_list_check], + ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, + nsim_rxq_reset_down], args=(nf, )) ksft_exit() diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py new file mode 100755 index 000000000000..80950888800b --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_exit, ksft_run, ksft_ge, RtnlAddrFamily +import socket + +IPV4_ALL_HOSTS_MULTICAST = b'\xe0\x00\x00\x01' + +def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None: + """ + Verify that at least one interface has the IPv4 all-hosts multicast address. + At least the loopback interface should have this address. + """ + + addresses = rtnl.getmaddrs({"ifa-family": socket.AF_INET}, dump=True) + + all_host_multicasts = [ + addr for addr in addresses if addr['ifa-multicast'] == IPV4_ALL_HOSTS_MULTICAST + ] + + ksft_ge(len(all_host_multicasts), 1, + "No interface found with the IPv4 all-hosts multicast address") + +def main() -> None: + rtnl = RtnlAddrFamily() + ksft_run([dump_mcaddr_check], args=(rtnl, )) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh deleted file mode 100755 index 3119b80e711f..000000000000 --- a/tools/testing/selftests/net/test_blackhole_dev.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Runs blackhole-dev test using blackhole-dev kernel module - -if /sbin/modprobe -q test_blackhole_dev ; then - /sbin/modprobe -q -r test_blackhole_dev; - echo "test_blackhole_dev: ok"; -else - echo "test_blackhole_dev: [FAIL]"; - exit 1; -fi diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index 12e7cae251be..e907c2751956 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -27,7 +27,8 @@ $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig: $(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig $(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a - $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a + $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \ + GENS="$(YNL_GENS)" RSTS="" libynl.a $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a EXTRA_CLEAN += \ |